diff options
Diffstat (limited to 'tools/perf/util')
139 files changed, 8964 insertions, 2752 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 5ec97e8d6b6d..7910d908c814 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -67,6 +67,7 @@ perf-util-y += maps.o perf-util-y += pstack.o perf-util-y += session.o perf-util-y += tool.o +perf-util-y += sample.o perf-util-y += sample-raw.o perf-util-y += s390-sample-raw.o perf-util-y += amd-sample-raw.o @@ -160,7 +161,7 @@ perf-util-y += clockid.o perf-util-y += list_sort.o perf-util-y += mutex.o perf-util-y += sharded_mutex.o -perf-util-$(CONFIG_X86_64) += intel-tpebs.o +perf-util-y += intel-tpebs.o perf-util-$(CONFIG_LIBBPF) += bpf_map.o perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o @@ -172,6 +173,10 @@ perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-flex.o perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-bison.o perf-util-$(CONFIG_PERF_BPF_SKEL) += btf.o +ifeq ($(CONFIG_TRACE),y) + perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-trace-summary.o +endif + ifeq ($(CONFIG_LIBTRACEEVENT),y) perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o endif @@ -236,9 +241,12 @@ perf-util-y += cap.o perf-util-$(CONFIG_CXX_DEMANGLE) += demangle-cxx.o perf-util-y += demangle-ocaml.o perf-util-y += demangle-java.o -perf-util-y += demangle-rust.o +perf-util-y += demangle-rust-v0.o perf-util-$(CONFIG_LIBLLVM) += llvm-c-helpers.o +CFLAGS_demangle-rust-v0.o += -Wno-shadow -Wno-declaration-after-statement \ + -Wno-switch-default -Wno-switch-enum -Wno-missing-field-initializers + ifdef CONFIG_JITDUMP perf-util-$(CONFIG_LIBELF) += jitdump.o perf-util-$(CONFIG_LIBELF) += genelf.o @@ -405,14 +413,39 @@ $(OUTPUT)util/list_sort.o: ../lib/list_sort.c FORCE ifdef SHELLCHECK SHELL_TESTS := generate-cmdlist.sh - TEST_LOGS := $(SHELL_TESTS:%=%.shellcheck_log) + SHELL_TEST_LOGS := $(SHELL_TESTS:%=%.shellcheck_log) else SHELL_TESTS := - TEST_LOGS := + SHELL_TEST_LOGS := endif $(OUTPUT)%.shellcheck_log: % $(call rule_mkdir) $(Q)$(call echo-cmd,test)shellcheck -a -S warning "$<" > $@ || (cat $@ && rm $@ && false) -perf-util-y += $(TEST_LOGS) +perf-util-y += $(SHELL_TEST_LOGS) + +PY_TESTS := setup.py +ifdef MYPY + MYPY_TEST_LOGS := $(PY_TESTS:%=%.mypy_log) +else + MYPY_TEST_LOGS := +endif + +$(OUTPUT)%.mypy_log: % + $(call rule_mkdir) + $(Q)$(call echo-cmd,test)mypy "$<" > $@ || (cat $@ && rm $@ && false) + +perf-util-y += $(MYPY_TEST_LOGS) + +ifdef PYLINT + PYLINT_TEST_LOGS := $(PY_TESTS:%=%.pylint_log) +else + PYLINT_TEST_LOGS := +endif + +$(OUTPUT)%.pylint_log: % + $(call rule_mkdir) + $(Q)$(call echo-cmd,test)pylint "$<" > $@ || (cat $@ && rm $@ && false) + +perf-util-y += $(PYLINT_TEST_LOGS) diff --git a/tools/perf/util/addr_location.c b/tools/perf/util/addr_location.c index 51825ef8c0ab..007a2f5df9a6 100644 --- a/tools/perf/util/addr_location.c +++ b/tools/perf/util/addr_location.c @@ -17,6 +17,7 @@ void addr_location__init(struct addr_location *al) al->cpumode = 0; al->cpu = 0; al->socket = 0; + al->parallelism = 1; } /* diff --git a/tools/perf/util/addr_location.h b/tools/perf/util/addr_location.h index d8ac0428dff2..64b551025216 100644 --- a/tools/perf/util/addr_location.h +++ b/tools/perf/util/addr_location.h @@ -17,10 +17,14 @@ struct addr_location { const char *srcline; u64 addr; char level; - u8 filtered; u8 cpumode; + u16 filtered; s32 cpu; s32 socket; + /* Same as machine.parallelism but within [1, nr_cpus]. */ + int parallelism; + /* See he_stat.latency. */ + u64 latency; }; void addr_location__init(struct addr_location *al); diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c index 9d0ce88e90e4..4b540e6fb42d 100644 --- a/tools/perf/util/amd-sample-raw.c +++ b/tools/perf/util/amd-sample-raw.c @@ -9,7 +9,7 @@ #include <inttypes.h> #include <linux/string.h> -#include "../../arch/x86/include/asm/amd-ibs.h" +#include "../../arch/x86/include/asm/amd/ibs.h" #include "debug.h" #include "session.h" @@ -19,6 +19,8 @@ static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type; static bool zen4_ibs_extensions; +static bool ldlat_cap; +static bool dtlb_pgsize_cap; static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg) { @@ -78,14 +80,20 @@ static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg) static void pr_ibs_op_ctl(union ibs_op_ctl reg) { char l3_miss_only[sizeof(" L3MissOnly _")] = ""; + char ldlat[sizeof(" LdLatThrsh __ LdLatEn _")] = ""; if (zen4_ibs_extensions) snprintf(l3_miss_only, sizeof(l3_miss_only), " L3MissOnly %d", reg.l3_miss_only); - printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d\n", + if (ldlat_cap) { + snprintf(ldlat, sizeof(ldlat), " LdLatThrsh %2d LdLatEn %d", + reg.ldlat_thrsh, reg.ldlat_en); + } + + printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d%s\n", reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, l3_miss_only, reg.op_en, reg.op_val, reg.cnt_ctl, - reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt); + reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt, ldlat); } static void pr_ibs_op_data(union ibs_op_data reg) @@ -154,9 +162,20 @@ static void pr_ibs_op_data2(union ibs_op_data2 reg) static void pr_ibs_op_data3(union ibs_op_data3 reg) { - char l2_miss_str[sizeof(" L2Miss _")] = ""; - char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = ""; + static const char * const dc_page_sizes[] = { + " 4K", + " 2M", + " 1G", + " ??", + }; char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = ""; + char dc_l1_l2tlb_miss_str[sizeof(" DcL1TlbMiss _ DcL2TlbMiss _")] = ""; + char dc_l1tlb_hit_str[sizeof(" DcL1TlbHit2M _ DcL1TlbHit1G _")] = ""; + char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = ""; + char dc_l2tlb_hit_2m_str[sizeof(" DcL2TlbHit2M _")] = ""; + char dc_l2tlb_hit_1g_str[sizeof(" DcL2TlbHit1G _")] = ""; + char dc_page_size_str[sizeof(" DcPageSize ____")] = ""; + char l2_miss_str[sizeof(" L2Miss _")] = ""; /* * Erratum #1293 @@ -172,16 +191,40 @@ static void pr_ibs_op_data3(union ibs_op_data3 reg) snprintf(op_mem_width_str, sizeof(op_mem_width_str), " OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1)); - printf("ibs_op_data3:\t%016llx LdOp %d StOp %d DcL1TlbMiss %d DcL2TlbMiss %d " - "DcL1TlbHit2M %d DcL1TlbHit1G %d DcL2TlbHit2M %d DcMiss %d DcMisAcc %d " - "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d DcLinAddrValid %d " - "DcPhyAddrValid %d DcL2TlbHit1G %d%s SwPf %d%s%s DcMissLat %5d TlbRefillLat %5d\n", - reg.val, reg.ld_op, reg.st_op, reg.dc_l1tlb_miss, reg.dc_l2tlb_miss, - reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g, reg.dc_l2tlb_hit_2m, reg.dc_miss, - reg.dc_mis_acc, reg.dc_wc_mem_acc, reg.dc_uc_mem_acc, reg.dc_locked_op, - reg.dc_miss_no_mab_alloc, reg.dc_lin_addr_valid, reg.dc_phy_addr_valid, - reg.dc_l2_tlb_hit_1g, l2_miss_str, reg.sw_pf, op_mem_width_str, - op_dc_miss_open_mem_reqs_str, reg.dc_miss_lat, reg.tlb_refill_lat); + if (dtlb_pgsize_cap) { + if (reg.dc_phy_addr_valid) { + int idx = (reg.dc_l1tlb_hit_1g << 1) | reg.dc_l1tlb_hit_2m; + + snprintf(dc_l1_l2tlb_miss_str, sizeof(dc_l1_l2tlb_miss_str), + " DcL1TlbMiss %d DcL2TlbMiss %d", + reg.dc_l1tlb_miss, reg.dc_l2tlb_miss); + snprintf(dc_page_size_str, sizeof(dc_page_size_str), + " DcPageSize %4s", dc_page_sizes[idx]); + } + } else { + snprintf(dc_l1_l2tlb_miss_str, sizeof(dc_l1_l2tlb_miss_str), + " DcL1TlbMiss %d DcL2TlbMiss %d", + reg.dc_l1tlb_miss, reg.dc_l2tlb_miss); + snprintf(dc_l1tlb_hit_str, sizeof(dc_l1tlb_hit_str), + " DcL1TlbHit2M %d DcL1TlbHit1G %d", + reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g); + snprintf(dc_l2tlb_hit_2m_str, sizeof(dc_l2tlb_hit_2m_str), + " DcL2TlbHit2M %d", reg.dc_l2tlb_hit_2m); + snprintf(dc_l2tlb_hit_1g_str, sizeof(dc_l2tlb_hit_1g_str), + " DcL2TlbHit1G %d", reg.dc_l2_tlb_hit_1g); + } + + printf("ibs_op_data3:\t%016llx LdOp %d StOp %d%s%s%s DcMiss %d DcMisAcc %d " + "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d " + "DcLinAddrValid %d DcPhyAddrValid %d%s%s SwPf %d%s%s " + "DcMissLat %5d TlbRefillLat %5d\n", + reg.val, reg.ld_op, reg.st_op, dc_l1_l2tlb_miss_str, + dtlb_pgsize_cap ? dc_page_size_str : dc_l1tlb_hit_str, + dc_l2tlb_hit_2m_str, reg.dc_miss, reg.dc_mis_acc, reg.dc_wc_mem_acc, + reg.dc_uc_mem_acc, reg.dc_locked_op, reg.dc_miss_no_mab_alloc, + reg.dc_lin_addr_valid, reg.dc_phy_addr_valid, dc_l2tlb_hit_1g_str, + l2_miss_str, reg.sw_pf, op_mem_width_str, op_dc_miss_open_mem_reqs_str, + reg.dc_miss_lat, reg.tlb_refill_lat); } /* @@ -331,6 +374,12 @@ bool evlist__has_amd_ibs(struct evlist *evlist) if (perf_env__find_pmu_cap(env, "ibs_op", "zen4_ibs_extensions")) zen4_ibs_extensions = 1; + if (perf_env__find_pmu_cap(env, "ibs_op", "ldlat")) + ldlat_cap = 1; + + if (perf_env__find_pmu_cap(env, "ibs_op", "dtlb_pgsize")) + dtlb_pgsize_cap = 1; + if (ibs_fetch_type || ibs_op_type) { if (!cpu_family) parse_cpuid(env); diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c index 976abedca09e..1ef2edbc71d9 100644 --- a/tools/perf/util/annotate-data.c +++ b/tools/perf/util/annotate-data.c @@ -314,6 +314,40 @@ static void delete_members(struct annotated_member *member) } } +static int fill_member_name(char *buf, size_t sz, struct annotated_member *m, + int offset, bool first) +{ + struct annotated_member *child; + + if (list_empty(&m->children)) + return 0; + + list_for_each_entry(child, &m->children, node) { + int len; + + if (offset < child->offset || offset >= child->offset + child->size) + continue; + + /* It can have anonymous struct/union members */ + if (child->var_name) { + len = scnprintf(buf, sz, "%s%s", + first ? "" : ".", child->var_name); + first = false; + } else { + len = 0; + } + + return fill_member_name(buf + len, sz - len, child, offset, first) + len; + } + return 0; +} + +int annotated_data_type__get_member_name(struct annotated_data_type *adt, + char *buf, size_t sz, int member_offset) +{ + return fill_member_name(buf, sz, &adt->self, member_offset, /*first=*/true); +} + static struct annotated_data_type *dso__findnew_data_type(struct dso *dso, Dwarf_Die *type_die) { @@ -830,7 +864,7 @@ static void update_var_state(struct type_state *state, struct data_loc_info *dlo if (!dwarf_offdie(dloc->di->dbg, var->die_off, &mem_die)) continue; - if (var->reg == DWARF_REG_FB || var->reg == fbreg) { + if (var->reg == DWARF_REG_FB || var->reg == fbreg || var->reg == state->stack_reg) { int offset = var->offset; struct type_state_stack *stack; @@ -845,8 +879,13 @@ static void update_var_state(struct type_state *state, struct data_loc_info *dlo findnew_stack_state(state, offset, TSR_KIND_TYPE, &mem_die); - pr_debug_dtp("var [%"PRIx64"] -%#x(stack)", - insn_offset, -offset); + if (var->reg == state->stack_reg) { + pr_debug_dtp("var [%"PRIx64"] %#x(reg%d)", + insn_offset, offset, state->stack_reg); + } else { + pr_debug_dtp("var [%"PRIx64"] -%#x(stack)", + insn_offset, -offset); + } pr_debug_type_name(&mem_die, TSR_KIND_TYPE); } else if (has_reg_type(state, var->reg) && var->offset == 0) { struct type_state_reg *reg; @@ -1127,10 +1166,10 @@ again: } check_non_register: - if (reg == dloc->fbreg) { + if (reg == dloc->fbreg || reg == state->stack_reg) { struct type_state_stack *stack; - pr_debug_dtp("fbreg"); + pr_debug_dtp("%s", reg == dloc->fbreg ? "fbreg" : "stack"); stack = find_stack_state(state, dloc->type_offset); if (stack == NULL) { diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h index 98c80b2268dd..541fee1a5f0a 100644 --- a/tools/perf/util/annotate-data.h +++ b/tools/perf/util/annotate-data.h @@ -227,8 +227,13 @@ void annotated_data_type__tree_delete(struct rb_root *root); /* Release all global variable information in the tree */ void global_var_type__tree_delete(struct rb_root *root); +/* Print data type annotation (including members) on stdout */ int hist_entry__annotate_data_tty(struct hist_entry *he, struct evsel *evsel); +/* Get name of member field at the given offset in the data type */ +int annotated_data_type__get_member_name(struct annotated_data_type *adt, + char *buf, size_t sz, int member_offset); + bool has_reg_type(struct type_state *state, int reg); struct type_state_stack *findnew_stack_state(struct type_state *state, int offset, u8 kind, @@ -276,6 +281,14 @@ static inline int hist_entry__annotate_data_tty(struct hist_entry *he __maybe_un return -1; } +static inline int annotated_data_type__get_member_name(struct annotated_data_type *adt __maybe_unused, + char *buf __maybe_unused, + size_t sz __maybe_unused, + int member_offset __maybe_unused) +{ + return -1; +} + #endif /* HAVE_LIBDW_SUPPORT */ #ifdef HAVE_SLANG_SUPPORT diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 31bb326b07a6..264a212b47df 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -87,6 +87,8 @@ struct annotated_data_type canary_type = { }, }; +#define NO_TYPE ((struct annotated_data_type *)-1UL) + /* symbol histogram: key = offset << 16 | evsel->core.idx */ static size_t sym_hist_hash(long key, void *ctx __maybe_unused) { @@ -758,15 +760,31 @@ static int disasm_line__print(struct disasm_line *dl, u64 start, int addr_fmt_wi return 0; } +static struct annotated_data_type * +__hist_entry__get_data_type(struct hist_entry *he, struct arch *arch, + struct debuginfo *dbg, struct disasm_line *dl, + int *type_offset); + +struct annotation_print_data { + struct hist_entry *he; + struct evsel *evsel; + struct arch *arch; + struct debuginfo *dbg; + u64 start; + int addr_fmt_width; +}; + static int -annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start, - struct evsel *evsel, u64 len, int min_pcnt, int printed, - int max_lines, struct annotation_line *queue, int addr_fmt_width, - int percent_type) +annotation_line__print(struct annotation_line *al, struct annotation_print_data *apd, + struct annotation_options *opts, int printed, + struct annotation_line *queue) { + struct symbol *sym = apd->he->ms.sym; struct disasm_line *dl = container_of(al, struct disasm_line, al); struct annotation *notes = symbol__annotation(sym); static const char *prev_line; + int max_lines = opts->max_lines; + int percent_type = opts->percent_type; if (al->offset != -1) { double max_percent = 0.0; @@ -786,19 +804,23 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start if (al->data_nr > nr_percent) nr_percent = al->data_nr; - if (max_percent < min_pcnt) + if (max_percent < opts->min_pcnt) return -1; if (max_lines && printed >= max_lines) return 1; if (queue != NULL) { + struct annotation_options queue_opts = { + .max_lines = 1, + .percent_type = percent_type, + }; + list_for_each_entry_from(queue, ¬es->src->source, node) { if (queue == al) break; - annotation_line__print(queue, sym, start, evsel, len, - 0, 0, 1, NULL, addr_fmt_width, - percent_type); + annotation_line__print(queue, apd, &queue_opts, + /*printed=*/0, /*queue=*/NULL); } } @@ -823,7 +845,31 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start printf(" : "); - disasm_line__print(dl, start, addr_fmt_width); + disasm_line__print(dl, apd->start, apd->addr_fmt_width); + + if (opts->code_with_type && apd->dbg) { + struct annotated_data_type *data_type; + int offset = 0; + + data_type = __hist_entry__get_data_type(apd->he, apd->arch, + apd->dbg, dl, &offset); + if (data_type && data_type != NO_TYPE) { + char buf[4096]; + + printf("\t\t# data-type: %s", + data_type->self.type_name); + + if (data_type != &stackop_type && + data_type != &canary_type) + printf(" +%#x", offset); + + if (annotated_data_type__get_member_name(data_type, + buf, + sizeof(buf), + offset)) + printf(" (%s)", buf); + } + } /* * Also color the filename and line if needed, with @@ -849,7 +895,8 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start if (!*al->line) printf(" %*s:\n", width, " "); else - printf(" %*s: %-*d %s\n", width, " ", addr_fmt_width, al->line_nr, al->line); + printf(" %*s: %-*d %s\n", width, " ", apd->addr_fmt_width, + al->line_nr, al->line); } return 0; @@ -1167,8 +1214,9 @@ static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start) return 0; } -int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel) +int hist_entry__annotate_printf(struct hist_entry *he, struct evsel *evsel) { + struct map_symbol *ms = &he->ms; struct map *map = ms->map; struct symbol *sym = ms->sym; struct dso *dso = map__dso(map); @@ -1179,11 +1227,14 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel) struct sym_hist *h = annotation__histogram(notes, evsel); struct annotation_line *pos, *queue = NULL; struct annotation_options *opts = &annotate_opts; - u64 start = map__rip_2objdump(map, sym->start); - int printed = 2, queue_len = 0, addr_fmt_width; + struct annotation_print_data apd = { + .he = he, + .evsel = evsel, + .start = map__rip_2objdump(map, sym->start), + }; + int printed = 2, queue_len = 0; int more = 0; bool context = opts->context; - u64 len; int width = annotation__pcnt_width(notes); int graph_dotted_len; char buf[512]; @@ -1197,8 +1248,6 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel) else d_filename = basename(filename); - len = symbol__size(sym); - if (evsel__is_group_event(evsel)) { evsel__group_desc(evsel, buf, sizeof(buf)); evsel_name = buf; @@ -1217,7 +1266,10 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel) if (verbose > 0) symbol__annotate_hits(sym, evsel); - addr_fmt_width = annotated_source__addr_fmt_width(¬es->src->source, start); + apd.addr_fmt_width = annotated_source__addr_fmt_width(¬es->src->source, + apd.start); + evsel__get_arch(evsel, &apd.arch); + apd.dbg = debuginfo__new(filename); list_for_each_entry(pos, ¬es->src->source, node) { int err; @@ -1227,9 +1279,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel) queue_len = 0; } - err = annotation_line__print(pos, sym, start, evsel, len, - opts->min_pcnt, printed, opts->max_lines, - queue, addr_fmt_width, opts->percent_type); + err = annotation_line__print(pos, &apd, opts, printed, queue); switch (err) { case 0: @@ -1260,6 +1310,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel) } } + debuginfo__delete(apd.dbg); free(filename); return more; @@ -1597,8 +1648,9 @@ static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root) annotation__calc_lines(notes, ms, root); } -int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel) +int hist_entry__tty_annotate2(struct hist_entry *he, struct evsel *evsel) { + struct map_symbol *ms = &he->ms; struct dso *dso = map__dso(ms->map); struct symbol *sym = ms->sym; struct rb_root source_line = RB_ROOT; @@ -1632,8 +1684,9 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel) return 0; } -int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel) +int hist_entry__tty_annotate(struct hist_entry *he, struct evsel *evsel) { + struct map_symbol *ms = &he->ms; struct dso *dso = map__dso(ms->map); struct symbol *sym = ms->sym; struct rb_root source_line = RB_ROOT; @@ -1657,7 +1710,7 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel) print_summary(&source_line, dso__long_name(dso)); } - symbol__annotate_printf(ms, evsel); + hist_entry__annotate_printf(he, evsel); annotated_source__purge(symbol__annotation(sym)->src); @@ -2227,6 +2280,7 @@ void annotation_options__init(void) opt->annotate_src = true; opt->offset_level = ANNOTATION__OFFSET_JUMP_TARGETS; opt->percent_type = PERCENT_PERIOD_LOCAL; + opt->hide_src_code_on_title = true; } void annotation_options__exit(void) @@ -2643,6 +2697,92 @@ void debuginfo_cache__delete(void) di_cache.dbg = NULL; } +static struct annotated_data_type * +__hist_entry__get_data_type(struct hist_entry *he, struct arch *arch, + struct debuginfo *dbg, struct disasm_line *dl, + int *type_offset) +{ + struct map_symbol *ms = &he->ms; + struct annotated_insn_loc loc; + struct annotated_op_loc *op_loc; + struct annotated_data_type *mem_type; + struct annotated_item_stat *istat; + int i; + + istat = annotate_data_stat(&ann_insn_stat, dl->ins.name); + if (istat == NULL) { + ann_data_stat.no_insn++; + return NO_TYPE; + } + + if (annotate_get_insn_location(arch, dl, &loc) < 0) { + ann_data_stat.no_insn_ops++; + istat->bad++; + return NO_TYPE; + } + + if (is_stack_operation(arch, dl)) { + istat->good++; + *type_offset = 0; + return &stackop_type; + } + + for_each_insn_op_loc(&loc, i, op_loc) { + struct data_loc_info dloc = { + .arch = arch, + .thread = he->thread, + .ms = ms, + .ip = ms->sym->start + dl->al.offset, + .cpumode = he->cpumode, + .op = op_loc, + .di = dbg, + }; + + if (!op_loc->mem_ref && op_loc->segment == INSN_SEG_NONE) + continue; + + /* PC-relative addressing */ + if (op_loc->reg1 == DWARF_REG_PC) { + dloc.var_addr = annotate_calc_pcrel(ms, dloc.ip, + op_loc->offset, dl); + } + + /* This CPU access in kernel - pretend PC-relative addressing */ + if (dso__kernel(map__dso(ms->map)) && arch__is(arch, "x86") && + op_loc->segment == INSN_SEG_X86_GS && op_loc->imm) { + dloc.var_addr = op_loc->offset; + op_loc->reg1 = DWARF_REG_PC; + } + + mem_type = find_data_type(&dloc); + + if (mem_type == NULL && is_stack_canary(arch, op_loc)) { + istat->good++; + *type_offset = 0; + return &canary_type; + } + + if (mem_type) + istat->good++; + else + istat->bad++; + + if (symbol_conf.annotate_data_sample) { + struct evsel *evsel = hists_to_evsel(he->hists); + + annotated_data_type__update_samples(mem_type, evsel, + dloc.type_offset, + he->stat.nr_events, + he->stat.period); + } + *type_offset = dloc.type_offset; + return mem_type ?: NO_TYPE; + } + + /* retry with a fused instruction */ + return NULL; +} + /** * hist_entry__get_data_type - find data type for given hist entry * @he: hist entry @@ -2658,12 +2798,9 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he) struct evsel *evsel = hists_to_evsel(he->hists); struct arch *arch; struct disasm_line *dl; - struct annotated_insn_loc loc; - struct annotated_op_loc *op_loc; struct annotated_data_type *mem_type; struct annotated_item_stat *istat; u64 ip = he->ip; - int i; ann_data_stat.total++; @@ -2715,77 +2852,10 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he) } retry: - istat = annotate_data_stat(&ann_insn_stat, dl->ins.name); - if (istat == NULL) { - ann_data_stat.no_insn++; - return NULL; - } - - if (annotate_get_insn_location(arch, dl, &loc) < 0) { - ann_data_stat.no_insn_ops++; - istat->bad++; - return NULL; - } - - if (is_stack_operation(arch, dl)) { - istat->good++; - he->mem_type_off = 0; - return &stackop_type; - } - - for_each_insn_op_loc(&loc, i, op_loc) { - struct data_loc_info dloc = { - .arch = arch, - .thread = he->thread, - .ms = ms, - /* Recalculate IP for LOCK prefix or insn fusion */ - .ip = ms->sym->start + dl->al.offset, - .cpumode = he->cpumode, - .op = op_loc, - .di = di_cache.dbg, - }; - - if (!op_loc->mem_ref && op_loc->segment == INSN_SEG_NONE) - continue; - - /* Recalculate IP because of LOCK prefix or insn fusion */ - ip = ms->sym->start + dl->al.offset; - - /* PC-relative addressing */ - if (op_loc->reg1 == DWARF_REG_PC) { - dloc.var_addr = annotate_calc_pcrel(ms, dloc.ip, - op_loc->offset, dl); - } - - /* This CPU access in kernel - pretend PC-relative addressing */ - if (dso__kernel(map__dso(ms->map)) && arch__is(arch, "x86") && - op_loc->segment == INSN_SEG_X86_GS && op_loc->imm) { - dloc.var_addr = op_loc->offset; - op_loc->reg1 = DWARF_REG_PC; - } - - mem_type = find_data_type(&dloc); - - if (mem_type == NULL && is_stack_canary(arch, op_loc)) { - istat->good++; - he->mem_type_off = 0; - return &canary_type; - } - - if (mem_type) - istat->good++; - else - istat->bad++; - - if (symbol_conf.annotate_data_sample) { - annotated_data_type__update_samples(mem_type, evsel, - dloc.type_offset, - he->stat.nr_events, - he->stat.period); - } - he->mem_type_off = dloc.type_offset; - return mem_type; - } + mem_type = __hist_entry__get_data_type(he, arch, di_cache.dbg, dl, + &he->mem_type_off); + if (mem_type) + return mem_type == NO_TYPE ? NULL : mem_type; /* * Some instructions can be fused and the actual memory access came @@ -2805,7 +2875,9 @@ retry: } ann_data_stat.no_mem_ops++; - istat->bad++; + istat = annotate_data_stat(&ann_insn_stat, dl->ins.name); + if (istat) + istat->bad++; return NULL; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 98db1b88daf4..bbb89b32f398 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -44,6 +44,7 @@ enum perf_disassembler { struct annotation_options { bool hide_src_code, + hide_src_code_on_title, use_offset, jump_arrows, print_lines, @@ -55,9 +56,11 @@ struct annotation_options { show_asm_raw, show_br_cntr, annotate_src, + code_with_type, full_addr; u8 offset_level; u8 disassemblers[MAX_DISASSEMBLERS]; + u8 disassembler_used; int min_pcnt; int max_lines; int context; @@ -455,7 +458,6 @@ enum symbol_disassemble_errno { int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen); -int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel); void symbol__annotate_zero_histogram(struct symbol *sym, struct evsel *evsel); void symbol__annotate_decay_histogram(struct symbol *sym, struct evsel *evsel); void annotated_source__purge(struct annotated_source *as); @@ -464,9 +466,9 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel); bool ui__has_annotation(void); -int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel); - -int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel); +int hist_entry__annotate_printf(struct hist_entry *he, struct evsel *evsel); +int hist_entry__tty_annotate(struct hist_entry *he, struct evsel *evsel); +int hist_entry__tty_annotate2(struct hist_entry *he, struct evsel *evsel); #ifdef HAVE_SLANG_SUPPORT int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index ba807071d3c1..688fe6d75244 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -28,7 +28,8 @@ static u64 arm_spe_calc_ip(int index, u64 payload) /* Instruction virtual address or Branch target address */ if (index == SPE_ADDR_PKT_HDR_INDEX_INS || - index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) { + index == SPE_ADDR_PKT_HDR_INDEX_BRANCH || + index == SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH) { ns = SPE_ADDR_PKT_GET_NS(payload); el = SPE_ADDR_PKT_GET_EL(payload); @@ -181,6 +182,8 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) decoder->record.virt_addr = ip; else if (idx == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) decoder->record.phys_addr = ip; + else if (idx == SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH) + decoder->record.prev_br_tgt = ip; break; case ARM_SPE_COUNTER: if (idx == SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT) @@ -207,6 +210,18 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) break; case SPE_OP_PKT_HDR_CLASS_BR_ERET: decoder->record.op |= ARM_SPE_OP_BRANCH_ERET; + if (payload & SPE_OP_PKT_COND) + decoder->record.op |= ARM_SPE_OP_BR_COND; + if (payload & SPE_OP_PKT_INDIRECT_BRANCH) + decoder->record.op |= ARM_SPE_OP_BR_INDIRECT; + if (payload & SPE_OP_PKT_GCS) + decoder->record.op |= ARM_SPE_OP_BR_GCS; + if (SPE_OP_PKT_CR_BL(payload)) + decoder->record.op |= ARM_SPE_OP_BR_CR_BL; + if (SPE_OP_PKT_CR_RET(payload)) + decoder->record.op |= ARM_SPE_OP_BR_CR_RET; + if (SPE_OP_PKT_CR_NON_BL_RET(payload)) + decoder->record.op |= ARM_SPE_OP_BR_CR_NON_BL_RET; break; default: pr_err("Get packet error!\n"); @@ -238,6 +253,12 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) if (payload & BIT(EV_MISPRED)) decoder->record.type |= ARM_SPE_BRANCH_MISS; + if (payload & BIT(EV_NOT_TAKEN)) + decoder->record.type |= ARM_SPE_BRANCH_NOT_TAKEN; + + if (payload & BIT(EV_TRANSACTIONAL)) + decoder->record.type |= ARM_SPE_IN_TXN; + if (payload & BIT(EV_PARTIAL_PREDICATE)) decoder->record.type |= ARM_SPE_SVE_PARTIAL_PRED; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 4bcd627e859f..881d9f29c138 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -24,6 +24,8 @@ enum arm_spe_sample_type { ARM_SPE_REMOTE_ACCESS = 1 << 7, ARM_SPE_SVE_PARTIAL_PRED = 1 << 8, ARM_SPE_SVE_EMPTY_PRED = 1 << 9, + ARM_SPE_BRANCH_NOT_TAKEN = 1 << 10, + ARM_SPE_IN_TXN = 1 << 11, }; enum arm_spe_op_type { @@ -52,8 +54,12 @@ enum arm_spe_op_type { ARM_SPE_OP_SVE_SG = 1 << 27, /* Second level operation type for BRANCH_ERET */ - ARM_SPE_OP_BR_COND = 1 << 16, - ARM_SPE_OP_BR_INDIRECT = 1 << 17, + ARM_SPE_OP_BR_COND = 1 << 16, + ARM_SPE_OP_BR_INDIRECT = 1 << 17, + ARM_SPE_OP_BR_GCS = 1 << 18, + ARM_SPE_OP_BR_CR_BL = 1 << 19, + ARM_SPE_OP_BR_CR_RET = 1 << 20, + ARM_SPE_OP_BR_CR_NON_BL_RET = 1 << 21, }; enum arm_spe_common_data_source { @@ -76,6 +82,23 @@ enum arm_spe_ampereone_data_source { ARM_SPE_AMPEREONE_L2D = 0x9, }; +enum arm_spe_hisi_hip_data_source { + ARM_SPE_HISI_HIP_PEER_CPU = 0, + ARM_SPE_HISI_HIP_PEER_CPU_HITM = 1, + ARM_SPE_HISI_HIP_L3 = 2, + ARM_SPE_HISI_HIP_L3_HITM = 3, + ARM_SPE_HISI_HIP_PEER_CLUSTER = 4, + ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM = 5, + ARM_SPE_HISI_HIP_REMOTE_SOCKET = 6, + ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM = 7, + ARM_SPE_HISI_HIP_LOCAL_MEM = 8, + ARM_SPE_HISI_HIP_REMOTE_MEM = 9, + ARM_SPE_HISI_HIP_NC_DEV = 13, + ARM_SPE_HISI_HIP_L2 = 16, + ARM_SPE_HISI_HIP_L2_HITM = 17, + ARM_SPE_HISI_HIP_L1 = 18, +}; + struct arm_spe_record { enum arm_spe_sample_type type; int err; @@ -83,6 +106,7 @@ struct arm_spe_record { u32 latency; u64 from_ip; u64 to_ip; + u64 prev_br_tgt; u64 timestamp; u64 virt_addr; u64 phys_addr; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 4cef10a83962..13cadb2f1cea 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -308,6 +308,8 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS"); if (payload & BIT(EV_ALIGNMENT)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " ALIGNMENT"); + if (payload & BIT(EV_TRANSACTIONAL)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " TXN"); if (payload & BIT(EV_PARTIAL_PREDICATE)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-PARTIAL-PRED"); if (payload & BIT(EV_EMPTY_PREDICATE)) @@ -397,10 +399,16 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, if (payload & SPE_OP_PKT_COND) arm_spe_pkt_out_string(&err, &buf, &buf_len, " COND"); - - if (SPE_OP_PKT_IS_INDIRECT_BRANCH(payload)) + if (payload & SPE_OP_PKT_INDIRECT_BRANCH) arm_spe_pkt_out_string(&err, &buf, &buf_len, " IND"); - + if (payload & SPE_OP_PKT_GCS) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " GCS"); + if (SPE_OP_PKT_CR_BL(payload)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " CR-BL"); + if (SPE_OP_PKT_CR_RET(payload)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " CR-RET"); + if (SPE_OP_PKT_CR_NON_BL_RET(payload)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " CR-NON-BL-RET"); break; default: /* Unknown index */ diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 464a912b221c..2cdf9f6da268 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -7,6 +7,7 @@ #ifndef INCLUDE__ARM_SPE_PKT_DECODER_H__ #define INCLUDE__ARM_SPE_PKT_DECODER_H__ +#include <linux/bitfield.h> #include <stddef.h> #include <stdint.h> @@ -104,6 +105,7 @@ enum arm_spe_events { EV_LLC_MISS = 9, EV_REMOTE_ACCESS = 10, EV_ALIGNMENT = 11, + EV_TRANSACTIONAL = 16, EV_PARTIAL_PREDICATE = 17, EV_EMPTY_PREDICATE = 18, }; @@ -116,8 +118,6 @@ enum arm_spe_events { #define SPE_OP_PKT_IS_OTHER_SVE_OP(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8) -#define SPE_OP_PKT_COND BIT(0) - #define SPE_OP_PKT_LDST_SUBCLASS_GET(v) ((v) & GENMASK_ULL(7, 1)) #define SPE_OP_PKT_LDST_SUBCLASS_GP_REG 0x0 #define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP 0x4 @@ -148,7 +148,13 @@ enum arm_spe_events { #define SPE_OP_PKT_SVE_PRED BIT(2) #define SPE_OP_PKT_SVE_FP BIT(1) -#define SPE_OP_PKT_IS_INDIRECT_BRANCH(v) (((v) & GENMASK_ULL(7, 1)) == 0x2) +#define SPE_OP_PKT_CR_MASK GENMASK_ULL(4, 3) +#define SPE_OP_PKT_CR_BL(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 1) +#define SPE_OP_PKT_CR_RET(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 2) +#define SPE_OP_PKT_CR_NON_BL_RET(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 3) +#define SPE_OP_PKT_GCS BIT(2) +#define SPE_OP_PKT_INDIRECT_BRANCH BIT(1) +#define SPE_OP_PKT_COND BIT(0) const char *arm_spe_pkt_name(enum arm_spe_pkt_type); diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 12761c39788f..d46e0cccac99 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -37,6 +37,8 @@ #include "../../arch/arm64/include/asm/cputype.h" #define MAX_TIMESTAMP (~0ULL) +#define is_ldst_op(op) (!!((op) & ARM_SPE_OP_LDST)) + struct arm_spe { struct auxtrace auxtrace; struct auxtrace_queues queues; @@ -101,6 +103,7 @@ struct arm_spe_queue { struct thread *thread; u64 period_instructions; u32 flags; + struct branch_stack *last_branch; }; struct data_source_handle { @@ -231,6 +234,17 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, params.get_trace = arm_spe_get_trace; params.data = speq; + if (spe->synth_opts.last_branch) { + size_t sz = sizeof(struct branch_stack); + + /* Allocate up to two entries for PBT + TGT */ + sz += sizeof(struct branch_entry) * + min(spe->synth_opts.last_branch_sz, 2U); + speq->last_branch = zalloc(sz); + if (!speq->last_branch) + goto out_free; + } + /* create new decoder */ speq->decoder = arm_spe_decoder_new(¶ms); if (!speq->decoder) @@ -240,6 +254,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, out_free: zfree(&speq->event_buf); + zfree(&speq->last_branch); free(speq); return NULL; @@ -346,6 +361,88 @@ static void arm_spe_prep_sample(struct arm_spe *spe, event->sample.header.size = sizeof(struct perf_event_header); } +static void arm_spe__prep_branch_stack(struct arm_spe_queue *speq) +{ + struct arm_spe *spe = speq->spe; + struct arm_spe_record *record = &speq->decoder->record; + struct branch_stack *bstack = speq->last_branch; + struct branch_flags *bs_flags; + unsigned int last_branch_sz = spe->synth_opts.last_branch_sz; + bool have_tgt = !!(speq->flags & PERF_IP_FLAG_BRANCH); + bool have_pbt = last_branch_sz >= (have_tgt + 1U) && record->prev_br_tgt; + size_t sz = sizeof(struct branch_stack) + + sizeof(struct branch_entry) * min(last_branch_sz, 2U) /* PBT + TGT */; + int i = 0; + + /* Clean up branch stack */ + memset(bstack, 0x0, sz); + + if (!have_tgt && !have_pbt) + return; + + if (have_tgt) { + bstack->entries[i].from = record->from_ip; + bstack->entries[i].to = record->to_ip; + + bs_flags = &bstack->entries[i].flags; + bs_flags->value = 0; + + if (record->op & ARM_SPE_OP_BR_CR_BL) { + if (record->op & ARM_SPE_OP_BR_COND) + bs_flags->type |= PERF_BR_COND_CALL; + else + bs_flags->type |= PERF_BR_CALL; + /* + * Indirect branch instruction without link (e.g. BR), + * take this case as function return. + */ + } else if (record->op & ARM_SPE_OP_BR_CR_RET || + record->op & ARM_SPE_OP_BR_INDIRECT) { + if (record->op & ARM_SPE_OP_BR_COND) + bs_flags->type |= PERF_BR_COND_RET; + else + bs_flags->type |= PERF_BR_RET; + } else if (record->op & ARM_SPE_OP_BR_CR_NON_BL_RET) { + if (record->op & ARM_SPE_OP_BR_COND) + bs_flags->type |= PERF_BR_COND; + else + bs_flags->type |= PERF_BR_UNCOND; + } else { + if (record->op & ARM_SPE_OP_BR_COND) + bs_flags->type |= PERF_BR_COND; + else + bs_flags->type |= PERF_BR_UNKNOWN; + } + + if (record->type & ARM_SPE_BRANCH_MISS) { + bs_flags->mispred = 1; + bs_flags->predicted = 0; + } else { + bs_flags->mispred = 0; + bs_flags->predicted = 1; + } + + if (record->type & ARM_SPE_BRANCH_NOT_TAKEN) + bs_flags->not_taken = 1; + + if (record->type & ARM_SPE_IN_TXN) + bs_flags->in_tx = 1; + + bs_flags->cycles = min(record->latency, 0xFFFFU); + i++; + } + + if (have_pbt) { + bs_flags = &bstack->entries[i].flags; + bs_flags->type |= PERF_BR_UNKNOWN; + bstack->entries[i].to = record->prev_br_tgt; + i++; + } + + bstack->nr = i; + bstack->hw_idx = -1ULL; +} + static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) { event->header.size = perf_event__sample_event_size(sample, type, 0); @@ -379,8 +476,10 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, struct arm_spe *spe = speq->spe; struct arm_spe_record *record = &speq->decoder->record; union perf_event *event = speq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; + int ret; + perf_sample__init(&sample, /*all=*/true); arm_spe_prep_sample(spe, speq, event, &sample); sample.id = spe_events_id; @@ -390,7 +489,9 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, sample.data_src = data_src; sample.weight = record->latency; - return arm_spe_deliver_synth_event(spe, speq, event, &sample); + ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); + perf_sample__exit(&sample); + return ret; } static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, @@ -399,8 +500,10 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, struct arm_spe *spe = speq->spe; struct arm_spe_record *record = &speq->decoder->record; union perf_event *event = speq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; + int ret; + perf_sample__init(&sample, /*all=*/true); arm_spe_prep_sample(spe, speq, event, &sample); sample.id = spe_events_id; @@ -408,8 +511,11 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, sample.addr = record->to_ip; sample.weight = record->latency; sample.flags = speq->flags; + sample.branch_stack = speq->last_branch; - return arm_spe_deliver_synth_event(spe, speq, event, &sample); + ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); + perf_sample__exit(&sample); + return ret; } static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, @@ -418,7 +524,8 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, struct arm_spe *spe = speq->spe; struct arm_spe_record *record = &speq->decoder->record; union perf_event *event = speq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; + int ret; /* * Handles perf instruction sampling period. @@ -428,6 +535,7 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, return 0; speq->period_instructions = 0; + perf_sample__init(&sample, /*all=*/true); arm_spe_prep_sample(spe, speq, event, &sample); sample.id = spe_events_id; @@ -438,8 +546,11 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, sample.period = spe->instructions_sample_period; sample.weight = record->latency; sample.flags = speq->flags; + sample.branch_stack = speq->last_branch; - return arm_spe_deliver_synth_event(spe, speq, event, &sample); + ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); + perf_sample__exit(&sample); + return ret; } static const struct midr_range common_ds_encoding_cpus[] = { @@ -460,6 +571,11 @@ static const struct midr_range ampereone_ds_encoding_cpus[] = { {}, }; +static const struct midr_range hisi_hip_ds_encoding_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_HISI_HIP12), + {}, +}; + static void arm_spe__sample_flags(struct arm_spe_queue *speq) { const struct arm_spe_record *record = &speq->decoder->record; @@ -470,6 +586,26 @@ static void arm_spe__sample_flags(struct arm_spe_queue *speq) if (record->type & ARM_SPE_BRANCH_MISS) speq->flags |= PERF_IP_FLAG_BRANCH_MISS; + + if (record->type & ARM_SPE_BRANCH_NOT_TAKEN) + speq->flags |= PERF_IP_FLAG_NOT_TAKEN; + + if (record->type & ARM_SPE_IN_TXN) + speq->flags |= PERF_IP_FLAG_IN_TX; + + if (record->op & ARM_SPE_OP_BR_COND) + speq->flags |= PERF_IP_FLAG_CONDITIONAL; + + if (record->op & ARM_SPE_OP_BR_CR_BL) + speq->flags |= PERF_IP_FLAG_CALL; + else if (record->op & ARM_SPE_OP_BR_CR_RET) + speq->flags |= PERF_IP_FLAG_RETURN; + /* + * Indirect branch instruction without link (e.g. BR), + * take it as a function return. + */ + else if (record->op & ARM_SPE_OP_BR_INDIRECT) + speq->flags |= PERF_IP_FLAG_RETURN; } } @@ -587,9 +723,100 @@ static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *re arm_spe__synth_data_source_common(&common_record, data_src); } +static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ + /* Use common synthesis method to handle store operations */ + if (record->op & ARM_SPE_OP_ST) { + arm_spe__synth_data_source_common(record, data_src); + return; + } + + switch (record->source) { + case ARM_SPE_HISI_HIP_PEER_CPU: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_PEER_CPU_HITM: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_L3: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HIT; + break; + case ARM_SPE_HISI_HIP_L3_HITM: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + break; + case ARM_SPE_HISI_HIP_PEER_CLUSTER: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_REMOTE_SOCKET: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_LOCAL_MEM: + data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_HISI_HIP_REMOTE_MEM: + data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + break; + case ARM_SPE_HISI_HIP_NC_DEV: + data_src->mem_lvl = PERF_MEM_LVL_IO | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_HISI_HIP_L2: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_HISI_HIP_L2_HITM: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + break; + case ARM_SPE_HISI_HIP_L1: + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + default: + break; + } +} + static const struct data_source_handle data_source_handles[] = { DS(common_ds_encoding_cpus, data_source_common), DS(ampereone_ds_encoding_cpus, data_source_ampereone), + DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip), }; static void arm_spe__synth_memory_level(const struct arm_spe_record *record, @@ -669,6 +896,10 @@ static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq, { union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; + /* Only synthesize data source for LDST operations */ + if (!is_ldst_op(record->op)) + return 0; + if (record->op & ARM_SPE_OP_LD) data_src.mem_op = PERF_MEM_OP_LOAD; else if (record->op & ARM_SPE_OP_ST) @@ -749,6 +980,10 @@ static int arm_spe_sample(struct arm_spe_queue *speq) } } + if (spe->synth_opts.last_branch && + (spe->sample_branch || spe->sample_instructions)) + arm_spe__prep_branch_stack(speq); + if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) { err = arm_spe__synth_branch_sample(speq, spe->branch_id); if (err) @@ -767,7 +1002,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq) * When data_src is zero it means the record is not a memory operation, * skip to synthesize memory sample for this case. */ - if (spe->sample_memory && data_src) { + if (spe->sample_memory && is_ldst_op(record->op)) { err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); if (err) return err; @@ -1240,6 +1475,7 @@ static void arm_spe_free_queue(void *priv) thread__zput(speq->thread); arm_spe_decoder_free(speq->decoder); zfree(&speq->event_buf); + zfree(&speq->last_branch); free(speq); } @@ -1459,6 +1695,19 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) id += 1; } + if (spe->synth_opts.last_branch) { + if (spe->synth_opts.last_branch_sz > 2) + pr_debug("Arm SPE supports only two bstack entries (PBT+TGT).\n"); + + attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + /* + * We don't use the hardware index, but the sample generation + * code uses the new format branch_stack with this field, + * so the event attributes must indicate that it's present. + */ + attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; + } + if (spe->synth_opts.branches) { spe->sample_branch = true; diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c index 4940be4a0569..958afe8b821e 100644 --- a/tools/perf/util/arm64-frame-pointer-unwind-support.c +++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c @@ -4,6 +4,7 @@ #include "event.h" #include "perf_regs.h" // SMPL_REG_MASK #include "unwind.h" +#include <string.h> #define perf_event_arm_regs perf_event_arm64_regs #include "../../arch/arm64/include/uapi/asm/perf_regs.h" @@ -16,8 +17,13 @@ struct entries { static bool get_leaf_frame_caller_enabled(struct perf_sample *sample) { - return callchain_param.record_mode == CALLCHAIN_FP && sample->user_regs.regs - && sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_LR); + struct regs_dump *regs; + + if (callchain_param.record_mode != CALLCHAIN_FP) + return false; + + regs = perf_sample__user_regs(sample); + return regs->regs && regs->mask & SMPL_REG_MASK(PERF_REG_ARM64_LR); } static int add_entry(struct unwind_entry *entry, void *arg) @@ -32,7 +38,7 @@ u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thr { int ret; struct entries entries = {}; - struct regs_dump old_regs = sample->user_regs; + struct regs_dump old_regs, *regs; if (!get_leaf_frame_caller_enabled(sample)) return 0; @@ -42,19 +48,20 @@ u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thr * and set its mask. SP is not used when doing the unwinding but it * still needs to be set to prevent failures. */ - - if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) { - sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC); - sample->user_regs.cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1]; + regs = perf_sample__user_regs(sample); + memcpy(&old_regs, regs, sizeof(*regs)); + if (!(regs->mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) { + regs->cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC); + regs->cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1]; } - if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) { - sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP); - sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0; + if (!(regs->mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) { + regs->cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP); + regs->cache_regs[PERF_REG_ARM64_SP] = 0; } ret = unwind__get_entries(add_entry, &entries, thread, sample, 2, true); - sample->user_regs = old_regs; + memcpy(regs, &old_regs, sizeof(*regs)); if (ret || entries.length != 2) return ret; diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 4d1633d87eff..03211c2623de 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1173,16 +1173,19 @@ static int auxtrace_queue_data_cb(struct perf_session *session, if (!qd->samples || event->header.type != PERF_RECORD_SAMPLE) return 0; + perf_sample__init(&sample, /*all=*/false); err = evlist__parse_sample(session->evlist, event, &sample); if (err) - return err; - - if (!sample.aux_sample.size) - return 0; + goto out; - offset += sample.aux_sample.data - (void *)event; + if (sample.aux_sample.size) { + offset += sample.aux_sample.data - (void *)event; - return session->auxtrace->queue_data(session, &sample, NULL, offset); + err = session->auxtrace->queue_data(session, &sample, NULL, offset); + } +out: + perf_sample__exit(&sample); + return err; } int auxtrace_queue_data(struct perf_session *session, bool samples, bool events) diff --git a/tools/perf/util/bpf-filter.l b/tools/perf/util/bpf-filter.l index f313404f95a9..6aa65ade3385 100644 --- a/tools/perf/util/bpf-filter.l +++ b/tools/perf/util/bpf-filter.l @@ -76,7 +76,7 @@ static int path_or_error(void) num_dec [0-9]+ num_hex 0[Xx][0-9a-fA-F]+ space [ \t]+ -path [^ \t\n]+ +path [^ \t\n,]+ ident [_a-zA-Z][_a-zA-Z0-9]+ %% diff --git a/tools/perf/util/bpf-trace-summary.c b/tools/perf/util/bpf-trace-summary.c new file mode 100644 index 000000000000..69fb165da206 --- /dev/null +++ b/tools/perf/util/bpf-trace-summary.c @@ -0,0 +1,458 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <inttypes.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> + +#include "dwarf-regs.h" /* for EM_HOST */ +#include "syscalltbl.h" +#include "util/cgroup.h" +#include "util/hashmap.h" +#include "util/trace.h" +#include "util/util.h" +#include <bpf/bpf.h> +#include <linux/rbtree.h> +#include <linux/time64.h> +#include <tools/libc_compat.h> /* reallocarray */ + +#include "bpf_skel/syscall_summary.h" +#include "bpf_skel/syscall_summary.skel.h" + + +static struct syscall_summary_bpf *skel; +static struct rb_root cgroups = RB_ROOT; + +int trace_prepare_bpf_summary(enum trace_summary_mode mode) +{ + skel = syscall_summary_bpf__open(); + if (skel == NULL) { + fprintf(stderr, "failed to open syscall summary bpf skeleton\n"); + return -1; + } + + if (mode == SUMMARY__BY_THREAD) + skel->rodata->aggr_mode = SYSCALL_AGGR_THREAD; + else if (mode == SUMMARY__BY_CGROUP) + skel->rodata->aggr_mode = SYSCALL_AGGR_CGROUP; + else + skel->rodata->aggr_mode = SYSCALL_AGGR_CPU; + + if (cgroup_is_v2("perf_event") > 0) + skel->rodata->use_cgroup_v2 = 1; + + if (syscall_summary_bpf__load(skel) < 0) { + fprintf(stderr, "failed to load syscall summary bpf skeleton\n"); + return -1; + } + + if (syscall_summary_bpf__attach(skel) < 0) { + fprintf(stderr, "failed to attach syscall summary bpf skeleton\n"); + return -1; + } + + if (mode == SUMMARY__BY_CGROUP) + read_all_cgroups(&cgroups); + + return 0; +} + +void trace_start_bpf_summary(void) +{ + skel->bss->enabled = 1; +} + +void trace_end_bpf_summary(void) +{ + skel->bss->enabled = 0; +} + +struct syscall_node { + int syscall_nr; + struct syscall_stats stats; +}; + +static double rel_stddev(struct syscall_stats *stat) +{ + double variance, average; + + if (stat->count < 2) + return 0; + + average = (double)stat->total_time / stat->count; + + variance = stat->squared_sum; + variance -= (stat->total_time * stat->total_time) / stat->count; + variance /= stat->count - 1; + + return 100 * sqrt(variance / stat->count) / average; +} + +/* + * The syscall_data is to maintain syscall stats ordered by total time. + * It supports different summary modes like per-thread or global. + * + * For per-thread stats, it uses two-level data strurcture - + * syscall_data is keyed by TID and has an array of nodes which + * represents each syscall for the thread. + * + * For global stats, it's still two-level technically but we don't need + * per-cpu analysis so it's keyed by the syscall number to combine stats + * from different CPUs. And syscall_data always has a syscall_node so + * it can effectively work as flat hierarchy. + * + * For per-cgroup stats, it uses two-level data structure like thread + * syscall_data is keyed by CGROUP and has an array of node which + * represents each syscall for the cgroup. + */ +struct syscall_data { + u64 key; /* tid if AGGR_THREAD, syscall-nr if AGGR_CPU, cgroup if AGGR_CGROUP */ + int nr_events; + int nr_nodes; + u64 total_time; + struct syscall_node *nodes; +}; + +static int datacmp(const void *a, const void *b) +{ + const struct syscall_data * const *sa = a; + const struct syscall_data * const *sb = b; + + return (*sa)->total_time > (*sb)->total_time ? -1 : 1; +} + +static int nodecmp(const void *a, const void *b) +{ + const struct syscall_node *na = a; + const struct syscall_node *nb = b; + + return na->stats.total_time > nb->stats.total_time ? -1 : 1; +} + +static size_t sc_node_hash(long key, void *ctx __maybe_unused) +{ + return key; +} + +static bool sc_node_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + +static int print_common_stats(struct syscall_data *data, FILE *fp) +{ + int printed = 0; + + for (int i = 0; i < data->nr_nodes; i++) { + struct syscall_node *node = &data->nodes[i]; + struct syscall_stats *stat = &node->stats; + double total = (double)(stat->total_time) / NSEC_PER_MSEC; + double min = (double)(stat->min_time) / NSEC_PER_MSEC; + double max = (double)(stat->max_time) / NSEC_PER_MSEC; + double avg = total / stat->count; + const char *name; + + /* TODO: support other ABIs */ + name = syscalltbl__name(EM_HOST, node->syscall_nr); + if (name) + printed += fprintf(fp, " %-15s", name); + else + printed += fprintf(fp, " syscall:%-7d", node->syscall_nr); + + printed += fprintf(fp, " %8u %6u %9.3f %9.3f %9.3f %9.3f %9.2f%%\n", + stat->count, stat->error, total, min, avg, max, + rel_stddev(stat)); + } + return printed; +} + +static int update_thread_stats(struct hashmap *hash, struct syscall_key *map_key, + struct syscall_stats *map_data) +{ + struct syscall_data *data; + struct syscall_node *nodes; + + if (!hashmap__find(hash, map_key->cpu_or_tid, &data)) { + data = zalloc(sizeof(*data)); + if (data == NULL) + return -ENOMEM; + + data->key = map_key->cpu_or_tid; + if (hashmap__add(hash, data->key, data) < 0) { + free(data); + return -ENOMEM; + } + } + + /* update thread total stats */ + data->nr_events += map_data->count; + data->total_time += map_data->total_time; + + nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes)); + if (nodes == NULL) + return -ENOMEM; + + data->nodes = nodes; + nodes = &data->nodes[data->nr_nodes++]; + nodes->syscall_nr = map_key->nr; + + /* each thread has an entry for each syscall, just use the stat */ + memcpy(&nodes->stats, map_data, sizeof(*map_data)); + return 0; +} + +static int print_thread_stat(struct syscall_data *data, FILE *fp) +{ + int printed = 0; + + qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp); + + printed += fprintf(fp, " thread (%d), ", (int)data->key); + printed += fprintf(fp, "%d events\n\n", data->nr_events); + + printed += fprintf(fp, " syscall calls errors total min avg max stddev\n"); + printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); + printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); + + printed += print_common_stats(data, fp); + printed += fprintf(fp, "\n\n"); + + return printed; +} + +static int print_thread_stats(struct syscall_data **data, int nr_data, FILE *fp) +{ + int printed = 0; + + for (int i = 0; i < nr_data; i++) + printed += print_thread_stat(data[i], fp); + + return printed; +} + +static int update_total_stats(struct hashmap *hash, struct syscall_key *map_key, + struct syscall_stats *map_data) +{ + struct syscall_data *data; + struct syscall_stats *stat; + + if (!hashmap__find(hash, map_key->nr, &data)) { + data = zalloc(sizeof(*data)); + if (data == NULL) + return -ENOMEM; + + data->nodes = zalloc(sizeof(*data->nodes)); + if (data->nodes == NULL) { + free(data); + return -ENOMEM; + } + + data->nr_nodes = 1; + data->key = map_key->nr; + data->nodes->syscall_nr = data->key; + + if (hashmap__add(hash, data->key, data) < 0) { + free(data->nodes); + free(data); + return -ENOMEM; + } + } + + /* update total stats for this syscall */ + data->nr_events += map_data->count; + data->total_time += map_data->total_time; + + /* This is sum of the same syscall from different CPUs */ + stat = &data->nodes->stats; + + stat->total_time += map_data->total_time; + stat->squared_sum += map_data->squared_sum; + stat->count += map_data->count; + stat->error += map_data->error; + + if (stat->max_time < map_data->max_time) + stat->max_time = map_data->max_time; + if (stat->min_time > map_data->min_time || stat->min_time == 0) + stat->min_time = map_data->min_time; + + return 0; +} + +static int print_total_stats(struct syscall_data **data, int nr_data, FILE *fp) +{ + int printed = 0; + int nr_events = 0; + + for (int i = 0; i < nr_data; i++) + nr_events += data[i]->nr_events; + + printed += fprintf(fp, " total, %d events\n\n", nr_events); + + printed += fprintf(fp, " syscall calls errors total min avg max stddev\n"); + printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); + printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); + + for (int i = 0; i < nr_data; i++) + printed += print_common_stats(data[i], fp); + + printed += fprintf(fp, "\n\n"); + return printed; +} + +static int update_cgroup_stats(struct hashmap *hash, struct syscall_key *map_key, + struct syscall_stats *map_data) +{ + struct syscall_data *data; + struct syscall_node *nodes; + + if (!hashmap__find(hash, map_key->cgroup, &data)) { + data = zalloc(sizeof(*data)); + if (data == NULL) + return -ENOMEM; + + data->key = map_key->cgroup; + if (hashmap__add(hash, data->key, data) < 0) { + free(data); + return -ENOMEM; + } + } + + /* update thread total stats */ + data->nr_events += map_data->count; + data->total_time += map_data->total_time; + + nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes)); + if (nodes == NULL) + return -ENOMEM; + + data->nodes = nodes; + nodes = &data->nodes[data->nr_nodes++]; + nodes->syscall_nr = map_key->nr; + + /* each thread has an entry for each syscall, just use the stat */ + memcpy(&nodes->stats, map_data, sizeof(*map_data)); + return 0; +} + +static int print_cgroup_stat(struct syscall_data *data, FILE *fp) +{ + int printed = 0; + struct cgroup *cgrp = __cgroup__find(&cgroups, data->key); + + qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp); + + if (cgrp) + printed += fprintf(fp, " cgroup %s,", cgrp->name); + else + printed += fprintf(fp, " cgroup id:%lu,", (unsigned long)data->key); + + printed += fprintf(fp, " %d events\n\n", data->nr_events); + + printed += fprintf(fp, " syscall calls errors total min avg max stddev\n"); + printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); + printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); + + printed += print_common_stats(data, fp); + printed += fprintf(fp, "\n\n"); + + return printed; +} + +static int print_cgroup_stats(struct syscall_data **data, int nr_data, FILE *fp) +{ + int printed = 0; + + for (int i = 0; i < nr_data; i++) + printed += print_cgroup_stat(data[i], fp); + + return printed; +} + +int trace_print_bpf_summary(FILE *fp) +{ + struct bpf_map *map = skel->maps.syscall_stats_map; + struct syscall_key *prev_key, key; + struct syscall_data **data = NULL; + struct hashmap schash; + struct hashmap_entry *entry; + int nr_data = 0; + int printed = 0; + int i; + size_t bkt; + + hashmap__init(&schash, sc_node_hash, sc_node_equal, /*ctx=*/NULL); + + printed = fprintf(fp, "\n Summary of events:\n\n"); + + /* get stats from the bpf map */ + prev_key = NULL; + while (!bpf_map__get_next_key(map, prev_key, &key, sizeof(key))) { + struct syscall_stats stat; + + if (!bpf_map__lookup_elem(map, &key, sizeof(key), &stat, sizeof(stat), 0)) { + switch (skel->rodata->aggr_mode) { + case SYSCALL_AGGR_THREAD: + update_thread_stats(&schash, &key, &stat); + break; + case SYSCALL_AGGR_CPU: + update_total_stats(&schash, &key, &stat); + break; + case SYSCALL_AGGR_CGROUP: + update_cgroup_stats(&schash, &key, &stat); + break; + default: + break; + } + } + + prev_key = &key; + } + + nr_data = hashmap__size(&schash); + data = calloc(nr_data, sizeof(*data)); + if (data == NULL) + goto out; + + i = 0; + hashmap__for_each_entry(&schash, entry, bkt) + data[i++] = entry->pvalue; + + qsort(data, nr_data, sizeof(*data), datacmp); + + switch (skel->rodata->aggr_mode) { + case SYSCALL_AGGR_THREAD: + printed += print_thread_stats(data, nr_data, fp); + break; + case SYSCALL_AGGR_CPU: + printed += print_total_stats(data, nr_data, fp); + break; + case SYSCALL_AGGR_CGROUP: + printed += print_cgroup_stats(data, nr_data, fp); + break; + default: + break; + } + + for (i = 0; i < nr_data && data; i++) { + free(data[i]->nodes); + free(data[i]); + } + free(data); + +out: + hashmap__clear(&schash); + return printed; +} + +void trace_cleanup_bpf_summary(void) +{ + if (!RB_EMPTY_ROOT(&cgroups)) { + struct cgroup *cgrp, *tmp; + + rbtree_postorder_for_each_entry_safe(cgrp, tmp, &cgroups, node) + cgroup__put(cgrp); + + cgroups = RB_ROOT; + } + + syscall_summary_bpf__destroy(skel); +} diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c index 25fc280e414a..7324668cc83e 100644 --- a/tools/perf/util/bpf_ftrace.c +++ b/tools/perf/util/bpf_ftrace.c @@ -39,6 +39,10 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) skel->rodata->bucket_range = ftrace->bucket_range; skel->rodata->min_latency = ftrace->min_latency; + skel->rodata->bucket_num = ftrace->bucket_num; + if (ftrace->bucket_range && ftrace->bucket_num) { + bpf_map__set_max_entries(skel->maps.latency, ftrace->bucket_num); + } /* don't need to set cpu filter for system-wide mode */ if (ftrace->target.cpu_list) { @@ -124,7 +128,7 @@ int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused) return 0; } -int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, +int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace, int buckets[], struct stats *stats) { int i, fd, err; @@ -138,7 +142,7 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, if (hist == NULL) return -ENOMEM; - for (idx = 0; idx < NUM_BUCKET; idx++) { + for (idx = 0; idx < skel->rodata->bucket_num; idx++) { err = bpf_map_lookup_elem(fd, &idx, hist); if (err) { buckets[idx] = 0; @@ -154,6 +158,12 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, stats->n = skel->bss->count; stats->max = skel->bss->max; stats->min = skel->bss->min; + + if (!ftrace->use_nsec) { + stats->mean /= 1000; + stats->max /= 1000; + stats->min /= 1000; + } } free(hist); diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index fc8666222399..60b81d586323 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -12,6 +12,7 @@ #include "util/lock-contention.h" #include <linux/zalloc.h> #include <linux/string.h> +#include <api/fs/fs.h> #include <bpf/bpf.h> #include <bpf/btf.h> #include <inttypes.h> @@ -35,28 +36,26 @@ static bool slab_cache_equal(long key1, long key2, void *ctx __maybe_unused) static void check_slab_cache_iter(struct lock_contention *con) { - struct btf *btf = btf__load_vmlinux_btf(); s32 ret; hashmap__init(&slab_hash, slab_cache_hash, slab_cache_equal, /*ctx=*/NULL); - if (btf == NULL) { + con->btf = btf__load_vmlinux_btf(); + if (con->btf == NULL) { pr_debug("BTF loading failed: %s\n", strerror(errno)); return; } - ret = btf__find_by_name_kind(btf, "bpf_iter__kmem_cache", BTF_KIND_STRUCT); + ret = btf__find_by_name_kind(con->btf, "bpf_iter__kmem_cache", BTF_KIND_STRUCT); if (ret < 0) { bpf_program__set_autoload(skel->progs.slab_cache_iter, false); pr_debug("slab cache iterator is not available: %d\n", ret); - goto out; + return; } has_slab_iter = true; bpf_map__set_max_entries(skel->maps.slab_caches, con->map_nr_entries); -out: - btf__free(btf); } static void run_slab_cache_iter(void) @@ -109,6 +108,75 @@ static void exit_slab_cache_iter(void) hashmap__clear(&slab_hash); } +static void init_numa_data(struct lock_contention *con) +{ + struct symbol *sym; + struct map *kmap; + char *buf = NULL, *p; + size_t len; + long last = -1; + int ret; + + /* + * 'struct zone' is embedded in 'struct pglist_data' as an array. + * As we may not have full information of the struct zone in the + * (fake) vmlinux.h, let's get the actual size from BTF. + */ + ret = btf__find_by_name_kind(con->btf, "zone", BTF_KIND_STRUCT); + if (ret < 0) { + pr_debug("cannot get type of struct zone: %d\n", ret); + return; + } + + ret = btf__resolve_size(con->btf, ret); + if (ret < 0) { + pr_debug("cannot get size of struct zone: %d\n", ret); + return; + } + skel->rodata->sizeof_zone = ret; + + /* UMA system doesn't have 'node_data[]' - just use contig_page_data. */ + sym = machine__find_kernel_symbol_by_name(con->machine, + "contig_page_data", + &kmap); + if (sym) { + skel->rodata->contig_page_data_addr = map__unmap_ip(kmap, sym->start); + map__put(kmap); + return; + } + + /* + * The 'node_data' is an array of pointers to struct pglist_data. + * It needs to follow the pointer for each node in BPF to get the + * address of struct pglist_data and its zones. + */ + sym = machine__find_kernel_symbol_by_name(con->machine, + "node_data", + &kmap); + if (sym == NULL) + return; + + skel->rodata->node_data_addr = map__unmap_ip(kmap, sym->start); + map__put(kmap); + + /* get the number of online nodes using the last node number + 1 */ + ret = sysfs__read_str("devices/system/node/online", &buf, &len); + if (ret < 0) { + pr_debug("failed to read online node: %d\n", ret); + return; + } + + p = buf; + while (p && *p) { + last = strtol(p, &p, 0); + + if (p && (*p == ',' || *p == '-' || *p == '\n')) + p++; + } + skel->rodata->nr_nodes = last + 1; + free(buf); +} + int lock_contention_prepare(struct lock_contention *con) { int i, fd; @@ -131,10 +199,20 @@ int lock_contention_prepare(struct lock_contention *con) else bpf_map__set_max_entries(skel->maps.task_data, 1); - if (con->save_callstack) + if (con->save_callstack) { bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries); - else + if (con->owner) { + bpf_map__set_value_size(skel->maps.stack_buf, con->max_stack * sizeof(u64)); + bpf_map__set_key_size(skel->maps.owner_stacks, + con->max_stack * sizeof(u64)); + bpf_map__set_max_entries(skel->maps.owner_stacks, con->map_nr_entries); + bpf_map__set_max_entries(skel->maps.owner_data, con->map_nr_entries); + bpf_map__set_max_entries(skel->maps.owner_stat, con->map_nr_entries); + skel->rodata->max_stack = con->max_stack; + } + } else { bpf_map__set_max_entries(skel->maps.stacks, 1); + } if (target__has_cpu(target)) { skel->rodata->has_cpu = 1; @@ -183,6 +261,27 @@ int lock_contention_prepare(struct lock_contention *con) skel->rodata->has_addr = 1; } + /* resolve lock name in delays */ + if (con->nr_delays) { + struct symbol *sym; + struct map *kmap; + + for (i = 0; i < con->nr_delays; i++) { + sym = machine__find_kernel_symbol_by_name(con->machine, + con->delays[i].sym, + &kmap); + if (sym == NULL) { + pr_warning("ignore unknown symbol: %s\n", + con->delays[i].sym); + continue; + } + + con->delays[i].addr = map__unmap_ip(kmap, sym->start); + } + skel->rodata->lock_delay = 1; + bpf_map__set_max_entries(skel->maps.lock_delays, con->nr_delays); + } + bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); bpf_map__set_max_entries(skel->maps.task_filter, ntasks); bpf_map__set_max_entries(skel->maps.type_filter, ntypes); @@ -208,6 +307,8 @@ int lock_contention_prepare(struct lock_contention *con) bpf_map__set_max_entries(skel->maps.slab_filter, nslabs); + init_numa_data(con); + if (lock_contention_bpf__load(skel) < 0) { pr_err("Failed to load lock-contention BPF skeleton\n"); return -1; @@ -272,6 +373,13 @@ int lock_contention_prepare(struct lock_contention *con) bpf_map_update_elem(fd, &con->filters->cgrps[i], &val, BPF_ANY); } + if (con->nr_delays) { + fd = bpf_map__fd(skel->maps.lock_delays); + + for (i = 0; i < con->nr_delays; i++) + bpf_map_update_elem(fd, &con->delays[i].addr, &con->delays[i].time, BPF_ANY); + } + if (con->aggr_mode == LOCK_AGGR_CGROUP) read_all_cgroups(&con->cgroups); @@ -450,7 +558,6 @@ static const char *lock_contention_get_name(struct lock_contention *con, { int idx = 0; u64 addr; - const char *name = ""; static char name_buf[KSYM_NAME_LEN]; struct symbol *sym; struct map *kmap; @@ -465,13 +572,14 @@ static const char *lock_contention_get_name(struct lock_contention *con, if (pid) { struct thread *t = machine__findnew_thread(machine, /*pid=*/-1, pid); - if (t == NULL) - return name; - if (!bpf_map_lookup_elem(task_fd, &pid, &task) && - thread__set_comm(t, task.comm, /*timestamp=*/0)) - name = task.comm; + if (t != NULL && + !bpf_map_lookup_elem(task_fd, &pid, &task) && + thread__set_comm(t, task.comm, /*timestamp=*/0)) { + snprintf(name_buf, sizeof(name_buf), "%s", task.comm); + return name_buf; + } } - return name; + return ""; } if (con->aggr_mode == LOCK_AGGR_ADDR) { @@ -495,6 +603,11 @@ static const char *lock_contention_get_name(struct lock_contention *con, return "rq_lock"; } + if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr_or_cgroup, &flags)) { + if (flags == LOCK_CLASS_ZONE_LOCK) + return "zone_lock"; + } + /* look slab_hash for dynamic locks in a slab object */ if (hashmap__find(&slab_hash, flags & LCB_F_SLAB_ID_MASK, &slab_data)) { snprintf(name_buf, sizeof(name_buf), "&%s", slab_data->name); @@ -539,6 +652,63 @@ static const char *lock_contention_get_name(struct lock_contention *con, return name_buf; } +struct lock_stat *pop_owner_stack_trace(struct lock_contention *con) +{ + int stacks_fd, stat_fd; + u64 *stack_trace = NULL; + s32 stack_id; + struct contention_key ckey = {}; + struct contention_data cdata = {}; + size_t stack_size = con->max_stack * sizeof(*stack_trace); + struct lock_stat *st = NULL; + + stacks_fd = bpf_map__fd(skel->maps.owner_stacks); + stat_fd = bpf_map__fd(skel->maps.owner_stat); + if (!stacks_fd || !stat_fd) + goto out_err; + + stack_trace = zalloc(stack_size); + if (stack_trace == NULL) + goto out_err; + + if (bpf_map_get_next_key(stacks_fd, NULL, stack_trace)) + goto out_err; + + bpf_map_lookup_elem(stacks_fd, stack_trace, &stack_id); + ckey.stack_id = stack_id; + bpf_map_lookup_elem(stat_fd, &ckey, &cdata); + + st = zalloc(sizeof(struct lock_stat)); + if (!st) + goto out_err; + + st->name = strdup(stack_trace[0] ? lock_contention_get_name(con, NULL, stack_trace, 0) : + "unknown"); + if (!st->name) + goto out_err; + + st->flags = cdata.flags; + st->nr_contended = cdata.count; + st->wait_time_total = cdata.total_time; + st->wait_time_max = cdata.max_time; + st->wait_time_min = cdata.min_time; + st->callstack = stack_trace; + + if (cdata.count) + st->avg_wait_time = cdata.total_time / cdata.count; + + bpf_map_delete_elem(stacks_fd, stack_trace); + bpf_map_delete_elem(stat_fd, &ckey); + + return st; + +out_err: + free(stack_trace); + free(st); + + return NULL; +} + int lock_contention_read(struct lock_contention *con) { int fd, stack, err = 0; @@ -676,6 +846,7 @@ int lock_contention_finish(struct lock_contention *con) } exit_slab_cache_iter(); + btf__free(con->btf); return 0; } diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c index 4269b41d1771..c367fefe6ecb 100644 --- a/tools/perf/util/bpf_off_cpu.c +++ b/tools/perf/util/bpf_off_cpu.c @@ -13,6 +13,8 @@ #include "util/cgroup.h" #include "util/strlist.h" #include <bpf/bpf.h> +#include <internal/xyarray.h> +#include <linux/time64.h> #include "bpf_skel/off_cpu.skel.h" @@ -36,34 +38,25 @@ union off_cpu_data { u64 array[1024 / sizeof(u64)]; }; +u64 off_cpu_raw[MAX_STACKS + 5]; + static int off_cpu_config(struct evlist *evlist) { + char off_cpu_event[64]; struct evsel *evsel; - struct perf_event_attr attr = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_BPF_OUTPUT, - .size = sizeof(attr), /* to capture ABI version */ - }; - char *evname = strdup(OFFCPU_EVENT); - if (evname == NULL) - return -ENOMEM; - - evsel = evsel__new(&attr); - if (!evsel) { - free(evname); - return -ENOMEM; + scnprintf(off_cpu_event, sizeof(off_cpu_event), "bpf-output/name=%s/", OFFCPU_EVENT); + if (parse_event(evlist, off_cpu_event)) { + pr_err("Failed to open off-cpu event\n"); + return -1; } - evsel->core.attr.freq = 1; - evsel->core.attr.sample_period = 1; - /* off-cpu analysis depends on stack trace */ - evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN; - - evlist__add(evlist, evsel); - - free(evsel->name); - evsel->name = evname; + evlist__for_each_entry(evlist, evsel) { + if (evsel__is_offcpu_event(evsel)) { + evsel->core.system_wide = true; + break; + } + } return 0; } @@ -71,6 +64,9 @@ static int off_cpu_config(struct evlist *evlist) static void off_cpu_start(void *arg) { struct evlist *evlist = arg; + struct evsel *evsel; + struct perf_cpu pcpu; + int i; /* update task filter for the given workload */ if (skel->rodata->has_task && skel->rodata->uses_tgid && @@ -84,6 +80,26 @@ static void off_cpu_start(void *arg) bpf_map_update_elem(fd, &pid, &val, BPF_ANY); } + /* update BPF perf_event map */ + evsel = evlist__find_evsel_by_str(evlist, OFFCPU_EVENT); + if (evsel == NULL) { + pr_err("%s evsel not found\n", OFFCPU_EVENT); + return; + } + + perf_cpu_map__for_each_cpu(pcpu, i, evsel->core.cpus) { + int err; + int cpu_nr = pcpu.cpu; + + err = bpf_map__update_elem(skel->maps.offcpu_output, &cpu_nr, sizeof(int), + xyarray__entry(evsel->core.fd, cpu_nr, 0), + sizeof(int), BPF_ANY); + if (err) { + pr_err("Failed to update perf event map for direct off-cpu dumping\n"); + return; + } + } + skel->bss->enabled = 1; } @@ -277,6 +293,8 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, } } + skel->bss->offcpu_thresh_ns = opts->off_cpu_thresh_ns; + err = off_cpu_bpf__attach(skel); if (err) { pr_err("Failed to attach off-cpu BPF skeleton\n"); @@ -300,6 +318,7 @@ int off_cpu_write(struct perf_session *session) { int bytes = 0, size; int fd, stack; + u32 raw_size; u64 sample_type, val, sid = 0; struct evsel *evsel; struct perf_data_file *file = &session->data->file; @@ -339,46 +358,54 @@ int off_cpu_write(struct perf_session *session) while (!bpf_map_get_next_key(fd, &prev, &key)) { int n = 1; /* start from perf_event_header */ - int ip_pos = -1; bpf_map_lookup_elem(fd, &key, &val); + /* zero-fill some of the fields, will be overwritten by raw_data when parsing */ if (sample_type & PERF_SAMPLE_IDENTIFIER) data.array[n++] = sid; - if (sample_type & PERF_SAMPLE_IP) { - ip_pos = n; + if (sample_type & PERF_SAMPLE_IP) data.array[n++] = 0; /* will be updated */ - } if (sample_type & PERF_SAMPLE_TID) - data.array[n++] = (u64)key.pid << 32 | key.tgid; + data.array[n++] = 0; if (sample_type & PERF_SAMPLE_TIME) data.array[n++] = tstamp; - if (sample_type & PERF_SAMPLE_ID) - data.array[n++] = sid; if (sample_type & PERF_SAMPLE_CPU) data.array[n++] = 0; if (sample_type & PERF_SAMPLE_PERIOD) - data.array[n++] = val; - if (sample_type & PERF_SAMPLE_CALLCHAIN) { - int len = 0; - - /* data.array[n] is callchain->nr (updated later) */ - data.array[n + 1] = PERF_CONTEXT_USER; - data.array[n + 2] = 0; - - bpf_map_lookup_elem(stack, &key.stack_id, &data.array[n + 2]); - while (data.array[n + 2 + len]) + data.array[n++] = 0; + if (sample_type & PERF_SAMPLE_RAW) { + /* + * [ size ][ data ] + * [ data ] + * [ data ] + * [ data ] + * [ data ][ empty] + */ + int len = 0, i = 0; + void *raw_data = (void *)data.array + n * sizeof(u64); + + off_cpu_raw[i++] = (u64)key.pid << 32 | key.tgid; + off_cpu_raw[i++] = val; + + /* off_cpu_raw[i] is callchain->nr (updated later) */ + off_cpu_raw[i + 1] = PERF_CONTEXT_USER; + off_cpu_raw[i + 2] = 0; + + bpf_map_lookup_elem(stack, &key.stack_id, &off_cpu_raw[i + 2]); + while (off_cpu_raw[i + 2 + len]) len++; - /* update length of callchain */ - data.array[n] = len + 1; + off_cpu_raw[i] = len + 1; + i += len + 2; + + off_cpu_raw[i++] = key.cgroup_id; - /* update sample ip with the first callchain entry */ - if (ip_pos >= 0) - data.array[ip_pos] = data.array[n + 2]; + raw_size = i * sizeof(u64) + sizeof(u32); /* 4 bytes for alignment */ + memcpy(raw_data, &raw_size, sizeof(raw_size)); + memcpy(raw_data + sizeof(u32), off_cpu_raw, i * sizeof(u64)); - /* calculate sample callchain data array length */ - n += len + 2; + n += i + 1; } if (sample_type & PERF_SAMPLE_CGROUP) data.array[n++] = key.cgroup_id; diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c index fb144811b34f..e731a79a753a 100644 --- a/tools/perf/util/bpf_skel/func_latency.bpf.c +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c @@ -50,6 +50,7 @@ const volatile int use_nsec = 0; const volatile unsigned int bucket_range; const volatile unsigned int min_latency; const volatile unsigned int max_latency; +const volatile unsigned int bucket_num = NUM_BUCKET; SEC("kprobe/func") int BPF_PROG(func_begin) @@ -101,6 +102,7 @@ int BPF_PROG(func_end) start = bpf_map_lookup_elem(&functime, &tid); if (start) { __s64 delta = bpf_ktime_get_ns() - *start; + __u64 val = delta; __u32 key = 0; __u64 *hist; @@ -110,30 +112,27 @@ int BPF_PROG(func_end) return 0; if (bucket_range != 0) { - delta /= cmp_base; + val = delta / cmp_base; if (min_latency > 0) { - if (delta > min_latency) - delta -= min_latency; + if (val > min_latency) + val -= min_latency; else goto do_lookup; } // Less than 1 unit (ms or ns), or, in the future, // than the min latency desired. - if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units ) - // clang 12 doesn't like s64 / u32 division - key = (__u64)delta / bucket_range + 1; - if (key >= NUM_BUCKET || - delta >= max_latency - min_latency) - key = NUM_BUCKET - 1; + if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units ) + key = val / bucket_range + 1; + if (key >= bucket_num) + key = bucket_num - 1; } - delta += min_latency; goto do_lookup; } // calculate index using delta - for (key = 0; key < (NUM_BUCKET - 1); key++) { + for (key = 0; key < (bucket_num - 1); key++) { if (delta < (cmp_base << key)) break; } @@ -143,12 +142,9 @@ do_lookup: if (!hist) return 0; - *hist += 1; + __sync_fetch_and_add(hist, 1); - if (bucket_range == 0) - delta /= cmp_base; - - __sync_fetch_and_add(&total, delta); + __sync_fetch_and_add(&total, delta); // always in nsec __sync_fetch_and_add(&count, 1); if (delta > max) diff --git a/tools/perf/util/bpf_skel/kwork_trace.bpf.c b/tools/perf/util/bpf_skel/kwork_trace.bpf.c index cbd79bc4b330..9ce9c8dddc4b 100644 --- a/tools/perf/util/bpf_skel/kwork_trace.bpf.c +++ b/tools/perf/util/bpf_skel/kwork_trace.bpf.c @@ -80,7 +80,7 @@ static __always_inline int local_strncmp(const char *s1, for (i = 0; i < sz; i++) { ret = (unsigned char)s1[i] - (unsigned char)s2[i]; - if (ret || !s1[i] || !s2[i]) + if (ret || !s1[i]) break; } diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c index 6533ea9b044c..96e7d853b9ed 100644 --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c @@ -11,6 +11,12 @@ /* for collect_lock_syms(). 4096 was rejected by the verifier */ #define MAX_CPUS 1024 +/* for collect_zone_lock(). It should be more than the actual zones. */ +#define MAX_ZONES 10 + +/* for do_lock_delay(). Arbitrarily set to 1 million. */ +#define MAX_LOOP (1U << 20) + /* lock contention flags from include/trace/events/lock.h */ #define LCB_F_SPIN (1U << 0) #define LCB_F_READ (1U << 1) @@ -27,6 +33,38 @@ struct { __uint(max_entries, MAX_ENTRIES); } stacks SEC(".maps"); +/* buffer for owner stacktrace */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u64)); + __uint(max_entries, 1); +} stack_buf SEC(".maps"); + +/* a map for tracing owner stacktrace to owner stack id */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u64)); // owner stacktrace + __uint(value_size, sizeof(__s32)); // owner stack id + __uint(max_entries, 1); +} owner_stacks SEC(".maps"); + +/* a map for tracing lock address to owner data */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u64)); // lock address + __uint(value_size, sizeof(struct owner_tracing_data)); + __uint(max_entries, 1); +} owner_data SEC(".maps"); + +/* a map for contention_key (stores owner stack id) to contention data */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(struct contention_key)); + __uint(value_size, sizeof(struct contention_data)); + __uint(max_entries, 1); +} owner_stat SEC(".maps"); + /* maintain timestamp at the beginning of contention */ struct { __uint(type, BPF_MAP_TYPE_HASH); @@ -114,6 +152,13 @@ struct { __uint(max_entries, 1); } slab_caches SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u64)); + __uint(value_size, sizeof(__u64)); + __uint(max_entries, 1); +} lock_delays SEC(".maps"); + struct rw_semaphore___old { struct task_struct *owner; } __attribute__((preserve_access_index)); @@ -143,6 +188,8 @@ const volatile int needs_callstack; const volatile int stack_skip; const volatile int lock_owner; const volatile int use_cgroup_v2; +const volatile int max_stack; +const volatile int lock_delay; /* determine the key of lock stat */ const volatile int aggr_mode; @@ -164,6 +211,9 @@ int data_fail; int task_map_full; int data_map_full; +struct task_struct *bpf_task_from_pid(s32 pid) __ksym __weak; +void bpf_task_release(struct task_struct *p) __ksym __weak; + static inline __u64 get_current_cgroup_id(void) { struct task_struct *task; @@ -348,6 +398,35 @@ static inline __u32 check_lock_type(__u64 lock, __u32 flags) return 0; } +static inline long delay_callback(__u64 idx, void *arg) +{ + __u64 target = *(__u64 *)arg; + + if (target <= bpf_ktime_get_ns()) + return 1; + + /* just to kill time */ + (void)bpf_get_prandom_u32(); + + return 0; +} + +static inline void do_lock_delay(__u64 duration) +{ + __u64 target = bpf_ktime_get_ns() + duration; + + bpf_loop(MAX_LOOP, delay_callback, &target, /*flags=*/0); +} + +static inline void check_lock_delay(__u64 lock) +{ + __u64 *delay; + + delay = bpf_map_lookup_elem(&lock_delays, &lock); + if (delay) + do_lock_delay(*delay); +} + static inline struct tstamp_data *get_tstamp_elem(__u32 flags) { __u32 pid; @@ -387,6 +466,61 @@ static inline struct tstamp_data *get_tstamp_elem(__u32 flags) return pelem; } +static inline s32 get_owner_stack_id(u64 *stacktrace) +{ + s32 *id, new_id; + static s64 id_gen = 1; + + id = bpf_map_lookup_elem(&owner_stacks, stacktrace); + if (id) + return *id; + + new_id = (s32)__sync_fetch_and_add(&id_gen, 1); + + bpf_map_update_elem(&owner_stacks, stacktrace, &new_id, BPF_NOEXIST); + + id = bpf_map_lookup_elem(&owner_stacks, stacktrace); + if (id) + return *id; + + return -1; +} + +static inline void update_contention_data(struct contention_data *data, u64 duration, u32 count) +{ + __sync_fetch_and_add(&data->total_time, duration); + __sync_fetch_and_add(&data->count, count); + + /* FIXME: need atomic operations */ + if (data->max_time < duration) + data->max_time = duration; + if (data->min_time > duration) + data->min_time = duration; +} + +static inline void update_owner_stat(u32 id, u64 duration, u32 flags) +{ + struct contention_key key = { + .stack_id = id, + .pid = 0, + .lock_addr_or_cgroup = 0, + }; + struct contention_data *data = bpf_map_lookup_elem(&owner_stat, &key); + + if (!data) { + struct contention_data first = { + .total_time = duration, + .max_time = duration, + .min_time = duration, + .count = 1, + .flags = flags, + }; + bpf_map_update_elem(&owner_stat, &key, &first, BPF_NOEXIST); + } else { + update_contention_data(data, duration, 1); + } +} + SEC("tp_btf/contention_begin") int contention_begin(u64 *ctx) { @@ -404,6 +538,72 @@ int contention_begin(u64 *ctx) pelem->flags = (__u32)ctx[1]; if (needs_callstack) { + u32 i = 0; + u32 id = 0; + int owner_pid; + u64 *buf; + struct task_struct *task; + struct owner_tracing_data *otdata; + + if (!lock_owner) + goto skip_owner; + + task = get_lock_owner(pelem->lock, pelem->flags); + if (!task) + goto skip_owner; + + owner_pid = BPF_CORE_READ(task, pid); + + buf = bpf_map_lookup_elem(&stack_buf, &i); + if (!buf) + goto skip_owner; + for (i = 0; i < max_stack; i++) + buf[i] = 0x0; + + if (!bpf_task_from_pid) + goto skip_owner; + + task = bpf_task_from_pid(owner_pid); + if (!task) + goto skip_owner; + + bpf_get_task_stack(task, buf, max_stack * sizeof(unsigned long), 0); + bpf_task_release(task); + + otdata = bpf_map_lookup_elem(&owner_data, &pelem->lock); + id = get_owner_stack_id(buf); + + /* + * Contention just happens, or corner case `lock` is owned by process not + * `owner_pid`. For the corner case we treat it as unexpected internal error and + * just ignore the precvious tracing record. + */ + if (!otdata || otdata->pid != owner_pid) { + struct owner_tracing_data first = { + .pid = owner_pid, + .timestamp = pelem->timestamp, + .count = 1, + .stack_id = id, + }; + bpf_map_update_elem(&owner_data, &pelem->lock, &first, BPF_ANY); + } + /* Contention is ongoing and new waiter joins */ + else { + __sync_fetch_and_add(&otdata->count, 1); + + /* + * The owner is the same, but stacktrace might be changed. In this case we + * store/update `owner_stat` based on current owner stack id. + */ + if (id != otdata->stack_id) { + update_owner_stat(id, pelem->timestamp - otdata->timestamp, + pelem->flags); + + otdata->timestamp = pelem->timestamp; + otdata->stack_id = id; + } + } +skip_owner: pelem->stack_id = bpf_get_stackid(ctx, &stacks, BPF_F_FAST_STACK_CMP | stack_skip); if (pelem->stack_id < 0) @@ -440,6 +640,7 @@ int contention_end(u64 *ctx) struct tstamp_data *pelem; struct contention_key key = {}; struct contention_data *data; + __u64 timestamp; __u64 duration; bool need_delete = false; @@ -467,12 +668,88 @@ int contention_end(u64 *ctx) need_delete = true; } - duration = bpf_ktime_get_ns() - pelem->timestamp; + timestamp = bpf_ktime_get_ns(); + duration = timestamp - pelem->timestamp; if ((__s64)duration < 0) { __sync_fetch_and_add(&time_fail, 1); goto out; } + if (needs_callstack && lock_owner) { + struct owner_tracing_data *otdata = bpf_map_lookup_elem(&owner_data, &pelem->lock); + + if (!otdata) + goto skip_owner; + + /* Update `owner_stat` */ + update_owner_stat(otdata->stack_id, timestamp - otdata->timestamp, pelem->flags); + + /* No contention is occurring, delete `lock` entry in `owner_data` */ + if (otdata->count <= 1) + bpf_map_delete_elem(&owner_data, &pelem->lock); + /* + * Contention is still ongoing, with a new owner (current task). `owner_data` + * should be updated accordingly. + */ + else { + u32 i = 0; + s32 ret = (s32)ctx[1]; + u64 *buf; + + otdata->timestamp = timestamp; + __sync_fetch_and_add(&otdata->count, -1); + + buf = bpf_map_lookup_elem(&stack_buf, &i); + if (!buf) + goto skip_owner; + for (i = 0; i < (u32)max_stack; i++) + buf[i] = 0x0; + + /* + * `ret` has the return code of the lock function. + * If `ret` is negative, the current task terminates lock waiting without + * acquiring it. Owner is not changed, but we still need to update the owner + * stack. + */ + if (ret < 0) { + s32 id = 0; + struct task_struct *task; + + if (!bpf_task_from_pid) + goto skip_owner; + + task = bpf_task_from_pid(otdata->pid); + if (!task) + goto skip_owner; + + bpf_get_task_stack(task, buf, + max_stack * sizeof(unsigned long), 0); + bpf_task_release(task); + + id = get_owner_stack_id(buf); + + /* + * If owner stack is changed, update owner stack id for this lock. + */ + if (id != otdata->stack_id) + otdata->stack_id = id; + } + /* + * Otherwise, update tracing data with the current task, which is the new + * owner. + */ + else { + otdata->pid = pid; + /* + * We don't want to retrieve callstack here, since it is where the + * current task acquires the lock and provides no additional + * information. We simply assign -1 to invalidate it. + */ + otdata->stack_id = -1; + } + } + } +skip_owner: switch (aggr_mode) { case LOCK_AGGR_CALLER: key.stack_id = pelem->stack_id; @@ -556,16 +833,12 @@ int contention_end(u64 *ctx) } found: - __sync_fetch_and_add(&data->total_time, duration); - __sync_fetch_and_add(&data->count, 1); - - /* FIXME: need atomic operations */ - if (data->max_time < duration) - data->max_time = duration; - if (data->min_time > duration) - data->min_time = duration; + update_contention_data(data, duration, 1); out: + if (lock_delay) + check_lock_delay(pelem->lock); + pelem->lock = 0; if (need_delete) bpf_map_delete_elem(&tstamp, &pid); @@ -574,6 +847,11 @@ out: extern struct rq runqueues __ksym; +const volatile __u64 contig_page_data_addr; +const volatile __u64 node_data_addr; +const volatile int nr_nodes; +const volatile int sizeof_zone; + struct rq___old { raw_spinlock_t lock; } __attribute__((preserve_access_index)); @@ -582,6 +860,59 @@ struct rq___new { raw_spinlock_t __lock; } __attribute__((preserve_access_index)); +static void collect_zone_lock(void) +{ + __u64 nr_zones, zone_off; + __u64 lock_addr, lock_off; + __u32 lock_flag = LOCK_CLASS_ZONE_LOCK; + + zone_off = offsetof(struct pglist_data, node_zones); + lock_off = offsetof(struct zone, lock); + + if (contig_page_data_addr) { + struct pglist_data *contig_page_data; + + contig_page_data = (void *)(long)contig_page_data_addr; + nr_zones = BPF_CORE_READ(contig_page_data, nr_zones); + + for (int i = 0; i < MAX_ZONES; i++) { + __u64 zone_addr; + + if (i >= nr_zones) + break; + + zone_addr = contig_page_data_addr + (sizeof_zone * i) + zone_off; + lock_addr = zone_addr + lock_off; + + bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY); + } + } else if (nr_nodes > 0) { + struct pglist_data **node_data = (void *)(long)node_data_addr; + + for (int i = 0; i < nr_nodes; i++) { + struct pglist_data *pgdat = NULL; + int err; + + err = bpf_core_read(&pgdat, sizeof(pgdat), &node_data[i]); + if (err < 0 || pgdat == NULL) + break; + + nr_zones = BPF_CORE_READ(pgdat, nr_zones); + for (int k = 0; k < MAX_ZONES; k++) { + __u64 zone_addr; + + if (k >= nr_zones) + break; + + zone_addr = (__u64)(void *)pgdat + (sizeof_zone * k) + zone_off; + lock_addr = zone_addr + lock_off; + + bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY); + } + } + } +} + SEC("raw_tp/bpf_test_finish") int BPF_PROG(collect_lock_syms) { @@ -603,6 +934,9 @@ int BPF_PROG(collect_lock_syms) lock_flag = LOCK_CLASS_RQLOCK; bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY); } + + collect_zone_lock(); + return 0; } diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h index c15f734d7fc4..28c5e5aced7f 100644 --- a/tools/perf/util/bpf_skel/lock_data.h +++ b/tools/perf/util/bpf_skel/lock_data.h @@ -3,6 +3,13 @@ #ifndef UTIL_BPF_SKEL_LOCK_DATA_H #define UTIL_BPF_SKEL_LOCK_DATA_H +struct owner_tracing_data { + u32 pid; // Who has the lock. + u32 count; // How many waiters for this lock. + u64 timestamp; // The time while the owner acquires lock and contention is going on. + s32 stack_id; // Identifier for `owner_stat`, which stores as value in `owner_stacks` +}; + struct tstamp_data { u64 timestamp; u64 lock; @@ -60,6 +67,7 @@ enum lock_aggr_mode { enum lock_class_sym { LOCK_CLASS_NONE, LOCK_CLASS_RQLOCK, + LOCK_CLASS_ZONE_LOCK, }; struct slab_cache_data { diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c index c152116df72f..72763bb8d1de 100644 --- a/tools/perf/util/bpf_skel/off_cpu.bpf.c +++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c @@ -18,10 +18,19 @@ #define MAX_STACKS 32 #define MAX_ENTRIES 102400 +#define MAX_CPUS 4096 +#define MAX_OFFCPU_LEN 37 + +// We have a 'struct stack' in vmlinux.h when building with GEN_VMLINUX_H=1 +struct __stack { + u64 array[MAX_STACKS]; +}; + struct tstamp_data { __u32 stack_id; __u32 state; __u64 timestamp; + struct __stack stack; }; struct offcpu_key { @@ -39,6 +48,24 @@ struct { __uint(max_entries, MAX_ENTRIES); } stacks SEC(".maps"); +struct offcpu_data { + u64 array[MAX_OFFCPU_LEN]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __uint(max_entries, MAX_CPUS); +} offcpu_output SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct offcpu_data)); + __uint(max_entries, 1); +} offcpu_payload SEC(".maps"); + struct { __uint(type, BPF_MAP_TYPE_TASK_STORAGE); __uint(map_flags, BPF_F_NO_PREALLOC); @@ -97,6 +124,8 @@ const volatile bool uses_cgroup_v1 = false; int perf_subsys_id = -1; +__u64 offcpu_thresh_ns; + /* * Old kernel used to call it task_struct->state and now it's '__state'. * Use BPF CO-RE "ignored suffix rule" to deal with it like below: @@ -183,6 +212,47 @@ static inline int can_record(struct task_struct *t, int state) return 1; } +static inline int copy_stack(struct __stack *from, struct offcpu_data *to, int n) +{ + int len = 0; + + for (int i = 0; i < MAX_STACKS && from->array[i]; ++i, ++len) + to->array[n + 2 + i] = from->array[i]; + + return len; +} + +/** + * off_cpu_dump - dump off-cpu samples to ring buffer + * @data: payload for dumping off-cpu samples + * @key: off-cpu data + * @stack: stack trace of the task before being scheduled out + * + * If the threshold of off-cpu time is reached, acquire tid, period, callchain, and cgroup id + * information of the task, and dump it as a raw sample to perf ring buffer + */ +static int off_cpu_dump(void *ctx, struct offcpu_data *data, struct offcpu_key *key, + struct __stack *stack, __u64 delta) +{ + int n = 0, len = 0; + + data->array[n++] = (u64)key->tgid << 32 | key->pid; + data->array[n++] = delta; + + /* data->array[n] is callchain->nr (updated later) */ + data->array[n + 1] = PERF_CONTEXT_USER; + data->array[n + 2] = 0; + len = copy_stack(stack, data, n); + + /* update length of callchain */ + data->array[n] = len + 1; + n += len + 2; + + data->array[n++] = key->cgroup_id; + + return bpf_perf_event_output(ctx, &offcpu_output, BPF_F_CURRENT_CPU, data, n * sizeof(u64)); +} + static int off_cpu_stat(u64 *ctx, struct task_struct *prev, struct task_struct *next, int state) { @@ -207,6 +277,16 @@ static int off_cpu_stat(u64 *ctx, struct task_struct *prev, pelem->state = state; pelem->stack_id = stack_id; + /* + * If stacks are successfully collected by bpf_get_stackid(), collect them once more + * in task_storage for direct off-cpu sample dumping + */ + if (stack_id > 0 && bpf_get_stack(ctx, &pelem->stack, MAX_STACKS * sizeof(u64), BPF_F_USER_STACK)) { + /* + * This empty if block is used to avoid 'result unused warning' from bpf_get_stack(). + * If the collection fails, continue with the logic for the next task. + */ + } next: pelem = bpf_task_storage_get(&tstamp, next, NULL, 0); @@ -221,11 +301,19 @@ next: __u64 delta = ts - pelem->timestamp; __u64 *total; - total = bpf_map_lookup_elem(&off_cpu, &key); - if (total) - *total += delta; - else - bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY); + if (delta >= offcpu_thresh_ns) { + int zero = 0; + struct offcpu_data *data = bpf_map_lookup_elem(&offcpu_payload, &zero); + + if (data) + off_cpu_dump(ctx, data, &key, &pelem->stack, delta); + } else { + total = bpf_map_lookup_elem(&off_cpu, &key); + if (total) + *total += delta; + else + bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY); + } /* prevent to reuse the timestamp later */ pelem->timestamp = 0; diff --git a/tools/perf/util/bpf_skel/syscall_summary.bpf.c b/tools/perf/util/bpf_skel/syscall_summary.bpf.c new file mode 100644 index 000000000000..1bcd066a5199 --- /dev/null +++ b/tools/perf/util/bpf_skel/syscall_summary.bpf.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Trace raw_syscalls tracepoints to collect system call statistics. + */ + +#include "vmlinux.h" +#include "syscall_summary.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +/* This is to calculate a delta between sys-enter and sys-exit for each thread */ +struct syscall_trace { + int nr; /* syscall number is only available at sys-enter */ + int unused; + u64 timestamp; +}; + +#define MAX_ENTRIES (128 * 1024) + +struct syscall_trace_map { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); /* tid */ + __type(value, struct syscall_trace); + __uint(max_entries, MAX_ENTRIES); +} syscall_trace_map SEC(".maps"); + +struct syscall_stats_map { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct syscall_key); + __type(value, struct syscall_stats); + __uint(max_entries, MAX_ENTRIES); +} syscall_stats_map SEC(".maps"); + +int enabled; /* controlled from userspace */ + +const volatile enum syscall_aggr_mode aggr_mode; +const volatile int use_cgroup_v2; + +int perf_subsys_id = -1; + +static inline __u64 get_current_cgroup_id(void) +{ + struct task_struct *task; + struct cgroup *cgrp; + + if (use_cgroup_v2) + return bpf_get_current_cgroup_id(); + + task = bpf_get_current_task_btf(); + + if (perf_subsys_id == -1) { +#if __has_builtin(__builtin_preserve_enum_value) + perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id, + perf_event_cgrp_id); +#else + perf_subsys_id = perf_event_cgrp_id; +#endif + } + + cgrp = BPF_CORE_READ(task, cgroups, subsys[perf_subsys_id], cgroup); + return BPF_CORE_READ(cgrp, kn, id); +} + +static void update_stats(int cpu_or_tid, u64 cgroup_id, int nr, s64 duration, + long ret) +{ + struct syscall_key key = { + .cpu_or_tid = cpu_or_tid, + .cgroup = cgroup_id, + .nr = nr, + }; + struct syscall_stats *stats; + + stats = bpf_map_lookup_elem(&syscall_stats_map, &key); + if (stats == NULL) { + struct syscall_stats zero = {}; + + bpf_map_update_elem(&syscall_stats_map, &key, &zero, BPF_NOEXIST); + stats = bpf_map_lookup_elem(&syscall_stats_map, &key); + if (stats == NULL) + return; + } + + __sync_fetch_and_add(&stats->count, 1); + if (ret < 0) + __sync_fetch_and_add(&stats->error, 1); + + if (duration > 0) { + __sync_fetch_and_add(&stats->total_time, duration); + __sync_fetch_and_add(&stats->squared_sum, duration * duration); + if (stats->max_time < duration) + stats->max_time = duration; + if (stats->min_time > duration || stats->min_time == 0) + stats->min_time = duration; + } + + return; +} + +SEC("tp_btf/sys_enter") +int sys_enter(u64 *ctx) +{ + int tid; + struct syscall_trace st; + + if (!enabled) + return 0; + + st.nr = ctx[1]; /* syscall number */ + st.unused = 0; + st.timestamp = bpf_ktime_get_ns(); + + tid = bpf_get_current_pid_tgid(); + bpf_map_update_elem(&syscall_trace_map, &tid, &st, BPF_ANY); + + return 0; +} + +SEC("tp_btf/sys_exit") +int sys_exit(u64 *ctx) +{ + int tid; + int key = 0; + u64 cgroup = 0; + long ret = ctx[1]; /* return value of the syscall */ + struct syscall_trace *st; + s64 delta; + + if (!enabled) + return 0; + + tid = bpf_get_current_pid_tgid(); + st = bpf_map_lookup_elem(&syscall_trace_map, &tid); + if (st == NULL) + return 0; + + if (aggr_mode == SYSCALL_AGGR_THREAD) + key = tid; + else if (aggr_mode == SYSCALL_AGGR_CGROUP) + cgroup = get_current_cgroup_id(); + else + key = bpf_get_smp_processor_id(); + + delta = bpf_ktime_get_ns() - st->timestamp; + update_stats(key, cgroup, st->nr, delta, ret); + + bpf_map_delete_elem(&syscall_trace_map, &tid); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/perf/util/bpf_skel/syscall_summary.h b/tools/perf/util/bpf_skel/syscall_summary.h new file mode 100644 index 000000000000..72ccccb45925 --- /dev/null +++ b/tools/perf/util/bpf_skel/syscall_summary.h @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Data structures shared between BPF and tools. */ +#ifndef UTIL_BPF_SKEL_SYSCALL_SUMMARY_H +#define UTIL_BPF_SKEL_SYSCALL_SUMMARY_H + +enum syscall_aggr_mode { + SYSCALL_AGGR_THREAD, + SYSCALL_AGGR_CPU, + SYSCALL_AGGR_CGROUP, +}; + +struct syscall_key { + u64 cgroup; + int cpu_or_tid; + int nr; +}; + +struct syscall_stats { + u64 total_time; + u64 squared_sum; + u64 max_time; + u64 min_time; + u32 count; + u32 error; +}; + +#endif /* UTIL_BPF_SKEL_SYSCALL_SUMMARY_H */ diff --git a/tools/perf/util/bpf_skel/vmlinux/vmlinux.h b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h index 7b81d3173917..a59ce912be18 100644 --- a/tools/perf/util/bpf_skel/vmlinux/vmlinux.h +++ b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h @@ -203,4 +203,13 @@ struct bpf_iter__kmem_cache { struct kmem_cache *s; } __attribute__((preserve_access_index)); +struct zone { + spinlock_t lock; +} __attribute__((preserve_access_index)); + +struct pglist_data { + struct zone node_zones[6]; /* value for all possible config */ + int nr_zones; +} __attribute__((preserve_access_index)); + #endif // __VMLINUX_H diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index b80c12c74bbb..7429530fa774 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -25,7 +25,8 @@ struct branch_flags { u64 spec:2; u64 new_type:4; u64 priv:3; - u64 reserved:31; + u64 not_taken:1; + u64 reserved:30; }; }; }; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 0c7564747a14..d7b7eef740b9 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -589,9 +589,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) return -ENOMEM; } call->ip = cursor_node->ip; - call->ms = cursor_node->ms; - call->ms.map = map__get(call->ms.map); - call->ms.maps = maps__get(call->ms.maps); + map_symbol__copy(&call->ms, &cursor_node->ms); call->srcline = cursor_node->srcline; if (cursor_node->branch) { @@ -1094,9 +1092,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor, node->ip = ip; map_symbol__exit(&node->ms); - node->ms = *ms; - node->ms.maps = maps__get(ms->maps); - node->ms.map = map__get(ms->map); + map_symbol__copy(&node->ms, ms); node->branch = branch; node->nr_loop_iter = nr_loop_iter; node->iter_cycles = iter_cycles; @@ -1564,7 +1560,7 @@ int callchain_node__make_parent_list(struct callchain_node *node) goto out; *new = *chain; new->has_children = false; - new->ms.map = map__get(new->ms.map); + map_symbol__copy(&new->ms, &chain->ms); list_add_tail(&new->list, &head); } parent = parent->parent; diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 9a7248dbe2d7..0319546decca 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -30,11 +30,6 @@ extern int perf_use_color_default; -/* - * Use this instead of perf_default_config if you need the value of color.ui. - */ -int perf_color_default_config(const char *var, const char *value, void *cb); - int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty); int color_vsnprintf(char *bf, size_t size, const char *color, const char *fmt, va_list args); diff --git a/tools/perf/util/color_config.c b/tools/perf/util/color_config.c index dc09ba7cb31e..301031ddc025 100644 --- a/tools/perf/util/color_config.c +++ b/tools/perf/util/color_config.c @@ -35,14 +35,3 @@ int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) } return 0; } - -int perf_color_default_config(const char *var, const char *value, - void *cb __maybe_unused) -{ - if (!strcmp(var, "color.ui")) { - perf_use_color_default = perf_config_colorbool(var, value, -1); - return 0; - } - - return 0; -} diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 49b79cf0c5cc..8aa456d7c2cd 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -5,6 +5,8 @@ #include <internal/rc_check.h> #include <linux/refcount.h> #include <linux/zalloc.h> +#include <tools/libc_compat.h> // reallocarray + #include "rwsem.h" DECLARE_RC_STRUCT(comm_str) { diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h index b29109cd3609..6cfecfca16f2 100644 --- a/tools/perf/util/compress.h +++ b/tools/perf/util/compress.h @@ -4,7 +4,9 @@ #include <stdbool.h> #include <stddef.h> +#include <stdio.h> #include <sys/types.h> +#include <linux/compiler.h> #ifdef HAVE_ZSTD_SUPPORT #include <zstd.h> #endif @@ -15,8 +17,26 @@ bool gzip_is_compressed(const char *input); #endif #ifdef HAVE_LZMA_SUPPORT +int lzma_decompress_stream_to_file(FILE *input, int output_fd); int lzma_decompress_to_file(const char *input, int output_fd); bool lzma_is_compressed(const char *input); +#else +static inline +int lzma_decompress_stream_to_file(FILE *input __maybe_unused, + int output_fd __maybe_unused) +{ + return -1; +} +static inline +int lzma_decompress_to_file(const char *input __maybe_unused, + int output_fd __maybe_unused) +{ + return -1; +} +static inline int lzma_is_compressed(const char *input __maybe_unused) +{ + return false; +} #endif struct zstd_data { diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 2d07c9257a1a..ae72b66b6ded 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -856,12 +856,6 @@ void perf_config__exit(void) config_set = NULL; } -void perf_config__refresh(void) -{ - perf_config__exit(); - perf_config__init(); -} - static void perf_config_item__delete(struct perf_config_item *item) { zfree(&item->name); diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index a727c95cb119..987b47cf54c3 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -49,7 +49,6 @@ void perf_config_set__delete(struct perf_config_set *set); int perf_config_set__collect(struct perf_config_set *set, const char *file_name, const char *var, const char *value); void perf_config__exit(void); -void perf_config__refresh(void); int perf_config__set_variable(const char *var, const char *value); /** diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 5c329ad614e9..89570397a4b3 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -67,19 +67,23 @@ static struct perf_cpu_map *cpu_map__from_entries(const struct perf_record_cpu_m struct perf_cpu_map *map; map = perf_cpu_map__empty_new(data->cpus_data.nr); - if (map) { - unsigned i; - - for (i = 0; i < data->cpus_data.nr; i++) { - /* - * Special treatment for -1, which is not real cpu number, - * and we need to use (int) -1 to initialize map[i], - * otherwise it would become 65535. - */ - if (data->cpus_data.cpu[i] == (u16) -1) - RC_CHK_ACCESS(map)->map[i].cpu = -1; - else - RC_CHK_ACCESS(map)->map[i].cpu = (int) data->cpus_data.cpu[i]; + if (!map) + return NULL; + + for (unsigned int i = 0; i < data->cpus_data.nr; i++) { + /* + * Special treatment for -1, which is not real cpu number, + * and we need to use (int) -1 to initialize map[i], + * otherwise it would become 65535. + */ + if (data->cpus_data.cpu[i] == (u16) -1) { + RC_CHK_ACCESS(map)->map[i].cpu = -1; + } else if (data->cpus_data.cpu[i] < INT16_MAX) { + RC_CHK_ACCESS(map)->map[i].cpu = (int16_t) data->cpus_data.cpu[i]; + } else { + pr_err("Invalid cpumap entry %u\n", data->cpus_data.cpu[i]); + perf_cpu_map__put(map); + return NULL; } } @@ -106,8 +110,15 @@ static struct perf_cpu_map *cpu_map__from_mask(const struct perf_record_cpu_map_ int cpu; perf_record_cpu_map_data__read_one_mask(data, i, local_copy); - for_each_set_bit(cpu, local_copy, 64) - RC_CHK_ACCESS(map)->map[j++].cpu = cpu + cpus_per_i; + for_each_set_bit(cpu, local_copy, 64) { + if (cpu + cpus_per_i < INT16_MAX) { + RC_CHK_ACCESS(map)->map[j++].cpu = cpu + cpus_per_i; + } else { + pr_err("Invalid cpumap entry %d\n", cpu + cpus_per_i); + perf_cpu_map__put(map); + return NULL; + } + } } return map; @@ -127,8 +138,15 @@ static struct perf_cpu_map *cpu_map__from_range(const struct perf_record_cpu_map RC_CHK_ACCESS(map)->map[i++].cpu = -1; for (int cpu = data->range_cpu_data.start_cpu; cpu <= data->range_cpu_data.end_cpu; - i++, cpu++) - RC_CHK_ACCESS(map)->map[i].cpu = cpu; + i++, cpu++) { + if (cpu < INT16_MAX) { + RC_CHK_ACCESS(map)->map[i].cpu = cpu; + } else { + pr_err("Invalid cpumap entry %d\n", cpu); + perf_cpu_map__put(map); + return NULL; + } + } return map; } @@ -427,7 +445,7 @@ static void set_max_cpu_num(void) { const char *mnt; char path[PATH_MAX]; - int ret = -1; + int max, ret = -1; /* set up default */ max_cpu_num.cpu = 4096; @@ -444,10 +462,12 @@ static void set_max_cpu_num(void) goto out; } - ret = get_max_num(path, &max_cpu_num.cpu); + ret = get_max_num(path, &max); if (ret) goto out; + max_cpu_num.cpu = max; + /* get the highest present cpu number for a sparse allocation */ ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt); if (ret >= PATH_MAX) { @@ -455,8 +475,14 @@ static void set_max_cpu_num(void) goto out; } - ret = get_max_num(path, &max_present_cpu_num.cpu); + ret = get_max_num(path, &max); + if (!ret && max > INT16_MAX) { + pr_err("Read out of bounds max cpus of %d\n", max); + ret = -1; + } + if (!ret) + max_present_cpu_num.cpu = (int16_t)max; out: if (ret) pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu); @@ -606,7 +632,7 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size) #define COMMA first ? "" : "," for (i = 0; i < perf_cpu_map__nr(map) + 1; i++) { - struct perf_cpu cpu = { .cpu = INT_MAX }; + struct perf_cpu cpu = { .cpu = INT16_MAX }; bool last = i == perf_cpu_map__nr(map); if (!last) @@ -696,7 +722,7 @@ struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ if (!online) online = perf_cpu_map__new_online_cpus(); /* from /sys/devices/system/cpu/online */ - return online; + return perf_cpu_map__get(online); } bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 0bf9e5c27b59..30f4bb3e7fa3 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -506,20 +506,27 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session, evsel = evlist__event2evsel(session->evlist, event); if (!evsel) return -EINVAL; + perf_sample__init(&sample, /*all=*/false); err = evsel__parse_sample(evsel, event, &sample); if (err) - return err; + goto out; cpu = sample.cpu; if (cpu == -1) { /* no CPU in the sample - possibly recorded with an old version of perf */ pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record."); - return -EINVAL; + err = -EINVAL; + goto out; } - if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0) - return cs_etm__process_trace_id_v0(etm, cpu, hw_id); + if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0) { + err = cs_etm__process_trace_id_v0(etm, cpu, hw_id); + goto out; + } - return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id); + err = cs_etm__process_trace_id_v0_1(etm, cpu, hw_id); +out: + perf_sample__exit(&sample); + return err; } void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, @@ -1560,8 +1567,9 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, int ret = 0; struct cs_etm_auxtrace *etm = etmq->etm; union perf_event *event = tidq->event_buf; - struct perf_sample sample = {.ip = 0,}; + struct perf_sample sample; + perf_sample__init(&sample, /*all=*/true); event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el); event->sample.header.size = sizeof(struct perf_event_header); @@ -1598,6 +1606,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, "CS ETM Trace: failed to deliver instruction event, error %d\n", ret); + perf_sample__exit(&sample); return ret; } @@ -3151,9 +3160,10 @@ static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf evsel = evlist__event2evsel(session->evlist, event); if (!evsel) return -EINVAL; + perf_sample__init(&sample, /*all=*/false); ret = evsel__parse_sample(evsel, event, &sample); if (ret) - return ret; + goto out; /* * Loop through the auxtrace index to find the buffer that matches up with this aux event. @@ -3168,7 +3178,7 @@ static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf * 1 ('not found') */ if (ret != 1) - return ret; + goto out; } } @@ -3178,7 +3188,10 @@ static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf */ pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64 " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu); - return 0; + ret = 0; +out: + perf_sample__exit(&sample); + return ret; } static int cs_etm__queue_aux_records(struct perf_session *session) diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 98661ede2a73..164eb45a0b36 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -158,26 +158,6 @@ out_err: return ret; } -int perf_data__update_dir(struct perf_data *data) -{ - int i; - - if (WARN_ON(!data->is_dir)) - return -EINVAL; - - for (i = 0; i < data->dir.nr; i++) { - struct perf_data_file *file = &data->dir.files[i]; - struct stat st; - - if (fstat(file->fd, &st)) - return -1; - - file->size = st.st_size; - } - - return 0; -} - static bool check_pipe(struct perf_data *data) { struct stat st; diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 110f3ebde30f..1438e32e0451 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -97,7 +97,6 @@ int perf_data__switch(struct perf_data *data, int perf_data__create_dir(struct perf_data *data, int nr); int perf_data__open_dir(struct perf_data *data); void perf_data__close_dir(struct perf_data *data); -int perf_data__update_dir(struct perf_data *data); unsigned long perf_data__size(struct perf_data *data); int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz); bool has_kcore_dir(const char *path); diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 995f6bb05b5f..f9ef7d045c92 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -46,8 +46,8 @@ int debug_type_profile; FILE *debug_file(void) { if (!_debug_file) { - pr_warning_once("debug_file not set"); debug_set_file(stderr); + pr_warning_once("debug_file not set"); } return _debug_file; } diff --git a/tools/perf/util/debuginfo.c b/tools/perf/util/debuginfo.c index 19acf4775d35..b5deea7cbdf2 100644 --- a/tools/perf/util/debuginfo.c +++ b/tools/perf/util/debuginfo.c @@ -125,8 +125,12 @@ struct debuginfo *debuginfo__new(const char *path) dso__put(dso); out: + if (dinfo) + return dinfo; + /* if failed to open all distro debuginfo, open given binary */ - return dinfo ? : __debuginfo__new(path); + symbol__join_symfs(buf, path); + return __debuginfo__new(buf); } void debuginfo__delete(struct debuginfo *dbg) diff --git a/tools/perf/util/demangle-cxx.h b/tools/perf/util/demangle-cxx.h index 26b5b66c0b4e..9359937a881a 100644 --- a/tools/perf/util/demangle-cxx.h +++ b/tools/perf/util/demangle-cxx.h @@ -2,6 +2,8 @@ #ifndef __PERF_DEMANGLE_CXX #define __PERF_DEMANGLE_CXX 1 +#include <stdbool.h> + #ifdef __cplusplus extern "C" { #endif diff --git a/tools/perf/util/demangle-rust-v0.c b/tools/perf/util/demangle-rust-v0.c new file mode 100644 index 000000000000..19924d85407d --- /dev/null +++ b/tools/perf/util/demangle-rust-v0.c @@ -0,0 +1,2042 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// The contents of this file come from the Rust rustc-demangle library, hosted +// in the <https://github.com/rust-lang/rustc-demangle> repository, licensed +// under "Apache-2.0 OR MIT". For copyright details, see +// <https://github.com/rust-lang/rustc-demangle/blob/main/README.md>. +// Please note that the file should be kept as close as possible to upstream. + +// Code for demangling Rust symbols. This code is mostly +// a line-by-line translation of the Rust code in `rustc-demangle`. + +// you can find the latest version of this code in https://github.com/rust-lang/rustc-demangle + +#include <stdint.h> +#include <stddef.h> +#include <string.h> +#include <stdbool.h> +#include <sys/param.h> +#include <stdio.h> + +#include "demangle-rust-v0.h" + +#if defined(__GNUC__) || defined(__clang__) +#define NODISCARD __attribute__((warn_unused_result)) +#else +#define NODISCARD +#endif + +#define MAX_DEPTH 500 + +typedef enum { + DemangleOk, + DemangleInvalid, + DemangleRecursed, + DemangleBug, +} demangle_status; + +struct demangle_v0 { + const char *mangled; + size_t mangled_len; +}; + +struct demangle_legacy { + const char *mangled; + size_t mangled_len; + size_t elements; +}; + +// private version of memrchr to avoid _GNU_SOURCE +static void *demangle_memrchr(const void *s, int c, size_t n) { + const uint8_t *s_ = s; + for (; n != 0; n--) { + if (s_[n-1] == c) { + return (void*)&s_[n-1]; + } + } + return NULL; +} + + +static bool unicode_iscontrol(uint32_t ch) { + // this is *technically* a unicode table, but + // some unicode properties are simpler than you might think + return ch < 0x20 || (ch >= 0x7f && ch < 0xa0); +} + +// "good enough" tables, the only consequence is that when printing +// *constant strings*, some characters are printed as `\u{abcd}` rather than themselves. +// +// I'm leaving these here to allow easily replacing them with actual +// tables if desired. +static bool unicode_isprint(uint32_t ch) { + if (ch < 0x20) { + return false; + } + if (ch < 0x7f) { + return true; + } + return false; +} + +static bool unicode_isgraphemextend(uint32_t ch) { + (void)ch; + return false; +} + +static bool str_isascii(const char *s, size_t s_len) { + for (size_t i = 0; i < s_len; i++) { + if (s[i] & 0x80) { + return false; + } + } + + return true; +} + +typedef enum { + PunycodeOk, + PunycodeError +} punycode_status; + +struct parser { + // the parser assumes that `sym` has a safe "terminating byte". It might be NUL, + // but it might also be something else if a symbol is "truncated". + const char *sym; + size_t sym_len; + size_t next; + uint32_t depth; +}; + +struct printer { + demangle_status status; // if status == 0 parser is valid + struct parser parser; + char *out; // NULL for no output [in which case out_len is not decremented] + size_t out_len; + uint32_t bound_lifetime_depth; + bool alternate; +}; + +static NODISCARD overflow_status printer_print_path(struct printer *printer, bool in_value); +static NODISCARD overflow_status printer_print_type(struct printer *printer); +static NODISCARD overflow_status printer_print_const(struct printer *printer, bool in_value); + +static NODISCARD demangle_status try_parse_path(struct parser *parser) { + struct printer printer = { + DemangleOk, + *parser, + NULL, + SIZE_MAX, + 0, + false + }; + overflow_status ignore = printer_print_path(&printer, false); // can't fail since no output + (void)ignore; + *parser = printer.parser; + return printer.status; +} + +NODISCARD static demangle_status rust_demangle_v0_demangle(const char *s, size_t s_len, struct demangle_v0 *res, const char **rest) { + if (s_len > strlen(s)) { + // s_len only exists to shorten the string, this is not a buffer API + return DemangleInvalid; + } + + const char *inner; + size_t inner_len; + if (s_len >= 2 && !strncmp(s, "_R", strlen("_R"))) { + inner = s+2; + inner_len = s_len - 2; + } else if (s_len >= 1 && !strncmp(s, "R", strlen("R"))) { + // On Windows, dbghelp strips leading underscores, so we accept "R..." + // form too. + inner = s+1; + inner_len = s_len - 1; + } else if (s_len >= 3 && !strncmp(s, "__R", strlen("__R"))) { + // On OSX, symbols are prefixed with an extra _ + inner = s+3; + inner_len = s_len - 3; + } else { + return DemangleInvalid; + } + + // Paths always start with uppercase characters. + if (*inner < 'A' || *inner > 'Z') { + return DemangleInvalid; + } + + if (!str_isascii(inner, inner_len)) { + return DemangleInvalid; + } + + struct parser parser = { inner, inner_len, 0, 0 }; + + demangle_status status = try_parse_path(&parser); + if (status != DemangleOk) return status; + char next = parser.sym[parser.next]; + + // Instantiating crate (paths always start with uppercase characters). + if (parser.next < parser.sym_len && next >= 'A' && next <= 'Z') { + status = try_parse_path(&parser); + if (status != DemangleOk) return status; + } + + res->mangled = inner; + res->mangled_len = inner_len; + if (rest) { + *rest = parser.sym + parser.next; + } + + return DemangleOk; +} + +// This might require `len` to be up to 3 characters bigger than the real output len in case of utf-8 +NODISCARD static overflow_status rust_demangle_v0_display_demangle(struct demangle_v0 res, char *out, size_t len, bool alternate) { + struct printer printer = { + DemangleOk, + { + res.mangled, + res.mangled_len, + 0, + 0 + }, + out, + len, + 0, + alternate + }; + if (printer_print_path(&printer, true) == OverflowOverflow) { + return OverflowOverflow; + } + if (printer.out_len < OVERFLOW_MARGIN) { + return OverflowOverflow; + } + *printer.out = '\0'; + return OverflowOk; +} + +static size_t code_to_utf8(unsigned char *buffer, uint32_t code) +{ + if (code <= 0x7F) { + buffer[0] = code; + return 1; + } + if (code <= 0x7FF) { + buffer[0] = 0xC0 | (code >> 6); /* 110xxxxx */ + buffer[1] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + return 2; + } + if (code <= 0xFFFF) { + buffer[0] = 0xE0 | (code >> 12); /* 1110xxxx */ + buffer[1] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ + buffer[2] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + return 3; + } + if (code <= 0x10FFFF) { + buffer[0] = 0xF0 | (code >> 18); /* 11110xxx */ + buffer[1] = 0x80 | ((code >> 12) & 0x3F); /* 10xxxxxx */ + buffer[2] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ + buffer[3] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + return 4; + } + return 0; +} + + +// return length of char at byte, or SIZE_MAX if invalid. buf should have 4 valid characters +static NODISCARD size_t utf8_next_char(uint8_t *s, uint32_t *ch) { + uint8_t byte = *s; + // UTF8-1 = %x00-7F + // UTF8-2 = %xC2-DF UTF8-tail + // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / + // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) + // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / + // %xF4 %x80-8F 2( UTF8-tail ) + if (byte < 0x80) { + *ch = byte; + return 1; + } else if (byte < 0xc2) { + return SIZE_MAX; + } else if (byte < 0xe0) { + if (s[1] >= 0x80 && s[1] < 0xc0) { + *ch = ((byte&0x1f)<<6) + (s[1] & 0x3f); + return 2; + } + return SIZE_MAX; + } if (byte < 0xf0) { + if (!(s[1] >= 0x80 && s[1] < 0xc0) || !(s[2] >= 0x80 && s[2] < 0xc0)) { + return SIZE_MAX; // basic validation + } + if (byte == 0xe0 && s[1] < 0xa0) { + return SIZE_MAX; // overshort + } + if (byte == 0xed && s[1] >= 0xa0) { + return SIZE_MAX; // surrogate + } + *ch = ((byte&0x0f)<<12) + ((s[1] & 0x3f)<<6) + (s[2] & 0x3f); + return 3; + } else if (byte < 0xf5) { + if (!(s[1] >= 0x80 && s[1] < 0xc0) || !(s[2] >= 0x80 && s[2] < 0xc0) || !(s[3] >= 0x80 && s[3] < 0xc0)) { + return SIZE_MAX; // basic validation + } + if (byte == 0xf0 && s[1] < 0x90) { + return SIZE_MAX; // overshort + } + if (byte == 0xf4 && s[1] >= 0x90) { + return SIZE_MAX; // over max + } + *ch = ((byte&0x07)<<18) + ((s[1] & 0x3f)<<12) + ((s[2] & 0x3f)<<6) + (s[3]&0x3f); + return 4; + } else { + return SIZE_MAX; + } +} + +static NODISCARD bool validate_char(uint32_t n) { + return ((n ^ 0xd800) - 0x800) < 0x110000 - 0x800; +} + +#define SMALL_PUNYCODE_LEN 128 + +static NODISCARD punycode_status punycode_decode(const char *start, size_t ascii_len, const char *punycode_start, size_t punycode_len, uint32_t (*out_)[SMALL_PUNYCODE_LEN], size_t *out_len) { + uint32_t *out = *out_; + + if (punycode_len == 0) { + return PunycodeError; + } + + if (ascii_len > SMALL_PUNYCODE_LEN) { + return PunycodeError; + } + for (size_t i = 0; i < ascii_len; i++) { + out[i] = start[i]; + } + size_t len = ascii_len; + + size_t base = 36, t_min = 1, t_max = 26, skew = 38, damp = 700, bias = 72, i = 0, n = 0x80; + for (;;) { + size_t delta = 0, w = 1, k = 0; + for (;;) { + k += base; + size_t biased = k < bias ? 0 : k - bias; + size_t t = MIN(MAX(biased, t_min), t_max); + size_t d; + if (punycode_len == 0) { + return PunycodeError; + } + char nx = *punycode_start++; + punycode_len--; + if ('a' <= nx && nx <= 'z') { + d = nx - 'a'; + } else if ('0' <= nx && nx <= '9') { + d = 26 + (nx - '0'); + } else { + return PunycodeError; + } + if (w == 0 || d > SIZE_MAX / w || d*w > SIZE_MAX - delta) { + return PunycodeError; + } + delta += d * w; + if (d < t) { + break; + } + if (base < t || w == 0 || (base - t) > SIZE_MAX / w) { + return PunycodeError; + } + w *= (base - t); + } + + len += 1; + if (i > SIZE_MAX - delta) { + return PunycodeError; + } + i += delta; + if (n > SIZE_MAX - i / len) { + return PunycodeError; + } + n += i / len; + i %= len; + + // char validation + if (n > UINT32_MAX || !validate_char((uint32_t)n)) { + return PunycodeError; + } + + // insert new character + if (len > SMALL_PUNYCODE_LEN) { + return PunycodeError; + } + memmove(out + i + 1, out + i, (len - i - 1) * sizeof(uint32_t)); + out[i] = (uint32_t)n; + + // start i index at incremented position + i++; + + // If there are no more deltas, decoding is complete. + if (punycode_len == 0) { + *out_len = len; + return PunycodeOk; + } + + // Perform bias adaptation. + delta /= damp; + damp = 2; + + delta += delta / len; + k = 0; + while (delta > ((base - t_min) * t_max) / 2) { + delta /= base - t_min; + k += base; + } + bias = k + ((base - t_min + 1) * delta) / (delta + skew); + } +} + +struct ident { + const char *ascii_start; + size_t ascii_len; + const char *punycode_start; + size_t punycode_len; +}; + +static NODISCARD overflow_status display_ident(const char *ascii_start, size_t ascii_len, const char *punycode_start, size_t punycode_len, uint8_t *out, size_t *out_len) { + uint32_t outbuf[SMALL_PUNYCODE_LEN]; + + size_t wide_len; + size_t out_buflen = *out_len; + + if (punycode_len == 0) { + if (ascii_len > out_buflen) { + return OverflowOverflow; + } + memcpy(out, ascii_start, ascii_len); + *out_len = ascii_len; + } else if (punycode_decode(ascii_start, ascii_len, punycode_start, punycode_len, &outbuf, &wide_len) == PunycodeOk) { + size_t narrow_len = 0; + for (size_t i = 0; i < wide_len; i++) { + if (out_buflen - narrow_len < 4) { + return OverflowOverflow; + } + unsigned char *pos = &out[narrow_len]; + narrow_len += code_to_utf8(pos, outbuf[i]); + } + *out_len = narrow_len; + } else { + size_t narrow_len = 0; + if (out_buflen < strlen("punycode{")) { + return OverflowOverflow; + } + memcpy(out, "punycode{", strlen("punycode{")); + narrow_len = strlen("punycode{"); + if (ascii_len > 0) { + if (out_buflen - narrow_len < ascii_len || out_buflen - narrow_len - ascii_len < 1) { + return OverflowOverflow; + } + memcpy(out + narrow_len, ascii_start, ascii_len); + narrow_len += ascii_len; + out[narrow_len] = '-'; + narrow_len++; + } + if (out_buflen - narrow_len < punycode_len || out_buflen - narrow_len - punycode_len < 1) { + return OverflowOverflow; + } + memcpy(out + narrow_len, punycode_start, punycode_len); + narrow_len += punycode_len; + out[narrow_len] = '}'; + narrow_len++; + *out_len = narrow_len; + } + + return OverflowOk; +} + +static NODISCARD bool try_parse_uint(const char *buf, size_t len, uint64_t *result) { + size_t cur = 0; + for(;cur < len && buf[cur] == '0';cur++); + uint64_t result_val = 0; + if (len - cur > 16) return false; + for(;cur < len;cur++) { + char c = buf[cur]; + result_val <<= 4; + if ('0' <= c && c <= '9') { + result_val += c - '0'; + } else if ('a' <= c && c <= 'f') { + result_val += 10 + (c - 'a'); + } else { + return false; + } + } + *result = result_val; + return true; +} + +static NODISCARD bool dinibble2int(const char *buf, uint8_t *result) { + uint8_t result_val = 0; + for (int i = 0; i < 2; i++) { + char c = buf[i]; + result_val <<= 4; + if ('0' <= c && c <= '9') { + result_val += c - '0'; + } else if ('a' <= c && c <= 'f') { + result_val += 10 + (c - 'a'); + } else { + return false; + } + } + *result = result_val; + return true; +} + + +typedef enum { + NtsOk = 0, + NtsOverflow = 1, + NtsInvalid = 2 +} nibbles_to_string_status; + +// '\u{10ffff}', +margin +#define ESCAPED_SIZE 12 + +static NODISCARD size_t char_to_string(uint32_t ch, uint8_t quote, bool first, char (*buf)[ESCAPED_SIZE]) { + // encode the character + char *escaped_buf = *buf; + escaped_buf[0] = '\\'; + size_t escaped_len = 2; + switch (ch) { + case '\0': + escaped_buf[1] = '0'; + break; + case '\t': + escaped_buf[1] = 't'; + break; + case '\r': + escaped_buf[1] = 'r'; + break; + case '\n': + escaped_buf[1] = 'n'; + break; + case '\\': + escaped_buf[1] = '\\'; + break; + default: + if (ch == quote) { + escaped_buf[1] = ch; + } else if (!unicode_isprint(ch) || (first && unicode_isgraphemextend(ch))) { + int hexlen = snprintf(escaped_buf, ESCAPED_SIZE, "\\u{%x}", (unsigned int)ch); + if (hexlen < 0) { + return 0; // (snprintf shouldn't fail!) + } + escaped_len = hexlen; + } else { + // printable character + escaped_buf[0] = ch; + escaped_len = 1; + } + break; + } + + return escaped_len; +} + +// convert nibbles to a single/double-quoted string +static NODISCARD nibbles_to_string_status nibbles_to_string(const char *buf, size_t len, uint8_t *out, size_t *out_len) { + uint8_t quote = '"'; + bool first = true; + + if ((len % 2) != 0) { + return NtsInvalid; // odd number of nibbles + } + + size_t cur_out_len = 0; + + // write starting quote + if (out != NULL) { + cur_out_len = *out_len; + if (cur_out_len == 0) { + return NtsOverflow; + } + *out++ = quote; + cur_out_len--; + } + + uint8_t conv_buf[4] = {0}; + size_t conv_buf_len = 0; + while (len > 1 || conv_buf_len > 0) { + while (len > 1 && conv_buf_len < sizeof(conv_buf)) { + if (!dinibble2int(buf, &conv_buf[conv_buf_len])) { + return NtsInvalid; + } + conv_buf_len++; + buf += 2; + len -= 2; + } + + // conv_buf is full here if possible, process 1 UTF-8 character + uint32_t ch = 0; + size_t consumed = utf8_next_char(conv_buf, &ch); + if (consumed > conv_buf_len) { + // either SIZE_MAX (invalid UTF-8) or finished input buffer and + // there are still bytes remaining, in both cases invalid + return NtsInvalid; + } + + // "consume" the character + memmove(conv_buf, conv_buf+consumed, conv_buf_len-consumed); + conv_buf_len -= consumed; + + char escaped_buf[ESCAPED_SIZE]; + size_t escaped_len = char_to_string(ch, '"', first, &escaped_buf); + if (out != NULL) { + if (cur_out_len < escaped_len) { + return NtsOverflow; + } + memcpy(out, escaped_buf, escaped_len); + out += escaped_len; + cur_out_len -= escaped_len; + } + first = false; + } + + // write ending quote + if (out != NULL) { + if (cur_out_len == 0) { + return NtsOverflow; + } + *out++ = quote; + cur_out_len--; + *out_len -= cur_out_len; // subtract remaining space to get used space + } + + return NtsOk; +} + +static const char* basic_type(uint8_t tag) { + switch(tag) { + case 'b': + return "bool"; + case 'c': + return "char"; + case 'e': + return "str"; + case 'u': + return "()"; + case 'a': + return "i8"; + case 's': + return "i16"; + case 'l': + return "i32"; + case 'x': + return "i64"; + case 'n': + return "i128"; + case 'i': + return "isize"; + case 'h': + return "u8"; + case 't': + return "u16"; + case 'm': + return "u32"; + case 'y': + return "u64"; + case 'o': + return "u128"; + case 'j': + return "usize"; + case 'f': + return "f32"; + case 'd': + return "f64"; + case 'z': + return "!"; + case 'p': + return "_"; + case 'v': + return "..."; + default: + return NULL; + } +} + +static NODISCARD demangle_status parser_push_depth(struct parser *parser) { + parser->depth++; + if (parser->depth > MAX_DEPTH) { + return DemangleRecursed; + } else { + return DemangleOk; + } +} + +static demangle_status parser_pop_depth(struct parser *parser) { + parser->depth--; + return DemangleOk; +} + +static uint8_t parser_peek(struct parser const *parser) { + if (parser->next == parser->sym_len) { + return 0; // add a "pseudo nul terminator" to avoid peeking past the end of a symbol + } else { + return parser->sym[parser->next]; + } +} + +static bool parser_eat(struct parser *parser, uint8_t ch) { + if (parser_peek(parser) == ch) { + if (ch != 0) { // safety: make sure we don't skip past the NUL terminator + parser->next++; + } + return true; + } else { + return false; + } +} + +static uint8_t parser_next(struct parser *parser) { + // don't advance after end of input, and return an imaginary NUL terminator + if (parser->next == parser->sym_len) { + return 0; + } else { + return parser->sym[parser->next++]; + } +} + +static NODISCARD demangle_status parser_ch(struct parser *parser, uint8_t *next) { + // don't advance after end of input + if (parser->next == parser->sym_len) { + return DemangleInvalid; + } else { + *next = parser->sym[parser->next++]; + return DemangleOk; + } +} + +struct buf { + const char *start; + size_t len; +}; + +static NODISCARD demangle_status parser_hex_nibbles(struct parser *parser, struct buf *buf) { + size_t start = parser->next; + for (;;) { + uint8_t ch = parser_next(parser); + if (ch == '_') { + break; + } + if (!(('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f'))) { + return DemangleInvalid; + } + } + buf->start = parser->sym + start; + buf->len = parser->next - start - 1; // skip final _ + return DemangleOk; +} + +static NODISCARD demangle_status parser_digit_10(struct parser *parser, uint8_t *out) { + uint8_t ch = parser_peek(parser); + if ('0' <= ch && ch <= '9') { + *out = ch - '0'; + parser->next++; + return DemangleOk; + } else { + return DemangleInvalid; + } +} + +static NODISCARD demangle_status parser_digit_62(struct parser *parser, uint64_t *out) { + uint8_t ch = parser_peek(parser); + if ('0' <= ch && ch <= '9') { + *out = ch - '0'; + parser->next++; + return DemangleOk; + } else if ('a' <= ch && ch <= 'z') { + *out = 10 + (ch - 'a'); + parser->next++; + return DemangleOk; + } else if ('A' <= ch && ch <= 'Z') { + *out = 10 + 26 + (ch - 'A'); + parser->next++; + return DemangleOk; + } else { + return DemangleInvalid; + } +} + +static NODISCARD demangle_status parser_integer_62(struct parser *parser, uint64_t *out) { + if (parser_eat(parser, '_')) { + *out = 0; + return DemangleOk; + } + + uint64_t x = 0; + demangle_status status; + while (!parser_eat(parser, '_')) { + uint64_t d; + if ((status = parser_digit_62(parser, &d)) != DemangleOk) { + return status; + } + if (x > UINT64_MAX / 62) { + return DemangleInvalid; + } + x *= 62; + if (x > UINT64_MAX - d) { + return DemangleInvalid; + } + x += d; + } + if (x == UINT64_MAX) { + return DemangleInvalid; + } + *out = x + 1; + return DemangleOk; +} + +static NODISCARD demangle_status parser_opt_integer_62(struct parser *parser, uint8_t tag, uint64_t *out) { + if (!parser_eat(parser, tag)) { + *out = 0; + return DemangleOk; + } + + demangle_status status; + if ((status = parser_integer_62(parser, out)) != DemangleOk) { + return status; + } + if (*out == UINT64_MAX) { + return DemangleInvalid; + } + *out = *out + 1; + return DemangleOk; +} + +static NODISCARD demangle_status parser_disambiguator(struct parser *parser, uint64_t *out) { + return parser_opt_integer_62(parser, 's', out); +} + +typedef uint8_t parser_namespace_type; + +static NODISCARD demangle_status parser_namespace(struct parser *parser, parser_namespace_type *out) { + uint8_t next = parser_next(parser); + if ('A' <= next && next <= 'Z') { + *out = next; + return DemangleOk; + } else if ('a' <= next && next <= 'z') { + *out = 0; + return DemangleOk; + } else { + return DemangleInvalid; + } +} + +static NODISCARD demangle_status parser_backref(struct parser *parser, struct parser *out) { + size_t start = parser->next; + if (start == 0) { + return DemangleBug; + } + size_t s_start = start - 1; + uint64_t i; + demangle_status status = parser_integer_62(parser, &i); + if (status != DemangleOk) { + return status; + } + if (i >= s_start) { + return DemangleInvalid; + } + struct parser res = { + .sym = parser->sym, + .sym_len = parser->sym_len, + .next = (size_t)i, + .depth = parser->depth + }; + status = parser_push_depth(&res); + if (status != DemangleOk) { + return status; + } + *out = res; + return DemangleOk; +} + +static NODISCARD demangle_status parser_ident(struct parser *parser, struct ident *out) { + bool is_punycode = parser_eat(parser, 'u'); + size_t len; + uint8_t d; + demangle_status status = parser_digit_10(parser, &d); + len = d; + if (status != DemangleOk) { + return status; + } + if (len) { + for (;;) { + status = parser_digit_10(parser, &d); + if (status != DemangleOk) { + break; + } + if (len > SIZE_MAX / 10) { + return DemangleInvalid; + } + len *= 10; + if (len > SIZE_MAX - d) { + return DemangleInvalid; + } + len += d; + } + } + + // Skip past the optional `_` separator. + parser_eat(parser, '_'); + + size_t start = parser->next; + if (parser->sym_len - parser->next < len) { + return DemangleInvalid; + } + parser->next += len; + + const char *ident = &parser->sym[start]; + + if (is_punycode) { + const char *underscore = demangle_memrchr(ident, '_', (size_t)len); + if (underscore == NULL) { + *out = (struct ident){ + .ascii_start="", + .ascii_len=0, + .punycode_start=ident, + .punycode_len=len + }; + } else { + size_t ascii_len = underscore - ident; + // ascii_len <= len - 1 since `_` is in the first len bytes + size_t punycode_len = len - 1 - ascii_len; + *out = (struct ident){ + .ascii_start=ident, + .ascii_len=ascii_len, + .punycode_start=underscore + 1, + .punycode_len=punycode_len + }; + } + if (out->punycode_len == 0) { + return DemangleInvalid; + } + return DemangleOk; + } else { + *out = (struct ident) { + .ascii_start=ident, + .ascii_len=(size_t)len, + .punycode_start="", + .punycode_len=0, + }; + return DemangleOk; + } +} + +#define INVALID_SYNTAX "{invalid syntax}" + +static const char *demangle_error_message(demangle_status status) { + switch (status) { + case DemangleInvalid: + return INVALID_SYNTAX; + case DemangleBug: + return "{bug}"; + case DemangleRecursed: + return "{recursion limit reached}"; + default: + return "{unknown error}"; + } +} + +#define PRINT(print_fn) \ + do { \ + if ((print_fn) == OverflowOverflow) { \ + return OverflowOverflow; \ + } \ + } while(0) + +#define PRINT_CH(printer, s) PRINT(printer_print_ch((printer), (s))) +#define PRINT_STR(printer, s) PRINT(printer_print_str((printer), (s))) +#define PRINT_U64(printer, s) PRINT(printer_print_u64((printer), (s))) +#define PRINT_IDENT(printer, s) PRINT(printer_print_ident((printer), (s))) + +#define INVALID(printer) \ + do { \ + PRINT_STR((printer), INVALID_SYNTAX); \ + (printer)->status = DemangleInvalid; \ + return OverflowOk; \ + } while(0) + +#define PARSE(printer, method, ...) \ + do { \ + if ((printer)->status != DemangleOk) { \ + PRINT_STR((printer), "?"); \ + return OverflowOk; \ + } else { \ + demangle_status _parse_status = method(&(printer)->parser, ## __VA_ARGS__); \ + if (_parse_status != DemangleOk) { \ + PRINT_STR((printer), demangle_error_message(_parse_status)); \ + (printer)->status = _parse_status; \ + return OverflowOk; \ + } \ + } \ + } while(0) + +#define PRINT_SEP_LIST(printer, body, sep) \ + do { \ + size_t _sep_list_i; \ + PRINT_SEP_LIST_COUNT(printer, _sep_list_i, body, sep); \ + } while(0) + +#define PRINT_SEP_LIST_COUNT(printer, count, body, sep) \ + do { \ + count = 0; \ + while ((printer)->status == DemangleOk && !printer_eat((printer), 'E')) { \ + if (count > 0) { PRINT_STR(printer, sep); } \ + body; \ + count++; \ + } \ + } while(0) + +static bool printer_eat(struct printer *printer, uint8_t b) { + if (printer->status != DemangleOk) { + return false; + } + + return parser_eat(&printer->parser, b); +} + +static void printer_pop_depth(struct printer *printer) { + if (printer->status == DemangleOk) { + parser_pop_depth(&printer->parser); + } +} + +static NODISCARD overflow_status printer_print_buf(struct printer *printer, const char *start, size_t len) { + if (printer->out == NULL) { + return OverflowOk; + } + if (printer->out_len < len) { + return OverflowOverflow; + } + + memcpy(printer->out, start, len); + printer->out += len; + printer->out_len -= len; + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_str(struct printer *printer, const char *buf) { + return printer_print_buf(printer, buf, strlen(buf)); +} + +static NODISCARD overflow_status printer_print_ch(struct printer *printer, char ch) { + return printer_print_buf(printer, &ch, 1); +} + +static NODISCARD overflow_status printer_print_u64(struct printer *printer, uint64_t n) { + char buf[32] = {0}; + sprintf(buf, "%llu", (unsigned long long)n); // printing uint64 uses 21 < 32 chars + return printer_print_str(printer, buf); +} + +static NODISCARD overflow_status printer_print_ident(struct printer *printer, struct ident *ident) { + if (printer->out == NULL) { + return OverflowOk; + } + + size_t out_len = printer->out_len; + overflow_status status; + if ((status = display_ident(ident->ascii_start, ident->ascii_len, ident->punycode_start, ident->punycode_len, (uint8_t*)printer->out, &out_len)) != OverflowOk) { + return status; + } + printer->out += out_len; + printer->out_len -= out_len; + return OverflowOk; +} + +typedef overflow_status (*printer_fn)(struct printer *printer); +typedef overflow_status (*backref_fn)(struct printer *printer, bool *arg); + +static NODISCARD overflow_status printer_print_backref(struct printer *printer, backref_fn func, bool *arg) { + struct parser backref; + PARSE(printer, parser_backref, &backref); + + if (printer->out == NULL) { + return OverflowOk; + } + + struct parser orig_parser = printer->parser; + demangle_status orig_status = printer->status; // fixme not sure this is needed match for Ok on the Rust side + printer->parser = backref; + printer->status = DemangleOk; + overflow_status status = func(printer, arg); + printer->parser = orig_parser; + printer->status = orig_status; + + return status; +} + +static NODISCARD overflow_status printer_print_lifetime_from_index(struct printer *printer, uint64_t lt) { + // Bound lifetimes aren't tracked when skipping printing. + if (printer->out == NULL) { + return OverflowOk; + } + + PRINT_STR(printer, "'"); + if (lt == 0) { + PRINT_STR(printer, "_"); + return OverflowOk; + } + + if (printer->bound_lifetime_depth < lt) { + INVALID(printer); + } else { + uint64_t depth = printer->bound_lifetime_depth - lt; + if (depth < 26) { + PRINT_CH(printer, 'a' + depth); + } else { + PRINT_STR(printer, "_"); + PRINT_U64(printer, depth); + } + + return OverflowOk; + } +} + +static NODISCARD overflow_status printer_in_binder(struct printer *printer, printer_fn func) { + uint64_t bound_lifetimes; + PARSE(printer, parser_opt_integer_62, 'G', &bound_lifetimes); + + // Don't track bound lifetimes when skipping printing. + if (printer->out == NULL) { + return func(printer); + } + + if (bound_lifetimes > 0) { + PRINT_STR(printer, "for<"); + for (uint64_t i = 0; i < bound_lifetimes; i++) { + if (i > 0) { + PRINT_STR(printer, ", "); + } + printer->bound_lifetime_depth++; + PRINT(printer_print_lifetime_from_index(printer, 1)); + } + PRINT_STR(printer, "> "); + } + + overflow_status r = func(printer); + printer->bound_lifetime_depth -= bound_lifetimes; + + return r; +} + +static NODISCARD overflow_status printer_print_generic_arg(struct printer *printer) { + if (printer_eat(printer, 'L')) { + uint64_t lt; + PARSE(printer, parser_integer_62, <); + return printer_print_lifetime_from_index(printer, lt); + } else if (printer_eat(printer, 'K')) { + return printer_print_const(printer, false); + } else { + return printer_print_type(printer); + } +} + +static NODISCARD overflow_status printer_print_generic_args(struct printer *printer) { + PRINT_STR(printer, "<"); + PRINT_SEP_LIST(printer, PRINT(printer_print_generic_arg(printer)), ", "); + PRINT_STR(printer, ">"); + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_path_out_of_value(struct printer *printer, bool *_arg) { + (void)_arg; + return printer_print_path(printer, false); +} + +static NODISCARD overflow_status printer_print_path_in_value(struct printer *printer, bool *_arg) { + (void)_arg; + return printer_print_path(printer, true); +} + +static NODISCARD overflow_status printer_print_path(struct printer *printer, bool in_value) { + PARSE(printer, parser_push_depth); + uint8_t tag; + PARSE(printer, parser_ch, &tag); + + overflow_status st; + uint64_t dis; + struct ident name; + parser_namespace_type ns; + char *orig_out; + + switch(tag) { + case 'C': + PARSE(printer, parser_disambiguator, &dis); + PARSE(printer, parser_ident, &name); + + PRINT_IDENT(printer, &name); + + if (printer->out != NULL && !printer->alternate && dis != 0) { + PRINT_STR(printer, "["); + char buf[24] = {0}; + sprintf(buf, "%llx", (unsigned long long)dis); + PRINT_STR(printer, buf); + PRINT_STR(printer, "]"); + } + break; + case 'N': + PARSE(printer, parser_namespace, &ns); + if ((st = printer_print_path(printer, in_value)) != OverflowOk) { + return st; + } + + // HACK(eddyb) if the parser is already marked as having errored, + // `parse!` below will print a `?` without its preceding `::` + // (because printing the `::` is skipped in certain conditions, + // i.e. a lowercase namespace with an empty identifier), + // so in order to get `::?`, the `::` has to be printed here. + if (printer->status != DemangleOk) { + PRINT_STR(printer, "::"); + } + + PARSE(printer, parser_disambiguator, &dis); + PARSE(printer, parser_ident, &name); + // Special namespace, like closures and shims + if (ns) { + PRINT_STR(printer, "::{"); + if (ns == 'C') { + PRINT_STR(printer, "closure"); + } else if (ns == 'S') { + PRINT_STR(printer, "shim"); + } else { + PRINT_CH(printer, ns); + } + if (name.ascii_len != 0 || name.punycode_len != 0) { + PRINT_STR(printer, ":"); + PRINT_IDENT(printer, &name); + } + PRINT_STR(printer, "#"); + PRINT_U64(printer, dis); + PRINT_STR(printer, "}"); + } else { + // Implementation-specific/unspecified namespaces + if (name.ascii_len != 0 || name.punycode_len != 0) { + PRINT_STR(printer, "::"); + PRINT_IDENT(printer, &name); + } + } + break; + case 'M': + case 'X': + // for impls, ignore the impls own path + PARSE(printer, parser_disambiguator, &dis); + orig_out = printer->out; + printer->out = NULL; + PRINT(printer_print_path(printer, false)); + printer->out = orig_out; + + // fallthru + case 'Y': + PRINT_STR(printer, "<"); + PRINT(printer_print_type(printer)); + if (tag != 'M') { + PRINT_STR(printer, " as "); + PRINT(printer_print_path(printer, false)); + } + PRINT_STR(printer, ">"); + break; + case 'I': + PRINT(printer_print_path(printer, in_value)); + if (in_value) { + PRINT_STR(printer, "::"); + } + PRINT(printer_print_generic_args(printer)); + break; + case 'B': + PRINT(printer_print_backref(printer, in_value ? printer_print_path_in_value : printer_print_path_out_of_value, NULL)); + break; + default: + INVALID(printer); + break; + } + + printer_pop_depth(printer); + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_const_uint(struct printer *printer, uint8_t tag) { + struct buf hex; + PARSE(printer, parser_hex_nibbles, &hex); + + uint64_t val; + if (try_parse_uint(hex.start, hex.len, &val)) { + PRINT_U64(printer, val); + } else { + PRINT_STR(printer, "0x"); + PRINT(printer_print_buf(printer, hex.start, hex.len)); + } + + if (printer->out != NULL && !printer->alternate) { + const char *ty = basic_type(tag); + if (/* safety */ ty != NULL) { + PRINT_STR(printer, ty); + } + } + + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_const_str_literal(struct printer *printer) { + struct buf hex; + PARSE(printer, parser_hex_nibbles, &hex); + + size_t out_len = SIZE_MAX; + nibbles_to_string_status nts_status = nibbles_to_string(hex.start, hex.len, NULL, &out_len); + switch (nts_status) { + case NtsOk: + if (printer->out != NULL) { + out_len = printer->out_len; + nts_status = nibbles_to_string(hex.start, hex.len, (uint8_t*)printer->out, &out_len); + if (nts_status != NtsOk) { + return OverflowOverflow; + } + printer->out += out_len; + printer->out_len -= out_len; + } + return OverflowOk; + case NtsOverflow: + // technically if there is a string of size `SIZE_MAX/6` whose escaped version overflows + // SIZE_MAX but has an invalid char, this will be a "fake" overflow. In practice, + // that is not going to happen and a fuzzer will not generate strings of this length. + return OverflowOverflow; + case NtsInvalid: + default: + INVALID(printer); + } +} + +static NODISCARD overflow_status printer_print_const_struct(struct printer *printer) { + uint64_t dis; + struct ident name; + PARSE(printer, parser_disambiguator, &dis); + PARSE(printer, parser_ident, &name); + PRINT_IDENT(printer, &name); + PRINT_STR(printer, ": "); + return printer_print_const(printer, true); +} + +static NODISCARD overflow_status printer_print_const_out_of_value(struct printer *printer, bool *_arg) { + (void)_arg; + return printer_print_const(printer, false); +} + +static NODISCARD overflow_status printer_print_const_in_value(struct printer *printer, bool *_arg) { + (void)_arg; + return printer_print_const(printer, true); +} + +static NODISCARD overflow_status printer_print_const(struct printer *printer, bool in_value) { + uint8_t tag; + + PARSE(printer, parser_ch, &tag); + PARSE(printer, parser_push_depth); + + struct buf hex; + uint64_t val; + size_t count; + + bool opened_brace = false; +#define OPEN_BRACE_IF_OUTSIDE_EXPR \ + do { if (!in_value) { \ + opened_brace = true; \ + PRINT_STR(printer, "{"); \ + } } while(0) + + switch(tag) { + case 'p': + PRINT_STR(printer, "_"); + break; + // Primitive leaves with hex-encoded values (see `basic_type`). + case 'a': + case 's': + case 'l': + case 'x': + case 'n': + case 'i': + if (printer_eat(printer, 'n')) { + PRINT_STR(printer, "-"); + } + /* fallthrough */ + case 'h': + case 't': + case 'm': + case 'y': + case 'o': + case 'j': + PRINT(printer_print_const_uint(printer, tag)); + break; + case 'b': + PARSE(printer, parser_hex_nibbles, &hex); + if (try_parse_uint(hex.start, hex.len, &val)) { + if (val == 0) { + PRINT_STR(printer, "false"); + } else if (val == 1) { + PRINT_STR(printer, "true"); + } else { + INVALID(printer); + } + } else { + INVALID(printer); + } + break; + case 'c': + PARSE(printer, parser_hex_nibbles, &hex); + if (try_parse_uint(hex.start, hex.len, &val) + && val < UINT32_MAX + && validate_char((uint32_t)val)) + { + char escaped_buf[ESCAPED_SIZE]; + size_t escaped_size = char_to_string((uint32_t)val, '\'', true, &escaped_buf); + + PRINT_STR(printer, "'"); + PRINT(printer_print_buf(printer, escaped_buf, escaped_size)); + PRINT_STR(printer, "'"); + } else { + INVALID(printer); + } + break; + case 'e': + OPEN_BRACE_IF_OUTSIDE_EXPR; + PRINT_STR(printer, "*"); + PRINT(printer_print_const_str_literal(printer)); + break; + case 'R': + case 'Q': + if (tag == 'R' && printer_eat(printer, 'e')) { + PRINT(printer_print_const_str_literal(printer)); + } else { + OPEN_BRACE_IF_OUTSIDE_EXPR; + PRINT_STR(printer, "&"); + if (tag != 'R') { + PRINT_STR(printer, "mut "); + } + PRINT(printer_print_const(printer, true)); + } + break; + case 'A': + OPEN_BRACE_IF_OUTSIDE_EXPR; + PRINT_STR(printer, "["); + PRINT_SEP_LIST(printer, PRINT(printer_print_const(printer, true)), ", "); + PRINT_STR(printer, "]"); + break; + case 'T': + OPEN_BRACE_IF_OUTSIDE_EXPR; + PRINT_STR(printer, "("); + PRINT_SEP_LIST_COUNT(printer, count, PRINT(printer_print_const(printer, true)), ", "); + if (count == 1) { + PRINT_STR(printer, ","); + } + PRINT_STR(printer, ")"); + break; + case 'V': + OPEN_BRACE_IF_OUTSIDE_EXPR; + PRINT(printer_print_path(printer, true)); + PARSE(printer, parser_ch, &tag); + switch(tag) { + case 'U': + break; + case 'T': + PRINT_STR(printer, "("); + PRINT_SEP_LIST(printer, PRINT(printer_print_const(printer, true)), ", "); + PRINT_STR(printer, ")"); + break; + case 'S': + PRINT_STR(printer, " { "); + PRINT_SEP_LIST(printer, PRINT(printer_print_const_struct(printer)), ", "); + PRINT_STR(printer, " }"); + break; + default: + INVALID(printer); + } + break; + case 'B': + PRINT(printer_print_backref(printer, in_value ? printer_print_const_in_value : printer_print_const_out_of_value, NULL)); + break; + default: + INVALID(printer); + } +#undef OPEN_BRACE_IF_OUTSIDE_EXPR + + if (opened_brace) { + PRINT_STR(printer, "}"); + } + printer_pop_depth(printer); + + return OverflowOk; +} + +/// A trait in a trait object may have some "existential projections" +/// (i.e. associated type bindings) after it, which should be printed +/// in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`. +/// To this end, this method will keep the `<...>` of an 'I' path +/// open, by omitting the `>`, and return `Ok(true)` in that case. +static NODISCARD overflow_status printer_print_maybe_open_generics(struct printer *printer, bool *open) { + if (printer_eat(printer, 'B')) { + // NOTE(eddyb) the closure may not run if printing is being skipped, + // but in that case the returned boolean doesn't matter. + *open = false; + return printer_print_backref(printer, printer_print_maybe_open_generics, open); + } else if(printer_eat(printer, 'I')) { + PRINT(printer_print_path(printer, false)); + PRINT_STR(printer, "<"); + PRINT_SEP_LIST(printer, PRINT(printer_print_generic_arg(printer)), ", "); + *open = true; + return OverflowOk; + } else { + PRINT(printer_print_path(printer, false)); + *open = false; + return OverflowOk; + } +} + +static NODISCARD overflow_status printer_print_dyn_trait(struct printer *printer) { + bool open; + PRINT(printer_print_maybe_open_generics(printer, &open)); + + while (printer_eat(printer, 'p')) { + if (!open) { + PRINT_STR(printer, "<"); + open = true; + } else { + PRINT_STR(printer, ", "); + } + + struct ident name; + PARSE(printer, parser_ident, &name); + + PRINT_IDENT(printer, &name); + PRINT_STR(printer, " = "); + PRINT(printer_print_type(printer)); + } + + if (open) { + PRINT_STR(printer, ">"); + } + + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_object_bounds(struct printer *printer) { + PRINT_SEP_LIST(printer, PRINT(printer_print_dyn_trait(printer)), " + "); + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_function_type(struct printer *printer) { + bool is_unsafe = printer_eat(printer, 'U'); + const char *abi; + size_t abi_len; + if (printer_eat(printer, 'K')) { + if (printer_eat(printer, 'C')) { + abi = "C"; + abi_len = 1; + } else { + struct ident abi_ident; + PARSE(printer, parser_ident, &abi_ident); + if (abi_ident.ascii_len == 0 || abi_ident.punycode_len != 0) { + INVALID(printer); + } + abi = abi_ident.ascii_start; + abi_len = abi_ident.ascii_len; + } + } else { + abi = NULL; + abi_len = 0; + } + + if (is_unsafe) { + PRINT_STR(printer, "unsafe "); + } + + if (abi != NULL) { + PRINT_STR(printer, "extern \""); + + // replace _ with - + while (abi_len > 0) { + const char *minus = memchr(abi, '_', abi_len); + if (minus == NULL) { + PRINT(printer_print_buf(printer, (const char*)abi, abi_len)); + break; + } else { + size_t space_to_minus = minus - abi; + PRINT(printer_print_buf(printer, (const char*)abi, space_to_minus)); + PRINT_STR(printer, "-"); + abi = minus + 1; + abi_len -= (space_to_minus + 1); + } + } + + PRINT_STR(printer, "\" "); + } + + PRINT_STR(printer, "fn("); + PRINT_SEP_LIST(printer, PRINT(printer_print_type(printer)), ", "); + PRINT_STR(printer, ")"); + + if (printer_eat(printer, 'u')) { + // Skip printing the return type if it's 'u', i.e. `()`. + } else { + PRINT_STR(printer, " -> "); + PRINT(printer_print_type(printer)); + } + + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_type_backref(struct printer *printer, bool *_arg) { + (void)_arg; + return printer_print_type(printer); +} + +static NODISCARD overflow_status printer_print_type(struct printer *printer) { + uint8_t tag; + PARSE(printer, parser_ch, &tag); + + const char *basic_ty = basic_type(tag); + if (basic_ty) { + return printer_print_str(printer, basic_ty); + } + + uint64_t count; + uint64_t lt; + + PARSE(printer, parser_push_depth); + switch (tag) { + case 'R': + case 'Q': + PRINT_STR(printer, "&"); + if (printer_eat(printer, 'L')) { + PARSE(printer, parser_integer_62, <); + if (lt != 0) { + PRINT(printer_print_lifetime_from_index(printer, lt)); + PRINT_STR(printer, " "); + } + } + if (tag != 'R') { + PRINT_STR(printer, "mut "); + } + PRINT(printer_print_type(printer)); + break; + case 'P': + case 'O': + PRINT_STR(printer, "*"); + if (tag != 'P') { + PRINT_STR(printer, "mut "); + } else { + PRINT_STR(printer, "const "); + } + PRINT(printer_print_type(printer)); + break; + case 'A': + case 'S': + PRINT_STR(printer, "["); + PRINT(printer_print_type(printer)); + if (tag == 'A') { + PRINT_STR(printer, "; "); + PRINT(printer_print_const(printer, true)); + } + PRINT_STR(printer, "]"); + break; + case 'T': + PRINT_STR(printer, "("); + PRINT_SEP_LIST_COUNT(printer, count, PRINT(printer_print_type(printer)), ", "); + if (count == 1) { + PRINT_STR(printer, ","); + } + PRINT_STR(printer, ")"); + break; + case 'F': + PRINT(printer_in_binder(printer, printer_print_function_type)); + break; + case 'D': + PRINT_STR(printer, "dyn "); + PRINT(printer_in_binder(printer, printer_print_object_bounds)); + + if (!printer_eat(printer, 'L')) { + INVALID(printer); + } + PARSE(printer, parser_integer_62, <); + + if (lt != 0) { + PRINT_STR(printer, " + "); + PRINT(printer_print_lifetime_from_index(printer, lt)); + } + break; + case 'B': + PRINT(printer_print_backref(printer, printer_print_type_backref, NULL)); + break; + default: + // Go back to the tag, so `print_path` also sees it. + if (printer->status == DemangleOk && /* safety */ printer->parser.next > 0) { + printer->parser.next--; + } + PRINT(printer_print_path(printer, false)); + } + + printer_pop_depth(printer); + return OverflowOk; +} + +NODISCARD static demangle_status rust_demangle_legacy_demangle(const char *s, size_t s_len, struct demangle_legacy *res, const char **rest) +{ + if (s_len > strlen(s)) { + // s_len only exists to shorten the string, this is not a buffer API + return DemangleInvalid; + } + + const char *inner; + size_t inner_len; + if (s_len >= 3 && !strncmp(s, "_ZN", 3)) { + inner = s + 3; + inner_len = s_len - 3; + } else if (s_len >= 2 && !strncmp(s, "ZN", 2)) { + // On Windows, dbghelp strips leading underscores, so we accept "ZN...E" + // form too. + inner = s + 2; + inner_len = s_len - 2; + } else if (s_len >= 4 && !strncmp(s, "__ZN", 4)) { + // On OSX, symbols are prefixed with an extra _ + inner = s + 4; + inner_len = s_len - 4; + } else { + return DemangleInvalid; + } + + if (!str_isascii(inner, inner_len)) { + return DemangleInvalid; + } + + size_t elements = 0; + const char *chars = inner; + size_t chars_len = inner_len; + if (chars_len == 0) { + return DemangleInvalid; + } + char c; + while ((c = *chars) != 'E') { + // Decode an identifier element's length + if (c < '0' || c > '9') { + return DemangleInvalid; + } + size_t len = 0; + while (c >= '0' && c <= '9') { + size_t d = c - '0'; + if (len > SIZE_MAX / 10) { + return DemangleInvalid; + } + len *= 10; + if (len > SIZE_MAX - d) { + return DemangleInvalid; + } + len += d; + + chars++; + chars_len--; + if (chars_len == 0) { + return DemangleInvalid; + } + c = *chars; + } + + // Advance by the length + if (chars_len <= len) { + return DemangleInvalid; + } + chars += len; + chars_len -= len; + elements++; + } + *res = (struct demangle_legacy) { inner, inner_len, elements }; + *rest = chars + 1; + return DemangleOk; +} + +static bool is_rust_hash(const char *s, size_t len) { + if (len == 0 || s[0] != 'h') { + return false; + } + + for (size_t i = 1; i < len; i++) { + if (!((s[i] >= '0' && s[i] <= '9') || (s[i] >= 'a' && s[i] <= 'f') || (s[i] >= 'A' && s[i] <= 'F'))) { + return false; + } + } + + return true; +} + +NODISCARD static overflow_status rust_demangle_legacy_display_demangle(struct demangle_legacy res, char *out, size_t len, bool alternate) +{ + struct printer printer = { + // not actually using the parser part of the printer, just keeping it to share the format functions + DemangleOk, + { NULL }, + out, + len, + 0, + alternate + }; + const char *inner = res.mangled; + for (size_t element = 0; element < res.elements; element++) { + size_t i = 0; + const char *rest; + for (rest = inner; rest < res.mangled + res.mangled_len && *rest >= '0' && *rest <= '9'; rest++) { + i *= 10; + i += *rest - '0'; + } + if ((size_t)(res.mangled + res.mangled_len - rest) < i) { + // safety: shouldn't reach this place if the input string is validated. bail out. + // safety: we knwo rest <= res.mangled + res.mangled_len from the for-loop above + break; + } + + size_t len = i; + inner = rest + len; + + // From here on, inner contains a pointer to the next element, rest[:len] to the current one + if (alternate && element + 1 == res.elements && is_rust_hash(rest, i)) { + break; + } + if (element != 0) { + PRINT_STR(&printer, "::"); + } + + if (len >= 2 && !strncmp(rest, "_$", 2)) { + rest++; + len--; + } + + while (len > 0) { + if (rest[0] == '.') { + if (len >= 2 && rest[1] == '.') { + PRINT_STR(&printer, "::"); + rest += 2; + len -= 2; + } else { + PRINT_STR(&printer, "."); + rest += 1; + len -= 1; + } + } else if (rest[0] == '$') { + const char *escape = memchr(rest + 1, '$', len - 1); + if (escape == NULL) { + break; + } + const char *escape_start = rest + 1; + size_t escape_len = escape - (rest + 1); + + size_t next_len = len - (escape + 1 - rest); + const char *next_rest = escape + 1; + + char ch; + if ((escape_len == 2 && escape_start[0] == 'S' && escape_start[1] == 'P')) { + ch = '@'; + } else if ((escape_len == 2 && escape_start[0] == 'B' && escape_start[1] == 'P')) { + ch = '*'; + } else if ((escape_len == 2 && escape_start[0] == 'R' && escape_start[1] == 'F')) { + ch = '&'; + } else if ((escape_len == 2 && escape_start[0] == 'L' && escape_start[1] == 'T')) { + ch = '<'; + } else if ((escape_len == 2 && escape_start[0] == 'G' && escape_start[1] == 'T')) { + ch = '>'; + } else if ((escape_len == 2 && escape_start[0] == 'L' && escape_start[1] == 'P')) { + ch = '('; + } else if ((escape_len == 2 && escape_start[0] == 'R' && escape_start[1] == 'P')) { + ch = ')'; + } else if ((escape_len == 1 && escape_start[0] == 'C')) { + ch = ','; + } else { + if (escape_len > 1 && escape_start[0] == 'u') { + escape_start++; + escape_len--; + uint64_t val; + if (try_parse_uint(escape_start, escape_len, &val) + && val < UINT32_MAX + && validate_char((uint32_t)val)) + { + if (!unicode_iscontrol(val)) { + uint8_t wchr[4]; + size_t wchr_len = code_to_utf8(wchr, (uint32_t)val); + PRINT(printer_print_buf(&printer, (const char*)wchr, wchr_len)); + len = next_len; + rest = next_rest; + continue; + } + } + } + break; // print the rest of this element raw + } + PRINT_CH(&printer, ch); + len = next_len; + rest = next_rest; + } else { + size_t j = 0; + for (;j < len && rest[j] != '$' && rest[j] != '.';j++); + if (j == len) { + break; + } + PRINT(printer_print_buf(&printer, rest, j)); + rest += j; + len -= j; + } + } + PRINT(printer_print_buf(&printer, rest, len)); + } + + if (printer.out_len < OVERFLOW_MARGIN) { + return OverflowOverflow; + } + *printer.out = '\0'; + return OverflowOk; +} + +static bool is_symbol_like(const char *s, size_t len) { + // rust-demangle definition of symbol like: control characters and space are not symbol-like, all else is + for (size_t i = 0; i < len; i++) { + char ch = s[i]; + if (!(ch >= 0x21 && ch <= 0x7e)) { + return false; + } + } + return true; +} + +void rust_demangle_demangle(const char *s, struct demangle *res) +{ + // During ThinLTO LLVM may import and rename internal symbols, so strip out + // those endings first as they're one of the last manglings applied to symbol + // names. + const char *llvm = ".llvm."; + const char *found_llvm = strstr(s, llvm); + size_t s_len = strlen(s); + if (found_llvm) { + const char *all_hex_ptr = found_llvm + strlen(".llvm."); + bool all_hex = true; + for (;*all_hex_ptr;all_hex_ptr++) { + if (!(('0' <= *all_hex_ptr && *all_hex_ptr <= '9') || + ('A' <= *all_hex_ptr && *all_hex_ptr <= 'F') || + *all_hex_ptr == '@')) { + all_hex = false; + break; + } + } + + if (all_hex) { + s_len = found_llvm - s; + } + } + + const char *suffix; + struct demangle_legacy legacy; + demangle_status st = rust_demangle_legacy_demangle(s, s_len, &legacy, &suffix); + if (st == DemangleOk) { + *res = (struct demangle) { + .style=DemangleStyleLegacy, + .mangled=legacy.mangled, + .mangled_len=legacy.mangled_len, + .elements=legacy.elements, + .original=s, + .original_len=s_len, + .suffix=suffix, + .suffix_len=s_len - (suffix - s), + }; + } else { + struct demangle_v0 v0; + st = rust_demangle_v0_demangle(s, s_len, &v0, &suffix); + if (st == DemangleOk) { + *res = (struct demangle) { + .style=DemangleStyleV0, + .mangled=v0.mangled, + .mangled_len=v0.mangled_len, + .elements=0, + .original=s, + .original_len=s_len, + .suffix=suffix, + .suffix_len=s_len - (suffix - s), + }; + } else { + *res = (struct demangle) { + .style=DemangleStyleUnknown, + .mangled=NULL, + .mangled_len=0, + .elements=0, + .original=s, + .original_len=s_len, + .suffix=s, + .suffix_len=0, + }; + } + } + + // Output like LLVM IR adds extra period-delimited words. See if + // we are in that case and save the trailing words if so. + if (res->suffix_len) { + if (res->suffix[0] == '.' && is_symbol_like(res->suffix, res->suffix_len)) { + // Keep the suffix + } else { + // Reset the suffix and invalidate the demangling + res->style = DemangleStyleUnknown; + res->suffix_len = 0; + } + } +} + +bool rust_demangle_is_known(struct demangle *res) { + return res->style != DemangleStyleUnknown; +} + +overflow_status rust_demangle_display_demangle(struct demangle const *res, char *out, size_t len, bool alternate) { + size_t original_len = res->original_len; + size_t out_len; + switch (res->style) { + case DemangleStyleUnknown: + if (len < original_len) { + return OverflowOverflow; + } else { + memcpy(out, res->original, original_len); + out += original_len; + len -= original_len; + break; + } + break; + case DemangleStyleLegacy: { + struct demangle_legacy legacy = { + res->mangled, + res->mangled_len, + res->elements + }; + if (rust_demangle_legacy_display_demangle(legacy, out, len, alternate) == OverflowOverflow) { + return OverflowOverflow; + } + out_len = strlen(out); + out += out_len; + len -= out_len; + break; + } + case DemangleStyleV0: { + struct demangle_v0 v0 = { + res->mangled, + res->mangled_len + }; + if (rust_demangle_v0_display_demangle(v0, out, len, alternate) == OverflowOverflow) { + return OverflowOverflow; + } + out_len = strlen(out); + out += out_len; + len -= out_len; + break; + } + } + size_t suffix_len = res->suffix_len; + if (len < suffix_len || len - suffix_len < OVERFLOW_MARGIN) { + return OverflowOverflow; + } + memcpy(out, res->suffix, suffix_len); + out[suffix_len] = 0; + return OverflowOk; +} diff --git a/tools/perf/util/demangle-rust-v0.h b/tools/perf/util/demangle-rust-v0.h new file mode 100644 index 000000000000..d0092818610a --- /dev/null +++ b/tools/perf/util/demangle-rust-v0.h @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// The contents of this file come from the Rust rustc-demangle library, hosted +// in the <https://github.com/rust-lang/rustc-demangle> repository, licensed +// under "Apache-2.0 OR MIT". For copyright details, see +// <https://github.com/rust-lang/rustc-demangle/blob/main/README.md>. +// Please note that the file should be kept as close as possible to upstream. + +#ifndef _H_DEMANGLE_V0_H +#define _H_DEMANGLE_V0_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stddef.h> + +#if defined(__GNUC__) || defined(__clang__) +#define DEMANGLE_NODISCARD __attribute__((warn_unused_result)) +#else +#define DEMANGLE_NODISCARD +#endif + +typedef enum { + OverflowOk, + OverflowOverflow +} overflow_status; + +enum demangle_style { + DemangleStyleUnknown = 0, + DemangleStyleLegacy, + DemangleStyleV0, +}; + +// Not using a union here to make the struct easier to copy-paste if needed. +struct demangle { + enum demangle_style style; + // points to the "mangled" part of the name, + // not including `ZN` or `R` prefixes. + const char *mangled; + size_t mangled_len; + // In DemangleStyleLegacy, is the number of path elements + size_t elements; + // while it's called "original", it will not contain `.llvm.9D1C9369@@16` suffixes + // that are to be ignored. + const char *original; + size_t original_len; + // Contains the part after the mangled name that is to be outputted, + // which can be `.exit.i.i` suffixes LLVM sometimes adds. + const char *suffix; + size_t suffix_len; +}; + +// if the length of the output buffer is less than `output_len-OVERFLOW_MARGIN`, +// the demangler will return `OverflowOverflow` even if there is no overflow. +#define OVERFLOW_MARGIN 4 + +/// Demangle a C string that refers to a Rust symbol and put the demangle intermediate result in `res`. +/// Beware that `res` contains references into `s`. If `s` is modified (or free'd) before calling +/// `rust_demangle_display_demangle` behavior is undefined. +/// +/// Use `rust_demangle_display_demangle` to convert it to an actual string. +void rust_demangle_demangle(const char *s, struct demangle *res); + +/// Write the string in a `struct demangle` into a buffer. +/// +/// Return `OverflowOk` if the output buffer was sufficiently big, `OverflowOverflow` if it wasn't. +/// This function is `O(n)` in the length of the input + *output* [$], but the demangled output of demangling a symbol can +/// be exponentially[$$] large, therefore it is recommended to have a sane bound (`rust-demangle` +/// uses 1,000,000 bytes) on `len`. +/// +/// `alternate`, if true, uses the less verbose alternate formatting (Rust `{:#}`) is used, which does not show +/// symbol hashes and types of constant ints. +/// +/// [$] It's `O(n * MAX_DEPTH)`, but `MAX_DEPTH` is a constant 300 and therefore it's `O(n)` +/// [$$] Technically, bounded by `O(n^MAX_DEPTH)`, but this is practically exponential. +DEMANGLE_NODISCARD overflow_status rust_demangle_display_demangle(struct demangle const *res, char *out, size_t len, bool alternate); + +/// Returns true if `res` refers to a known valid Rust demangling style, false if it's an unknown style. +bool rust_demangle_is_known(struct demangle *res); + +#undef DEMANGLE_NODISCARD + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/perf/util/demangle-rust.c b/tools/perf/util/demangle-rust.c deleted file mode 100644 index a659fc69f73a..000000000000 --- a/tools/perf/util/demangle-rust.c +++ /dev/null @@ -1,269 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <string.h> -#include "debug.h" - -#include "demangle-rust.h" - -/* - * Mangled Rust symbols look like this: - * - * _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a - * - * The original symbol is: - * - * <std::sys::fd::FileDesc as core::ops::Drop>::drop - * - * The last component of the path is a 64-bit hash in lowercase hex, prefixed - * with "h". Rust does not have a global namespace between crates, an illusion - * which Rust maintains by using the hash to distinguish things that would - * otherwise have the same symbol. - * - * Any path component not starting with a XID_Start character is prefixed with - * "_". - * - * The following escape sequences are used: - * - * "," => $C$ - * "@" => $SP$ - * "*" => $BP$ - * "&" => $RF$ - * "<" => $LT$ - * ">" => $GT$ - * "(" => $LP$ - * ")" => $RP$ - * " " => $u20$ - * "'" => $u27$ - * "[" => $u5b$ - * "]" => $u5d$ - * "~" => $u7e$ - * - * A double ".." means "::" and a single "." means "-". - * - * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$ - */ - -static const char *hash_prefix = "::h"; -static const size_t hash_prefix_len = 3; -static const size_t hash_len = 16; - -static bool is_prefixed_hash(const char *start); -static bool looks_like_rust(const char *sym, size_t len); -static bool unescape(const char **in, char **out, const char *seq, char value); - -/* - * INPUT: - * sym: symbol that has been through BFD-demangling - * - * This function looks for the following indicators: - * - * 1. The hash must consist of "h" followed by 16 lowercase hex digits. - * - * 2. As a sanity check, the hash must use between 5 and 15 of the 16 possible - * hex digits. This is true of 99.9998% of hashes so once in your life you - * may see a false negative. The point is to notice path components that - * could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In - * this case a false positive (non-Rust symbol has an important path - * component removed because it looks like a Rust hash) is worse than a - * false negative (the rare Rust symbol is not demangled) so this sets the - * balance in favor of false negatives. - * - * 3. There must be no characters other than a-zA-Z0-9 and _.:$ - * - * 4. There must be no unrecognized $-sign sequences. - * - * 5. There must be no sequence of three or more dots in a row ("..."). - */ -bool -rust_is_mangled(const char *sym) -{ - size_t len, len_without_hash; - - if (!sym) - return false; - - len = strlen(sym); - if (len <= hash_prefix_len + hash_len) - /* Not long enough to contain "::h" + hash + something else */ - return false; - - len_without_hash = len - (hash_prefix_len + hash_len); - if (!is_prefixed_hash(sym + len_without_hash)) - return false; - - return looks_like_rust(sym, len_without_hash); -} - -/* - * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex - * digits must comprise between 5 and 15 (inclusive) distinct digits. - */ -static bool is_prefixed_hash(const char *str) -{ - const char *end; - bool seen[16]; - size_t i; - int count; - - if (strncmp(str, hash_prefix, hash_prefix_len)) - return false; - str += hash_prefix_len; - - memset(seen, false, sizeof(seen)); - for (end = str + hash_len; str < end; str++) - if (*str >= '0' && *str <= '9') - seen[*str - '0'] = true; - else if (*str >= 'a' && *str <= 'f') - seen[*str - 'a' + 10] = true; - else - return false; - - /* Count how many distinct digits seen */ - count = 0; - for (i = 0; i < 16; i++) - if (seen[i]) - count++; - - return count >= 5 && count <= 15; -} - -static bool looks_like_rust(const char *str, size_t len) -{ - const char *end = str + len; - - while (str < end) - switch (*str) { - case '$': - if (!strncmp(str, "$C$", 3)) - str += 3; - else if (!strncmp(str, "$SP$", 4) - || !strncmp(str, "$BP$", 4) - || !strncmp(str, "$RF$", 4) - || !strncmp(str, "$LT$", 4) - || !strncmp(str, "$GT$", 4) - || !strncmp(str, "$LP$", 4) - || !strncmp(str, "$RP$", 4)) - str += 4; - else if (!strncmp(str, "$u20$", 5) - || !strncmp(str, "$u27$", 5) - || !strncmp(str, "$u5b$", 5) - || !strncmp(str, "$u5d$", 5) - || !strncmp(str, "$u7e$", 5)) - str += 5; - else - return false; - break; - case '.': - /* Do not allow three or more consecutive dots */ - if (!strncmp(str, "...", 3)) - return false; - /* Fall through */ - case 'a' ... 'z': - case 'A' ... 'Z': - case '0' ... '9': - case '_': - case ':': - str++; - break; - default: - return false; - } - - return true; -} - -/* - * INPUT: - * sym: symbol for which rust_is_mangled(sym) returns true - * - * The input is demangled in-place because the mangled name is always longer - * than the demangled one. - */ -void -rust_demangle_sym(char *sym) -{ - const char *in; - char *out; - const char *end; - - if (!sym) - return; - - in = sym; - out = sym; - end = sym + strlen(sym) - (hash_prefix_len + hash_len); - - while (in < end) - switch (*in) { - case '$': - if (!(unescape(&in, &out, "$C$", ',') - || unescape(&in, &out, "$SP$", '@') - || unescape(&in, &out, "$BP$", '*') - || unescape(&in, &out, "$RF$", '&') - || unescape(&in, &out, "$LT$", '<') - || unescape(&in, &out, "$GT$", '>') - || unescape(&in, &out, "$LP$", '(') - || unescape(&in, &out, "$RP$", ')') - || unescape(&in, &out, "$u20$", ' ') - || unescape(&in, &out, "$u27$", '\'') - || unescape(&in, &out, "$u5b$", '[') - || unescape(&in, &out, "$u5d$", ']') - || unescape(&in, &out, "$u7e$", '~'))) { - pr_err("demangle-rust: unexpected escape sequence"); - goto done; - } - break; - case '_': - /* - * If this is the start of a path component and the next - * character is an escape sequence, ignore the - * underscore. The mangler inserts an underscore to make - * sure the path component begins with a XID_Start - * character. - */ - if ((in == sym || in[-1] == ':') && in[1] == '$') - in++; - else - *out++ = *in++; - break; - case '.': - if (in[1] == '.') { - /* ".." becomes "::" */ - *out++ = ':'; - *out++ = ':'; - in += 2; - } else { - /* "." becomes "-" */ - *out++ = '-'; - in++; - } - break; - case 'a' ... 'z': - case 'A' ... 'Z': - case '0' ... '9': - case ':': - *out++ = *in++; - break; - default: - pr_err("demangle-rust: unexpected character '%c' in symbol\n", - *in); - goto done; - } - -done: - *out = '\0'; -} - -static bool unescape(const char **in, char **out, const char *seq, char value) -{ - size_t len = strlen(seq); - - if (strncmp(*in, seq, len)) - return false; - - **out = value; - - *in += len; - *out += 1; - - return true; -} diff --git a/tools/perf/util/demangle-rust.h b/tools/perf/util/demangle-rust.h deleted file mode 100644 index 2fca618b1aa5..000000000000 --- a/tools/perf/util/demangle-rust.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PERF_DEMANGLE_RUST -#define __PERF_DEMANGLE_RUST 1 - -bool rust_is_mangled(const char *str); -void rust_demangle_sym(char *str); - -#endif /* __PERF_DEMANGLE_RUST */ diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c index 50c5c206b70e..8f0eb56c6fc6 100644 --- a/tools/perf/util/disasm.c +++ b/tools/perf/util/disasm.c @@ -48,7 +48,7 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size, static void ins__sort(struct arch *arch); static int disasm_line__parse(char *line, const char **namep, char **rawp); -static int disasm_line__parse_powerpc(struct disasm_line *dl); +static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args); static char *expand_tabs(char *line, char **storage, size_t *storage_len); static __attribute__((constructor)) void symbol__init_regexpr(void) @@ -968,24 +968,25 @@ out: #define PPC_OP(op) (((op) >> 26) & 0x3F) #define RAW_BYTES 11 -static int disasm_line__parse_powerpc(struct disasm_line *dl) +static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args) { char *line = dl->al.line; const char **namep = &dl->ins.name; char **rawp = &dl->ops.raw; char *tmp_raw_insn, *name_raw_insn = skip_spaces(line); char *name = skip_spaces(name_raw_insn + RAW_BYTES); - int objdump = 0; + int disasm = 0; + int ret = 0; - if (strlen(line) > RAW_BYTES) - objdump = 1; + if (args->options->disassembler_used) + disasm = 1; if (name_raw_insn[0] == '\0') return -1; - if (objdump) { - disasm_line__parse(name, namep, rawp); - } else + if (disasm) + ret = disasm_line__parse(name, namep, rawp); + else *namep = ""; tmp_raw_insn = strndup(name_raw_insn, 11); @@ -995,10 +996,10 @@ static int disasm_line__parse_powerpc(struct disasm_line *dl) remove_spaces(tmp_raw_insn); sscanf(tmp_raw_insn, "%x", &dl->raw.raw_insn); - if (objdump) + if (disasm) dl->raw.raw_insn = be32_to_cpu(dl->raw.raw_insn); - return 0; + return ret; } static void annotation_line__init(struct annotation_line *al, @@ -1054,7 +1055,7 @@ struct disasm_line *disasm_line__new(struct annotate_args *args) if (args->offset != -1) { if (arch__is(args->arch, "powerpc")) { - if (disasm_line__parse_powerpc(dl) < 0) + if (disasm_line__parse_powerpc(dl, args) < 0) goto out_free_line; } else if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line; @@ -2289,16 +2290,20 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) switch (dis) { case PERF_DISASM_LLVM: + args->options->disassembler_used = PERF_DISASM_LLVM; err = symbol__disassemble_llvm(symfs_filename, sym, args); break; case PERF_DISASM_CAPSTONE: + args->options->disassembler_used = PERF_DISASM_CAPSTONE; err = symbol__disassemble_capstone(symfs_filename, sym, args); break; case PERF_DISASM_OBJDUMP: + args->options->disassembler_used = PERF_DISASM_OBJDUMP; err = symbol__disassemble_objdump(symfs_filename, sym, args); break; case PERF_DISASM_UNKNOWN: /* End of disassemblers. */ default: + args->options->disassembler_used = PERF_DISASM_UNKNOWN; goto out_remove_tmp; } if (err == 0) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 5c6e85fdae0d..057fcf4225ac 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -67,6 +67,7 @@ char dso__symtab_origin(const struct dso *dso) [DSO_BINARY_TYPE__GUEST_KMODULE] = 'G', [DSO_BINARY_TYPE__GUEST_KMODULE_COMP] = 'M', [DSO_BINARY_TYPE__GUEST_VMLINUX] = 'V', + [DSO_BINARY_TYPE__GNU_DEBUGDATA] = 'n', }; if (dso == NULL || dso__symtab_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) @@ -93,6 +94,7 @@ bool dso__is_object_file(const struct dso *dso) case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: case DSO_BINARY_TYPE__BUILDID_DEBUGINFO: + case DSO_BINARY_TYPE__GNU_DEBUGDATA: case DSO_BINARY_TYPE__SYSTEM_PATH_DSO: case DSO_BINARY_TYPE__GUEST_KMODULE: case DSO_BINARY_TYPE__GUEST_KMODULE_COMP: @@ -224,6 +226,7 @@ int dso__read_binary_type_filename(const struct dso *dso, case DSO_BINARY_TYPE__VMLINUX: case DSO_BINARY_TYPE__GUEST_VMLINUX: case DSO_BINARY_TYPE__SYSTEM_PATH_DSO: + case DSO_BINARY_TYPE__GNU_DEBUGDATA: __symbol__join_symfs(filename, size, dso__long_name(dso)); break; @@ -490,11 +493,25 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m, /* * Global list of open DSOs and the counter. */ +struct mutex _dso__data_open_lock; static LIST_HEAD(dso__data_open); -static long dso__data_open_cnt; -static pthread_mutex_t dso__data_open_lock = PTHREAD_MUTEX_INITIALIZER; +static long dso__data_open_cnt GUARDED_BY(_dso__data_open_lock); -static void dso__list_add(struct dso *dso) +static void dso__data_open_lock_init(void) +{ + mutex_init(&_dso__data_open_lock); +} + +static struct mutex *dso__data_open_lock(void) LOCK_RETURNED(_dso__data_open_lock) +{ + static pthread_once_t data_open_lock_once = PTHREAD_ONCE_INIT; + + pthread_once(&data_open_lock_once, dso__data_open_lock_init); + + return &_dso__data_open_lock; +} + +static void dso__list_add(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { list_add_tail(&dso__data(dso)->open_entry, &dso__data_open); #ifdef REFCNT_CHECKING @@ -505,11 +522,13 @@ static void dso__list_add(struct dso *dso) dso__data_open_cnt++; } -static void dso__list_del(struct dso *dso) +static void dso__list_del(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { list_del_init(&dso__data(dso)->open_entry); #ifdef REFCNT_CHECKING + mutex_unlock(dso__data_open_lock()); dso__put(dso__data(dso)->dso); + mutex_lock(dso__data_open_lock()); #endif WARN_ONCE(dso__data_open_cnt <= 0, "DSO data fd counter out of bounds."); @@ -518,7 +537,7 @@ static void dso__list_del(struct dso *dso) static void close_first_dso(void); -static int do_open(char *name) +static int do_open(char *name) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { int fd; char sbuf[STRERR_BUFSIZE]; @@ -545,6 +564,7 @@ char *dso__filename_with_chroot(const struct dso *dso, const char *filename) } static int __open_dso(struct dso *dso, struct machine *machine) + EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { int fd = -EINVAL; char *root_dir = (char *)""; @@ -610,6 +630,7 @@ static void check_data_close(void); * list/count of open DSO objects. */ static int open_dso(struct dso *dso, struct machine *machine) + EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { int fd; struct nscookie nsc; @@ -635,7 +656,7 @@ static int open_dso(struct dso *dso, struct machine *machine) return fd; } -static void close_data_fd(struct dso *dso) +static void close_data_fd(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { if (dso__data(dso)->fd >= 0) { close(dso__data(dso)->fd); @@ -652,12 +673,12 @@ static void close_data_fd(struct dso *dso) * Close @dso's data file descriptor and updates * list/count of open DSO objects. */ -static void close_dso(struct dso *dso) +static void close_dso(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { close_data_fd(dso); } -static void close_first_dso(void) +static void close_first_dso(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { struct dso_data *dso_data; struct dso *dso; @@ -702,7 +723,7 @@ void reset_fd_limit(void) fd_limit = 0; } -static bool may_cache_fd(void) +static bool may_cache_fd(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { if (!fd_limit) fd_limit = get_fd_limit(); @@ -718,7 +739,7 @@ static bool may_cache_fd(void) * for opened dso file descriptors. The limit is half * of the RLIMIT_NOFILE files opened. */ -static void check_data_close(void) +static void check_data_close(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { bool cache_fd = may_cache_fd(); @@ -734,12 +755,13 @@ static void check_data_close(void) */ void dso__data_close(struct dso *dso) { - pthread_mutex_lock(&dso__data_open_lock); + mutex_lock(dso__data_open_lock()); close_dso(dso); - pthread_mutex_unlock(&dso__data_open_lock); + mutex_unlock(dso__data_open_lock()); } static void try_to_open_dso(struct dso *dso, struct machine *machine) + EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { enum dso_binary_type binary_type_data[] = { DSO_BINARY_TYPE__BUILD_ID_CACHE, @@ -781,25 +803,27 @@ out: * returns file descriptor. It should be paired with * dso__data_put_fd() if it returns non-negative value. */ -int dso__data_get_fd(struct dso *dso, struct machine *machine) +bool dso__data_get_fd(struct dso *dso, struct machine *machine, int *fd) { + *fd = -1; if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR) - return -1; + return false; - if (pthread_mutex_lock(&dso__data_open_lock) < 0) - return -1; + mutex_lock(dso__data_open_lock()); try_to_open_dso(dso, machine); - if (dso__data(dso)->fd < 0) - pthread_mutex_unlock(&dso__data_open_lock); + *fd = dso__data(dso)->fd; + if (*fd >= 0) + return true; - return dso__data(dso)->fd; + mutex_unlock(dso__data_open_lock()); + return false; } void dso__data_put_fd(struct dso *dso __maybe_unused) { - pthread_mutex_unlock(&dso__data_open_lock); + mutex_unlock(dso__data_open_lock()); } bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by) @@ -951,7 +975,7 @@ static ssize_t file_read(struct dso *dso, struct machine *machine, { ssize_t ret; - pthread_mutex_lock(&dso__data_open_lock); + mutex_lock(dso__data_open_lock()); /* * dso__data(dso)->fd might be closed if other thread opened another @@ -967,7 +991,7 @@ static ssize_t file_read(struct dso *dso, struct machine *machine, ret = pread(dso__data(dso)->fd, data, DSO__DATA_CACHE_SIZE, offset); out: - pthread_mutex_unlock(&dso__data_open_lock); + mutex_unlock(dso__data_open_lock()); return ret; } @@ -1075,7 +1099,7 @@ static int file_size(struct dso *dso, struct machine *machine) struct stat st; char sbuf[STRERR_BUFSIZE]; - pthread_mutex_lock(&dso__data_open_lock); + mutex_lock(dso__data_open_lock()); /* * dso__data(dso)->fd might be closed if other thread opened another @@ -1099,7 +1123,7 @@ static int file_size(struct dso *dso, struct machine *machine) dso__data(dso)->file_size = st.st_size; out: - pthread_mutex_unlock(&dso__data_open_lock); + mutex_unlock(dso__data_open_lock()); return ret; } @@ -1170,6 +1194,68 @@ ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, return data_read_write_offset(dso, machine, offset, data, size, true); } +uint16_t dso__e_machine(struct dso *dso, struct machine *machine) +{ + uint16_t e_machine = EM_NONE; + int fd; + + switch (dso__binary_type(dso)) { + case DSO_BINARY_TYPE__KALLSYMS: + case DSO_BINARY_TYPE__GUEST_KALLSYMS: + case DSO_BINARY_TYPE__VMLINUX: + case DSO_BINARY_TYPE__GUEST_VMLINUX: + case DSO_BINARY_TYPE__GUEST_KMODULE: + case DSO_BINARY_TYPE__GUEST_KMODULE_COMP: + case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE: + case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP: + case DSO_BINARY_TYPE__KCORE: + case DSO_BINARY_TYPE__GUEST_KCORE: + case DSO_BINARY_TYPE__BPF_PROG_INFO: + case DSO_BINARY_TYPE__BPF_IMAGE: + case DSO_BINARY_TYPE__OOL: + case DSO_BINARY_TYPE__JAVA_JIT: + return EM_HOST; + case DSO_BINARY_TYPE__DEBUGLINK: + case DSO_BINARY_TYPE__BUILD_ID_CACHE: + case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO: + case DSO_BINARY_TYPE__GNU_DEBUGDATA: + case DSO_BINARY_TYPE__SYSTEM_PATH_DSO: + case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: + case DSO_BINARY_TYPE__FEDORA_DEBUGINFO: + case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: + case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: + case DSO_BINARY_TYPE__BUILDID_DEBUGINFO: + break; + case DSO_BINARY_TYPE__NOT_FOUND: + default: + return EM_NONE; + } + + mutex_lock(dso__data_open_lock()); + + /* + * dso__data(dso)->fd might be closed if other thread opened another + * file (dso) due to open file limit (RLIMIT_NOFILE). + */ + try_to_open_dso(dso, machine); + fd = dso__data(dso)->fd; + if (fd >= 0) { + _Static_assert(offsetof(Elf32_Ehdr, e_machine) == 18, "Unexpected offset"); + _Static_assert(offsetof(Elf64_Ehdr, e_machine) == 18, "Unexpected offset"); + if (dso__needs_swap(dso) == DSO_SWAP__UNSET) { + unsigned char eidata; + + if (pread(fd, &eidata, sizeof(eidata), EI_DATA) == sizeof(eidata)) + dso__swap_init(dso, eidata); + } + if (dso__needs_swap(dso) != DSO_SWAP__UNSET && + pread(fd, &e_machine, sizeof(e_machine), 18) == sizeof(e_machine)) + e_machine = DSO__SWAP(dso, uint16_t, e_machine); + } + mutex_unlock(dso__data_open_lock()); + return e_machine; +} + /** * dso__data_read_addr - Read data from dso address * @dso: dso object @@ -1263,6 +1349,16 @@ struct dso *machine__findnew_kernel(struct machine *machine, const char *name, return dso; } +static void __dso__set_long_name_id(struct dso *dso, const char *name, bool name_allocated) +{ + if (dso__long_name_allocated(dso)) + free((char *)dso__long_name(dso)); + + RC_CHK_ACCESS(dso)->long_name = name; + RC_CHK_ACCESS(dso)->long_name_len = strlen(name); + dso__set_long_name_allocated(dso, name_allocated); +} + static void dso__set_long_name_id(struct dso *dso, const char *name, bool name_allocated) { struct dsos *dsos = dso__dsos(dso); @@ -1276,18 +1372,11 @@ static void dso__set_long_name_id(struct dso *dso, const char *name, bool name_a * renaming the dso. */ down_write(&dsos->lock); - } - - if (dso__long_name_allocated(dso)) - free((char *)dso__long_name(dso)); - - RC_CHK_ACCESS(dso)->long_name = name; - RC_CHK_ACCESS(dso)->long_name_len = strlen(name); - dso__set_long_name_allocated(dso, name_allocated); - - if (dsos) { + __dso__set_long_name_id(dso, name, name_allocated); dsos->sorted = false; up_write(&dsos->lock); + } else { + __dso__set_long_name_id(dso, name, name_allocated); } } @@ -1365,6 +1454,16 @@ void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) dso__set_long_name_id(dso, name, name_allocated); } +static void __dso__set_short_name(struct dso *dso, const char *name, bool name_allocated) +{ + if (dso__short_name_allocated(dso)) + free((char *)dso__short_name(dso)); + + RC_CHK_ACCESS(dso)->short_name = name; + RC_CHK_ACCESS(dso)->short_name_len = strlen(name); + dso__set_short_name_allocated(dso, name_allocated); +} + void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated) { struct dsos *dsos = dso__dsos(dso); @@ -1378,17 +1477,11 @@ void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated) * renaming the dso. */ down_write(&dsos->lock); - } - if (dso__short_name_allocated(dso)) - free((char *)dso__short_name(dso)); - - RC_CHK_ACCESS(dso)->short_name = name; - RC_CHK_ACCESS(dso)->short_name_len = strlen(name); - dso__set_short_name_allocated(dso, name_allocated); - - if (dsos) { + __dso__set_short_name(dso, name, name_allocated); dsos->sorted = false; up_write(&dsos->lock); + } else { + __dso__set_short_name(dso, name, name_allocated); } } @@ -1525,6 +1618,33 @@ void dso__put(struct dso *dso) RC_CHK_PUT(dso); } +int dso__swap_init(struct dso *dso, unsigned char eidata) +{ + static unsigned int const endian = 1; + + dso__set_needs_swap(dso, DSO_SWAP__NO); + + switch (eidata) { + case ELFDATA2LSB: + /* We are big endian, DSO is little endian. */ + if (*(unsigned char const *)&endian != 1) + dso__set_needs_swap(dso, DSO_SWAP__YES); + break; + + case ELFDATA2MSB: + /* We are little endian, DSO is big endian. */ + if (*(unsigned char const *)&endian != 0) + dso__set_needs_swap(dso, DSO_SWAP__YES); + break; + + default: + pr_err("unrecognized DSO data encoding %d\n", eidata); + return -EINVAL; + } + + return 0; +} + void dso__set_build_id(struct dso *dso, struct build_id *bid) { RC_CHK_ACCESS(dso)->bid = *bid; @@ -1608,11 +1728,10 @@ size_t dso__fprintf(struct dso *dso, FILE *fp) enum dso_type dso__type(struct dso *dso, struct machine *machine) { - int fd; + int fd = -1; enum dso_type type = DSO__TYPE_UNKNOWN; - fd = dso__data_get_fd(dso, machine); - if (fd >= 0) { + if (dso__data_get_fd(dso, machine, &fd)) { type = dso__type_fd(fd); dso__data_put_fd(dso); } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index bb8e8f444054..c87564471f9b 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -20,30 +20,88 @@ struct perf_env; #define DSO__NAME_KALLSYMS "[kernel.kallsyms]" #define DSO__NAME_KCORE "[kernel.kcore]" +/** + * enum dso_binary_type - The kind of DSO generally associated with a memory + * region (struct map). + */ enum dso_binary_type { + /** @DSO_BINARY_TYPE__KALLSYMS: Symbols from /proc/kallsyms file. */ DSO_BINARY_TYPE__KALLSYMS = 0, + /** @DSO_BINARY_TYPE__GUEST_KALLSYMS: Guest /proc/kallsyms file. */ DSO_BINARY_TYPE__GUEST_KALLSYMS, + /** @DSO_BINARY_TYPE__VMLINUX: Path to kernel /boot/vmlinux file. */ DSO_BINARY_TYPE__VMLINUX, + /** @DSO_BINARY_TYPE__GUEST_VMLINUX: Path to guest kernel /boot/vmlinux file. */ DSO_BINARY_TYPE__GUEST_VMLINUX, + /** @DSO_BINARY_TYPE__JAVA_JIT: Symbols from /tmp/perf.map file. */ DSO_BINARY_TYPE__JAVA_JIT, + /** + * @DSO_BINARY_TYPE__DEBUGLINK: Debug file readable from the file path + * in the .gnu_debuglink ELF section of the dso. + */ DSO_BINARY_TYPE__DEBUGLINK, + /** + * @DSO_BINARY_TYPE__BUILD_ID_CACHE: File named after buildid located in + * the buildid cache with an elf filename. + */ DSO_BINARY_TYPE__BUILD_ID_CACHE, + /** + * @DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO: File named after buildid + * located in the buildid cache with a debug filename. + */ DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO, + /** + * @DSO_BINARY_TYPE__FEDORA_DEBUGINFO: Debug file in /usr/lib/debug + * with .debug suffix. + */ DSO_BINARY_TYPE__FEDORA_DEBUGINFO, + /** @DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: Debug file in /usr/lib/debug. */ DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, + /** + * @DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: dso__long_name debuginfo + * file in /usr/lib/debug/lib rather than the expected + * /usr/lib/debug/usr/lib. + */ DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, + /** + * @DSO_BINARY_TYPE__BUILDID_DEBUGINFO: File named after buildid located + * in /usr/lib/debug/.build-id/. + */ DSO_BINARY_TYPE__BUILDID_DEBUGINFO, + /** + * @DSO_BINARY_TYPE__GNU_DEBUGDATA: MiniDebuginfo where a compressed + * ELF file is placed in a .gnu_debugdata section. + */ + DSO_BINARY_TYPE__GNU_DEBUGDATA, + /** @DSO_BINARY_TYPE__SYSTEM_PATH_DSO: A regular executable/shared-object file. */ DSO_BINARY_TYPE__SYSTEM_PATH_DSO, + /** @DSO_BINARY_TYPE__GUEST_KMODULE: Guest kernel module .ko file. */ DSO_BINARY_TYPE__GUEST_KMODULE, + /** @DSO_BINARY_TYPE__GUEST_KMODULE_COMP: Guest kernel module .ko.gz file. */ DSO_BINARY_TYPE__GUEST_KMODULE_COMP, + /** @DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE: Kernel module .ko file. */ DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE, + /** @DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP: Kernel module .ko.gz file. */ DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP, + /** @DSO_BINARY_TYPE__KCORE: /proc/kcore file. */ DSO_BINARY_TYPE__KCORE, + /** @DSO_BINARY_TYPE__GUEST_KCORE: Guest /proc/kcore file. */ DSO_BINARY_TYPE__GUEST_KCORE, + /** + * @DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: Openembedded/Yocto -dbg + * package debug info. + */ DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, + /** @DSO_BINARY_TYPE__BPF_PROG_INFO: jitted BPF code. */ DSO_BINARY_TYPE__BPF_PROG_INFO, + /** @DSO_BINARY_TYPE__BPF_IMAGE: jitted BPF trampoline or dispatcher code. */ DSO_BINARY_TYPE__BPF_IMAGE, + /** + * @DSO_BINARY_TYPE__OOL: out of line code such as kprobe-replaced + * instructions or optimized kprobes or ftrace trampolines. + */ DSO_BINARY_TYPE__OOL, + /** @DSO_BINARY_TYPE__NOT_FOUND: Unknown DSO kind. */ DSO_BINARY_TYPE__NOT_FOUND, }; @@ -154,10 +212,12 @@ struct dso_data { int status; u32 status_seen; u64 file_size; +#ifdef HAVE_LIBUNWIND_SUPPORT u64 elf_base_addr; u64 debug_frame_offset; u64 eh_frame_hdr_addr; u64 eh_frame_hdr_offset; +#endif }; struct dso_bpf_prog { @@ -231,6 +291,8 @@ DECLARE_RC_STRUCT(dso) { char name[]; }; +extern struct mutex _dso__data_open_lock; + /* dso__for_each_symbol - iterate over the symbols of given type * * @dso: the 'struct dso *' in which symbols are iterated @@ -652,7 +714,7 @@ void __dso__inject_id(struct dso *dso, const struct dso_id *id); int dso__name_len(const struct dso *dso); struct dso *dso__get(struct dso *dso); -void dso__put(struct dso *dso); +void dso__put(struct dso *dso) LOCKS_EXCLUDED(_dso__data_open_lock); static inline void __dso__zput(struct dso **dso) { @@ -675,6 +737,8 @@ bool dso__sorted_by_name(const struct dso *dso); void dso__set_sorted_by_name(struct dso *dso); void dso__sort_by_name(struct dso *dso); +int dso__swap_init(struct dso *dso, unsigned char eidata); + void dso__set_build_id(struct dso *dso, struct build_id *bid); bool dso__build_id_equal(const struct dso *dso, struct build_id *bid); void dso__read_running_kernel_build_id(struct dso *dso, @@ -732,8 +796,8 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m, * The current usage of the dso__data_* interface is as follows: * * Get DSO's fd: - * int fd = dso__data_get_fd(dso, machine); - * if (fd >= 0) { + * int fd; + * if (dso__data_get_fd(dso, machine, &fd)) { * USE 'fd' SOMEHOW * dso__data_put_fd(dso); * } @@ -755,14 +819,16 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m, * * TODO */ -int dso__data_get_fd(struct dso *dso, struct machine *machine); -void dso__data_put_fd(struct dso *dso); -void dso__data_close(struct dso *dso); +bool dso__data_get_fd(struct dso *dso, struct machine *machine, int *fd) + EXCLUSIVE_TRYLOCK_FUNCTION(true, _dso__data_open_lock); +void dso__data_put_fd(struct dso *dso) UNLOCK_FUNCTION(_dso__data_open_lock); +void dso__data_close(struct dso *dso) LOCKS_EXCLUDED(_dso__data_open_lock); int dso__data_file_size(struct dso *dso, struct machine *machine); off_t dso__data_size(struct dso *dso, struct machine *machine); ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, u64 offset, u8 *data, ssize_t size); +uint16_t dso__e_machine(struct dso *dso, struct machine *machine); ssize_t dso__data_read_addr(struct dso *dso, struct map *map, struct machine *machine, u64 addr, u8 *data, ssize_t size); @@ -808,7 +874,9 @@ static inline bool dso__is_kcore(const struct dso *dso) static inline bool dso__is_kallsyms(const struct dso *dso) { - return RC_CHK_ACCESS(dso)->kernel && RC_CHK_ACCESS(dso)->long_name[0] != '/'; + enum dso_binary_type bt = dso__binary_type(dso); + + return bt == DSO_BINARY_TYPE__KALLSYMS || bt == DSO_BINARY_TYPE__GUEST_KALLSYMS; } bool dso__is_object_file(const struct dso *dso); diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index e0998e2a7c4e..4d213017d202 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -157,6 +157,7 @@ static struct dso *__dsos__find_by_longname_id(struct dsos *dsos, const char *name, const struct dso_id *id, bool write_locked) + SHARED_LOCKS_REQUIRED(dsos->lock) { struct dsos__key key = { .long_name = name, @@ -262,6 +263,7 @@ static int dsos__find_id_cb(struct dso *dso, void *data) static struct dso *__dsos__find_id(struct dsos *dsos, const char *name, const struct dso_id *id, bool cmp_short, bool write_locked) + SHARED_LOCKS_REQUIRED(dsos->lock) { struct dso *res; @@ -338,6 +340,7 @@ static struct dso *__dsos__addnew_id(struct dsos *dsos, const char *name, const } static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, const struct dso_id *id) + SHARED_LOCKS_REQUIRED(dsos->lock) { struct dso *dso = __dsos__find_id(dsos, name, id, false, /*write_locked=*/true); diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index cae4f6d63318..36411749e007 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -543,7 +543,7 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu) for (i = 0; i < env->nr_numa_nodes; i++) { nn = &env->numa_nodes[i]; - nr = max(nr, perf_cpu_map__max(nn->map).cpu); + nr = max(nr, (int)perf_cpu_map__max(nn->map).cpu); } nr++; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index aac96d5d1917..7544a3104e21 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -77,6 +77,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_HEADER_FEATURE] = "FEATURE", [PERF_RECORD_COMPRESSED] = "COMPRESSED", [PERF_RECORD_FINISHED_INIT] = "FINISHED_INIT", + [PERF_RECORD_COMPRESSED2] = "COMPRESSED2", }; const char *perf_event__name(unsigned int id) @@ -448,12 +449,13 @@ int perf_event__exit_del_thread(const struct perf_tool *tool __maybe_unused, size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp) { - return fprintf(fp, " offset: %#"PRI_lx64" size: %#"PRI_lx64" flags: %#"PRI_lx64" [%s%s%s]\n", + return fprintf(fp, " offset: %#"PRI_lx64" size: %#"PRI_lx64" flags: %#"PRI_lx64" [%s%s%s%s]\n", event->aux.aux_offset, event->aux.aux_size, event->aux.flags, event->aux.flags & PERF_AUX_FLAG_TRUNCATED ? "T" : "", event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : "", - event->aux.flags & PERF_AUX_FLAG_PARTIAL ? "P" : ""); + event->aux.flags & PERF_AUX_FLAG_PARTIAL ? "P" : "", + event->aux.flags & PERF_AUX_FLAG_COLLISION ? "C" : ""); } size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp) @@ -767,6 +769,17 @@ int machine__resolve(struct machine *machine, struct addr_location *al, al->socket = env->cpu[al->cpu].socket_id; } + /* Account for possible out-of-order switch events. */ + al->parallelism = max(1, min(machine->parallelism, machine__nr_cpus_avail(machine))); + if (test_bit(al->parallelism, symbol_conf.parallelism_filter)) + al->filtered |= (1 << HIST_FILTER__PARALLELISM); + /* + * Multiply it by some const to avoid precision loss or dealing + * with floats. The multiplier does not matter otherwise since + * we only print it as percents. + */ + al->latency = sample->period * 1000 / al->parallelism; + if (al->map) { if (symbol_conf.dso_list && (!dso || !(strlist__has_entry(symbol_conf.dso_list, diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 2744c54f404e..664bf39567ce 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -67,9 +67,15 @@ enum { PERF_IP_FLAG_INTR_DISABLE = 1ULL << 13, PERF_IP_FLAG_INTR_TOGGLE = 1ULL << 14, PERF_IP_FLAG_BRANCH_MISS = 1ULL << 15, + PERF_IP_FLAG_NOT_TAKEN = 1ULL << 16, }; -#define PERF_IP_FLAG_CHARS "bcrosyiABExghDt" +#define PERF_IP_FLAG_CHARS "bcrosyiABExghDtmn" + +#define PERF_ADDITIONAL_STATE_MASK \ + (PERF_IP_FLAG_IN_TX | \ + PERF_IP_FLAG_INTR_DISABLE | \ + PERF_IP_FLAG_INTR_TOGGLE) #define PERF_BRANCH_MASK (\ PERF_IP_FLAG_BRANCH |\ @@ -85,6 +91,10 @@ enum { PERF_IP_FLAG_VMENTRY |\ PERF_IP_FLAG_VMEXIT) +#define PERF_IP_FLAG_BRANCH_EVENT_MASK \ + (PERF_IP_FLAG_BRANCH_MISS | \ + PERF_IP_FLAG_NOT_TAKEN) + #define PERF_MEM_DATA_SRC_NONE \ (PERF_MEM_S(OP, NA) |\ PERF_MEM_S(LVL, NA) |\ diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h index eabd7913c309..dcff697ed252 100644 --- a/tools/perf/util/events_stats.h +++ b/tools/perf/util/events_stats.h @@ -57,6 +57,8 @@ struct events_stats { struct hists_stats { u64 total_period; u64 total_non_filtered_period; + u64 total_latency; + u64 total_non_filtered_latency; u32 nr_samples; u32 nr_non_filtered_samples; u32 nr_lost_samples; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f0dd174e2deb..dcd1130502df 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -35,6 +35,7 @@ #include "util/util.h" #include "util/env.h" #include "util/intel-tpebs.h" +#include "util/strbuf.h" #include <signal.h> #include <unistd.h> #include <sched.h> @@ -183,7 +184,6 @@ void evlist__delete(struct evlist *evlist) if (evlist == NULL) return; - tpebs_delete(); evlist__free_stats(evlist); evlist__munmap(evlist); evlist__close(evlist); @@ -1373,19 +1373,18 @@ static int evlist__create_syswide_maps(struct evlist *evlist) */ cpus = perf_cpu_map__new_online_cpus(); if (!cpus) - goto out; + return -ENOMEM; threads = perf_thread_map__new_dummy(); - if (!threads) - goto out_put; + if (!threads) { + perf_cpu_map__put(cpus); + return -ENOMEM; + } perf_evlist__set_maps(&evlist->core, cpus, threads); - perf_thread_map__put(threads); -out_put: perf_cpu_map__put(cpus); -out: - return -ENOMEM; + return 0; } int evlist__open(struct evlist *evlist) @@ -2469,23 +2468,36 @@ struct evsel *evlist__find_evsel(struct evlist *evlist, int idx) return NULL; } -int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf) +void evlist__format_evsels(struct evlist *evlist, struct strbuf *sb, size_t max_length) { - struct evsel *evsel; - int printed = 0; + struct evsel *evsel, *leader = NULL; + bool first = true; evlist__for_each_entry(evlist, evsel) { + struct evsel *new_leader = evsel__leader(evsel); + if (evsel__is_dummy_event(evsel)) continue; - if (size > (strlen(evsel__name(evsel)) + (printed ? 2 : 1))) { - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "," : "", evsel__name(evsel)); - } else { - printed += scnprintf(bf + printed, size - printed, "%s...", printed ? "," : ""); - break; + + if (leader != new_leader && leader && leader->core.nr_members > 1) + strbuf_addch(sb, '}'); + + if (!first) + strbuf_addch(sb, ','); + + if (sb->len > max_length) { + strbuf_addstr(sb, "..."); + return; } - } + if (leader != new_leader && new_leader->core.nr_members > 1) + strbuf_addch(sb, '{'); - return printed; + strbuf_addstr(sb, evsel__name(evsel)); + first = false; + leader = new_leader; + } + if (leader && leader->core.nr_members > 1) + strbuf_addch(sb, '}'); } void evlist__check_mem_load_aux(struct evlist *evlist) @@ -2535,10 +2547,10 @@ void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_lis return; evlist__for_each_entry(evlist, pos) { - struct perf_cpu_map *intersect, *to_test; + struct perf_cpu_map *intersect, *to_test, *online = cpu_map__online(); const struct perf_pmu *pmu = evsel__find_pmu(pos); - to_test = pmu && pmu->is_core ? pmu->cpus : cpu_map__online(); + to_test = pmu && pmu->is_core ? pmu->cpus : online; intersect = perf_cpu_map__intersect(to_test, user_requested_cpus); if (!perf_cpu_map__equal(intersect, user_requested_cpus)) { char buf[128]; @@ -2548,38 +2560,61 @@ void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_lis cpu_list, pmu ? pmu->name : "cpu", buf, evsel__name(pos)); } perf_cpu_map__put(intersect); + perf_cpu_map__put(online); } perf_cpu_map__put(user_requested_cpus); } -void evlist__uniquify_name(struct evlist *evlist) +/* Should uniquify be disabled for the evlist? */ +static bool evlist__disable_uniquify(const struct evlist *evlist) { - char *new_name, empty_attributes[2] = ":", *attributes; - struct evsel *pos; + struct evsel *counter; + struct perf_pmu *last_pmu = NULL; + bool first = true; - if (perf_pmus__num_core_pmus() == 1) - return; + evlist__for_each_entry(evlist, counter) { + /* If PMUs vary then uniquify can be useful. */ + if (!first && counter->pmu != last_pmu) + return false; + first = false; + if (counter->pmu) { + /* Allow uniquify for uncore PMUs. */ + if (!counter->pmu->is_core) + return false; + /* Keep hybrid event names uniquified for clarity. */ + if (perf_pmus__num_core_pmus() > 1) + return false; + } + last_pmu = counter->pmu; + } + return true; +} - evlist__for_each_entry(evlist, pos) { - if (!evsel__is_hybrid(pos)) - continue; +static bool evlist__set_needs_uniquify(struct evlist *evlist, const struct perf_stat_config *config) +{ + struct evsel *counter; + bool needs_uniquify = false; - if (strchr(pos->name, '/')) - continue; + if (evlist__disable_uniquify(evlist)) { + evlist__for_each_entry(evlist, counter) + counter->uniquified_name = true; + return false; + } - attributes = strchr(pos->name, ':'); - if (attributes) - *attributes = '\0'; - else - attributes = empty_attributes; + evlist__for_each_entry(evlist, counter) { + if (evsel__set_needs_uniquify(counter, config)) + needs_uniquify = true; + } + return needs_uniquify; +} - if (asprintf(&new_name, "%s/%s/%s", pos->pmu ? pos->pmu->name : "", - pos->name, attributes + 1)) { - free(pos->name); - pos->name = new_name; - } else { - *attributes = ':'; - } +void evlist__uniquify_evsel_names(struct evlist *evlist, const struct perf_stat_config *config) +{ + if (evlist__set_needs_uniquify(evlist, config)) { + struct evsel *pos; + + evlist__for_each_entry(evlist, pos) + evsel__uniquify_counter(pos); } } @@ -2594,3 +2629,17 @@ bool evlist__has_bpf_output(struct evlist *evlist) return false; } + +bool evlist__needs_bpf_sb_event(struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel__is_dummy_event(evsel)) + continue; + if (!evsel->core.attr.exclude_kernel) + return true; + } + + return false; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index adddb1db1ad2..85859708393e 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -19,7 +19,9 @@ struct pollfd; struct thread_map; struct perf_cpu_map; +struct perf_stat_config; struct record_opts; +struct strbuf; struct target; /* @@ -430,10 +432,11 @@ int event_enable_timer__process(struct event_enable_timer *eet); struct evsel *evlist__find_evsel(struct evlist *evlist, int idx); -int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf); +void evlist__format_evsels(struct evlist *evlist, struct strbuf *sb, size_t max_length); void evlist__check_mem_load_aux(struct evlist *evlist); void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list); -void evlist__uniquify_name(struct evlist *evlist); +void evlist__uniquify_evsel_names(struct evlist *evlist, const struct perf_stat_config *config); bool evlist__has_bpf_output(struct evlist *evlist); +bool evlist__needs_bpf_sb_event(struct evlist *evlist); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index bc144388f892..d55482f094bf 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -237,6 +237,16 @@ set_methods: return 0; } +const char *evsel__pmu_name(const struct evsel *evsel) +{ + struct perf_pmu *pmu = evsel__find_pmu(evsel); + + if (pmu) + return pmu->name; + + return event_type(evsel->core.attr.type); +} + #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) int __evsel__sample_size(u64 sample_type) @@ -511,6 +521,16 @@ struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig) } evsel->cgrp = cgroup__get(orig->cgrp); #ifdef HAVE_LIBTRACEEVENT + if (orig->tp_sys) { + evsel->tp_sys = strdup(orig->tp_sys); + if (evsel->tp_sys == NULL) + goto out_err; + } + if (orig->tp_name) { + evsel->tp_name = strdup(orig->tp_name); + if (evsel->tp_name == NULL) + goto out_err; + } evsel->tp_format = orig->tp_format; #endif evsel->handler = orig->handler; @@ -532,11 +552,11 @@ struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig) evsel->exclude_GH = orig->exclude_GH; evsel->sample_read = orig->sample_read; - evsel->auto_merge_stats = orig->auto_merge_stats; evsel->collect_stat = orig->collect_stat; evsel->weak_group = orig->weak_group; evsel->use_config_name = orig->use_config_name; evsel->pmu = orig->pmu; + evsel->first_wildcard_match = orig->first_wildcard_match; if (evsel__copy_config_terms(evsel, orig) < 0) goto out_err; @@ -634,7 +654,11 @@ struct tep_event *evsel__tp_format(struct evsel *evsel) if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) return NULL; - tp_format = trace_event__tp_format(evsel->tp_sys, evsel->tp_name); + if (!evsel->tp_sys) + tp_format = trace_event__tp_format_id(evsel->core.attr.config); + else + tp_format = trace_event__tp_format(evsel->tp_sys, evsel->tp_name); + if (IS_ERR(tp_format)) { int err = -PTR_ERR(evsel->tp_format); @@ -1251,9 +1275,10 @@ static void evsel__set_default_freq_period(struct record_opts *opts, } } -static bool evsel__is_offcpu_event(struct evsel *evsel) +bool evsel__is_offcpu_event(struct evsel *evsel) { - return evsel__is_bpf_output(evsel) && evsel__name_is(evsel, OFFCPU_EVENT); + return evsel__is_bpf_output(evsel) && evsel__name_is(evsel, OFFCPU_EVENT) && + evsel->core.attr.sample_type & PERF_SAMPLE_RAW; } /* @@ -1401,7 +1426,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, evsel__set_sample_bit(evsel, CPU); } - if (opts->sample_address) + if (opts->sample_data_src) evsel__set_sample_bit(evsel, DATA_SRC); if (opts->sample_phys_addr) @@ -1416,9 +1441,10 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, attr->branch_sample_type = opts->branch_stack; } - if (opts->sample_weight) + if (opts->sample_weight || evsel->retire_lat) { arch_evsel__set_sample_weight(evsel); - + evsel->retire_lat = false; + } attr->task = track; attr->mmap = track; attr->mmap2 = track && !perf_missing_features.mmap2; @@ -1530,8 +1556,10 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, if (evsel__is_dummy_event(evsel)) evsel__reset_sample_bit(evsel, BRANCH_STACK); - if (evsel__is_offcpu_event(evsel)) + if (evsel__is_offcpu_event(evsel)) { evsel->core.attr.sample_type &= OFFCPU_SAMPLE_TYPES; + attr->inherit = 0; + } arch__post_evsel_config(evsel, attr); } @@ -1632,6 +1660,8 @@ void evsel__exit(struct evsel *evsel) { assert(list_empty(&evsel->core.node)); assert(evsel->evlist == NULL); + if (evsel__is_retire_lat(evsel)) + evsel__tpebs_close(evsel); bpf_counter__destroy(evsel); perf_bpf_filter__destroy(evsel); evsel__free_counts(evsel); @@ -1694,11 +1724,6 @@ static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread) return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count); } -static int evsel__read_retire_lat(struct evsel *evsel, int cpu_map_idx, int thread) -{ - return tpebs_set_evsel(evsel, cpu_map_idx, thread); -} - static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread, u64 val, u64 ena, u64 run, u64 lost) { @@ -1706,8 +1731,8 @@ static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread, count = perf_counts(counter->counts, cpu_map_idx, thread); - if (counter->retire_lat) { - evsel__read_retire_lat(counter, cpu_map_idx, thread); + if (evsel__is_retire_lat(counter)) { + evsel__tpebs_read(counter, cpu_map_idx, thread); perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true); return; } @@ -1865,7 +1890,7 @@ int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread) return evsel__hwmon_pmu_read(evsel, cpu_map_idx, thread); if (evsel__is_retire_lat(evsel)) - return evsel__read_retire_lat(evsel, cpu_map_idx, thread); + return evsel__tpebs_read(evsel, cpu_map_idx, thread); if (evsel->core.attr.read_format & PERF_FORMAT_GROUP) return evsel__read_group(evsel, cpu_map_idx, thread); @@ -2542,25 +2567,6 @@ check: return false; } -static bool evsel__handle_error_quirks(struct evsel *evsel, int error) -{ - /* - * AMD core PMU tries to forward events with precise_ip to IBS PMU - * implicitly. But IBS PMU has more restrictions so it can fail with - * supported event attributes. Let's forward it back to the core PMU - * by clearing precise_ip only if it's from precise_max (:P). - */ - if ((error == -EINVAL || error == -ENOENT) && x86__is_amd_cpu() && - evsel->core.attr.precise_ip && evsel->precise_max) { - evsel->core.attr.precise_ip = 0; - pr_debug2_peo("removing precise_ip on AMD\n"); - display_attr(&evsel->core.attr); - return true; - } - - return false; -} - static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads, int start_cpu_map_idx, int end_cpu_map_idx) @@ -2571,7 +2577,7 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_cpu cpu; if (evsel__is_retire_lat(evsel)) - return tpebs_start(evsel->evlist); + return evsel__tpebs_open(evsel); err = __evsel__prepare_open(evsel, cpus, threads); if (err) @@ -2706,9 +2712,6 @@ try_fallback: if (evsel__precise_ip_fallback(evsel)) goto retry_open; - if (evsel__handle_error_quirks(evsel, err)) - goto retry_open; - out_close: if (err) threads->err_thread = thread; @@ -2735,7 +2738,7 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, void evsel__close(struct evsel *evsel) { if (evsel__is_retire_lat(evsel)) - tpebs_delete(); + evsel__tpebs_close(evsel); perf_evsel__close(&evsel->core); perf_evsel__free_id(&evsel->core); } @@ -2921,6 +2924,35 @@ static inline bool evsel__has_branch_counters(const struct evsel *evsel) return false; } +static int __set_offcpu_sample(struct perf_sample *data) +{ + u64 *array = data->raw_data; + u32 max_size = data->raw_size, *p32; + const void *endp = (void *)array + max_size; + + if (array == NULL) + return -EFAULT; + + OVERFLOW_CHECK_u64(array); + p32 = (void *)array++; + data->pid = p32[0]; + data->tid = p32[1]; + + OVERFLOW_CHECK_u64(array); + data->period = *array++; + + OVERFLOW_CHECK_u64(array); + data->callchain = (struct ip_callchain *)array++; + OVERFLOW_CHECK(array, data->callchain->nr * sizeof(u64), max_size); + data->ip = data->callchain->ips[1]; + array += data->callchain->nr; + + OVERFLOW_CHECK_u64(array); + data->cgroup = *array; + + return 0; +} + int evsel__parse_sample(struct evsel *evsel, union perf_event *event, struct perf_sample *data) { @@ -3164,17 +3196,19 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, } if (type & PERF_SAMPLE_REGS_USER) { + struct regs_dump *regs = perf_sample__user_regs(data); + OVERFLOW_CHECK_u64(array); - data->user_regs.abi = *array; + regs->abi = *array; array++; - if (data->user_regs.abi) { + if (regs->abi) { u64 mask = evsel->core.attr.sample_regs_user; sz = hweight64(mask) * sizeof(u64); OVERFLOW_CHECK(array, sz, max_size); - data->user_regs.mask = mask; - data->user_regs.regs = (u64 *)array; + regs->mask = mask; + regs->regs = (u64 *)array; array = (void *)array + sz; } } @@ -3218,19 +3252,20 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, array++; } - data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE; if (type & PERF_SAMPLE_REGS_INTR) { + struct regs_dump *regs = perf_sample__intr_regs(data); + OVERFLOW_CHECK_u64(array); - data->intr_regs.abi = *array; + regs->abi = *array; array++; - if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) { + if (regs->abi != PERF_SAMPLE_REGS_ABI_NONE) { u64 mask = evsel->core.attr.sample_regs_intr; sz = hweight64(mask) * sizeof(u64); OVERFLOW_CHECK(array, sz, max_size); - data->intr_regs.mask = mask; - data->intr_regs.regs = (u64 *)array; + regs->mask = mask; + regs->regs = (u64 *)array; array = (void *)array + sz; } } @@ -3272,6 +3307,9 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, array = (void *)array + sz; } + if (evsel__is_offcpu_event(evsel)) + return __set_offcpu_sample(data); + return 0; } @@ -3747,6 +3785,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg, size, "%s", "No hardware sampling interrupt available.\n"); #endif + if (!target__has_cpu(target)) + return scnprintf(msg, size, + "Unsupported event (%s) in per-thread mode, enable system wide with '-a'.", + evsel__name(evsel)); break; case EBUSY: if (find_process("oprofiled")) @@ -3856,10 +3898,10 @@ void evsel__zero_per_pkg(struct evsel *evsel) */ bool evsel__is_hybrid(const struct evsel *evsel) { - if (perf_pmus__num_core_pmus() == 1) + if (!evsel->core.is_pmu_core) return false; - return evsel->core.is_pmu_core; + return perf_pmus__num_core_pmus() > 1; } struct evsel *evsel__leader(const struct evsel *evsel) @@ -3912,3 +3954,120 @@ void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader) leader->core.nr_members--; } } + +bool evsel__set_needs_uniquify(struct evsel *counter, const struct perf_stat_config *config) +{ + struct evsel *evsel; + + if (counter->needs_uniquify) { + /* Already set. */ + return true; + } + + if (counter->use_config_name || counter->is_libpfm_event) { + /* Original name will be used. */ + return false; + } + + if (!config->hybrid_merge && evsel__is_hybrid(counter)) { + /* Unique hybrid counters necessary. */ + counter->needs_uniquify = true; + return true; + } + + if (counter->core.attr.type < PERF_TYPE_MAX && counter->core.attr.type != PERF_TYPE_RAW) { + /* Legacy event, don't uniquify. */ + return false; + } + + if (counter->pmu && counter->pmu->is_core && + counter->alternate_hw_config != PERF_COUNT_HW_MAX) { + /* A sysfs or json event replacing a legacy event, don't uniquify. */ + return false; + } + + if (config->aggr_mode == AGGR_NONE) { + /* Always unique with no aggregation. */ + counter->needs_uniquify = true; + return true; + } + + if (counter->first_wildcard_match != NULL) { + /* + * If stats are merged then only the first_wildcard_match is + * displayed, there is no need to uniquify this evsel as the + * name won't be shown. + */ + return false; + } + + /* + * Do other non-merged events in the evlist have the same name? If so + * uniquify is necessary. + */ + evlist__for_each_entry(counter->evlist, evsel) { + if (evsel == counter || evsel->first_wildcard_match || evsel->pmu == counter->pmu) + continue; + + if (evsel__name_is(counter, evsel__name(evsel))) { + counter->needs_uniquify = true; + return true; + } + } + return false; +} + +void evsel__uniquify_counter(struct evsel *counter) +{ + const char *name, *pmu_name; + char *new_name, *config; + int ret; + + /* No uniquification necessary. */ + if (!counter->needs_uniquify) + return; + + /* The evsel was already uniquified. */ + if (counter->uniquified_name) + return; + + /* Avoid checking to uniquify twice. */ + counter->uniquified_name = true; + + name = evsel__name(counter); + pmu_name = counter->pmu->name; + /* Already prefixed by the PMU name. */ + if (!strncmp(name, pmu_name, strlen(pmu_name))) + return; + + config = strchr(name, '/'); + if (config) { + int len = config - name; + + if (config[1] == '/') { + /* case: event// */ + ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 2); + } else { + /* case: event/.../ */ + ret = asprintf(&new_name, "%s/%.*s,%s", pmu_name, len, name, config + 1); + } + } else { + config = strchr(name, ':'); + if (config) { + /* case: event:.. */ + int len = config - name; + + ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 1); + } else { + /* case: event */ + ret = asprintf(&new_name, "%s/%s/", pmu_name, name); + } + } + if (ret > 0) { + free(counter->name); + counter->name = new_name; + } else { + /* ENOMEM from asprintf. */ + counter->uniquified_name = false; + } +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 5e789fa80590..6dbc9690e0c9 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -16,6 +16,7 @@ struct bpf_object; struct cgroup; struct perf_counts; +struct perf_stat_config; struct perf_stat_evsel; union perf_event; struct bpf_counter_ops; @@ -69,6 +70,11 @@ struct evsel { const char *unit; struct cgroup *cgrp; const char *metric_id; + /* + * This point to the first evsel with the same name, intended to store the + * aggregated counts in aggregation mode. + */ + struct evsel *first_wildcard_match; /* parse modifier helper */ int exclude_GH; int sample_read; @@ -77,7 +83,6 @@ struct evsel { bool percore; bool precise_max; bool is_libpfm_event; - bool auto_merge_stats; bool collect_stat; bool weak_group; bool bpf_counter; @@ -114,11 +119,11 @@ struct evsel { bool ignore_missing_thread; bool forced_leader; bool cmdline_group_boundary; - bool merged_stat; bool reset_group; bool errored; bool needs_auxtrace_mmap; bool default_metricgroup; /* A member of the Default metricgroup */ + bool needs_uniquify; struct hashmap *per_pkg_mask; int err; int script_output_type; @@ -176,6 +181,12 @@ struct evsel { /* For tool events */ /* Beginning time subtracted when the counter is read. */ union { + /* Defaults for retirement latency events. */ + struct _retirement_latency { + double mean; + double min; + double max; + } retirement_latency; /* duration_time is a single global time. */ __u64 start_time; /* @@ -236,6 +247,7 @@ int evsel__object_config(size_t object_size, void (*fini)(struct evsel *evsel)); struct perf_pmu *evsel__find_pmu(const struct evsel *evsel); +const char *evsel__pmu_name(const struct evsel *evsel); bool evsel__is_aux_event(const struct evsel *evsel); struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx); @@ -540,6 +552,9 @@ void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader); bool arch_evsel__must_be_in_group(const struct evsel *evsel); +bool evsel__set_needs_uniquify(struct evsel *counter, const struct perf_stat_config *config); +void evsel__uniquify_counter(struct evsel *counter); + /* * Macro to swap the bit-field postition and size. * Used when, @@ -555,4 +570,6 @@ u64 evsel__bitfield_swap_branch_flags(u64 value); void evsel__set_config_if_unset(struct perf_pmu *pmu, struct evsel *evsel, const char *config_name, u64 val); +bool evsel__is_offcpu_event(struct evsel *evsel); + #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h index af52a1516d0b..94a1e9cf73d6 100644 --- a/tools/perf/util/evsel_config.h +++ b/tools/perf/util/evsel_config.h @@ -48,6 +48,7 @@ struct evsel_config_term { u32 aux_sample_size; u64 cfg_chg; char *str; + int cpu; } val; bool weak; }; diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index c221dcce6666..6413537442aa 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -215,6 +215,8 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref) int expr__get_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **data) { + if (!ctx || !id) + return -1; return hashmap__find(ctx->ids, id, data) ? 0 : -1; } diff --git a/tools/perf/util/fncache.c b/tools/perf/util/fncache.c index 6225cbc52310..bf9559c55c63 100644 --- a/tools/perf/util/fncache.c +++ b/tools/perf/util/fncache.c @@ -1,53 +1,58 @@ // SPDX-License-Identifier: GPL-2.0-only /* Manage a cache of file names' existence */ +#include <pthread.h> #include <stdlib.h> -#include <unistd.h> #include <string.h> -#include <linux/list.h> +#include <unistd.h> +#include <linux/compiler.h> #include "fncache.h" +#include "hashmap.h" -struct fncache { - struct hlist_node nd; - bool res; - char name[]; -}; +static struct hashmap *fncache; -#define FNHSIZE 61 +static size_t fncache__hash(long key, void *ctx __maybe_unused) +{ + return str_hash((const char *)key); +} -static struct hlist_head fncache_hash[FNHSIZE]; +static bool fncache__equal(long key1, long key2, void *ctx __maybe_unused) +{ + return strcmp((const char *)key1, (const char *)key2) == 0; +} -unsigned shash(const unsigned char *s) +static void fncache__init(void) { - unsigned h = 0; - while (*s) - h = 65599 * h + *s++; - return h ^ (h >> 16); + fncache = hashmap__new(fncache__hash, fncache__equal, /*ctx=*/NULL); +} + +static struct hashmap *fncache__get(void) +{ + static pthread_once_t fncache_once = PTHREAD_ONCE_INIT; + + pthread_once(&fncache_once, fncache__init); + + return fncache; } static bool lookup_fncache(const char *name, bool *res) { - int h = shash((const unsigned char *)name) % FNHSIZE; - struct fncache *n; - - hlist_for_each_entry(n, &fncache_hash[h], nd) { - if (!strcmp(n->name, name)) { - *res = n->res; - return true; - } - } - return false; + long val; + + if (!hashmap__find(fncache__get(), name, &val)) + return false; + + *res = (val != 0); + return true; } static void update_fncache(const char *name, bool res) { - struct fncache *n = malloc(sizeof(struct fncache) + strlen(name) + 1); - int h = shash((const unsigned char *)name) % FNHSIZE; - - if (!n) - return; - strcpy(n->name, name); - n->res = res; - hlist_add_head(&n->nd, &fncache_hash[h]); + char *old_key = NULL, *key = strdup(name); + + if (key) { + hashmap__set(fncache__get(), key, res, &old_key, /*old_value*/NULL); + free(old_key); + } } /* No LRU, only use when bounded in some other way. */ diff --git a/tools/perf/util/fncache.h b/tools/perf/util/fncache.h index fe020beaefb1..b6a0f209493e 100644 --- a/tools/perf/util/fncache.h +++ b/tools/perf/util/fncache.h @@ -1,7 +1,6 @@ #ifndef _FCACHE_H #define _FCACHE_H 1 -unsigned shash(const unsigned char *s); bool file_available(const char *name); #endif diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h index 5dee2caba0fe..a9bc47da83a5 100644 --- a/tools/perf/util/ftrace.h +++ b/tools/perf/util/ftrace.h @@ -24,6 +24,8 @@ struct perf_ftrace { unsigned int bucket_range; unsigned int min_latency; unsigned int max_latency; + unsigned int bucket_num; + bool hide_empty; int graph_depth; int func_stack_trace; int func_irq_info; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index d06aa86352d3..e3cdc3b7b4ab 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -44,6 +44,7 @@ #include "build-id.h" #include "data.h" #include <api/fs/fs.h> +#include <api/io_dir.h> #include "asm/bug.h" #include "tool.h" #include "time-utils.h" @@ -1311,11 +1312,11 @@ static int memory_node__read(struct memory_node *n, unsigned long idx) { unsigned int phys, size = 0; char path[PATH_MAX]; - struct dirent *ent; - DIR *dir; + struct io_dirent64 *ent; + struct io_dir dir; #define for_each_memory(mem, dir) \ - while ((ent = readdir(dir))) \ + while ((ent = io_dir__readdir(&dir)) != NULL) \ if (strcmp(ent->d_name, ".") && \ strcmp(ent->d_name, "..") && \ sscanf(ent->d_name, "memory%u", &mem) == 1) @@ -1324,9 +1325,9 @@ static int memory_node__read(struct memory_node *n, unsigned long idx) "%s/devices/system/node/node%lu", sysfs__mountpoint(), idx); - dir = opendir(path); - if (!dir) { - pr_warning("failed: can't open memory sysfs data\n"); + io_dir__init(&dir, open(path, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + if (dir.dirfd < 0) { + pr_warning("failed: can't open memory sysfs data '%s'\n", path); return -1; } @@ -1338,20 +1339,20 @@ static int memory_node__read(struct memory_node *n, unsigned long idx) n->set = bitmap_zalloc(size); if (!n->set) { - closedir(dir); + close(dir.dirfd); return -ENOMEM; } n->node = idx; n->size = size; - rewinddir(dir); + io_dir__rewinddir(&dir); for_each_memory(phys, dir) { __set_bit(phys, n->set); } - closedir(dir); + close(dir.dirfd); return 0; } @@ -1374,8 +1375,8 @@ static int memory_node__sort(const void *a, const void *b) static int build_mem_topology(struct memory_node **nodesp, u64 *cntp) { char path[PATH_MAX]; - struct dirent *ent; - DIR *dir; + struct io_dirent64 *ent; + struct io_dir dir; int ret = 0; size_t cnt = 0, size = 0; struct memory_node *nodes = NULL; @@ -1383,14 +1384,14 @@ static int build_mem_topology(struct memory_node **nodesp, u64 *cntp) scnprintf(path, PATH_MAX, "%s/devices/system/node/", sysfs__mountpoint()); - dir = opendir(path); - if (!dir) { + io_dir__init(&dir, open(path, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + if (dir.dirfd < 0) { pr_debug2("%s: couldn't read %s, does this arch have topology information?\n", __func__, path); return -1; } - while (!ret && (ent = readdir(dir))) { + while (!ret && (ent = io_dir__readdir(&dir))) { unsigned int idx; int r; @@ -1419,7 +1420,7 @@ static int build_mem_topology(struct memory_node **nodesp, u64 *cntp) cnt += 1; } out: - closedir(dir); + close(dir.dirfd); if (!ret) { *cntp = cnt; *nodesp = nodes; @@ -2769,6 +2770,8 @@ static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused) free(name); pmu_num--; } + /* AMD may set it by evlist__has_amd_ibs() from perf_session__new() */ + free(ff->ph->env.pmu_mappings); ff->ph->env.pmu_mappings = strbuf_detach(&sb, NULL); return 0; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 0f30f843c566..afc6855327ab 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -43,6 +43,8 @@ static bool hists__filter_entry_by_symbol(struct hists *hists, struct hist_entry *he); static bool hists__filter_entry_by_socket(struct hists *hists, struct hist_entry *he); +static bool hists__filter_entry_by_parallelism(struct hists *hists, + struct hist_entry *he); u16 hists__col_len(struct hists *hists, enum hist_column col) { @@ -207,6 +209,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_CGROUP, 6); hists__new_col_len(hists, HISTC_CGROUP_ID, 20); + hists__new_col_len(hists, HISTC_PARALLELISM, 11); hists__new_col_len(hists, HISTC_CPU, 3); hists__new_col_len(hists, HISTC_SOCKET, 6); hists__new_col_len(hists, HISTC_MEM_LOCKED, 6); @@ -302,9 +305,10 @@ static long hist_time(unsigned long htime) return htime; } -static void he_stat__add_period(struct he_stat *he_stat, u64 period) +static void he_stat__add_period(struct he_stat *he_stat, u64 period, u64 latency) { he_stat->period += period; + he_stat->latency += latency; he_stat->nr_events += 1; } @@ -319,6 +323,7 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) dest->weight2 += src->weight2; dest->weight3 += src->weight3; dest->nr_events += src->nr_events; + dest->latency += src->latency; } static void he_stat__decay(struct he_stat *he_stat) @@ -328,6 +333,70 @@ static void he_stat__decay(struct he_stat *he_stat) he_stat->weight1 = (he_stat->weight1 * 7) / 8; he_stat->weight2 = (he_stat->weight2 * 7) / 8; he_stat->weight3 = (he_stat->weight3 * 7) / 8; + he_stat->latency = (he_stat->latency * 7) / 8; +} + +static int hists__update_mem_stat(struct hists *hists, struct hist_entry *he, + struct mem_info *mi, u64 period) +{ + if (hists->nr_mem_stats == 0) + return 0; + + if (he->mem_stat == NULL) { + he->mem_stat = calloc(hists->nr_mem_stats, sizeof(*he->mem_stat)); + if (he->mem_stat == NULL) + return -1; + } + + for (int i = 0; i < hists->nr_mem_stats; i++) { + int idx = mem_stat_index(hists->mem_stat_types[i], + mem_info__const_data_src(mi)->val); + + assert(0 <= idx && idx < MEM_STAT_LEN); + he->mem_stat[i].entries[idx] += period; + hists->mem_stat_total[i].entries[idx] += period; + } + return 0; +} + +static void hists__add_mem_stat(struct hists *hists, struct hist_entry *dst, + struct hist_entry *src) +{ + if (hists->nr_mem_stats == 0) + return; + + for (int i = 0; i < hists->nr_mem_stats; i++) { + for (int k = 0; k < MEM_STAT_LEN; k++) + dst->mem_stat[i].entries[k] += src->mem_stat[i].entries[k]; + } +} + +static int hists__clone_mem_stat(struct hists *hists, struct hist_entry *dst, + struct hist_entry *src) +{ + if (hists->nr_mem_stats == 0) + return 0; + + dst->mem_stat = calloc(hists->nr_mem_stats, sizeof(*dst->mem_stat)); + if (dst->mem_stat == NULL) + return -1; + + for (int i = 0; i < hists->nr_mem_stats; i++) { + for (int k = 0; k < MEM_STAT_LEN; k++) + dst->mem_stat[i].entries[k] = src->mem_stat[i].entries[k]; + } + return 0; +} + +static void hists__decay_mem_stat(struct hists *hists, struct hist_entry *he) +{ + if (hists->nr_mem_stats == 0) + return; + + for (int i = 0; i < hists->nr_mem_stats; i++) { + for (int k = 0; k < MEM_STAT_LEN; k++) + he->mem_stat[i].entries[k] = (he->mem_stat[i].entries[k] * 7) / 8; + } } static void hists__delete_entry(struct hists *hists, struct hist_entry *he); @@ -335,7 +404,7 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he); static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) { u64 prev_period = he->stat.period; - u64 diff; + u64 prev_latency = he->stat.latency; if (prev_period == 0) return true; @@ -344,13 +413,18 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) if (symbol_conf.cumulate_callchain) he_stat__decay(he->stat_acc); decay_callchain(he->callchain); - - diff = prev_period - he->stat.period; + hists__decay_mem_stat(hists, he); if (!he->depth) { - hists->stats.total_period -= diff; - if (!he->filtered) - hists->stats.total_non_filtered_period -= diff; + u64 period_diff = prev_period - he->stat.period; + u64 latency_diff = prev_latency - he->stat.latency; + + hists->stats.total_period -= period_diff; + hists->stats.total_latency -= latency_diff; + if (!he->filtered) { + hists->stats.total_non_filtered_period -= period_diff; + hists->stats.total_non_filtered_latency -= latency_diff; + } } if (!he->leaf) { @@ -365,7 +439,7 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) } } - return he->stat.period == 0; + return he->stat.period == 0 && he->stat.latency == 0; } static void hists__delete_entry(struct hists *hists, struct hist_entry *he) @@ -584,21 +658,24 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, return he; } -static u8 symbol__parent_filter(const struct symbol *parent) +static filter_mask_t symbol__parent_filter(const struct symbol *parent) { if (symbol_conf.exclude_other && parent == NULL) return 1 << HIST_FILTER__PARENT; return 0; } -static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period) +static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period, u64 latency) { if (!hist_entry__has_callchains(he) || !symbol_conf.use_callchain) return; he->hists->callchain_period += period; - if (!he->filtered) + he->hists->callchain_latency += latency; + if (!he->filtered) { he->hists->callchain_non_filtered_period += period; + he->hists->callchain_non_filtered_latency += latency; + } } static struct hist_entry *hists__findnew_entry(struct hists *hists, @@ -611,6 +688,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, struct hist_entry *he; int64_t cmp; u64 period = entry->stat.period; + u64 latency = entry->stat.latency; bool leftmost = true; p = &hists->entries_in->rb_root.rb_node; @@ -629,10 +707,10 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (!cmp) { if (sample_self) { he_stat__add_stat(&he->stat, &entry->stat); - hist_entry__add_callchain_period(he, period); + hist_entry__add_callchain_period(he, period, latency); } if (symbol_conf.cumulate_callchain) - he_stat__add_period(he->stat_acc, period); + he_stat__add_period(he->stat_acc, period, latency); block_info__delete(entry->block_info); @@ -669,7 +747,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, return NULL; if (sample_self) - hist_entry__add_callchain_period(he, period); + hist_entry__add_callchain_period(he, period, latency); hists->nr_entries++; rb_link_node(&he->rb_node_in, parent, p); @@ -679,6 +757,10 @@ out: he_stat__add_cpumode_period(&he->stat, al->cpumode, period); if (symbol_conf.cumulate_callchain) he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period); + if (hists__update_mem_stat(hists, he, entry->mem_info, period) < 0) { + hist_entry__delete(he); + return NULL; + } return he; } @@ -741,12 +823,14 @@ __hists__add_entry(struct hists *hists, .ip = al->addr, .level = al->level, .code_page_size = sample->code_page_size, + .parallelism = al->parallelism, .stat = { .nr_events = 1, .period = sample->period, .weight1 = sample->weight, .weight2 = sample->ins_lat, .weight3 = sample->p_stage_cyc, + .latency = al->latency, }, .parent = sym_parent, .filtered = symbol__parent_filter(sym_parent) | al->filtered, @@ -975,8 +1059,6 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a if (he == NULL) return -ENOMEM; - hists__inc_nr_samples(hists, he->filtered); - out: iter->he = he; iter->curr++; @@ -995,9 +1077,15 @@ static int iter_finish_branch_entry(struct hist_entry_iter *iter, struct addr_location *al __maybe_unused) { + struct evsel *evsel = iter->evsel; + struct hists *hists = evsel__hists(evsel); + for (int i = 0; i < iter->total; i++) branch_info__exit(&iter->bi[i]); + if (iter->he) + hists__inc_nr_samples(hists, iter->he->filtered); + zfree(&iter->bi); iter->he = NULL; @@ -1365,6 +1453,16 @@ void hist_entry__delete(struct hist_entry *he) { struct hist_entry_ops *ops = he->ops; + if (symbol_conf.report_hierarchy) { + struct rb_root *root = &he->hroot_out.rb_root; + struct hist_entry *child, *tmp; + + rbtree_postorder_for_each_entry_safe(child, tmp, root, rb_node) + hist_entry__delete(child); + + *root = RB_ROOT; + } + thread__zput(he->thread); map_symbol__exit(&he->ms); @@ -1393,6 +1491,7 @@ void hist_entry__delete(struct hist_entry *he) free_callchain(he->callchain); zfree(&he->trace_output); zfree(&he->raw_data); + zfree(&he->mem_stat); ops->free(he); } @@ -1455,6 +1554,10 @@ static void hist_entry__check_and_remove_filter(struct hist_entry *he, if (symbol_conf.sym_list == NULL) return; break; + case HIST_FILTER__PARALLELISM: + if (__bitmap_weight(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1) == 0) + return; + break; case HIST_FILTER__PARENT: case HIST_FILTER__GUEST: case HIST_FILTER__HOST: @@ -1513,6 +1616,9 @@ static void hist_entry__apply_hierarchy_filters(struct hist_entry *he) hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL, perf_hpp__is_sym_entry); + hist_entry__check_and_remove_filter(he, HIST_FILTER__PARALLELISM, + perf_hpp__is_parallelism_entry); + hists__apply_filters(he->hists, he); } @@ -1535,6 +1641,7 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, cmp = hist_entry__collapse_hierarchy(hpp_list, iter, he); if (!cmp) { he_stat__add_stat(&iter->stat, &he->stat); + hists__add_mem_stat(hists, iter, he); return iter; } @@ -1576,6 +1683,11 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, new->srcfile = NULL; } + if (hists__clone_mem_stat(hists, new, he) < 0) { + hist_entry__delete(new); + return NULL; + } + rb_link_node(&new->rb_node_in, parent, p); rb_insert_color_cached(&new->rb_node_in, root, leftmost); return new; @@ -1658,6 +1770,7 @@ static int hists__collapse_insert_entry(struct hists *hists, he_stat__add_stat(&iter->stat, &he->stat); if (symbol_conf.cumulate_callchain) he_stat__add_stat(iter->stat_acc, he->stat_acc); + hists__add_mem_stat(hists, iter, he); if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) { struct callchain_cursor *cursor = get_tls_callchain_cursor(); @@ -1709,6 +1822,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he) hists__filter_entry_by_thread(hists, he); hists__filter_entry_by_symbol(hists, he); hists__filter_entry_by_socket(hists, he); + hists__filter_entry_by_parallelism(hists, he); } int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) @@ -1756,12 +1870,14 @@ static void hists__reset_filter_stats(struct hists *hists) { hists->nr_non_filtered_entries = 0; hists->stats.total_non_filtered_period = 0; + hists->stats.total_non_filtered_latency = 0; } void hists__reset_stats(struct hists *hists) { hists->nr_entries = 0; hists->stats.total_period = 0; + hists->stats.total_latency = 0; hists__reset_filter_stats(hists); } @@ -1770,6 +1886,7 @@ static void hists__inc_filter_stats(struct hists *hists, struct hist_entry *h) { hists->nr_non_filtered_entries++; hists->stats.total_non_filtered_period += h->stat.period; + hists->stats.total_non_filtered_latency += h->stat.latency; } void hists__inc_stats(struct hists *hists, struct hist_entry *h) @@ -1779,6 +1896,7 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h) hists->nr_entries++; hists->stats.total_period += h->stat.period; + hists->stats.total_latency += h->stat.latency; } static void hierarchy_recalc_total_periods(struct hists *hists) @@ -1790,6 +1908,8 @@ static void hierarchy_recalc_total_periods(struct hists *hists) hists->stats.total_period = 0; hists->stats.total_non_filtered_period = 0; + hists->stats.total_latency = 0; + hists->stats.total_non_filtered_latency = 0; /* * recalculate total period using top-level entries only @@ -1801,8 +1921,11 @@ static void hierarchy_recalc_total_periods(struct hists *hists) node = rb_next(node); hists->stats.total_period += he->stat.period; - if (!he->filtered) + hists->stats.total_latency += he->stat.latency; + if (!he->filtered) { hists->stats.total_non_filtered_period += he->stat.period; + hists->stats.total_non_filtered_latency += he->stat.latency; + } } } @@ -2195,6 +2318,16 @@ static bool hists__filter_entry_by_socket(struct hists *hists, return false; } +static bool hists__filter_entry_by_parallelism(struct hists *hists, + struct hist_entry *he) +{ + if (test_bit(he->parallelism, hists->parallelism_filter)) { + he->filtered |= (1 << HIST_FILTER__PARALLELISM); + return true; + } + return false; +} + typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he); static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter) @@ -2364,6 +2497,16 @@ void hists__filter_by_socket(struct hists *hists) hists__filter_entry_by_socket); } +void hists__filter_by_parallelism(struct hists *hists) +{ + if (symbol_conf.report_hierarchy) + hists__filter_hierarchy(hists, HIST_FILTER__PARALLELISM, + hists->parallelism_filter); + else + hists__filter_by_type(hists, HIST_FILTER__PARALLELISM, + hists__filter_entry_by_parallelism); +} + void events_stats__inc(struct events_stats *stats, u32 type) { ++stats->nr_events[0]; @@ -2759,6 +2902,12 @@ u64 hists__total_period(struct hists *hists) hists->stats.total_period; } +u64 hists__total_latency(struct hists *hists) +{ + return symbol_conf.filter_relative ? hists->stats.total_non_filtered_latency : + hists->stats.total_latency; +} + int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq) { char unit; @@ -2870,6 +3019,7 @@ int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list) hists->entries = RB_ROOT_CACHED; mutex_init(&hists->lock); hists->socket_filter = -1; + hists->parallelism_filter = symbol_conf.parallelism_filter; hists->hpp_list = hpp_list; INIT_LIST_HEAD(&hists->hpp_formats); return 0; @@ -2904,6 +3054,8 @@ static void hists_evsel__exit(struct evsel *evsel) struct perf_hpp_list_node *node, *tmp; hists__delete_all_entries(hists); + zfree(&hists->mem_stat_types); + zfree(&hists->mem_stat_total); list_for_each_entry_safe(node, tmp, &hists->hpp_formats, list) { perf_hpp_list__for_each_format_safe(&node->hpp, fmt, pos) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 46c8373e3146..c64254088fc7 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -9,6 +9,7 @@ #include "events_stats.h" #include "evsel.h" #include "map_symbol.h" +#include "mem-events.h" #include "mutex.h" #include "sample.h" #include "spark.h" @@ -31,17 +32,22 @@ enum hist_filter { HIST_FILTER__HOST, HIST_FILTER__SOCKET, HIST_FILTER__C2C, + HIST_FILTER__PARALLELISM, }; +typedef u16 filter_mask_t; + enum hist_column { HISTC_SYMBOL, HISTC_TIME, HISTC_DSO, HISTC_THREAD, + HISTC_TGID, HISTC_COMM, HISTC_CGROUP_ID, HISTC_CGROUP, HISTC_PARENT, + HISTC_PARALLELISM, HISTC_CPU, HISTC_SOCKET, HISTC_SRCLINE, @@ -96,6 +102,13 @@ enum hist_column { struct thread; struct dso; +#define MEM_STAT_LEN 8 + +struct he_mem_stat { + /* meaning of entries depends on enum mem_stat_type */ + u64 entries[MEM_STAT_LEN]; +}; + struct hists { struct rb_root_cached entries_in_array[2]; struct rb_root_cached *entries_in; @@ -105,10 +118,13 @@ struct hists { u64 nr_non_filtered_entries; u64 callchain_period; u64 callchain_non_filtered_period; + u64 callchain_latency; + u64 callchain_non_filtered_latency; struct thread *thread_filter; const struct dso *dso_filter; const char *uid_filter_str; const char *symbol_filter_str; + unsigned long *parallelism_filter; struct mutex lock; struct hists_stats stats; u64 event_stream; @@ -118,6 +134,9 @@ struct hists { struct perf_hpp_list *hpp_list; struct list_head hpp_formats; int nr_hpp_node; + int nr_mem_stats; + enum mem_stat_type *mem_stat_types; + struct he_mem_stat *mem_stat_total; }; #define hists__has(__h, __f) (__h)->hpp_list->__f @@ -165,6 +184,12 @@ struct res_sample { struct he_stat { u64 period; + /* + * Period re-scaled from CPU time to wall-clock time (divided by the + * parallelism at the time of the sample). This represents effect of + * the event on latency rather than CPU consumption. + */ + u64 latency; u64 period_sys; u64 period_us; u64 period_guest_sys; @@ -219,6 +244,7 @@ struct hist_entry { } pairs; struct he_stat stat; struct he_stat *stat_acc; + struct he_mem_stat *mem_stat; struct map_symbol ms; struct thread *thread; struct comm *comm; @@ -226,15 +252,16 @@ struct hist_entry { u64 cgroup; u64 ip; u64 transaction; - s32 socket; - s32 cpu; u64 code_page_size; u64 weight; u64 ins_lat; u64 p_stage_cyc; + s32 socket; + s32 cpu; + int parallelism; + int mem_type_off; u8 cpumode; u8 depth; - int mem_type_off; struct simd_flags simd_flags; /* We are added by hists__add_dummy_entry. */ @@ -242,7 +269,7 @@ struct hist_entry { bool leaf; char level; - u8 filtered; + filter_mask_t filtered; u16 callchain_size; union { @@ -368,6 +395,7 @@ void hists__output_recalc_col_len(struct hists *hists, int max_rows); struct hist_entry *hists__get_entry(struct hists *hists, int idx); u64 hists__total_period(struct hists *hists); +u64 hists__total_latency(struct hists *hists); void hists__reset_stats(struct hists *hists); void hists__inc_stats(struct hists *hists, struct hist_entry *h); void hists__inc_nr_events(struct hists *hists); @@ -384,11 +412,13 @@ void hists__filter_by_dso(struct hists *hists); void hists__filter_by_thread(struct hists *hists); void hists__filter_by_symbol(struct hists *hists); void hists__filter_by_socket(struct hists *hists); +void hists__filter_by_parallelism(struct hists *hists); static inline bool hists__has_filter(struct hists *hists) { return hists->thread_filter || hists->dso_filter || - hists->symbol_filter_str || (hists->socket_filter > -1); + hists->symbol_filter_str || (hists->socket_filter > -1) || + hists->parallelism_filter; } u16 hists__col_len(struct hists *hists, enum hist_column col); @@ -547,27 +577,37 @@ extern struct perf_hpp_fmt perf_hpp__format[]; enum { /* Matches perf_hpp__format array. */ PERF_HPP__OVERHEAD, + PERF_HPP__LATENCY, PERF_HPP__OVERHEAD_SYS, PERF_HPP__OVERHEAD_US, PERF_HPP__OVERHEAD_GUEST_SYS, PERF_HPP__OVERHEAD_GUEST_US, PERF_HPP__OVERHEAD_ACC, + PERF_HPP__LATENCY_ACC, PERF_HPP__SAMPLES, PERF_HPP__PERIOD, PERF_HPP__WEIGHT1, PERF_HPP__WEIGHT2, PERF_HPP__WEIGHT3, + PERF_HPP__MEM_STAT_OP, + PERF_HPP__MEM_STAT_CACHE, + PERF_HPP__MEM_STAT_MEMORY, + PERF_HPP__MEM_STAT_SNOOP, + PERF_HPP__MEM_STAT_DTLB, PERF_HPP__MAX_INDEX }; void perf_hpp__init(void); -void perf_hpp__cancel_cumulate(void); +void perf_hpp__cancel_cumulate(struct evlist *evlist); +void perf_hpp__cancel_latency(struct evlist *evlist); void perf_hpp__setup_output_field(struct perf_hpp_list *list); void perf_hpp__reset_output_field(struct perf_hpp_list *list); void perf_hpp__append_sort_keys(struct perf_hpp_list *list); int perf_hpp__setup_hists_formats(struct perf_hpp_list *list, struct evlist *evlist); +int perf_hpp__alloc_mem_stats(struct perf_hpp_list *list, + struct evlist *evlist); bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format); @@ -580,6 +620,7 @@ bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt); bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt); bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt); bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_parallelism_entry(struct perf_hpp_fmt *fmt); struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt); @@ -606,6 +647,7 @@ void hists__reset_column_width(struct hists *hists); enum perf_hpp_fmt_type { PERF_HPP_FMT_TYPE__RAW, PERF_HPP_FMT_TYPE__PERCENT, + PERF_HPP_FMT_TYPE__LATENCY, PERF_HPP_FMT_TYPE__AVERAGE, }; @@ -621,6 +663,9 @@ int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he, hpp_field_fn get_field, const char *fmtstr, hpp_snprint_fn print_fn, enum perf_hpp_fmt_type fmtype); +int hpp__fmt_mem_stat(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he, enum mem_stat_type mst, + const char *fmtstr, hpp_snprint_fn print_fn); static inline void advance_hpp(struct perf_hpp *hpp, int inc) { diff --git a/tools/perf/util/hwmon_pmu.c b/tools/perf/util/hwmon_pmu.c index 4acb9bb19b84..c25e7296f1c1 100644 --- a/tools/perf/util/hwmon_pmu.c +++ b/tools/perf/util/hwmon_pmu.c @@ -11,13 +11,13 @@ #include <sys/types.h> #include <assert.h> #include <ctype.h> -#include <dirent.h> #include <fcntl.h> #include <stddef.h> #include <stdlib.h> #include <string.h> #include <api/fs/fs.h> #include <api/io.h> +#include <api/io_dir.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/zalloc.h> @@ -108,20 +108,6 @@ struct hwmon_pmu { }; /** - * union hwmon_pmu_event_key: Key for hwmon_pmu->events as such each key - * represents an event. - * - * Related hwmon files start <type><number> that this key represents. - */ -union hwmon_pmu_event_key { - long type_and_num; - struct { - int num :16; - enum hwmon_type type :8; - }; -}; - -/** * struct hwmon_pmu_event_value: Value in hwmon_pmu->events. * * Hwmon files are of the form <type><number>_<item> and may have a suffix @@ -249,31 +235,22 @@ static void fix_name(char *p) static int hwmon_pmu__read_events(struct hwmon_pmu *pmu) { - DIR *dir; - struct dirent *ent; - int dup_fd, err = 0; + int err = 0; struct hashmap_entry *cur, *tmp; size_t bkt; + struct io_dirent64 *ent; + struct io_dir dir; if (pmu->pmu.sysfs_aliases_loaded) return 0; - /* - * Use a dup-ed fd as closedir will close it. Use openat so that the - * directory contents are refreshed. - */ - dup_fd = openat(pmu->hwmon_dir_fd, ".", O_DIRECTORY); - - if (dup_fd == -1) - return -ENOMEM; + /* Use openat so that the directory contents are refreshed. */ + io_dir__init(&dir, openat(pmu->hwmon_dir_fd, ".", O_CLOEXEC | O_DIRECTORY | O_RDONLY)); - dir = fdopendir(dup_fd); - if (!dir) { - close(dup_fd); - return -ENOMEM; - } + if (dir.dirfd < 0) + return -ENOENT; - while ((ent = readdir(dir)) != NULL) { + while ((ent = io_dir__readdir(&dir)) != NULL) { enum hwmon_type type; int number; enum hwmon_item item; @@ -361,7 +338,7 @@ static int hwmon_pmu__read_events(struct hwmon_pmu *pmu) pmu->pmu.sysfs_aliases_loaded = true; err_out: - closedir(dir); + close(dir.dirfd); return err; } @@ -369,42 +346,43 @@ struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, int hwmon_dir, const cha { char buf[32]; struct hwmon_pmu *hwm; + __u32 type = PERF_PMU_TYPE_HWMON_START + strtoul(sysfs_name + 5, NULL, 10); + + if (type > PERF_PMU_TYPE_HWMON_END) { + pr_err("Unable to encode hwmon type from %s in valid PMU type\n", sysfs_name); + return NULL; + } + + snprintf(buf, sizeof(buf), "hwmon_%s", name); + fix_name(buf + 6); hwm = zalloc(sizeof(*hwm)); if (!hwm) return NULL; - hwm->hwmon_dir_fd = hwmon_dir; - hwm->pmu.type = PERF_PMU_TYPE_HWMON_START + strtoul(sysfs_name + 5, NULL, 10); - if (hwm->pmu.type > PERF_PMU_TYPE_HWMON_END) { - pr_err("Unable to encode hwmon type from %s in valid PMU type\n", sysfs_name); - goto err_out; + if (perf_pmu__init(&hwm->pmu, type, buf) != 0) { + perf_pmu__delete(&hwm->pmu); + return NULL; } - snprintf(buf, sizeof(buf), "hwmon_%s", name); - fix_name(buf + 6); - hwm->pmu.name = strdup(buf); - if (!hwm->pmu.name) - goto err_out; + + hwm->hwmon_dir_fd = hwmon_dir; hwm->pmu.alias_name = strdup(sysfs_name); - if (!hwm->pmu.alias_name) - goto err_out; + if (!hwm->pmu.alias_name) { + perf_pmu__delete(&hwm->pmu); + return NULL; + } hwm->pmu.cpus = perf_cpu_map__new("0"); - if (!hwm->pmu.cpus) - goto err_out; + if (!hwm->pmu.cpus) { + perf_pmu__delete(&hwm->pmu); + return NULL; + } INIT_LIST_HEAD(&hwm->pmu.format); - INIT_LIST_HEAD(&hwm->pmu.aliases); INIT_LIST_HEAD(&hwm->pmu.caps); hashmap__init(&hwm->events, hwmon_pmu__event_hashmap_hash, hwmon_pmu__event_hashmap_equal, /*ctx=*/NULL); list_add_tail(&hwm->pmu.list, pmus); return &hwm->pmu; -err_out: - free((char *)hwm->pmu.name); - free(hwm->pmu.alias_name); - free(hwm); - close(hwmon_dir); - return NULL; } void hwmon_pmu__exit(struct perf_pmu *pmu) @@ -716,8 +694,8 @@ int hwmon_pmu__check_alias(struct parse_events_terms *terms, struct perf_pmu_inf int perf_pmus__read_hwmon_pmus(struct list_head *pmus) { char *line = NULL; - DIR *class_hwmon_dir; - struct dirent *class_hwmon_ent; + struct io_dirent64 *class_hwmon_ent; + struct io_dir class_hwmon_dir; char buf[PATH_MAX]; const char *sysfs = sysfs__mountpoint(); @@ -725,11 +703,12 @@ int perf_pmus__read_hwmon_pmus(struct list_head *pmus) return 0; scnprintf(buf, sizeof(buf), "%s/class/hwmon/", sysfs); - class_hwmon_dir = opendir(buf); - if (!class_hwmon_dir) + io_dir__init(&class_hwmon_dir, open(buf, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + + if (class_hwmon_dir.dirfd < 0) return 0; - while ((class_hwmon_ent = readdir(class_hwmon_dir)) != NULL) { + while ((class_hwmon_ent = io_dir__readdir(&class_hwmon_dir)) != NULL) { size_t line_len; int hwmon_dir, name_fd; struct io io; @@ -759,7 +738,7 @@ int perf_pmus__read_hwmon_pmus(struct list_head *pmus) close(name_fd); } free(line); - closedir(class_hwmon_dir); + close(class_hwmon_dir.dirfd); return 0; } diff --git a/tools/perf/util/hwmon_pmu.h b/tools/perf/util/hwmon_pmu.h index 882566846df4..b3329774d2b2 100644 --- a/tools/perf/util/hwmon_pmu.h +++ b/tools/perf/util/hwmon_pmu.h @@ -91,6 +91,22 @@ enum hwmon_item { HWMON_ITEM__MAX, }; +/** + * union hwmon_pmu_event_key: Key for hwmon_pmu->events as such each key + * represents an event. + * union is exposed for testing to ensure problems are avoided on big + * endian machines. + * + * Related hwmon files start <type><number> that this key represents. + */ +union hwmon_pmu_event_key { + long type_and_num; + struct { + int num :16; + enum hwmon_type type :8; + }; +}; + bool perf_pmu__is_hwmon(const struct perf_pmu *pmu); bool evsel__is_hwmon(const struct evsel *evsel); diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h index 178b00205fe6..89979ca23c3f 100644 --- a/tools/perf/util/include/linux/linkage.h +++ b/tools/perf/util/include/linux/linkage.h @@ -132,4 +132,8 @@ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif +#ifndef SYM_PIC_ALIAS +#define SYM_PIC_ALIAS(sym) SYM_ALIAS(__pi_ ## sym, sym, SYM_T_FUNC, SYM_L_GLOBAL) +#endif + #endif /* PERF_LINUX_LINKAGE_H_ */ diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index a7c589fecb98..3625c6224750 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -275,12 +275,13 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, int ret; struct intel_bts *bts = btsq->bts; union perf_event event; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; if (bts->synth_opts.initial_skip && bts->num_events++ <= bts->synth_opts.initial_skip) return 0; + perf_sample__init(&sample, /*all=*/true); sample.ip = le64_to_cpu(branch->from); sample.cpumode = intel_bts_cpumode(bts, sample.ip); sample.pid = btsq->pid; @@ -312,6 +313,7 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, pr_err("Intel BTS: failed to deliver branch event, error %d\n", ret); + perf_sample__exit(&sample); return ret; } diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 30be6dfe09eb..9b1011fe4826 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -127,6 +127,7 @@ struct intel_pt { bool single_pebs; bool sample_pebs; + int pebs_data_src_fmt; struct evsel *pebs_evsel; u64 evt_sample_type; @@ -175,6 +176,7 @@ enum switch_state { struct intel_pt_pebs_event { struct evsel *evsel; u64 id; + int data_src_fmt; }; struct intel_pt_queue { @@ -1764,12 +1766,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct dummy_branch_stack { u64 nr; u64 hw_idx; struct branch_entry entries; } dummy_bs; + int ret; if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) return 0; @@ -1777,6 +1780,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_b_sample(pt, ptq, event, &sample); sample.id = ptq->pt->branches_id; @@ -1806,8 +1810,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt; } - return intel_pt_deliver_synth_event(pt, event, &sample, + perf_sample__exit(&sample); + ret = intel_pt_deliver_synth_event(pt, event, &sample, pt->branches_sample_type); + return ret; } static void intel_pt_prep_sample(struct intel_pt *pt, @@ -1835,11 +1841,13 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_sample(pt, ptq, event, &sample); sample.id = ptq->pt->instructions_id; @@ -1859,16 +1867,19 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) ptq->last_insn_cnt = ptq->state->tot_insn_cnt; - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->instructions_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->instructions_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; u64 period = 0; + int ret; if (ptq->sample_ipc) period = ptq->ipc_cyc_cnt - ptq->last_cy_cyc_cnt; @@ -1876,6 +1887,7 @@ static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq) if (!period || intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_sample(pt, ptq, event, &sample); sample.id = ptq->pt->cycles_id; @@ -1887,25 +1899,31 @@ static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq) ptq->last_cy_insn_cnt = ptq->ipc_insn_cnt; ptq->last_cy_cyc_cnt = ptq->ipc_cyc_cnt; - return intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_sample(pt, ptq, event, &sample); sample.id = ptq->pt->transactions_id; sample.stream_id = ptq->pt->transactions_id; - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->transactions_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->transactions_sample_type); + perf_sample__exit(&sample); + return ret; } static void intel_pt_prep_p_sample(struct intel_pt *pt, @@ -1953,15 +1971,17 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_cbr raw; u32 flags; + int ret; if (intel_pt_skip_cbr_event(pt)) return 0; ptq->cbr_seen = ptq->state->cbr; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->cbr_id; @@ -1975,20 +1995,24 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_psb raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->psb_id; @@ -2001,20 +2025,24 @@ static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_mwait raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->mwait_id; @@ -2026,20 +2054,24 @@ static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_pwre raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->pwre_id; @@ -2051,20 +2083,24 @@ static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_exstop raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->exstop_id; @@ -2076,20 +2112,24 @@ static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_pwrx raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->pwrx_id; @@ -2101,8 +2141,10 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } /* @@ -2232,19 +2274,160 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack, } } -static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id) +#define P(a, b) PERF_MEM_S(a, b) +#define OP_LH (P(OP, LOAD) | P(LVL, HIT)) +#define LEVEL(x) P(LVLNUM, x) +#define REM P(REMOTE, REMOTE) +#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) + +#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10 +#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1) + +/* Based on kernel __intel_pmu_pebs_data_source_grt() and pebs_data_source */ +static const u64 pebs_data_source_grt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP Hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP Fwd */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* RAM hit|SNP Hit */ + OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* Remote L3 hit|SNP Hit */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* RAM hit|SNP None or Miss */ + OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* Remote RAM hit|SNP None or Miss */ + OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */ +}; + +/* Based on kernel __intel_pmu_pebs_data_source_cmt() and pebs_data_source */ +static const u64 pebs_data_source_cmt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* L3 hit|SNP Hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP Fwd */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* RAM hit|SNP Hit */ + OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE), /* Remote L3 hit|SNP Hit */ + OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD), /* RAM hit|SNP None or Miss */ + OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM), /* Remote RAM hit|SNP None or Miss */ + OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */ +}; + +/* Based on kernel pebs_set_tlb_lock() */ +static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock) +{ + /* + * TLB access + * 0 = did not miss 2nd level TLB + * 1 = missed 2nd level TLB + */ + if (tlb) + *val |= P(TLB, MISS) | P(TLB, L2); + else + *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); + + /* locked prefix */ + if (lock) + *val |= P(LOCK, LOCKED); +} + +/* Based on kernel __grt_latency_data() */ +static u64 intel_pt_grt_latency_data(u8 dse, bool tlb, bool lock, bool blk, + const u64 *pebs_data_source) +{ + u64 val; + + dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK; + val = pebs_data_source[dse]; + + pebs_set_tlb_lock(&val, tlb, lock); + + if (blk) + val |= P(BLK, DATA); + else + val |= P(BLK, NA); + + return val; +} + +/* Default value for data source */ +#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\ + PERF_MEM_S(LVL, NA) |\ + PERF_MEM_S(SNOOP, NA) |\ + PERF_MEM_S(LOCK, NA) |\ + PERF_MEM_S(TLB, NA) |\ + PERF_MEM_S(LVLNUM, NA)) + +enum DATA_SRC_FORMAT { + DATA_SRC_FORMAT_ERR = -1, + DATA_SRC_FORMAT_NA = 0, + DATA_SRC_FORMAT_GRT = 1, + DATA_SRC_FORMAT_CMT = 2, +}; + +/* Based on kernel grt_latency_data() and cmt_latency_data */ +static u64 intel_pt_get_data_src(u64 mem_aux_info, int data_src_fmt) +{ + switch (data_src_fmt) { + case DATA_SRC_FORMAT_GRT: { + union { + u64 val; + struct { + unsigned int dse:4; + unsigned int locked:1; + unsigned int stlb_miss:1; + unsigned int fwd_blk:1; + unsigned int reserved:25; + }; + } x = {.val = mem_aux_info}; + return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk, + pebs_data_source_grt); + } + case DATA_SRC_FORMAT_CMT: { + union { + u64 val; + struct { + unsigned int dse:5; + unsigned int locked:1; + unsigned int stlb_miss:1; + unsigned int fwd_blk:1; + unsigned int reserved:24; + }; + } x = {.val = mem_aux_info}; + return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk, + pebs_data_source_cmt); + } + default: + return PERF_MEM_NA; + } +} + +static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, + u64 id, int data_src_fmt) { const struct intel_pt_blk_items *items = &ptq->state->items; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; union perf_event *event = ptq->event_buf; struct intel_pt *pt = ptq->pt; u64 sample_type = evsel->core.attr.sample_type; u8 cpumode; - u64 regs[8 * sizeof(sample.intr_regs.mask)]; + u64 regs[8 * sizeof(sample.intr_regs->mask)]; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_a_sample(ptq, event, &sample); sample.id = id; @@ -2291,15 +2474,16 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse items->mask[INTEL_PT_XMM_POS])) { u64 regs_mask = evsel->core.attr.sample_regs_intr; u64 *pos; + struct regs_dump *intr_regs = perf_sample__intr_regs(&sample); - sample.intr_regs.abi = items->is_32_bit ? + intr_regs->abi = items->is_32_bit ? PERF_SAMPLE_REGS_ABI_32 : PERF_SAMPLE_REGS_ABI_64; - sample.intr_regs.regs = regs; + intr_regs->regs = regs; - pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask); + pos = intel_pt_add_gp_regs(intr_regs, regs, items, regs_mask); - intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask); + intel_pt_add_xmm(intr_regs, pos, items, regs_mask); } if (sample_type & PERF_SAMPLE_BRANCH_STACK) { @@ -2350,6 +2534,18 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse } } + if (sample_type & PERF_SAMPLE_DATA_SRC) { + if (items->has_mem_aux_info && data_src_fmt) { + if (data_src_fmt < 0) { + pr_err("Intel PT missing data_src info\n"); + return -1; + } + sample.data_src = intel_pt_get_data_src(items->mem_aux_info, data_src_fmt); + } else { + sample.data_src = PERF_MEM_NA; + } + } + if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) { u64 ax = items->has_rax ? items->rax : 0; /* Refer kernel's intel_hsw_transaction() */ @@ -2361,16 +2557,19 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse sample.transaction = txn; } - return intel_pt_deliver_synth_event(pt, event, &sample, sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; struct evsel *evsel = pt->pebs_evsel; + int data_src_fmt = pt->pebs_data_src_fmt; u64 id = evsel->core.id[0]; - return intel_pt_do_synth_pebs_sample(ptq, evsel, id); + return intel_pt_do_synth_pebs_sample(ptq, evsel, id, data_src_fmt); } static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) @@ -2395,7 +2594,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) hw_id); return intel_pt_synth_single_pebs_sample(ptq); } - err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id); + err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id, pe->data_src_fmt); if (err) return err; } @@ -2407,16 +2606,17 @@ static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct { struct perf_synth_intel_evt cfe; struct perf_synth_intel_evd evd[INTEL_PT_MAX_EVDS]; } raw; - int i; + int i, ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->evt_id; @@ -2438,20 +2638,24 @@ static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq) ptq->state->evd_cnt * sizeof(struct perf_synth_intel_evd); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->evt_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->evt_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_iflag_chg raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->iflag_chg_id; @@ -2471,8 +2675,10 @@ static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->iflag_chg_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->iflag_chg_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, @@ -3355,6 +3561,49 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt, event->itrace_start.tid); } +/* + * Events with data_src are identified by L1_Hit_Indication + * refer https://github.com/intel/perfmon + */ +static int intel_pt_data_src_fmt(struct intel_pt *pt, struct evsel *evsel) +{ + struct perf_env *env = pt->machine->env; + int fmt = DATA_SRC_FORMAT_NA; + + if (!env->cpuid) + return DATA_SRC_FORMAT_ERR; + + /* + * PEBS-via-PT is only supported on E-core non-hybrid. Of those only + * Gracemont and Crestmont have data_src. Check for: + * Alderlake N (Gracemont) + * Sierra Forest (Crestmont) + * Grand Ridge (Crestmont) + */ + + if (!strncmp(env->cpuid, "GenuineIntel,6,190,", 19)) + fmt = DATA_SRC_FORMAT_GRT; + + if (!strncmp(env->cpuid, "GenuineIntel,6,175,", 19) || + !strncmp(env->cpuid, "GenuineIntel,6,182,", 19)) + fmt = DATA_SRC_FORMAT_CMT; + + if (fmt == DATA_SRC_FORMAT_NA) + return fmt; + + /* + * Only data_src events are: + * mem-loads event=0xd0,umask=0x5 + * mem-stores event=0xd0,umask=0x6 + */ + if (evsel->core.attr.type == PERF_TYPE_RAW && + ((evsel->core.attr.config & 0xffff) == 0x5d0 || + (evsel->core.attr.config & 0xffff) == 0x6d0)) + return fmt; + + return DATA_SRC_FORMAT_NA; +} + static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample) @@ -3375,6 +3624,7 @@ static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, ptq->pebs[hw_id].evsel = evsel; ptq->pebs[hw_id].id = sample->id; + ptq->pebs[hw_id].data_src_fmt = intel_pt_data_src_fmt(pt, evsel); return 0; } @@ -3924,6 +4174,7 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt) } pt->single_pebs = true; pt->sample_pebs = true; + pt->pebs_data_src_fmt = intel_pt_data_src_fmt(pt, evsel); pt->pebs_evsel = evsel; } } diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c index 50a3c3e07160..3b92ebf5c112 100644 --- a/tools/perf/util/intel-tpebs.c +++ b/tools/perf/util/intel-tpebs.c @@ -3,7 +3,7 @@ * intel_tpebs.c: Intel TPEBS support */ - +#include <api/fs/fs.h> #include <sys/param.h> #include <subcmd/run-command.h> #include <thread.h> @@ -12,13 +12,17 @@ #include <linux/zalloc.h> #include <linux/err.h> #include "sample.h" +#include "counts.h" #include "debug.h" #include "evlist.h" #include "evsel.h" +#include "mutex.h" #include "session.h" +#include "stat.h" #include "tool.h" #include "cpumap.h" #include "metricgroup.h" +#include "stat.h" #include <sys/stat.h> #include <sys/file.h> #include <poll.h> @@ -27,95 +31,155 @@ #define PERF_DATA "-" bool tpebs_recording; -static pid_t tpebs_pid = -1; -static size_t tpebs_event_size; +enum tpebs_mode tpebs_mode; static LIST_HEAD(tpebs_results); static pthread_t tpebs_reader_thread; -static struct child_process *tpebs_cmd; +static struct child_process tpebs_cmd; +static int control_fd[2], ack_fd[2]; +static struct mutex tpebs_mtx; struct tpebs_retire_lat { struct list_head nd; - /* Event name */ - const char *name; - /* Event name with the TPEBS modifier R */ - const char *tpebs_name; - /* Count of retire_latency values found in sample data */ - size_t count; - /* Sum of all the retire_latency values in sample data */ - int sum; - /* Average of retire_latency, val = sum / count */ - double val; + /** @evsel: The evsel that opened the retire_lat event. */ + struct evsel *evsel; + /** @event: Event passed to perf record. */ + char *event; + /** @stats: Recorded retirement latency stats. */ + struct stats stats; + /** @last: Last retirement latency read. */ + uint64_t last; + /* Has the event been sent to perf record? */ + bool started; }; -static int get_perf_record_args(const char **record_argv, char buf[], - const char *cpumap_buf) +static void tpebs_mtx_init(void) +{ + mutex_init(&tpebs_mtx); +} + +static struct mutex *tpebs_mtx_get(void) { - struct tpebs_retire_lat *e; - int i = 0; + static pthread_once_t tpebs_mtx_once = PTHREAD_ONCE_INIT; + + pthread_once(&tpebs_mtx_once, tpebs_mtx_init); + return &tpebs_mtx; +} - pr_debug("tpebs: Prepare perf record for retire_latency\n"); +static struct tpebs_retire_lat *tpebs_retire_lat__find(struct evsel *evsel) + EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get()); + +static int evsel__tpebs_start_perf_record(struct evsel *evsel) +{ + const char **record_argv; + int tpebs_event_size = 0, i = 0, ret; + char control_fd_buf[32]; + char cpumap_buf[50]; + struct tpebs_retire_lat *t; + + list_for_each_entry(t, &tpebs_results, nd) + tpebs_event_size++; + + record_argv = malloc((10 + 2 * tpebs_event_size) * sizeof(*record_argv)); + if (!record_argv) + return -ENOMEM; record_argv[i++] = "perf"; record_argv[i++] = "record"; record_argv[i++] = "-W"; record_argv[i++] = "--synth=no"; - record_argv[i++] = buf; - if (!cpumap_buf) { - pr_err("tpebs: Require cpumap list to run sampling\n"); - return -ECANCELED; - } - /* Use -C when cpumap_buf is not "-1" */ - if (strcmp(cpumap_buf, "-1")) { + scnprintf(control_fd_buf, sizeof(control_fd_buf), "--control=fd:%d,%d", + control_fd[0], ack_fd[1]); + record_argv[i++] = control_fd_buf; + + record_argv[i++] = "-o"; + record_argv[i++] = PERF_DATA; + + if (!perf_cpu_map__is_any_cpu_or_is_empty(evsel->evlist->core.user_requested_cpus)) { + cpu_map__snprint(evsel->evlist->core.user_requested_cpus, cpumap_buf, + sizeof(cpumap_buf)); record_argv[i++] = "-C"; record_argv[i++] = cpumap_buf; } - list_for_each_entry(e, &tpebs_results, nd) { + list_for_each_entry(t, &tpebs_results, nd) { record_argv[i++] = "-e"; - record_argv[i++] = e->name; + record_argv[i++] = t->event; } + record_argv[i++] = NULL; + assert(i == 10 + 2 * tpebs_event_size || i == 8 + 2 * tpebs_event_size); + /* Note, no workload given so system wide is implied. */ + + assert(tpebs_cmd.pid == 0); + tpebs_cmd.argv = record_argv; + tpebs_cmd.out = -1; + ret = start_command(&tpebs_cmd); + zfree(&tpebs_cmd.argv); + list_for_each_entry(t, &tpebs_results, nd) + t->started = true; - record_argv[i++] = "-o"; - record_argv[i++] = PERF_DATA; - - return 0; + return ret; } -static int prepare_run_command(const char **argv) +static bool is_child_pid(pid_t parent, pid_t child) { - tpebs_cmd = zalloc(sizeof(struct child_process)); - if (!tpebs_cmd) - return -ENOMEM; - tpebs_cmd->argv = argv; - tpebs_cmd->out = -1; - return 0; + if (parent < 0 || child < 0) + return false; + + while (true) { + char path[PATH_MAX]; + char line[256]; + FILE *fp; + +new_child: + if (parent == child) + return true; + + if (child <= 0) + return false; + + scnprintf(path, sizeof(path), "%s/%d/status", procfs__mountpoint(), child); + fp = fopen(path, "r"); + if (!fp) { + /* Presumably the process went away. Assume not a child. */ + return false; + } + while (fgets(line, sizeof(line), fp) != NULL) { + if (strncmp(line, "PPid:", 5) == 0) { + fclose(fp); + if (sscanf(line + 5, "%d", &child) != 1) { + /* Unexpected error parsing. */ + return false; + } + goto new_child; + } + } + /* Unexpected EOF. */ + fclose(fp); + return false; + } } -static int start_perf_record(int control_fd[], int ack_fd[], - const char *cpumap_buf) +static bool should_ignore_sample(const struct perf_sample *sample, const struct tpebs_retire_lat *t) { - const char **record_argv; - int ret; - char buf[32]; + pid_t workload_pid, sample_pid = sample->pid; - scnprintf(buf, sizeof(buf), "--control=fd:%d,%d", control_fd[0], ack_fd[1]); + /* + * During evlist__purge the evlist will be removed prior to the + * evsel__exit calling evsel__tpebs_close and taking the + * tpebs_mtx. Avoid a segfault by ignoring samples in this case. + */ + if (t->evsel->evlist == NULL) + return true; - record_argv = calloc(12 + 2 * tpebs_event_size, sizeof(char *)); - if (!record_argv) - return -ENOMEM; + workload_pid = t->evsel->evlist->workload.pid; + if (workload_pid < 0 || workload_pid == sample_pid) + return false; - ret = get_perf_record_args(record_argv, buf, cpumap_buf); - if (ret) - goto out; + if (!t->evsel->core.attr.inherit) + return true; - ret = prepare_run_command(record_argv); - if (ret) - goto out; - ret = start_command(tpebs_cmd); -out: - free(record_argv); - return ret; + return !is_child_pid(workload_pid, sample_pid); } static int process_sample_event(const struct perf_tool *tool __maybe_unused, @@ -124,27 +188,32 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused, struct evsel *evsel, struct machine *machine __maybe_unused) { - int ret = 0; - const char *evname; struct tpebs_retire_lat *t; - evname = evsel__name(evsel); - + mutex_lock(tpebs_mtx_get()); + if (tpebs_cmd.pid == 0) { + /* Record has terminated. */ + mutex_unlock(tpebs_mtx_get()); + return 0; + } + t = tpebs_retire_lat__find(evsel); + if (!t) { + mutex_unlock(tpebs_mtx_get()); + return -EINVAL; + } + if (should_ignore_sample(sample, t)) { + mutex_unlock(tpebs_mtx_get()); + return 0; + } /* * Need to handle per core results? We are assuming average retire * latency value will be used. Save the number of samples and the sum of * retire latency value for each event. */ - list_for_each_entry(t, &tpebs_results, nd) { - if (!strcmp(evname, t->name)) { - t->count += 1; - t->sum += sample->retire_lat; - t->val = (double) t->sum / t->count; - break; - } - } - - return ret; + t->last = sample->retire_lat; + update_stats(&t->stats, sample->retire_lat); + mutex_unlock(tpebs_mtx_get()); + return 0; } static int process_feature_event(struct perf_session *session, @@ -155,14 +224,13 @@ static int process_feature_event(struct perf_session *session, return 0; } -static void *__sample_reader(void *arg) +static void *__sample_reader(void *arg __maybe_unused) { - struct child_process *child = arg; struct perf_session *session; struct perf_data data = { .mode = PERF_DATA_MODE_READ, .path = PERF_DATA, - .file.fd = child->out, + .file.fd = tpebs_cmd.out, }; struct perf_tool tool; @@ -180,94 +248,277 @@ static void *__sample_reader(void *arg) return NULL; } +static int tpebs_send_record_cmd(const char *msg) EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get()) +{ + struct pollfd pollfd = { .events = POLLIN, }; + int ret, len, retries = 0; + char ack_buf[8]; + + /* Check if the command exited before the send, done with the lock held. */ + if (tpebs_cmd.pid == 0) + return 0; + + /* + * Let go of the lock while sending/receiving as blocking can starve the + * sample reading thread. + */ + mutex_unlock(tpebs_mtx_get()); + + /* Send perf record command.*/ + len = strlen(msg); + ret = write(control_fd[1], msg, len); + if (ret != len) { + pr_err("perf record control write control message '%s' failed\n", msg); + ret = -EPIPE; + goto out; + } + + if (!strcmp(msg, EVLIST_CTL_CMD_STOP_TAG)) { + ret = 0; + goto out; + } + + /* Wait for an ack. */ + pollfd.fd = ack_fd[0]; + + /* + * We need this poll to ensure the ack_fd PIPE will not hang + * when perf record failed for any reason. The timeout value + * 3000ms is an empirical selection. + */ +again: + if (!poll(&pollfd, 1, 500)) { + if (check_if_command_finished(&tpebs_cmd)) { + ret = 0; + goto out; + } + + if (retries++ < 6) + goto again; + pr_err("tpebs failed: perf record ack timeout for '%s'\n", msg); + ret = -ETIMEDOUT; + goto out; + } + + if (!(pollfd.revents & POLLIN)) { + if (check_if_command_finished(&tpebs_cmd)) { + ret = 0; + goto out; + } + + pr_err("tpebs failed: did not received an ack for '%s'\n", msg); + ret = -EPIPE; + goto out; + } + + ret = read(ack_fd[0], ack_buf, sizeof(ack_buf)); + if (ret > 0) + ret = strcmp(ack_buf, EVLIST_CTL_CMD_ACK_TAG); + else + pr_err("tpebs: perf record control ack failed\n"); +out: + /* Re-take lock as expected by caller. */ + mutex_lock(tpebs_mtx_get()); + return ret; +} + /* * tpebs_stop - stop the sample data read thread and the perf record process. */ -static int tpebs_stop(void) +static int tpebs_stop(void) EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get()) { int ret = 0; /* Like tpebs_start, we should only run tpebs_end once. */ - if (tpebs_pid != -1) { - kill(tpebs_cmd->pid, SIGTERM); - tpebs_pid = -1; + if (tpebs_cmd.pid != 0) { + tpebs_send_record_cmd(EVLIST_CTL_CMD_STOP_TAG); + tpebs_cmd.pid = 0; + mutex_unlock(tpebs_mtx_get()); pthread_join(tpebs_reader_thread, NULL); - close(tpebs_cmd->out); - ret = finish_command(tpebs_cmd); + mutex_lock(tpebs_mtx_get()); + close(control_fd[0]); + close(control_fd[1]); + close(ack_fd[0]); + close(ack_fd[1]); + close(tpebs_cmd.out); + ret = finish_command(&tpebs_cmd); + tpebs_cmd.pid = 0; if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL) ret = 0; } return ret; } -/* - * tpebs_start - start tpebs execution. - * @evsel_list: retire_latency evsels in this list will be selected and sampled - * to get the average retire_latency value. - * - * This function will be called from evlist level later when evlist__open() is - * called consistently. +/** + * evsel__tpebs_event() - Create string event encoding to pass to `perf record`. */ -int tpebs_start(struct evlist *evsel_list) +static int evsel__tpebs_event(struct evsel *evsel, char **event) { - int ret = 0; - struct evsel *evsel; - char cpumap_buf[50]; + char *name, *modifier; + int ret; + + name = strdup(evsel->name); + if (!name) + return -ENOMEM; + + modifier = strrchr(name, 'R'); + if (!modifier) { + ret = -EINVAL; + goto out; + } + *modifier = 'p'; + modifier = strchr(name, ':'); + if (!modifier) + modifier = strrchr(name, '/'); + if (!modifier) { + ret = -EINVAL; + goto out; + } + *modifier = '\0'; + if (asprintf(event, "%s/name=tpebs_event_%p/%s", name, evsel, modifier + 1) > 0) + ret = 0; + else + ret = -ENOMEM; +out: + if (ret) + pr_err("Tpebs event modifier broken '%s'\n", evsel->name); + free(name); + return ret; +} + +static struct tpebs_retire_lat *tpebs_retire_lat__new(struct evsel *evsel) +{ + struct tpebs_retire_lat *result = zalloc(sizeof(*result)); + int ret; + + if (!result) + return NULL; + + ret = evsel__tpebs_event(evsel, &result->event); + if (ret) { + free(result); + return NULL; + } + result->evsel = evsel; + return result; +} + +static void tpebs_retire_lat__delete(struct tpebs_retire_lat *r) +{ + zfree(&r->event); + free(r); +} + +static struct tpebs_retire_lat *tpebs_retire_lat__find(struct evsel *evsel) +{ + struct tpebs_retire_lat *t; + unsigned long num; + const char *evsel_name; /* - * We should only run tpebs_start when tpebs_recording is enabled. - * And we should only run it once with all the required events. + * Evsels will match for evlist with the retirement latency event. The + * name with "tpebs_event_" prefix will be present on events being read + * from `perf record`. */ - if (tpebs_pid != -1 || !tpebs_recording) + if (evsel__is_retire_lat(evsel)) { + list_for_each_entry(t, &tpebs_results, nd) { + if (t->evsel == evsel) + return t; + } + return NULL; + } + evsel_name = strstr(evsel->name, "tpebs_event_"); + if (!evsel_name) { + /* Unexpected that the perf record should have other events. */ + return NULL; + } + errno = 0; + num = strtoull(evsel_name + 12, NULL, 16); + if (errno) { + pr_err("Bad evsel for tpebs find '%s'\n", evsel->name); + return NULL; + } + list_for_each_entry(t, &tpebs_results, nd) { + if ((unsigned long)t->evsel == num) + return t; + } + return NULL; +} + +/** + * evsel__tpebs_prepare - create tpebs data structures ready for opening. + * @evsel: retire_latency evsel, all evsels on its list will be prepared. + */ +static int evsel__tpebs_prepare(struct evsel *evsel) +{ + struct evsel *pos; + struct tpebs_retire_lat *tpebs_event; + + mutex_lock(tpebs_mtx_get()); + tpebs_event = tpebs_retire_lat__find(evsel); + if (tpebs_event) { + /* evsel, or an identically named one, was already prepared. */ + mutex_unlock(tpebs_mtx_get()); return 0; + } + tpebs_event = tpebs_retire_lat__new(evsel); + if (!tpebs_event) { + mutex_unlock(tpebs_mtx_get()); + return -ENOMEM; + } + list_add_tail(&tpebs_event->nd, &tpebs_results); + mutex_unlock(tpebs_mtx_get()); - cpu_map__snprint(evsel_list->core.user_requested_cpus, cpumap_buf, sizeof(cpumap_buf)); /* - * Prepare perf record for sampling event retire_latency before fork and - * prepare workload + * Eagerly prepare all other evsels on the list to try to ensure that by + * open they are all known. */ - evlist__for_each_entry(evsel_list, evsel) { - int i; - char *name; - struct tpebs_retire_lat *new; + evlist__for_each_entry(evsel->evlist, pos) { + int ret; - if (!evsel->retire_lat) + if (pos == evsel || !pos->retire_lat) continue; - pr_debug("tpebs: Retire_latency of event %s is required\n", evsel->name); - for (i = strlen(evsel->name) - 1; i > 0; i--) { - if (evsel->name[i] == 'R') - break; - } - if (i <= 0 || evsel->name[i] != 'R') { - ret = -1; - goto err; - } + ret = evsel__tpebs_prepare(pos); + if (ret) + return ret; + } + return 0; +} - name = strdup(evsel->name); - if (!name) { - ret = -ENOMEM; - goto err; - } - name[i] = 'p'; +/** + * evsel__tpebs_open - starts tpebs execution. + * @evsel: retire_latency evsel, all evsels on its list will be selected. Each + * evsel is sampled to get the average retire_latency value. + */ +int evsel__tpebs_open(struct evsel *evsel) +{ + int ret; + bool tpebs_empty; - new = zalloc(sizeof(*new)); - if (!new) { - ret = -1; - zfree(name); - goto err; - } - new->name = name; - new->tpebs_name = evsel->name; - list_add_tail(&new->nd, &tpebs_results); - tpebs_event_size += 1; + /* We should only run tpebs_start when tpebs_recording is enabled. */ + if (!tpebs_recording) + return 0; + /* Only start the events once. */ + if (tpebs_cmd.pid != 0) { + struct tpebs_retire_lat *t; + bool valid; + + mutex_lock(tpebs_mtx_get()); + t = tpebs_retire_lat__find(evsel); + valid = t && t->started; + mutex_unlock(tpebs_mtx_get()); + /* May fail as the event wasn't started. */ + return valid ? 0 : -EBUSY; } - if (tpebs_event_size > 0) { - struct pollfd pollfd = { .events = POLLIN, }; - int control_fd[2], ack_fd[2], len; - char ack_buf[8]; + ret = evsel__tpebs_prepare(evsel); + if (ret) + return ret; + mutex_lock(tpebs_mtx_get()); + tpebs_empty = list_empty(&tpebs_results); + if (!tpebs_empty) { /*Create control and ack fd for --control*/ if (pipe(control_fd) < 0) { pr_err("tpebs: Failed to create control fifo"); @@ -280,153 +531,131 @@ int tpebs_start(struct evlist *evsel_list) goto out; } - ret = start_perf_record(control_fd, ack_fd, cpumap_buf); + ret = evsel__tpebs_start_perf_record(evsel); if (ret) goto out; - tpebs_pid = tpebs_cmd->pid; - if (pthread_create(&tpebs_reader_thread, NULL, __sample_reader, tpebs_cmd)) { - kill(tpebs_cmd->pid, SIGTERM); - close(tpebs_cmd->out); - pr_err("Could not create thread to process sample data.\n"); - ret = -1; - goto out; - } - /* Wait for perf record initialization.*/ - len = strlen(EVLIST_CTL_CMD_ENABLE_TAG); - ret = write(control_fd[1], EVLIST_CTL_CMD_ENABLE_TAG, len); - if (ret != len) { - pr_err("perf record control write control message failed\n"); - goto out; - } - - /* wait for an ack */ - pollfd.fd = ack_fd[0]; - - /* - * We need this poll to ensure the ack_fd PIPE will not hang - * when perf record failed for any reason. The timeout value - * 3000ms is an empirical selection. - */ - if (!poll(&pollfd, 1, 3000)) { - pr_err("tpebs failed: perf record ack timeout\n"); - ret = -1; - goto out; - } - if (!(pollfd.revents & POLLIN)) { - pr_err("tpebs failed: did not received an ack\n"); + if (pthread_create(&tpebs_reader_thread, /*attr=*/NULL, __sample_reader, + /*arg=*/NULL)) { + kill(tpebs_cmd.pid, SIGTERM); + close(tpebs_cmd.out); + pr_err("Could not create thread to process sample data.\n"); ret = -1; goto out; } - - ret = read(ack_fd[0], ack_buf, sizeof(ack_buf)); - if (ret > 0) - ret = strcmp(ack_buf, EVLIST_CTL_CMD_ACK_TAG); - else { - pr_err("tpebs: perf record control ack failed\n"); - goto out; - } + ret = tpebs_send_record_cmd(EVLIST_CTL_CMD_ENABLE_TAG); + } out: - close(control_fd[0]); - close(control_fd[1]); - close(ack_fd[0]); - close(ack_fd[1]); + if (ret) { + struct tpebs_retire_lat *t = tpebs_retire_lat__find(evsel); + + list_del_init(&t->nd); + tpebs_retire_lat__delete(t); } -err: - if (ret) - tpebs_delete(); + mutex_unlock(tpebs_mtx_get()); return ret; } - -int tpebs_set_evsel(struct evsel *evsel, int cpu_map_idx, int thread) +int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread) { - __u64 val; - bool found = false; + struct perf_counts_values *count, *old_count = NULL; struct tpebs_retire_lat *t; - struct perf_counts_values *count; + uint64_t val; + int ret; - /* Non reitre_latency evsel should never enter this function. */ - if (!evsel__is_retire_lat(evsel)) - return -1; + /* Only set retire_latency value to the first CPU and thread. */ + if (cpu_map_idx != 0 || thread != 0) + return 0; + + if (evsel->prev_raw_counts) + old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); - /* - * Need to stop the forked record to ensure get sampled data from the - * PIPE to process and get non-zero retire_lat value for hybrid. - */ - tpebs_stop(); count = perf_counts(evsel->counts, cpu_map_idx, thread); - list_for_each_entry(t, &tpebs_results, nd) { - if (t->tpebs_name == evsel->name || - (evsel->metric_id && !strcmp(t->tpebs_name, evsel->metric_id))) { - found = true; + mutex_lock(tpebs_mtx_get()); + t = tpebs_retire_lat__find(evsel); + /* + * If reading the first tpebs result, send a ping to the record + * process. Allow the sample reader a chance to read by releasing and + * reacquiring the lock. + */ + if (t && &t->nd == tpebs_results.next) { + ret = tpebs_send_record_cmd(EVLIST_CTL_CMD_PING_TAG); + mutex_unlock(tpebs_mtx_get()); + if (ret) + return ret; + mutex_lock(tpebs_mtx_get()); + } + if (t == NULL || t->stats.n == 0) { + /* No sample data, use default. */ + if (tpebs_recording) { + pr_warning_once( + "Using precomputed retirement latency data as no samples\n"); + } + val = 0; + switch (tpebs_mode) { + case TPEBS_MODE__MIN: + val = rint(evsel->retirement_latency.min); + break; + case TPEBS_MODE__MAX: + val = rint(evsel->retirement_latency.max); + break; + default: + case TPEBS_MODE__LAST: + case TPEBS_MODE__MEAN: + val = rint(evsel->retirement_latency.mean); + break; + } + } else { + switch (tpebs_mode) { + case TPEBS_MODE__MIN: + val = t->stats.min; + break; + case TPEBS_MODE__MAX: + val = t->stats.max; + break; + case TPEBS_MODE__LAST: + val = t->last; + break; + default: + case TPEBS_MODE__MEAN: + val = rint(t->stats.mean); break; } } - - /* Set ena and run to non-zero */ - count->ena = count->run = 1; - count->lost = 0; - - if (!found) { - /* - * Set default value or 0 when retire_latency for this event is - * not found from sampling data (record_tpebs not set or 0 - * sample recorded). - */ - count->val = 0; - return 0; + mutex_unlock(tpebs_mtx_get()); + + if (old_count) { + count->val = old_count->val + val; + count->run = old_count->run + 1; + count->ena = old_count->ena + 1; + } else { + count->val = val; + count->run++; + count->ena++; } - - /* - * Only set retire_latency value to the first CPU and thread. - */ - if (cpu_map_idx == 0 && thread == 0) - val = rint(t->val); - else - val = 0; - - count->val = val; return 0; } -static void tpebs_retire_lat__delete(struct tpebs_retire_lat *r) -{ - zfree(&r->name); - free(r); -} - - -/* - * tpebs_delete - delete tpebs related data and stop the created thread and - * process by calling tpebs_stop(). +/** + * evsel__tpebs_close() - delete tpebs related data. If the last event, stop the + * created thread and process by calling tpebs_stop(). * - * This function is called from evlist_delete() and also from builtin-stat - * stat_handle_error(). If tpebs_start() is called from places other then perf - * stat, need to ensure tpebs_delete() is also called to safely free mem and - * close the data read thread and the forked perf record process. - * - * This function is also called in evsel__close() to be symmetric with - * tpebs_start() being called in evsel__open(). We will update this call site - * when move tpebs_start() to evlist level. + * This function is called in evsel__close() to be symmetric with + * evsel__tpebs_open() being called in evsel__open(). */ -void tpebs_delete(void) +void evsel__tpebs_close(struct evsel *evsel) { - struct tpebs_retire_lat *r, *rtmp; - - if (tpebs_pid == -1) - return; - - tpebs_stop(); + struct tpebs_retire_lat *t; - list_for_each_entry_safe(r, rtmp, &tpebs_results, nd) { - list_del_init(&r->nd); - tpebs_retire_lat__delete(r); - } + mutex_lock(tpebs_mtx_get()); + t = tpebs_retire_lat__find(evsel); + if (t) { + list_del_init(&t->nd); + tpebs_retire_lat__delete(t); - if (tpebs_cmd) { - free(tpebs_cmd); - tpebs_cmd = NULL; + if (list_empty(&tpebs_results)) + tpebs_stop(); } + mutex_unlock(tpebs_mtx_get()); } diff --git a/tools/perf/util/intel-tpebs.h b/tools/perf/util/intel-tpebs.h index 766b3fbd79f1..9475e2e6ea74 100644 --- a/tools/perf/util/intel-tpebs.h +++ b/tools/perf/util/intel-tpebs.h @@ -2,34 +2,24 @@ /* * intel_tpebs.h: Intel TEPBS support */ -#ifndef INCLUDE__PERF_INTEL_TPEBS_H__ -#define INCLUDE__PERF_INTEL_TPEBS_H__ +#ifndef __INTEL_TPEBS_H +#define __INTEL_TPEBS_H -#include "stat.h" -#include "evsel.h" +struct evlist; +struct evsel; -#ifdef HAVE_ARCH_X86_64_SUPPORT +enum tpebs_mode { + TPEBS_MODE__MEAN, + TPEBS_MODE__MIN, + TPEBS_MODE__MAX, + TPEBS_MODE__LAST, +}; extern bool tpebs_recording; -int tpebs_start(struct evlist *evsel_list); -void tpebs_delete(void); -int tpebs_set_evsel(struct evsel *evsel, int cpu_map_idx, int thread); +extern enum tpebs_mode tpebs_mode; -#else +int evsel__tpebs_open(struct evsel *evsel); +void evsel__tpebs_close(struct evsel *evsel); +int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread); -static inline int tpebs_start(struct evlist *evsel_list __maybe_unused) -{ - return 0; -} - -static inline void tpebs_delete(void) {}; - -static inline int tpebs_set_evsel(struct evsel *evsel __maybe_unused, - int cpu_map_idx __maybe_unused, - int thread __maybe_unused) -{ - return 0; -} - -#endif -#endif +#endif /* __INTEL_TPEBS_H */ diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index f23e21502bf8..624964f01b5f 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -516,7 +516,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) * create pseudo sample to induce dso hit increment * use first address as sample address */ - memset(&sample, 0, sizeof(sample)); + perf_sample__init(&sample, /*all=*/true); sample.cpumode = PERF_RECORD_MISC_USER; sample.pid = pid; sample.tid = tid; @@ -535,6 +535,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine); out: + perf_sample__exit(&sample); free(event); return ret; } @@ -611,7 +612,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) * create pseudo sample to induce dso hit increment * use first address as sample address */ - memset(&sample, 0, sizeof(sample)); + perf_sample__init(&sample, /*all=*/true); sample.cpumode = PERF_RECORD_MISC_USER; sample.pid = pid; sample.tid = tid; @@ -620,12 +621,13 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) ret = perf_event__process_mmap2(tool, event, &sample, jd->machine); if (ret) - return ret; + goto out; ret = jit_inject_event(jd, event); if (!ret) build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine); - +out: + perf_sample__exit(&sample); return ret; } diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h index a09f7fe877df..59c94190b092 100644 --- a/tools/perf/util/lock-contention.h +++ b/tools/perf/util/lock-contention.h @@ -18,6 +18,12 @@ struct lock_filter { char **slabs; }; +struct lock_delay { + char *sym; + unsigned long addr; + unsigned long time; +}; + struct lock_stat { struct hlist_node hash_entry; struct rb_node rb; /* used for sorting */ @@ -140,14 +146,17 @@ struct lock_contention { struct machine *machine; struct hlist_head *result; struct lock_filter *filters; + struct lock_delay *delays; struct lock_contention_fails fails; struct rb_root cgroups; + void *btf; unsigned long map_nr_entries; int max_stack; int stack_skip; int aggr_mode; int owner; int nr_filtered; + int nr_delays; bool save_callstack; }; @@ -168,6 +177,8 @@ int lock_contention_stop(void); int lock_contention_read(struct lock_contention *con); int lock_contention_finish(struct lock_contention *con); +struct lock_stat *pop_owner_stack_trace(struct lock_contention *con); + #else /* !HAVE_BPF_SKEL */ static inline int lock_contention_prepare(struct lock_contention *con __maybe_unused) @@ -187,6 +198,11 @@ static inline int lock_contention_read(struct lock_contention *con __maybe_unuse return 0; } +static inline struct lock_stat *pop_owner_stack_trace(struct lock_contention *con __maybe_unused) +{ + return NULL; +} + #endif /* HAVE_BPF_SKEL */ #endif /* PERF_LOCK_CONTENTION_H */ diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c index af9a97612f9d..bbcd2ffcf4bd 100644 --- a/tools/perf/util/lzma.c +++ b/tools/perf/util/lzma.c @@ -32,7 +32,7 @@ static const char *lzma_strerror(lzma_ret ret) } } -int lzma_decompress_to_file(const char *input, int output_fd) +int lzma_decompress_stream_to_file(FILE *infile, int output_fd) { lzma_action action = LZMA_RUN; lzma_stream strm = LZMA_STREAM_INIT; @@ -41,18 +41,11 @@ int lzma_decompress_to_file(const char *input, int output_fd) u8 buf_in[BUFSIZE]; u8 buf_out[BUFSIZE]; - FILE *infile; - - infile = fopen(input, "rb"); - if (!infile) { - pr_debug("lzma: fopen failed on %s: '%s'\n", input, strerror(errno)); - return -1; - } ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED); if (ret != LZMA_OK) { pr_debug("lzma: lzma_stream_decoder failed %s (%d)\n", lzma_strerror(ret), ret); - goto err_fclose; + return err; } strm.next_in = NULL; @@ -100,11 +93,25 @@ int lzma_decompress_to_file(const char *input, int output_fd) err = 0; err_lzma_end: lzma_end(&strm); -err_fclose: - fclose(infile); return err; } +int lzma_decompress_to_file(const char *input, int output_fd) +{ + FILE *infile; + int ret; + + infile = fopen(input, "rb"); + if (!infile) { + pr_debug("lzma: fopen failed on %s: '%s'\n", input, strerror(errno)); + return -1; + } + + ret = lzma_decompress_stream_to_file(infile, output_fd); + fclose(infile); + return ret; +} + bool lzma_is_compressed(const char *input) { int fd = open(input, O_RDONLY); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 2d51badfbf2e..7ec12c207970 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -20,6 +20,7 @@ #include "path.h" #include "srcline.h" #include "symbol.h" +#include "synthetic-events.h" #include "sort.h" #include "strlist.h" #include "target.h" @@ -37,6 +38,7 @@ #include <internal/lib.h> // page_size #include "cgroup.h" #include "arm64-frame-pointer-unwind-support.h" +#include <api/io_dir.h> #include <linux/ctype.h> #include <symbol/kallsyms.h> @@ -94,6 +96,8 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) machine->comm_exec = false; machine->kernel_start = 0; machine->vmlinux_map = NULL; + /* There is no initial context switch in, so we start at 1. */ + machine->parallelism = 1; machine->root_dir = strdup(root_dir); if (machine->root_dir == NULL) @@ -125,23 +129,57 @@ out: return 0; } -struct machine *machine__new_host(void) +static struct machine *__machine__new_host(bool kernel_maps) { struct machine *machine = malloc(sizeof(*machine)); - if (machine != NULL) { - machine__init(machine, "", HOST_KERNEL_ID); + if (!machine) + return NULL; - if (machine__create_kernel_maps(machine) < 0) - goto out_delete; + machine__init(machine, "", HOST_KERNEL_ID); - machine->env = &perf_env; + if (kernel_maps && machine__create_kernel_maps(machine) < 0) { + free(machine); + return NULL; } + machine->env = &perf_env; + return machine; +} + +struct machine *machine__new_host(void) +{ + return __machine__new_host(/*kernel_maps=*/true); +} + +static int mmap_handler(const struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + return machine__process_mmap2_event(machine, event, sample); +} +static int machine__init_live(struct machine *machine, pid_t pid) +{ + union perf_event event; + + memset(&event, 0, sizeof(event)); + return perf_event__synthesize_mmap_events(NULL, &event, pid, pid, + mmap_handler, machine, true); +} + +struct machine *machine__new_live(bool kernel_maps, pid_t pid) +{ + struct machine *machine = __machine__new_host(kernel_maps); + + if (!machine) + return NULL; + + if (machine__init_live(machine, pid)) { + machine__delete(machine); + return NULL; + } return machine; -out_delete: - free(machine); - return NULL; } struct machine *machine__new_kallsyms(void) @@ -677,8 +715,11 @@ int machine__process_aux_output_hw_id_event(struct machine *machine __maybe_unus int machine__process_switch_event(struct machine *machine __maybe_unused, union perf_event *event) { + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; + if (dump_trace) perf_event__fprintf_switch(event, stdout); + machine->parallelism += out ? -1 : 1; return 0; } @@ -712,7 +753,7 @@ static int machine__process_ksymbol_register(struct machine *machine, map__set_start(map, event->ksymbol.addr); map__set_end(map, map__start(map) + event->ksymbol.len); - err = maps__insert(machine__kernel_maps(machine), map); + err = maps__fixup_overlap_and_insert(machine__kernel_maps(machine), map); if (err) { err = -ENOMEM; goto out; @@ -773,6 +814,10 @@ int machine__process_ksymbol(struct machine *machine __maybe_unused, if (dump_trace) perf_event__fprintf_ksymbol(event, stdout); + /* no need to process non-JIT BPF as it cannot get samples */ + if (event->ksymbol.len == 0) + return 0; + if (event->ksymbol.flags & PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER) return machine__process_ksymbol_unregister(machine, event, sample); @@ -886,26 +931,6 @@ size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp, return ret; } -size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp) -{ - int i; - size_t printed = 0; - struct dso *kdso = machine__kernel_dso(machine); - - if (dso__has_build_id(kdso)) { - char filename[PATH_MAX]; - - if (dso__build_id_filename(kdso, filename, sizeof(filename), false)) - printed += fprintf(fp, "[0] %s\n", filename); - } - - for (i = 0; i < vmlinux_path__nr_entries; ++i) { - printed += fprintf(fp, "[%d] %s\n", i + dso__has_build_id(kdso), - vmlinux_path[i]); - } - return printed; -} - struct machine_fprintf_cb_args { FILE *fp; size_t printed; @@ -1352,27 +1377,24 @@ static int maps__set_module_path(struct maps *maps, const char *path, struct kmo return 0; } -static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, int depth) +static int maps__set_modules_path_dir(struct maps *maps, char *path, size_t path_size, int depth) { - struct dirent *dent; - DIR *dir = opendir(dir_name); + struct io_dirent64 *dent; + struct io_dir iod; + size_t root_len = strlen(path); int ret = 0; - if (!dir) { - pr_debug("%s: cannot open %s dir\n", __func__, dir_name); + io_dir__init(&iod, open(path, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + if (iod.dirfd < 0) { + pr_debug("%s: cannot open %s dir\n", __func__, path); return -1; } - - while ((dent = readdir(dir)) != NULL) { - char path[PATH_MAX]; - struct stat st; - - /*sshfs might return bad dent->d_type, so we have to stat*/ - path__join(path, sizeof(path), dir_name, dent->d_name); - if (stat(path, &st)) - continue; - - if (S_ISDIR(st.st_mode)) { + /* Bounds check, should never happen. */ + if (root_len >= path_size) + return -1; + path[root_len++] = '/'; + while ((dent = io_dir__readdir(&iod)) != NULL) { + if (io_dir__is_dir(&iod, dent)) { if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) continue; @@ -1384,7 +1406,12 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i continue; } - ret = maps__set_modules_path_dir(maps, path, depth + 1); + /* Bounds check, should never happen. */ + if (root_len + strlen(dent->d_name) >= path_size) + continue; + + strcpy(path + root_len, dent->d_name); + ret = maps__set_modules_path_dir(maps, path, path_size, depth + 1); if (ret < 0) goto out; } else { @@ -1394,9 +1421,14 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i if (ret) goto out; - if (m.kmod) - ret = maps__set_module_path(maps, path, &m); + if (m.kmod) { + /* Bounds check, should never happen. */ + if (root_len + strlen(dent->d_name) < path_size) { + strcpy(path + root_len, dent->d_name); + ret = maps__set_module_path(maps, path, &m); + } + } zfree(&m.name); if (ret) @@ -1405,7 +1437,7 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i } out: - closedir(dir); + close(iod.dirfd); return ret; } @@ -1422,7 +1454,8 @@ static int machine__set_modules_path(struct machine *machine) machine->root_dir, version); free(version); - return maps__set_modules_path_dir(machine__kernel_maps(machine), modules_path, 0); + return maps__set_modules_path_dir(machine__kernel_maps(machine), + modules_path, sizeof(modules_path), 0); } int __weak arch__fix_module_text_start(u64 *start __maybe_unused, u64 *size __maybe_unused, @@ -1468,8 +1501,6 @@ static int machine__create_modules(struct machine *machine) if (modules__parse(modules, machine, machine__create_module)) return -1; - maps__fixup_end(machine__kernel_maps(machine)); - if (!machine__set_modules_path(machine)) return 0; @@ -1563,6 +1594,8 @@ int machine__create_kernel_maps(struct machine *machine) } } + maps__fixup_end(machine__kernel_maps(machine)); + out_put: dso__put(kernel); return ret; @@ -1900,6 +1933,8 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event if (dump_trace) perf_event__fprintf_task(event, stdout); + /* There is no context switch out before exit, so we decrement here. */ + machine->parallelism--; if (thread != NULL) { if (symbol_conf.keep_exited_threads) thread__set_exited(thread, /*exited=*/true); @@ -1976,7 +2011,7 @@ static void ip__resolve_ams(struct thread *thread, * Thus, we have to try consecutively until we find a match * or else, the symbol is unknown */ - thread__find_cpumode_addr_location(thread, ip, &al); + thread__find_cpumode_addr_location(thread, ip, /*symbols=*/true, &al); ams->addr = ip; ams->al_addr = al.addr; @@ -2078,7 +2113,7 @@ static int add_callchain_ip(struct thread *thread, al.sym = NULL; al.srcline = NULL; if (!cpumode) { - thread__find_cpumode_addr_location(thread, ip, &al); + thread__find_cpumode_addr_location(thread, ip, symbols, &al); } else { if (ip >= PERF_CONTEXT_MAX) { switch (ip) { @@ -2106,6 +2141,8 @@ static int add_callchain_ip(struct thread *thread, } if (symbols) thread__find_symbol(thread, *cpumode, ip, &al); + else + thread__find_map(thread, *cpumode, ip, &al); } if (al.sym != NULL) { @@ -2929,8 +2966,8 @@ static int thread__resolve_callchain_unwind(struct thread *thread, return 0; /* Bail out if nothing was captured. */ - if ((!sample->user_regs.regs) || - (!sample->user_stack.size)) + if (!sample->user_regs || !sample->user_regs->regs || + !sample->user_stack.size) return 0; if (!symbols) diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 2e5a4cb342d8..180b369c366c 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -50,6 +50,12 @@ struct machine { u64 text_start; u64 text_end; } sched, lock, traceiter, trace; + /* + * The current parallelism level (number of threads that run on CPUs). + * This value can be less than 1, or larger than the total number + * of CPUs, if events are poorly ordered. + */ + int parallelism; pid_t *current_tid; size_t current_tid_sz; union { /* Tool specific area */ @@ -165,6 +171,7 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec); struct machine *machine__new_host(void); struct machine *machine__new_kallsyms(void); +struct machine *machine__new_live(bool kernel_maps, pid_t pid); int machine__init(struct machine *machine, const char *root_dir, pid_t pid); void machine__exit(struct machine *machine); void machine__delete_threads(struct machine *machine); @@ -266,8 +273,6 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid); int machines__create_guest_kernel_maps(struct machines *machines); void machines__destroy_kernel_maps(struct machines *machines); -size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp); - typedef int (*machine__dso_t)(struct dso *dso, struct machine *machine, void *priv); int machine__for_each_dso(struct machine *machine, machine__dso_t fn, diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 09c9cc326c08..85b2a93a59ac 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -428,11 +428,29 @@ static unsigned int maps__by_name_index(const struct maps *maps, const struct ma return -1; } +static void map__set_kmap_maps(struct map *map, struct maps *maps) +{ + struct dso *dso; + + if (map == NULL) + return; + + dso = map__dso(map); + + if (dso && dso__kernel(dso)) { + struct kmap *kmap = map__kmap(map); + + if (kmap) + kmap->kmaps = maps; + else + pr_err("Internal error: kernel dso with non kernel map\n"); + } +} + static int __maps__insert(struct maps *maps, struct map *new) { struct map **maps_by_address = maps__maps_by_address(maps); struct map **maps_by_name = maps__maps_by_name(maps); - const struct dso *dso = map__dso(new); unsigned int nr_maps = maps__nr_maps(maps); unsigned int nr_allocate = RC_CHK_ACCESS(maps)->nr_maps_allocated; @@ -483,14 +501,9 @@ static int __maps__insert(struct maps *maps, struct map *new) } if (map__end(new) < map__start(new)) RC_CHK_ACCESS(maps)->ends_broken = true; - if (dso && dso__kernel(dso)) { - struct kmap *kmap = map__kmap(new); - if (kmap) - kmap->kmaps = maps; - else - pr_err("Internal error: kernel dso with non kernel map\n"); - } + map__set_kmap_maps(new, maps); + return 0; } @@ -785,6 +798,9 @@ static int __maps__insert_sorted(struct maps *maps, unsigned int first_after_ind } RC_CHK_ACCESS(maps)->nr_maps = nr_maps + to_add; maps__set_maps_by_name_sorted(maps, false); + map__set_kmap_maps(new1, maps); + map__set_kmap_maps(new2, maps); + check_invariants(maps); return 0; } @@ -797,7 +813,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) { int err = 0; FILE *fp = debug_file(); - unsigned int i; + unsigned int i, ni = INT_MAX; // Some gcc complain, but depends on maps_by_name... if (!maps__maps_by_address_sorted(maps)) __maps__sort_by_address(maps); @@ -808,6 +824,7 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) */ for (i = first_ending_after(maps, new); i < maps__nr_maps(maps); ) { struct map **maps_by_address = maps__maps_by_address(maps); + struct map **maps_by_name = maps__maps_by_name(maps); struct map *pos = maps_by_address[i]; struct map *before = NULL, *after = NULL; @@ -827,6 +844,9 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) map__fprintf(pos, fp); } + if (maps_by_name) + ni = maps__by_name_index(maps, pos); + /* * Now check if we need to create new maps for areas not * overlapped by the new map: @@ -871,6 +891,12 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) if (before) { map__put(maps_by_address[i]); maps_by_address[i] = before; + + if (maps_by_name) { + map__put(maps_by_name[ni]); + maps_by_name[ni] = map__get(before); + } + /* Maps are still ordered, go to next one. */ i++; if (after) { @@ -892,6 +918,12 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) */ map__put(maps_by_address[i]); maps_by_address[i] = map__get(new); + + if (maps_by_name) { + map__put(maps_by_name[ni]); + maps_by_name[ni] = map__get(new); + } + err = __maps__insert_sorted(maps, i + 1, after, NULL); map__put(after); check_invariants(maps); @@ -910,6 +942,14 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) */ map__put(maps_by_address[i]); maps_by_address[i] = map__get(new); + + if (maps_by_name) { + map__put(maps_by_name[ni]); + maps_by_name[ni] = map__get(new); + } + + map__set_kmap_maps(new, maps); + check_invariants(maps); return err; } @@ -1042,10 +1082,13 @@ struct map *maps__find(struct maps *maps, u64 ip) while (!done) { down_read(maps__lock(maps)); if (maps__maps_by_address_sorted(maps)) { - struct map **mapp = - bsearch(&ip, maps__maps_by_address(maps), maps__nr_maps(maps), - sizeof(*mapp), map__addr_cmp); + struct map **mapp = NULL; + struct map **maps_by_address = maps__maps_by_address(maps); + unsigned int nr_maps = maps__nr_maps(maps); + if (maps_by_address && nr_maps) + mapp = bsearch(&ip, maps_by_address, nr_maps, sizeof(*mapp), + map__addr_cmp); if (mapp) result = map__get(*mapp); done = true; diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 3692e988c86e..80b3069427bc 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -31,9 +31,6 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { bool perf_mem_record[PERF_MEM_EVENTS__MAX] = { 0 }; -static char mem_loads_name[100]; -static char mem_stores_name[100]; - struct perf_mem_event *perf_pmu__mem_events_ptr(struct perf_pmu *pmu, int i) { if (i >= PERF_MEM_EVENTS__MAX || !pmu) @@ -81,7 +78,8 @@ int perf_pmu__mem_events_num_mem_pmus(struct perf_pmu *pmu) return num; } -static const char *perf_pmu__mem_events_name(int i, struct perf_pmu *pmu) +static const char *perf_pmu__mem_events_name(struct perf_pmu *pmu, int i, + char *buf, size_t buf_size) { struct perf_mem_event *e; @@ -96,31 +94,31 @@ static const char *perf_pmu__mem_events_name(int i, struct perf_pmu *pmu) if (e->ldlat) { if (!e->aux_event) { /* ARM and Most of Intel */ - scnprintf(mem_loads_name, sizeof(mem_loads_name), + scnprintf(buf, buf_size, e->name, pmu->name, perf_mem_events__loads_ldlat); } else { /* Intel with mem-loads-aux event */ - scnprintf(mem_loads_name, sizeof(mem_loads_name), + scnprintf(buf, buf_size, e->name, pmu->name, pmu->name, perf_mem_events__loads_ldlat); } } else { if (!e->aux_event) { /* AMD and POWER */ - scnprintf(mem_loads_name, sizeof(mem_loads_name), + scnprintf(buf, buf_size, e->name, pmu->name); - } else + } else { return NULL; + } } - - return mem_loads_name; + return buf; } if (i == PERF_MEM_EVENTS__STORE) { - scnprintf(mem_stores_name, sizeof(mem_stores_name), + scnprintf(buf, buf_size, e->name, pmu->name); - return mem_stores_name; + return buf; } return NULL; @@ -189,7 +187,7 @@ static bool perf_pmu__mem_events_supported(const char *mnt, struct perf_pmu *pmu if (!e->event_name) return true; - scnprintf(path, PATH_MAX, "%s/devices/%s/events/%s", mnt, pmu->name, e->event_name); + scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/events/%s", mnt, pmu->name, e->event_name); return !stat(path, &st); } @@ -238,55 +236,69 @@ void perf_pmu__mem_events_list(struct perf_pmu *pmu) int j; for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { + char buf[128]; struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j); fprintf(stderr, "%-*s%-*s%s", e->tag ? 13 : 0, e->tag ? : "", e->tag && verbose > 0 ? 25 : 0, - e->tag && verbose > 0 ? perf_pmu__mem_events_name(j, pmu) : "", + e->tag && verbose > 0 + ? perf_pmu__mem_events_name(pmu, j, buf, sizeof(buf)) + : "", e->supported ? ": available\n" : ""); } } -int perf_mem_events__record_args(const char **rec_argv, int *argv_nr) +int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, char **event_name_storage_out) { const char *mnt = sysfs__mount(); struct perf_pmu *pmu = NULL; - struct perf_mem_event *e; int i = *argv_nr; - const char *s; - char *copy; struct perf_cpu_map *cpu_map = NULL; - int ret; + size_t event_name_storage_size = + perf_pmu__mem_events_num_mem_pmus(NULL) * PERF_MEM_EVENTS__MAX * 128; + size_t event_name_storage_remaining = event_name_storage_size; + char *event_name_storage = malloc(event_name_storage_size); + char *event_name_storage_ptr = event_name_storage; + if (!event_name_storage) + return -ENOMEM; + + *event_name_storage_out = NULL; while ((pmu = perf_pmus__scan_mem(pmu)) != NULL) { for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - e = perf_pmu__mem_events_ptr(pmu, j); + const char *s; + struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j); + int ret; if (!perf_mem_record[j]) continue; if (!e->supported) { + char buf[128]; + pr_err("failed: event '%s' not supported\n", - perf_pmu__mem_events_name(j, pmu)); + perf_pmu__mem_events_name(pmu, j, buf, sizeof(buf))); + free(event_name_storage); return -1; } - s = perf_pmu__mem_events_name(j, pmu); + s = perf_pmu__mem_events_name(pmu, j, event_name_storage_ptr, + event_name_storage_remaining); if (!s || !perf_pmu__mem_events_supported(mnt, pmu, e)) continue; - copy = strdup(s); - if (!copy) - return -1; - rec_argv[i++] = "-e"; - rec_argv[i++] = copy; + rec_argv[i++] = event_name_storage_ptr; + event_name_storage_remaining -= strlen(event_name_storage_ptr) + 1; + event_name_storage_ptr += strlen(event_name_storage_ptr) + 1; ret = perf_cpu_map__merge(&cpu_map, pmu->cpus); - if (ret < 0) + if (ret < 0) { + free(event_name_storage); return ret; + } } } @@ -301,6 +313,7 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr) } *argv_nr = i; + *event_name_storage_out = event_name_storage; return 0; } @@ -664,7 +677,10 @@ do { \ if (lvl & P(LVL, LFB)) stats->ld_fbhit++; if (lvl & P(LVL, L1 )) stats->ld_l1hit++; if (lvl & P(LVL, L2)) { - stats->ld_l2hit++; + if (snoop & P(SNOOP, HITM)) + HITM_INC(lcl_hitm); + else + stats->ld_l2hit++; if (snoopx & P(SNOOPX, PEER)) PEER_INC(lcl_peer); @@ -783,3 +799,181 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add) stats->nomap += add->nomap; stats->noparse += add->noparse; } + +/* + * It returns an index in hist_entry->mem_stat array for the given val which + * represents a data-src based on the mem_stat_type. + */ +int mem_stat_index(const enum mem_stat_type mst, const u64 val) +{ + union perf_mem_data_src src = { + .val = val, + }; + + switch (mst) { + case PERF_MEM_STAT_OP: + switch (src.mem_op) { + case PERF_MEM_OP_LOAD: + return MEM_STAT_OP_LOAD; + case PERF_MEM_OP_STORE: + return MEM_STAT_OP_STORE; + case PERF_MEM_OP_LOAD | PERF_MEM_OP_STORE: + return MEM_STAT_OP_LDST; + default: + if (src.mem_op & PERF_MEM_OP_PFETCH) + return MEM_STAT_OP_PFETCH; + if (src.mem_op & PERF_MEM_OP_EXEC) + return MEM_STAT_OP_EXEC; + return MEM_STAT_OP_OTHER; + } + case PERF_MEM_STAT_CACHE: + switch (src.mem_lvl_num) { + case PERF_MEM_LVLNUM_L1: + return MEM_STAT_CACHE_L1; + case PERF_MEM_LVLNUM_L2: + return MEM_STAT_CACHE_L2; + case PERF_MEM_LVLNUM_L3: + return MEM_STAT_CACHE_L3; + case PERF_MEM_LVLNUM_L4: + return MEM_STAT_CACHE_L4; + case PERF_MEM_LVLNUM_LFB: + return MEM_STAT_CACHE_L1_BUF; + case PERF_MEM_LVLNUM_L2_MHB: + return MEM_STAT_CACHE_L2_BUF; + default: + return MEM_STAT_CACHE_OTHER; + } + case PERF_MEM_STAT_MEMORY: + switch (src.mem_lvl_num) { + case PERF_MEM_LVLNUM_MSC: + return MEM_STAT_MEMORY_MSC; + case PERF_MEM_LVLNUM_RAM: + return MEM_STAT_MEMORY_RAM; + case PERF_MEM_LVLNUM_UNC: + return MEM_STAT_MEMORY_UNC; + case PERF_MEM_LVLNUM_CXL: + return MEM_STAT_MEMORY_CXL; + case PERF_MEM_LVLNUM_IO: + return MEM_STAT_MEMORY_IO; + case PERF_MEM_LVLNUM_PMEM: + return MEM_STAT_MEMORY_PMEM; + default: + return MEM_STAT_MEMORY_OTHER; + } + case PERF_MEM_STAT_SNOOP: + switch (src.mem_snoop) { + case PERF_MEM_SNOOP_HIT: + return MEM_STAT_SNOOP_HIT; + case PERF_MEM_SNOOP_HITM: + return MEM_STAT_SNOOP_HITM; + case PERF_MEM_SNOOP_MISS: + return MEM_STAT_SNOOP_MISS; + default: + return MEM_STAT_SNOOP_OTHER; + } + case PERF_MEM_STAT_DTLB: + switch (src.mem_dtlb) { + case PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT: + return MEM_STAT_DTLB_L1_HIT; + case PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT: + return MEM_STAT_DTLB_L2_HIT; + case PERF_MEM_TLB_L1 | PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT: + return MEM_STAT_DTLB_ANY_HIT; + default: + if (src.mem_dtlb & PERF_MEM_TLB_MISS) + return MEM_STAT_DTLB_MISS; + return MEM_STAT_DTLB_OTHER; + } + default: + break; + } + return -1; +} + +/* To align output, returned string should be shorter than MEM_STAT_PRINT_LEN */ +const char *mem_stat_name(const enum mem_stat_type mst, const int idx) +{ + switch (mst) { + case PERF_MEM_STAT_OP: + switch (idx) { + case MEM_STAT_OP_LOAD: + return "Load"; + case MEM_STAT_OP_STORE: + return "Store"; + case MEM_STAT_OP_LDST: + return "Ld+St"; + case MEM_STAT_OP_PFETCH: + return "Pfetch"; + case MEM_STAT_OP_EXEC: + return "Exec"; + case MEM_STAT_OP_OTHER: + default: + return "Other"; + } + case PERF_MEM_STAT_CACHE: + switch (idx) { + case MEM_STAT_CACHE_L1: + return "L1"; + case MEM_STAT_CACHE_L2: + return "L2"; + case MEM_STAT_CACHE_L3: + return "L3"; + case MEM_STAT_CACHE_L4: + return "L4"; + case MEM_STAT_CACHE_L1_BUF: + return "L1-buf"; + case MEM_STAT_CACHE_L2_BUF: + return "L2-buf"; + case MEM_STAT_CACHE_OTHER: + default: + return "Other"; + } + case PERF_MEM_STAT_MEMORY: + switch (idx) { + case MEM_STAT_MEMORY_RAM: + return "RAM"; + case MEM_STAT_MEMORY_MSC: + return "MSC"; + case MEM_STAT_MEMORY_UNC: + return "Uncach"; + case MEM_STAT_MEMORY_CXL: + return "CXL"; + case MEM_STAT_MEMORY_IO: + return "IO"; + case MEM_STAT_MEMORY_PMEM: + return "PMEM"; + case MEM_STAT_MEMORY_OTHER: + default: + return "Other"; + } + case PERF_MEM_STAT_SNOOP: + switch (idx) { + case MEM_STAT_SNOOP_HIT: + return "Hit"; + case MEM_STAT_SNOOP_HITM: + return "HitM"; + case MEM_STAT_SNOOP_MISS: + return "Miss"; + case MEM_STAT_SNOOP_OTHER: + default: + return "Other"; + } + case PERF_MEM_STAT_DTLB: + switch (idx) { + case MEM_STAT_DTLB_L1_HIT: + return "L1-Hit"; + case MEM_STAT_DTLB_L2_HIT: + return "L2-Hit"; + case MEM_STAT_DTLB_ANY_HIT: + return "L?-Hit"; + case MEM_STAT_DTLB_MISS: + return "Miss"; + case MEM_STAT_DTLB_OTHER: + default: + return "Other"; + } + default: + break; + } + return "N/A"; +} diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 8dc27db9fd52..5b98076904b0 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -38,7 +38,8 @@ int perf_pmu__mem_events_num_mem_pmus(struct perf_pmu *pmu); bool is_mem_loads_aux_event(struct evsel *leader); void perf_pmu__mem_events_list(struct perf_pmu *pmu); -int perf_mem_events__record_args(const char **rec_argv, int *argv_nr); +int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, + char **event_name_storage_out); int perf_mem__tlb_scnprintf(char *out, size_t sz, const struct mem_info *mem_info); int perf_mem__lvl_scnprintf(char *out, size_t sz, const struct mem_info *mem_info); @@ -88,4 +89,61 @@ struct hist_entry; int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi); void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add); +enum mem_stat_type { + PERF_MEM_STAT_OP, + PERF_MEM_STAT_CACHE, + PERF_MEM_STAT_MEMORY, + PERF_MEM_STAT_SNOOP, + PERF_MEM_STAT_DTLB, +}; + +#define MEM_STAT_PRINT_LEN 7 /* 1 space + 5 digits + 1 percent sign */ + +enum mem_stat_op { + MEM_STAT_OP_LOAD, + MEM_STAT_OP_STORE, + MEM_STAT_OP_LDST, + MEM_STAT_OP_PFETCH, + MEM_STAT_OP_EXEC, + MEM_STAT_OP_OTHER, +}; + +enum mem_stat_cache { + MEM_STAT_CACHE_L1, + MEM_STAT_CACHE_L2, + MEM_STAT_CACHE_L3, + MEM_STAT_CACHE_L4, + MEM_STAT_CACHE_L1_BUF, + MEM_STAT_CACHE_L2_BUF, + MEM_STAT_CACHE_OTHER, +}; + +enum mem_stat_memory { + MEM_STAT_MEMORY_RAM, + MEM_STAT_MEMORY_MSC, + MEM_STAT_MEMORY_UNC, + MEM_STAT_MEMORY_CXL, + MEM_STAT_MEMORY_IO, + MEM_STAT_MEMORY_PMEM, + MEM_STAT_MEMORY_OTHER, +}; + +enum mem_stat_snoop { + MEM_STAT_SNOOP_HIT, + MEM_STAT_SNOOP_HITM, + MEM_STAT_SNOOP_MISS, + MEM_STAT_SNOOP_OTHER, +}; + +enum mem_stat_dtlb { + MEM_STAT_DTLB_L1_HIT, + MEM_STAT_DTLB_L2_HIT, + MEM_STAT_DTLB_ANY_HIT, + MEM_STAT_DTLB_MISS, + MEM_STAT_DTLB_OTHER, +}; + +int mem_stat_index(const enum mem_stat_type mst, const u64 data_src); +const char *mem_stat_name(const enum mem_stat_type mst, const int idx); + #endif /* __PERF_MEM_EVENTS_H */ diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 46920ebadfd1..43d35f956a33 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -353,7 +353,7 @@ static int setup_metric_events(const char *pmu, struct hashmap *ids, return 0; } -static bool match_metric(const char *metric_or_groups, const char *sought) +static bool match_metric_or_groups(const char *metric_or_groups, const char *sought) { int len; char *m; @@ -369,18 +369,19 @@ static bool match_metric(const char *metric_or_groups, const char *sought) (metric_or_groups[len] == 0 || metric_or_groups[len] == ';')) return true; m = strchr(metric_or_groups, ';'); - return m && match_metric(m + 1, sought); + return m && match_metric_or_groups(m + 1, sought); } -static bool match_pm_metric(const struct pmu_metric *pm, const char *pmu, const char *metric) +static bool match_pm_metric_or_groups(const struct pmu_metric *pm, const char *pmu, + const char *metric_or_groups) { const char *pm_pmu = pm->pmu ?: "cpu"; if (strcmp(pmu, "all") && strcmp(pm_pmu, pmu)) return false; - return match_metric(pm->metric_group, metric) || - match_metric(pm->metric_name, metric); + return match_metric_or_groups(pm->metric_group, metric_or_groups) || + match_metric_or_groups(pm->metric_name, metric_or_groups); } /** struct mep - RB-tree node for building printing information. */ @@ -395,6 +396,7 @@ struct mep { const char *metric_expr; const char *metric_threshold; const char *metric_unit; + const char *pmu_name; }; static int mep_cmp(struct rb_node *rb_node, const void *entry) @@ -475,6 +477,7 @@ static int metricgroup__add_to_mep_groups(const struct pmu_metric *pm, me->metric_expr = pm->metric_expr; me->metric_threshold = pm->metric_threshold; me->metric_unit = pm->unit; + me->pmu_name = pm->pmu; } } free(omg); @@ -550,7 +553,8 @@ void metricgroup__print(const struct print_callbacks *print_cb, void *print_stat me->metric_long_desc, me->metric_expr, me->metric_threshold, - me->metric_unit); + me->metric_unit, + me->pmu_name); next = rb_next(node); rblist__remove_node(&groups, node); } @@ -802,11 +806,6 @@ struct metricgroup_add_iter_data { const struct pmu_metrics_table *table; }; -static bool metricgroup__find_metric(const char *pmu, - const char *metric, - const struct pmu_metrics_table *table, - struct pmu_metric *pm); - static int add_metric(struct list_head *metric_list, const struct pmu_metric *pm, const char *modifier, @@ -818,6 +817,16 @@ static int add_metric(struct list_head *metric_list, const struct visited_metric *visited, const struct pmu_metrics_table *table); +static int metricgroup__find_metric_callback(const struct pmu_metric *pm, + const struct pmu_metrics_table *table __maybe_unused, + void *vdata) +{ + struct pmu_metric *copied_pm = vdata; + + memcpy(copied_pm, pm, sizeof(*pm)); + return 0; +} + /** * resolve_metric - Locate metrics within the root metric and recursively add * references to them. @@ -838,7 +847,7 @@ static int add_metric(struct list_head *metric_list, * architecture perf is running upon. */ static int resolve_metric(struct list_head *metric_list, - const char *pmu, + struct perf_pmu *pmu, const char *modifier, bool metric_no_group, bool metric_no_threshold, @@ -868,7 +877,9 @@ static int resolve_metric(struct list_head *metric_list, hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) { struct pmu_metric pm; - if (metricgroup__find_metric(pmu, cur->pkey, table, &pm)) { + if (pmu_metrics_table__find_metric(table, pmu, cur->pkey, + metricgroup__find_metric_callback, + &pm) != PMU_METRICS__NOT_FOUND) { pending = realloc(pending, (pending_cnt + 1) * sizeof(struct to_resolve)); if (!pending) @@ -1019,7 +1030,12 @@ static int __add_metric(struct list_head *metric_list, } if (!ret) { /* Resolve referenced metrics. */ - const char *pmu = pm->pmu ?: "cpu"; + struct perf_pmu *pmu; + + if (pm->pmu && pm->pmu[0] != '\0') + pmu = perf_pmus__find(pm->pmu); + else + pmu = perf_pmus__scan_core(/*pmu=*/ NULL); ret = resolve_metric(metric_list, pmu, modifier, metric_no_group, metric_no_threshold, user_requested_cpu_list, @@ -1036,44 +1052,6 @@ static int __add_metric(struct list_head *metric_list, return ret; } -struct metricgroup__find_metric_data { - const char *pmu; - const char *metric; - struct pmu_metric *pm; -}; - -static int metricgroup__find_metric_callback(const struct pmu_metric *pm, - const struct pmu_metrics_table *table __maybe_unused, - void *vdata) -{ - struct metricgroup__find_metric_data *data = vdata; - const char *pm_pmu = pm->pmu ?: "cpu"; - - if (strcmp(data->pmu, "all") && strcmp(pm_pmu, data->pmu)) - return 0; - - if (!match_metric(pm->metric_name, data->metric)) - return 0; - - memcpy(data->pm, pm, sizeof(*pm)); - return 1; -} - -static bool metricgroup__find_metric(const char *pmu, - const char *metric, - const struct pmu_metrics_table *table, - struct pmu_metric *pm) -{ - struct metricgroup__find_metric_data data = { - .pmu = pmu, - .metric = metric, - .pm = pm, - }; - - return pmu_metrics_table__for_each_metric(table, metricgroup__find_metric_callback, &data) - ? true : false; -} - static int add_metric(struct list_head *metric_list, const struct pmu_metric *pm, const char *modifier, @@ -1119,7 +1097,7 @@ static int metricgroup__add_metric_sys_event_iter(const struct pmu_metric *pm, struct metricgroup_add_iter_data *d = data; int ret; - if (!match_pm_metric(pm, d->pmu, d->metric_name)) + if (!match_pm_metric_or_groups(pm, d->pmu, d->metric_name)) return 0; ret = add_metric(d->metric_list, pm, d->modifier, d->metric_no_group, @@ -1200,9 +1178,9 @@ static int metricgroup__add_metric_callback(const struct pmu_metric *pm, struct metricgroup__add_metric_data *data = vdata; int ret = 0; - if (pm->metric_expr && match_pm_metric(pm, data->pmu, data->metric_name)) { + if (pm->metric_expr && match_pm_metric_or_groups(pm, data->pmu, data->metric_name)) { bool metric_no_group = data->metric_no_group || - match_metric(pm->metricgroup_no_group, data->metric_name); + match_metric_or_groups(pm->metricgroup_no_group, data->metric_name); data->has_match = true; ret = add_metric(data->list, pm, data->modifier, metric_no_group, @@ -1723,29 +1701,32 @@ int metricgroup__parse_groups_test(struct evlist *evlist, struct metricgroup__has_metric_data { const char *pmu; - const char *metric; + const char *metric_or_groups; }; -static int metricgroup__has_metric_callback(const struct pmu_metric *pm, - const struct pmu_metrics_table *table __maybe_unused, - void *vdata) +static int metricgroup__has_metric_or_groups_callback(const struct pmu_metric *pm, + const struct pmu_metrics_table *table + __maybe_unused, + void *vdata) { struct metricgroup__has_metric_data *data = vdata; - return match_pm_metric(pm, data->pmu, data->metric) ? 1 : 0; + return match_pm_metric_or_groups(pm, data->pmu, data->metric_or_groups) ? 1 : 0; } -bool metricgroup__has_metric(const char *pmu, const char *metric) +bool metricgroup__has_metric_or_groups(const char *pmu, const char *metric_or_groups) { const struct pmu_metrics_table *table = pmu_metrics_table__find(); struct metricgroup__has_metric_data data = { .pmu = pmu, - .metric = metric, + .metric_or_groups = metric_or_groups, }; if (!table) return false; - return pmu_metrics_table__for_each_metric(table, metricgroup__has_metric_callback, &data) + return pmu_metrics_table__for_each_metric(table, + metricgroup__has_metric_or_groups_callback, + &data) ? true : false; } diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 779f6ede1b51..a04ac1afa6cc 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -85,7 +85,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist, struct rblist *metric_events); void metricgroup__print(const struct print_callbacks *print_cb, void *print_state); -bool metricgroup__has_metric(const char *pmu, const char *metric); +bool metricgroup__has_metric_or_groups(const char *pmu, const char *metric_or_groups); unsigned int metricgroups__topdown_max_level(void); int arch_get_runtimeparam(const struct pmu_metric *pm); void metricgroup__rblist_exit(struct rblist *metric_events); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 43b02293f1d2..a34726219af3 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -244,9 +244,8 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask) { int idx, nr_cpus; struct perf_cpu cpu; - const struct perf_cpu_map *cpu_map = NULL; + struct perf_cpu_map *cpu_map = cpu_map__online(); - cpu_map = cpu_map__online(); if (!cpu_map) return; @@ -256,6 +255,7 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask) if (cpu__get_node(cpu) == node) __set_bit(cpu.cpu, mask->bits); } + perf_cpu_map__put(cpu_map); } static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp) @@ -356,14 +356,3 @@ int perf_mmap__push(struct mmap *md, void *to, out: return rc; } - -int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original, struct mmap_cpu_mask *clone) -{ - clone->nbits = original->nbits; - clone->bits = bitmap_zalloc(original->nbits); - if (!clone->bits) - return -ENOMEM; - - memcpy(clone->bits, original->bits, MMAP_CPU_MASK_BYTES(original)); - return 0; -} diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 0df6e1621c7e..4d72c5fa5084 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -61,7 +61,4 @@ size_t mmap__mmap_len(struct mmap *map); void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag); -int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original, - struct mmap_cpu_mask *clone); - #endif /*__PERF_MMAP_H */ diff --git a/tools/perf/util/mutex.h b/tools/perf/util/mutex.h index 40661120cacc..38458f00846f 100644 --- a/tools/perf/util/mutex.h +++ b/tools/perf/util/mutex.h @@ -33,10 +33,22 @@ /* Documents if a type is a lockable type. */ #define LOCKABLE __attribute__((lockable)) +/* Documents a function that expects a lock not to be held prior to entry. */ +#define LOCKS_EXCLUDED(...) __attribute__((locks_excluded(__VA_ARGS__))) + +/* Documents a function that returns a lock. */ +#define LOCK_RETURNED(x) __attribute__((lock_returned(x))) + /* Documents functions that acquire a lock in the body of a function, and do not release it. */ #define EXCLUSIVE_LOCK_FUNCTION(...) __attribute__((exclusive_lock_function(__VA_ARGS__))) /* + * Documents functions that acquire a shared (reader) lock in the body of a + * function, and do not release it. + */ +#define SHARED_LOCK_FUNCTION(...) __attribute__((shared_lock_function(__VA_ARGS__))) + +/* * Documents functions that expect a lock to be held on entry to the function, * and release it in the body of the function. */ @@ -49,6 +61,9 @@ /* Documents a function that expects a mutex to be held prior to entry. */ #define EXCLUSIVE_LOCKS_REQUIRED(...) __attribute__((exclusive_locks_required(__VA_ARGS__))) +/* Documents a function that expects a shared (reader) lock to be held prior to entry. */ +#define SHARED_LOCKS_REQUIRED(...) __attribute__((shared_locks_required(__VA_ARGS__))) + /* Turns off thread safety checking within the body of a particular function. */ #define NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) @@ -57,10 +72,14 @@ #define GUARDED_BY(x) #define PT_GUARDED_BY(x) #define LOCKABLE +#define LOCKS_EXCLUDED(...) +#define LOCK_RETURNED(x) #define EXCLUSIVE_LOCK_FUNCTION(...) +#define SHARED_LOCK_FUNCTION(...) #define UNLOCK_FUNCTION(...) #define EXCLUSIVE_TRYLOCK_FUNCTION(...) #define EXCLUSIVE_LOCKS_REQUIRED(...) +#define SHARED_LOCKS_REQUIRED(...) #define NO_THREAD_SAFETY_ANALYSIS #endif diff --git a/tools/perf/util/off_cpu.h b/tools/perf/util/off_cpu.h index 2dd67c60f211..64bf763ddf50 100644 --- a/tools/perf/util/off_cpu.h +++ b/tools/perf/util/off_cpu.h @@ -13,9 +13,10 @@ struct record_opts; #define OFFCPU_SAMPLE_TYPES (PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | \ PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | \ - PERF_SAMPLE_PERIOD | PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_PERIOD | PERF_SAMPLE_RAW | \ PERF_SAMPLE_CGROUP) +#define OFFCPU_THRESH 500000000ULL #ifdef HAVE_BPF_SKEL int off_cpu_prepare(struct evlist *evlist, struct target *target, diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 1e23faa364b1..2380de56a207 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -7,6 +7,7 @@ #include <errno.h> #include <sys/ioctl.h> #include <sys/param.h> +#include "cpumap.h" #include "term.h" #include "env.h" #include "evlist.h" @@ -17,6 +18,7 @@ #include "strbuf.h" #include "debug.h" #include <api/fs/tracing_path.h> +#include <api/io_dir.h> #include <perf/cpumap.h> #include <util/parse-events-bison.h> #include <util/parse-events-flex.h> @@ -27,6 +29,7 @@ #include "util/evsel_config.h" #include "util/event.h" #include "util/bpf-filter.h" +#include "util/stat.h" #include "util/util.h" #include "tracepoint.h" @@ -178,6 +181,26 @@ static char *get_config_name(const struct parse_events_terms *head_terms) return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_NAME); } +static struct perf_cpu_map *get_config_cpu(const struct parse_events_terms *head_terms) +{ + struct parse_events_term *term; + struct perf_cpu_map *cpus = NULL; + + if (!head_terms) + return NULL; + + list_for_each_entry(term, &head_terms->terms, list) { + if (term->type_term == PARSE_EVENTS__TERM_TYPE_CPU) { + struct perf_cpu_map *cpu = perf_cpu_map__new_int(term->val.num); + + perf_cpu_map__merge(&cpus, cpu); + perf_cpu_map__put(cpu); + } + } + + return cpus; +} + /** * fix_raw - For each raw term see if there is an event (aka alias) in pmu that * matches the raw's string value. If the string value matches an @@ -227,25 +250,55 @@ __add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, bool init_attr, const char *name, const char *metric_id, struct perf_pmu *pmu, - struct list_head *config_terms, bool auto_merge_stats, + struct list_head *config_terms, struct evsel *first_wildcard_match, struct perf_cpu_map *cpu_list, u64 alternate_hw_config) { struct evsel *evsel; - struct perf_cpu_map *cpus = perf_cpu_map__is_empty(cpu_list) && pmu ? pmu->cpus : cpu_list; + bool is_pmu_core; + struct perf_cpu_map *cpus; - cpus = perf_cpu_map__get(cpus); - if (pmu) - perf_pmu__warn_invalid_formats(pmu); + /* + * Ensure the first_wildcard_match's PMU matches that of the new event + * being added. Otherwise try to match with another event further down + * the evlist. + */ + if (first_wildcard_match) { + struct evsel *pos = list_prev_entry(first_wildcard_match, core.node); + + first_wildcard_match = NULL; + list_for_each_entry_continue(pos, list, core.node) { + if (perf_pmu__name_no_suffix_match(pos->pmu, pmu->name)) { + first_wildcard_match = pos; + break; + } + if (pos->pmu->is_core && (!pmu || pmu->is_core)) { + first_wildcard_match = pos; + break; + } + } + } - if (pmu && (attr->type == PERF_TYPE_RAW || attr->type >= PERF_TYPE_MAX)) { - perf_pmu__warn_invalid_config(pmu, attr->config, name, - PERF_PMU_FORMAT_VALUE_CONFIG, "config"); - perf_pmu__warn_invalid_config(pmu, attr->config1, name, - PERF_PMU_FORMAT_VALUE_CONFIG1, "config1"); - perf_pmu__warn_invalid_config(pmu, attr->config2, name, - PERF_PMU_FORMAT_VALUE_CONFIG2, "config2"); - perf_pmu__warn_invalid_config(pmu, attr->config3, name, - PERF_PMU_FORMAT_VALUE_CONFIG3, "config3"); + if (pmu) { + is_pmu_core = pmu->is_core; + cpus = perf_cpu_map__get(perf_cpu_map__is_empty(cpu_list) ? pmu->cpus : cpu_list); + perf_pmu__warn_invalid_formats(pmu); + if (attr->type == PERF_TYPE_RAW || attr->type >= PERF_TYPE_MAX) { + perf_pmu__warn_invalid_config(pmu, attr->config, name, + PERF_PMU_FORMAT_VALUE_CONFIG, "config"); + perf_pmu__warn_invalid_config(pmu, attr->config1, name, + PERF_PMU_FORMAT_VALUE_CONFIG1, "config1"); + perf_pmu__warn_invalid_config(pmu, attr->config2, name, + PERF_PMU_FORMAT_VALUE_CONFIG2, "config2"); + perf_pmu__warn_invalid_config(pmu, attr->config3, name, + PERF_PMU_FORMAT_VALUE_CONFIG3, "config3"); + } + } else { + is_pmu_core = (attr->type == PERF_TYPE_HARDWARE || + attr->type == PERF_TYPE_HW_CACHE); + if (perf_cpu_map__is_empty(cpu_list)) + cpus = is_pmu_core ? perf_cpu_map__new_online_cpus() : NULL; + else + cpus = perf_cpu_map__get(cpu_list); } if (init_attr) event_attr_init(attr); @@ -260,10 +313,10 @@ __add_event(struct list_head *list, int *idx, evsel->core.cpus = cpus; evsel->core.own_cpus = perf_cpu_map__get(cpus); evsel->core.requires_cpu = pmu ? pmu->is_uncore : false; - evsel->core.is_pmu_core = pmu ? pmu->is_core : false; - evsel->auto_merge_stats = auto_merge_stats; + evsel->core.is_pmu_core = is_pmu_core; evsel->pmu = pmu; evsel->alternate_hw_config = alternate_hw_config; + evsel->first_wildcard_match = first_wildcard_match; if (name) evsel->name = strdup(name); @@ -286,7 +339,7 @@ struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, { return __add_event(/*list=*/NULL, &idx, attr, /*init_attr=*/false, name, metric_id, pmu, /*config_terms=*/NULL, - /*auto_merge_stats=*/false, /*cpu_list=*/NULL, + /*first_wildcard_match=*/NULL, /*cpu_list=*/NULL, /*alternate_hw_config=*/PERF_COUNT_HW_MAX); } @@ -297,7 +350,7 @@ static int add_event(struct list_head *list, int *idx, { return __add_event(list, idx, attr, /*init_attr*/true, name, metric_id, /*pmu=*/NULL, config_terms, - /*auto_merge_stats=*/false, /*cpu_list=*/NULL, + /*first_wildcard_match=*/NULL, /*cpu_list=*/NULL, alternate_hw_config) ? 0 : -ENOMEM; } @@ -422,7 +475,7 @@ bool parse_events__filter_pmu(const struct parse_events_state *parse_state, static int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, struct perf_pmu *pmu, const struct parse_events_terms *const_parsed_terms, - bool auto_merge_stats, u64 alternate_hw_config); + struct evsel *first_wildcard_match, u64 alternate_hw_config); int parse_events_add_cache(struct list_head *list, int *idx, const char *name, struct parse_events_state *parse_state, @@ -432,11 +485,13 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name, bool found_supported = false; const char *config_name = get_config_name(parsed_terms); const char *metric_id = get_config_metric_id(parsed_terms); + struct perf_cpu_map *cpus = get_config_cpu(parsed_terms); + int ret = 0; + struct evsel *first_wildcard_match = NULL; while ((pmu = perf_pmus__scan(pmu)) != NULL) { LIST_HEAD(config_terms); struct perf_event_attr attr; - int ret; if (parse_events__filter_pmu(parse_state, pmu)) continue; @@ -448,10 +503,13 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name, */ ret = parse_events_add_pmu(parse_state, list, pmu, parsed_terms, - perf_pmu__auto_merge_stats(pmu), + first_wildcard_match, /*alternate_hw_config=*/PERF_COUNT_HW_MAX); if (ret) - return ret; + goto out_err; + if (first_wildcard_match == NULL) + first_wildcard_match = + container_of(list->prev, struct evsel, core.node); continue; } @@ -471,21 +529,29 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name, if (parsed_terms) { if (config_attr(&attr, parsed_terms, parse_state->error, - config_term_common)) - return -EINVAL; - - if (get_config_terms(parsed_terms, &config_terms)) - return -ENOMEM; + config_term_common)) { + ret = -EINVAL; + goto out_err; + } + if (get_config_terms(parsed_terms, &config_terms)) { + ret = -ENOMEM; + goto out_err; + } } if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name, - metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, - /*cpu_list=*/NULL, - /*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL) - return -ENOMEM; + metric_id, pmu, &config_terms, first_wildcard_match, + cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL) + ret = -ENOMEM; + if (first_wildcard_match == NULL) + first_wildcard_match = container_of(list->prev, struct evsel, core.node); free_config_terms(&config_terms); + if (ret) + goto out_err; } +out_err: + perf_cpu_map__put(cpus); return found_supported ? 0 : -EINVAL; } @@ -554,8 +620,8 @@ static int add_tracepoint_multi_event(struct parse_events_state *parse_state, struct parse_events_terms *head_config, YYLTYPE *loc) { char *evt_path; - struct dirent *evt_ent; - DIR *evt_dir; + struct io_dirent64 *evt_ent; + struct io_dir evt_dir; int ret = 0, found = 0; evt_path = get_events_file(sys_name); @@ -563,14 +629,14 @@ static int add_tracepoint_multi_event(struct parse_events_state *parse_state, tracepoint_error(err, errno, sys_name, evt_name, loc->first_column); return -1; } - evt_dir = opendir(evt_path); - if (!evt_dir) { + io_dir__init(&evt_dir, open(evt_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + if (evt_dir.dirfd < 0) { put_events_file(evt_path); tracepoint_error(err, errno, sys_name, evt_name, loc->first_column); return -1; } - while (!ret && (evt_ent = readdir(evt_dir))) { + while (!ret && (evt_ent = io_dir__readdir(&evt_dir))) { if (!strcmp(evt_ent->d_name, ".") || !strcmp(evt_ent->d_name, "..") || !strcmp(evt_ent->d_name, "enable") @@ -592,7 +658,7 @@ static int add_tracepoint_multi_event(struct parse_events_state *parse_state, } put_events_file(evt_path); - closedir(evt_dir); + close(evt_dir.dirfd); return ret; } @@ -615,17 +681,23 @@ static int add_tracepoint_multi_sys(struct parse_events_state *parse_state, struct parse_events_error *err, struct parse_events_terms *head_config, YYLTYPE *loc) { - struct dirent *events_ent; - DIR *events_dir; + struct io_dirent64 *events_ent; + struct io_dir events_dir; int ret = 0; + char *events_dir_path = get_tracing_file("events"); - events_dir = tracing_events__opendir(); - if (!events_dir) { + if (!events_dir_path) { + tracepoint_error(err, errno, sys_name, evt_name, loc->first_column); + return -1; + } + io_dir__init(&events_dir, open(events_dir_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + put_events_file(events_dir_path); + if (events_dir.dirfd < 0) { tracepoint_error(err, errno, sys_name, evt_name, loc->first_column); return -1; } - while (!ret && (events_ent = readdir(events_dir))) { + while (!ret && (events_ent = io_dir__readdir(&events_dir))) { if (!strcmp(events_ent->d_name, ".") || !strcmp(events_ent->d_name, "..") || !strcmp(events_ent->d_name, "enable") @@ -639,8 +711,7 @@ static int add_tracepoint_multi_sys(struct parse_events_state *parse_state, ret = add_tracepoint_event(parse_state, list, events_ent->d_name, evt_name, err, head_config, loc); } - - closedir(events_dir); + close(events_dir.dirfd); return ret; } @@ -799,6 +870,7 @@ const char *parse_events__term_type_str(enum parse_events__term_type term_type) [PARSE_EVENTS__TERM_TYPE_RAW] = "raw", [PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE] = "legacy-cache", [PARSE_EVENTS__TERM_TYPE_HARDWARE] = "hardware", + [PARSE_EVENTS__TERM_TYPE_CPU] = "cpu", }; if ((unsigned int)term_type >= __PARSE_EVENTS__TERM_TYPE_NR) return "unknown term"; @@ -828,6 +900,7 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er case PARSE_EVENTS__TERM_TYPE_METRIC_ID: case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: case PARSE_EVENTS__TERM_TYPE_PERCORE: + case PARSE_EVENTS__TERM_TYPE_CPU: return true; case PARSE_EVENTS__TERM_TYPE_USER: case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: @@ -975,6 +1048,15 @@ do { \ return -EINVAL; } break; + case PARSE_EVENTS__TERM_TYPE_CPU: + CHECK_TYPE_VAL(NUM); + if (term->val.num >= (u64)cpu__max_present_cpu().cpu) { + parse_events_error__handle(err, term->err_val, + strdup("too big"), + NULL); + return -EINVAL; + } + break; case PARSE_EVENTS__TERM_TYPE_DRV_CFG: case PARSE_EVENTS__TERM_TYPE_USER: case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: @@ -1102,6 +1184,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr, case PARSE_EVENTS__TERM_TYPE_RAW: case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: case PARSE_EVENTS__TERM_TYPE_HARDWARE: + case PARSE_EVENTS__TERM_TYPE_CPU: default: if (err) { parse_events_error__handle(err, term->err_term, @@ -1236,6 +1319,7 @@ do { \ case PARSE_EVENTS__TERM_TYPE_RAW: case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: case PARSE_EVENTS__TERM_TYPE_HARDWARE: + case PARSE_EVENTS__TERM_TYPE_CPU: default: break; } @@ -1290,6 +1374,7 @@ static int get_config_chgs(struct perf_pmu *pmu, struct parse_events_terms *head case PARSE_EVENTS__TERM_TYPE_RAW: case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: case PARSE_EVENTS__TERM_TYPE_HARDWARE: + case PARSE_EVENTS__TERM_TYPE_CPU: default: break; } @@ -1329,11 +1414,13 @@ int parse_events_add_tracepoint(struct parse_events_state *parse_state, static int __parse_events_add_numeric(struct parse_events_state *parse_state, struct list_head *list, struct perf_pmu *pmu, u32 type, u32 extended_type, - u64 config, const struct parse_events_terms *head_config) + u64 config, const struct parse_events_terms *head_config, + struct evsel *first_wildcard_match) { struct perf_event_attr attr; LIST_HEAD(config_terms); const char *name, *metric_id; + struct perf_cpu_map *cpus; int ret; memset(&attr, 0, sizeof(attr)); @@ -1355,10 +1442,11 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state, name = get_config_name(head_config); metric_id = get_config_metric_id(head_config); + cpus = get_config_cpu(head_config); ret = __add_event(list, &parse_state->idx, &attr, /*init_attr*/true, name, - metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, - /*cpu_list=*/NULL, /*alternate_hw_config=*/PERF_COUNT_HW_MAX - ) == NULL ? -ENOMEM : 0; + metric_id, pmu, &config_terms, first_wildcard_match, + cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) ? 0 : -ENOMEM; + perf_cpu_map__put(cpus); free_config_terms(&config_terms); return ret; } @@ -1374,6 +1462,7 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, /* Wildcards on numeric values are only supported by core PMUs. */ if (wildcard && perf_pmus__supports_extended_type()) { + struct evsel *first_wildcard_match = NULL; while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { int ret; @@ -1383,15 +1472,20 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, ret = __parse_events_add_numeric(parse_state, list, pmu, type, pmu->type, - config, head_config); + config, head_config, + first_wildcard_match); if (ret) return ret; + if (first_wildcard_match == NULL) + first_wildcard_match = + container_of(list->prev, struct evsel, core.node); } if (found_supported) return 0; } return __parse_events_add_numeric(parse_state, list, perf_pmus__find_by_type(type), - type, /*extended_type=*/0, config, head_config); + type, /*extended_type=*/0, config, head_config, + /*first_wildcard_match=*/NULL); } static bool config_term_percore(struct list_head *config_terms) @@ -1409,7 +1503,7 @@ static bool config_term_percore(struct list_head *config_terms) static int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, struct perf_pmu *pmu, const struct parse_events_terms *const_parsed_terms, - bool auto_merge_stats, u64 alternate_hw_config) + struct evsel *first_wildcard_match, u64 alternate_hw_config) { struct perf_event_attr attr; struct perf_pmu_info info; @@ -1418,6 +1512,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, LIST_HEAD(config_terms); struct parse_events_terms parsed_terms; bool alias_rewrote_terms = false; + struct perf_cpu_map *term_cpu = NULL; if (verbose > 1) { struct strbuf sb; @@ -1445,7 +1540,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true, /*name=*/NULL, /*metric_id=*/NULL, pmu, - /*config_terms=*/NULL, auto_merge_stats, + /*config_terms=*/NULL, first_wildcard_match, /*cpu_list=*/NULL, alternate_hw_config); return evsel ? 0 : -ENOMEM; } @@ -1512,11 +1607,12 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, return -EINVAL; } + term_cpu = get_config_cpu(&parsed_terms); evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true, get_config_name(&parsed_terms), get_config_metric_id(&parsed_terms), pmu, - &config_terms, auto_merge_stats, /*cpu_list=*/NULL, - alternate_hw_config); + &config_terms, first_wildcard_match, term_cpu, alternate_hw_config); + perf_cpu_map__put(term_cpu); if (!evsel) { parse_events_terms__exit(&parsed_terms); return -ENOMEM; @@ -1533,6 +1629,10 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, evsel->scale = info.scale; evsel->per_pkg = info.per_pkg; evsel->snapshot = info.snapshot; + evsel->retirement_latency.mean = info.retirement_latency_mean; + evsel->retirement_latency.min = info.retirement_latency_min; + evsel->retirement_latency.max = info.retirement_latency_max; + return 0; } @@ -1548,6 +1648,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, int ok = 0; const char *config; struct parse_events_terms parsed_terms; + struct evsel *first_wildcard_match = NULL; *listp = NULL; @@ -1580,17 +1681,14 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, INIT_LIST_HEAD(list); while ((pmu = perf_pmus__scan(pmu)) != NULL) { - bool auto_merge_stats; - if (parse_events__filter_pmu(parse_state, pmu)) continue; if (!perf_pmu__have_event(pmu, event_name)) continue; - auto_merge_stats = perf_pmu__auto_merge_stats(pmu); if (!parse_events_add_pmu(parse_state, list, pmu, - &parsed_terms, auto_merge_stats, hw_config)) { + &parsed_terms, first_wildcard_match, hw_config)) { struct strbuf sb; strbuf_init(&sb, /*hint=*/ 0); @@ -1599,11 +1697,13 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, strbuf_release(&sb); ok++; } + if (first_wildcard_match == NULL) + first_wildcard_match = container_of(list->prev, struct evsel, core.node); } if (parse_state->fake_pmu) { if (!parse_events_add_pmu(parse_state, list, perf_pmus__fake_pmu(), &parsed_terms, - /*auto_merge_stats=*/true, hw_config)) { + first_wildcard_match, hw_config)) { struct strbuf sb; strbuf_init(&sb, /*hint=*/ 0); @@ -1634,6 +1734,7 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state struct perf_pmu *pmu; int ok = 0; char *help; + struct evsel *first_wildcard_match = NULL; *listp = malloc(sizeof(**listp)); if (!*listp) @@ -1644,14 +1745,14 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state /* Attempt to add to list assuming event_or_pmu is a PMU name. */ pmu = perf_pmus__find(event_or_pmu); if (pmu && !parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms, - /*auto_merge_stats=*/false, + first_wildcard_match, /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) return 0; if (parse_state->fake_pmu) { if (!parse_events_add_pmu(parse_state, *listp, perf_pmus__fake_pmu(), const_parsed_terms, - /*auto_merge_stats=*/false, + first_wildcard_match, /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) return 0; } @@ -1660,16 +1761,17 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state /* Failed to add, try wildcard expansion of event_or_pmu as a PMU name. */ while ((pmu = perf_pmus__scan(pmu)) != NULL) { if (!parse_events__filter_pmu(parse_state, pmu) && - perf_pmu__match(pmu, event_or_pmu)) { - bool auto_merge_stats = perf_pmu__auto_merge_stats(pmu); - + perf_pmu__wildcard_match(pmu, event_or_pmu)) { if (!parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms, - auto_merge_stats, + first_wildcard_match, /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) { ok++; parse_state->wild_card_pmus = true; } + if (first_wildcard_match == NULL) + first_wildcard_match = + container_of((*listp)->prev, struct evsel, core.node); } } if (ok) @@ -1974,48 +2076,55 @@ static int evlist__cmp(void *_fg_idx, const struct list_head *l, const struct li int *force_grouped_idx = _fg_idx; int lhs_sort_idx, rhs_sort_idx, ret; const char *lhs_pmu_name, *rhs_pmu_name; - bool lhs_has_group, rhs_has_group; /* - * First sort by grouping/leader. Read the leader idx only if the evsel - * is part of a group, by default ungrouped events will be sorted - * relative to grouped events based on where the first ungrouped event - * occurs. If both events don't have a group we want to fall-through to - * the arch specific sorting, that can reorder and fix things like - * Intel's topdown events. + * Get the indexes of the 2 events to sort. If the events are + * in groups then the leader's index is used otherwise the + * event's index is used. An index may be forced for events that + * must be in the same group, namely Intel topdown events. */ - if (lhs_core->leader != lhs_core || lhs_core->nr_members > 1) { - lhs_has_group = true; - lhs_sort_idx = lhs_core->leader->idx; + if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)) { + lhs_sort_idx = *force_grouped_idx; } else { - lhs_has_group = false; - lhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs) - ? *force_grouped_idx - : lhs_core->idx; - } - if (rhs_core->leader != rhs_core || rhs_core->nr_members > 1) { - rhs_has_group = true; - rhs_sort_idx = rhs_core->leader->idx; + bool lhs_has_group = lhs_core->leader != lhs_core || lhs_core->nr_members > 1; + + lhs_sort_idx = lhs_has_group ? lhs_core->leader->idx : lhs_core->idx; + } + if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)) { + rhs_sort_idx = *force_grouped_idx; } else { - rhs_has_group = false; - rhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs) - ? *force_grouped_idx - : rhs_core->idx; + bool rhs_has_group = rhs_core->leader != rhs_core || rhs_core->nr_members > 1; + + rhs_sort_idx = rhs_has_group ? rhs_core->leader->idx : rhs_core->idx; } + /* If the indices differ then respect the insertion order. */ if (lhs_sort_idx != rhs_sort_idx) return lhs_sort_idx - rhs_sort_idx; - /* Group by PMU if there is a group. Groups can't span PMUs. */ - if (lhs_has_group && rhs_has_group) { - lhs_pmu_name = lhs->group_pmu_name; - rhs_pmu_name = rhs->group_pmu_name; - ret = strcmp(lhs_pmu_name, rhs_pmu_name); - if (ret) - return ret; - } + /* + * Ignoring forcing, lhs_sort_idx == rhs_sort_idx so lhs and rhs should + * be in the same group. Events in the same group need to be ordered by + * their grouping PMU name as the group will be broken to ensure only + * events on the same PMU are programmed together. + * + * With forcing the lhs_sort_idx == rhs_sort_idx shows that one or both + * events are being forced to be at force_group_index. If only one event + * is being forced then the other event is the group leader of the group + * we're trying to force the event into. Ensure for the force grouped + * case that the PMU name ordering is also respected. + */ + lhs_pmu_name = lhs->group_pmu_name; + rhs_pmu_name = rhs->group_pmu_name; + ret = strcmp(lhs_pmu_name, rhs_pmu_name); + if (ret) + return ret; - /* Architecture specific sorting. */ + /* + * Architecture specific sorting, by default sort events in the same + * group with the same PMU by their insertion index. On Intel topdown + * constraints must be adhered to - slots first, etc. + */ return arch_evlist__cmp(lhs, rhs); } @@ -2024,9 +2133,11 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list) int idx = 0, force_grouped_idx = -1; struct evsel *pos, *cur_leader = NULL; struct perf_evsel *cur_leaders_grp = NULL; - bool idx_changed = false, cur_leader_force_grouped = false; + bool idx_changed = false; int orig_num_leaders = 0, num_leaders = 0; int ret; + struct evsel *force_grouped_leader = NULL; + bool last_event_was_forced_leader = false; /* * Compute index to insert ungrouped events at. Place them where the @@ -2049,10 +2160,13 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list) */ pos->core.idx = idx++; - /* Remember an index to sort all forced grouped events together to. */ - if (force_grouped_idx == -1 && pos == pos_leader && pos->core.nr_members < 2 && - arch_evsel__must_be_in_group(pos)) - force_grouped_idx = pos->core.idx; + /* + * Remember an index to sort all forced grouped events + * together to. Use the group leader as some events + * must appear first within the group. + */ + if (force_grouped_idx == -1 && arch_evsel__must_be_in_group(pos)) + force_grouped_idx = pos_leader->core.idx; } /* Sort events. */ @@ -2080,31 +2194,66 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list) * Set the group leader respecting the given groupings and that * groups can't span PMUs. */ - if (!cur_leader) + if (!cur_leader) { cur_leader = pos; + cur_leaders_grp = &pos->core; + if (pos_force_grouped) + force_grouped_leader = pos; + } cur_leader_pmu_name = cur_leader->group_pmu_name; - if ((cur_leaders_grp != pos->core.leader && - (!pos_force_grouped || !cur_leader_force_grouped)) || - strcmp(cur_leader_pmu_name, pos_pmu_name)) { - /* Event is for a different group/PMU than last. */ + if (strcmp(cur_leader_pmu_name, pos_pmu_name)) { + /* PMU changed so the group/leader must change. */ cur_leader = pos; - /* - * Remember the leader's group before it is overwritten, - * so that later events match as being in the same - * group. - */ cur_leaders_grp = pos->core.leader; + if (pos_force_grouped && force_grouped_leader == NULL) + force_grouped_leader = pos; + } else if (cur_leaders_grp != pos->core.leader) { + bool split_even_if_last_leader_was_forced = true; + /* - * Avoid forcing events into groups with events that - * don't need to be in the group. + * Event is for a different group. If the last event was + * the forced group leader then subsequent group events + * and forced events should be in the same group. If + * there are no other forced group events then the + * forced group leader wasn't really being forced into a + * group, it just set arch_evsel__must_be_in_group, and + * we don't want the group to split here. */ - cur_leader_force_grouped = pos_force_grouped; + if (force_grouped_idx != -1 && last_event_was_forced_leader) { + struct evsel *pos2 = pos; + /* + * Search the whole list as the group leaders + * aren't currently valid. + */ + list_for_each_entry_continue(pos2, list, core.node) { + if (pos->core.leader == pos2->core.leader && + arch_evsel__must_be_in_group(pos2)) { + split_even_if_last_leader_was_forced = false; + break; + } + } + } + if (!last_event_was_forced_leader || split_even_if_last_leader_was_forced) { + if (pos_force_grouped) { + if (force_grouped_leader) { + cur_leader = force_grouped_leader; + cur_leaders_grp = force_grouped_leader->core.leader; + } else { + cur_leader = force_grouped_leader = pos; + cur_leaders_grp = &pos->core; + } + } else { + cur_leader = pos; + cur_leaders_grp = pos->core.leader; + } + } } if (pos_leader != cur_leader) { /* The leader changed so update it. */ evsel__set_leader(pos, cur_leader); } + last_event_was_forced_leader = (force_grouped_leader == pos); } list_for_each_entry(pos, list, core.node) { struct evsel *pos_leader = evsel__leader(pos); @@ -2143,14 +2292,23 @@ int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filte if (ret2 < 0) return ret; - if (ret2 && warn_if_reordered && !parse_state.wild_card_pmus) - pr_warning("WARNING: events were regrouped to match PMUs\n"); - /* * Add list to the evlist even with errors to allow callers to clean up. */ evlist__splice_list_tail(evlist, &parse_state.list); + if (ret2 && warn_if_reordered && !parse_state.wild_card_pmus) { + pr_warning("WARNING: events were regrouped to match PMUs\n"); + + if (verbose > 0) { + struct strbuf sb = STRBUF_INIT; + + evlist__uniquify_evsel_names(evlist, &stat_config); + evlist__format_evsels(evlist, &sb, 2048); + pr_debug("evlist after sorting/fixing: '%s'\n", sb.buf); + strbuf_release(&sb); + } + } if (!ret) { struct evsel *last; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index e176a34ab088..ab242f671031 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -80,7 +80,8 @@ enum parse_events__term_type { PARSE_EVENTS__TERM_TYPE_RAW, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE, PARSE_EVENTS__TERM_TYPE_HARDWARE, -#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_HARDWARE + 1) + PARSE_EVENTS__TERM_TYPE_CPU, +#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_CPU + 1) }; struct parse_events_term { diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index bf7f73548605..4af7b9c1f44d 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -53,21 +53,25 @@ static int str(yyscan_t scanner, int token) YYSTYPE *yylval = parse_events_get_lval(scanner); char *text = parse_events_get_text(scanner); - if (text[0] != '\'') { - yylval->str = strdup(text); - } else { - /* - * If a text tag specified on the command line - * contains opening single quite ' then it is - * expected that the tag ends with single quote - * as well, like this: - * name=\'CPU_CLK_UNHALTED.THREAD:cmask=1\' - * quotes need to be escaped to bypass shell - * processing. - */ - yylval->str = strndup(&text[1], strlen(text) - 2); - } + yylval->str = strdup(text); + return token; +} + +static int quoted_str(yyscan_t scanner, int token) +{ + YYSTYPE *yylval = parse_events_get_lval(scanner); + char *text = parse_events_get_text(scanner); + /* + * If a text tag specified on the command line + * contains opening single quite ' then it is + * expected that the tag ends with single quote + * as well, like this: + * name=\'CPU_CLK_UNHALTED.THREAD:cmask=1\' + * quotes need to be escaped to bypass shell + * processing. + */ + yylval->str = strndup(&text[1], strlen(text) - 2); return token; } @@ -235,9 +239,16 @@ event [^,{}/]+ num_dec [0-9]+ num_hex 0x[a-fA-F0-9]{1,16} num_raw_hex [a-fA-F0-9]{1,16} -name [a-zA-Z0-9_*?\[\]][a-zA-Z0-9_*?.\[\]!\-]* -name_tag [\'][a-zA-Z0-9_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\'] -name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* +/* Regular pattern to match the token PE_NAME. */ +name_start [a-zA-Z0-9_*?\[\]] +name {name_start}[a-zA-Z0-9_*?.\[\]!\-]* +/* PE_NAME token when inside a config term list, allows ':'. */ +term_name {name_start}[a-zA-Z0-9_*?.\[\]!\-:]* +/* + * PE_NAME token when quoted, allows ':,.='. + * Matches the RHS of terms like: name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks'. + */ +quoted_name [\']{name_start}[a-zA-Z0-9_*?.\[\]!\-:,\.=]*[\'] drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? /* * If you add a modifier you need to update check_modifier(). @@ -324,6 +335,7 @@ aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); } aux-action { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_ACTION); } aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); } metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); } +cpu { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CPU); } cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); } stalled-cycles-frontend|idle-cycles-frontend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } stalled-cycles-backend|idle-cycles-backend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } @@ -341,7 +353,9 @@ r0x{num_raw_hex} { return str(yyscanner, PE_RAW); } {lc_type} { return lc_str(yyscanner, _parse_state); } {lc_type}-{lc_op_result} { return lc_str(yyscanner, _parse_state); } {lc_type}-{lc_op_result}-{lc_op_result} { return lc_str(yyscanner, _parse_state); } -{name_minus} { return str(yyscanner, PE_NAME); } +{num_dec} { return value(_parse_state, yyscanner, 10); } +{num_hex} { return value(_parse_state, yyscanner, 16); } +{term_name} { return str(yyscanner, PE_NAME); } @{drv_cfg_term} { return drv_str(yyscanner, PE_DRV_CFG_TERM); } } @@ -410,7 +424,7 @@ r{num_raw_hex} { return str(yyscanner, PE_RAW); } {modifier_event} { return modifiers(_parse_state, yyscanner); } {name} { return str(yyscanner, PE_NAME); } -{name_tag} { return str(yyscanner, PE_NAME); } +{quoted_name} { return quoted_str(yyscanner, PE_NAME); } "/" { BEGIN(config); return '/'; } , { BEGIN(event); return ','; } : { return ':'; } diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index c7f3543b9921..66b666d9ce64 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -79,24 +79,22 @@ static void __p_read_format(char *buf, size_t size, u64 value) #define ENUM_ID_TO_STR_CASE(x) case x: return (#x); static const char *stringify_perf_type_id(struct perf_pmu *pmu, u32 type) { - if (pmu) - return pmu->name; - switch (type) { ENUM_ID_TO_STR_CASE(PERF_TYPE_HARDWARE) ENUM_ID_TO_STR_CASE(PERF_TYPE_SOFTWARE) ENUM_ID_TO_STR_CASE(PERF_TYPE_TRACEPOINT) ENUM_ID_TO_STR_CASE(PERF_TYPE_HW_CACHE) - ENUM_ID_TO_STR_CASE(PERF_TYPE_RAW) ENUM_ID_TO_STR_CASE(PERF_TYPE_BREAKPOINT) + case PERF_TYPE_RAW: + return pmu ? pmu->name : "PERF_TYPE_RAW"; default: - return NULL; + return pmu ? pmu->name : NULL; } } static const char *stringify_perf_hw_id(u64 value) { - switch (value) { + switch (value & PERF_HW_EVENT_MASK) { ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CPU_CYCLES) ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_INSTRUCTIONS) ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_REFERENCES) @@ -169,79 +167,100 @@ static const char *stringify_perf_sw_id(u64 value) } #undef ENUM_ID_TO_STR_CASE -#define PRINT_ID(_s, _f) \ -do { \ - const char *__s = _s; \ - if (__s == NULL) \ - snprintf(buf, size, _f, value); \ - else \ - snprintf(buf, size, _f" (%s)", value, __s); \ -} while (0) -#define print_id_unsigned(_s) PRINT_ID(_s, "%"PRIu64) -#define print_id_hex(_s) PRINT_ID(_s, "%#"PRIx64) +static void print_id_unsigned(char *buf, size_t size, u64 value, const char *s) +{ + if (s == NULL) + snprintf(buf, size, "%"PRIu64, value); + else + snprintf(buf, size, "%"PRIu64" (%s)", value, s); +} + +static void print_id_hex(char *buf, size_t size, u64 value, const char *s) +{ + if (s == NULL) + snprintf(buf, size, "%#"PRIx64, value); + else + snprintf(buf, size, "%#"PRIx64" (%s)", value, s); +} -static void __p_type_id(struct perf_pmu *pmu, char *buf, size_t size, u64 value) +static void __p_type_id(char *buf, size_t size, struct perf_pmu *pmu, u32 type) { - print_id_unsigned(stringify_perf_type_id(pmu, value)); + print_id_unsigned(buf, size, type, stringify_perf_type_id(pmu, type)); } -static void __p_config_hw_id(char *buf, size_t size, u64 value) +static void __p_config_hw_id(char *buf, size_t size, struct perf_pmu *pmu, u64 config) { - print_id_hex(stringify_perf_hw_id(value)); + const char *name = stringify_perf_hw_id(config); + + if (name == NULL) { + if (pmu == NULL) { + snprintf(buf, size, "%#"PRIx64, config); + } else { + snprintf(buf, size, "%#"PRIx64" (%s/config=%#"PRIx64"/)", config, pmu->name, + config); + } + } else { + if (pmu == NULL) + snprintf(buf, size, "%#"PRIx64" (%s)", config, name); + else + snprintf(buf, size, "%#"PRIx64" (%s/%s/)", config, pmu->name, name); + } } -static void __p_config_sw_id(char *buf, size_t size, u64 value) +static void __p_config_sw_id(char *buf, size_t size, u64 id) { - print_id_hex(stringify_perf_sw_id(value)); + print_id_hex(buf, size, id, stringify_perf_sw_id(id)); } -static void __p_config_hw_cache_id(char *buf, size_t size, u64 value) +static void __p_config_hw_cache_id(char *buf, size_t size, struct perf_pmu *pmu, u64 config) { - const char *hw_cache_str = stringify_perf_hw_cache_id(value & 0xff); + const char *hw_cache_str = stringify_perf_hw_cache_id(config & 0xff); const char *hw_cache_op_str = - stringify_perf_hw_cache_op_id((value & 0xff00) >> 8); + stringify_perf_hw_cache_op_id((config & 0xff00) >> 8); const char *hw_cache_op_result_str = - stringify_perf_hw_cache_op_result_id((value & 0xff0000) >> 16); - - if (hw_cache_str == NULL || hw_cache_op_str == NULL || - hw_cache_op_result_str == NULL) { - snprintf(buf, size, "%#"PRIx64, value); + stringify_perf_hw_cache_op_result_id((config & 0xff0000) >> 16); + + if (hw_cache_str == NULL || hw_cache_op_str == NULL || hw_cache_op_result_str == NULL) { + if (pmu == NULL) { + snprintf(buf, size, "%#"PRIx64, config); + } else { + snprintf(buf, size, "%#"PRIx64" (%s/config=%#"PRIx64"/)", config, pmu->name, + config); + } } else { - snprintf(buf, size, "%#"PRIx64" (%s | %s | %s)", value, - hw_cache_op_result_str, hw_cache_op_str, hw_cache_str); + if (pmu == NULL) { + snprintf(buf, size, "%#"PRIx64" (%s | %s | %s)", config, + hw_cache_op_result_str, hw_cache_op_str, hw_cache_str); + } else { + snprintf(buf, size, "%#"PRIx64" (%s/%s | %s | %s/)", config, pmu->name, + hw_cache_op_result_str, hw_cache_op_str, hw_cache_str); + } } } -static void __p_config_tracepoint_id(char *buf, size_t size, u64 value) +static void __p_config_tracepoint_id(char *buf, size_t size, u64 id) { - char *str = tracepoint_id_to_name(value); + char *str = tracepoint_id_to_name(id); - print_id_hex(str); + print_id_hex(buf, size, id, str); free(str); } -static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type, u64 value) +static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type, u64 config) { - const char *name = perf_pmu__name_from_config(pmu, value); - - if (name) { - print_id_hex(name); - return; - } switch (type) { case PERF_TYPE_HARDWARE: - return __p_config_hw_id(buf, size, value); + return __p_config_hw_id(buf, size, pmu, config); case PERF_TYPE_SOFTWARE: - return __p_config_sw_id(buf, size, value); + return __p_config_sw_id(buf, size, config); case PERF_TYPE_HW_CACHE: - return __p_config_hw_cache_id(buf, size, value); + return __p_config_hw_cache_id(buf, size, pmu, config); case PERF_TYPE_TRACEPOINT: - return __p_config_tracepoint_id(buf, size, value); + return __p_config_tracepoint_id(buf, size, config); case PERF_TYPE_RAW: case PERF_TYPE_BREAKPOINT: default: - snprintf(buf, size, "%#"PRIx64, value); - return; + return print_id_hex(buf, size, config, perf_pmu__name_from_config(pmu, config)); } } @@ -253,7 +272,7 @@ static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type #define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val) #define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val) #define p_read_format(val) __p_read_format(buf, BUF_SIZE, val) -#define p_type_id(val) __p_type_id(pmu, buf, BUF_SIZE, val) +#define p_type_id(val) __p_type_id(buf, BUF_SIZE, pmu, val) #define p_config_id(val) __p_config_id(pmu, buf, BUF_SIZE, attr->type, val) #define PRINT_ATTRn(_n, _f, _p, _a) \ @@ -273,6 +292,13 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, char buf[BUF_SIZE]; int ret = 0; + if (!pmu && (attr->type == PERF_TYPE_HARDWARE || attr->type == PERF_TYPE_HW_CACHE)) { + u32 extended_type = attr->config >> PERF_PMU_TYPE_SHIFT; + + if (extended_type) + pmu = perf_pmus__find_by_type(extended_type); + } + PRINT_ATTRn("type", type, p_type_id, true); PRINT_ATTRf(size, p_unsigned); PRINT_ATTRn("config", config, p_config_id, true); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 6206c8fe2bf9..609828513f6c 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -13,6 +13,7 @@ #include <dirent.h> #include <api/fs/fs.h> #include <api/io.h> +#include <api/io_dir.h> #include <locale.h> #include <fnmatch.h> #include <math.h> @@ -26,6 +27,7 @@ #include <util/pmu-flex.h> #include "parse-events.h" #include "print-events.h" +#include "hashmap.h" #include "header.h" #include "string2.h" #include "strbuf.h" @@ -36,12 +38,12 @@ #define UNIT_MAX_LEN 31 /* max length for event unit name */ enum event_source { - /* An event loaded from /sys/devices/<pmu>/events. */ + /* An event loaded from /sys/bus/event_source/devices/<pmu>/events. */ EVENT_SRC_SYSFS, /* An event loaded from a CPUID matched json file. */ EVENT_SRC_CPU_JSON, /* - * An event loaded from a /sys/devices/<pmu>/identifier matched json + * An event loaded from a /sys/bus/event_source/devices/<pmu>/identifier matched json * file. */ EVENT_SRC_SYS_JSON, @@ -65,8 +67,6 @@ struct perf_pmu_alias { char *topic; /** @terms: Owned list of the original parsed parameters. */ struct parse_events_terms terms; - /** @list: List element of struct perf_pmu aliases. */ - struct list_head list; /** * @pmu_name: The name copied from the json struct pmu_event. This can * differ from the PMU name as it won't have suffixes. @@ -76,6 +76,12 @@ struct perf_pmu_alias { char unit[UNIT_MAX_LEN+1]; /** @scale: Value to scale read counter values by. */ double scale; + /** @retirement_latency_mean: Value to be given for unsampled retirement latency mean. */ + double retirement_latency_mean; + /** @retirement_latency_min: Value to be given for unsampled retirement latency min. */ + double retirement_latency_min; + /** @retirement_latency_max: Value to be given for unsampled retirement latency max. */ + double retirement_latency_max; /** * @per_pkg: Does the file * <sysfs>/bus/event_source/devices/<pmu_name>/events/<name>.per-pkg or @@ -195,19 +201,17 @@ static void perf_pmu_format__load(const struct perf_pmu *pmu, struct perf_pmu_fo */ static int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load) { - struct dirent *evt_ent; - DIR *format_dir; + struct io_dirent64 *evt_ent; + struct io_dir format_dir; int ret = 0; - format_dir = fdopendir(dirfd); - if (!format_dir) - return -EINVAL; + io_dir__init(&format_dir, dirfd); - while ((evt_ent = readdir(format_dir)) != NULL) { + while ((evt_ent = io_dir__readdir(&format_dir)) != NULL) { struct perf_pmu_format *format; char *name = evt_ent->d_name; - if (!strcmp(name, ".") || !strcmp(name, "..")) + if (io_dir__is_dir(&format_dir, evt_ent)) continue; format = perf_pmu__new_format(&pmu->format, name); @@ -234,7 +238,7 @@ static int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_lo } } - closedir(format_dir); + close(format_dir.dirfd); return ret; } @@ -258,7 +262,7 @@ static int pmu_format(struct perf_pmu *pmu, int dirfd, const char *name, bool ea return 0; } -int perf_pmu__convert_scale(const char *scale, char **end, double *sval) +static int parse_double(const char *scale, char **end, double *sval) { char *lc; int ret = 0; @@ -295,6 +299,11 @@ out: return ret; } +int perf_pmu__convert_scale(const char *scale, char **end, double *sval) +{ + return parse_double(scale, end, sval); +} + static int perf_pmu__parse_scale(struct perf_pmu *pmu, struct perf_pmu_alias *alias) { struct stat st; @@ -408,25 +417,33 @@ static void perf_pmu__parse_snapshot(struct perf_pmu *pmu, struct perf_pmu_alias } /* Delete an alias entry. */ -static void perf_pmu_free_alias(struct perf_pmu_alias *newalias) +static void perf_pmu_free_alias(struct perf_pmu_alias *alias) { - zfree(&newalias->name); - zfree(&newalias->desc); - zfree(&newalias->long_desc); - zfree(&newalias->topic); - zfree(&newalias->pmu_name); - parse_events_terms__exit(&newalias->terms); - free(newalias); + if (!alias) + return; + + zfree(&alias->name); + zfree(&alias->desc); + zfree(&alias->long_desc); + zfree(&alias->topic); + zfree(&alias->pmu_name); + parse_events_terms__exit(&alias->terms); + free(alias); } static void perf_pmu__del_aliases(struct perf_pmu *pmu) { - struct perf_pmu_alias *alias, *tmp; + struct hashmap_entry *entry; + size_t bkt; - list_for_each_entry_safe(alias, tmp, &pmu->aliases, list) { - list_del(&alias->list); - perf_pmu_free_alias(alias); - } + if (!pmu->aliases) + return; + + hashmap__for_each_entry(pmu->aliases, entry, bkt) + perf_pmu_free_alias(entry->pvalue); + + hashmap__free(pmu->aliases); + pmu->aliases = NULL; } static struct perf_pmu_alias *perf_pmu__find_alias(struct perf_pmu *pmu, @@ -434,35 +451,37 @@ static struct perf_pmu_alias *perf_pmu__find_alias(struct perf_pmu *pmu, bool load) { struct perf_pmu_alias *alias; + bool has_sysfs_event; + char event_file_name[FILENAME_MAX + 8]; - if (load && !pmu->sysfs_aliases_loaded) { - bool has_sysfs_event; - char event_file_name[FILENAME_MAX + 8]; + if (hashmap__find(pmu->aliases, name, &alias)) + return alias; - /* - * Test if alias/event 'name' exists in the PMU's sysfs/events - * directory. If not skip parsing the sysfs aliases. Sysfs event - * name must be all lower or all upper case. - */ - scnprintf(event_file_name, sizeof(event_file_name), "events/%s", name); - for (size_t i = 7, n = 7 + strlen(name); i < n; i++) - event_file_name[i] = tolower(event_file_name[i]); + if (!load || pmu->sysfs_aliases_loaded) + return NULL; - has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name); - if (!has_sysfs_event) { - for (size_t i = 7, n = 7 + strlen(name); i < n; i++) - event_file_name[i] = toupper(event_file_name[i]); + /* + * Test if alias/event 'name' exists in the PMU's sysfs/events + * directory. If not skip parsing the sysfs aliases. Sysfs event + * name must be all lower or all upper case. + */ + scnprintf(event_file_name, sizeof(event_file_name), "events/%s", name); + for (size_t i = 7, n = 7 + strlen(name); i < n; i++) + event_file_name[i] = tolower(event_file_name[i]); - has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name); - } - if (has_sysfs_event) - pmu_aliases_parse(pmu); + has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name); + if (!has_sysfs_event) { + for (size_t i = 7, n = 7 + strlen(name); i < n; i++) + event_file_name[i] = toupper(event_file_name[i]); + has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name); } - list_for_each_entry(alias, &pmu->aliases, list) { - if (!strcasecmp(alias->name, name)) + if (has_sysfs_event) { + pmu_aliases_parse(pmu); + if (hashmap__find(pmu->aliases, name, &alias)) return alias; } + return NULL; } @@ -526,6 +545,18 @@ static int update_alias(const struct pmu_event *pe, if (!ret) snprintf(data->alias->unit, sizeof(data->alias->unit), "%s", unit); } + if (!ret && pe->retirement_latency_mean) { + ret = parse_double(pe->retirement_latency_mean, NULL, + &data->alias->retirement_latency_mean); + } + if (!ret && pe->retirement_latency_min) { + ret = parse_double(pe->retirement_latency_min, NULL, + &data->alias->retirement_latency_min); + } + if (!ret && pe->retirement_latency_max) { + ret = parse_double(pe->retirement_latency_max, NULL, + &data->alias->retirement_latency_max); + } return ret; } @@ -533,8 +564,8 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, const char *desc, const char *val, FILE *val_fd, const struct pmu_event *pe, enum event_source src) { - struct perf_pmu_alias *alias; - int ret; + struct perf_pmu_alias *alias, *old_alias; + int ret = 0; const char *long_desc = NULL, *topic = NULL, *unit = NULL, *pmu_name = NULL; bool deprecated = false, perpkg = false; @@ -563,6 +594,24 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, alias->per_pkg = perpkg; alias->snapshot = false; alias->deprecated = deprecated; + alias->retirement_latency_mean = 0.0; + alias->retirement_latency_min = 0.0; + alias->retirement_latency_max = 0.0; + + if (!ret && pe && pe->retirement_latency_mean) { + ret = parse_double(pe->retirement_latency_mean, NULL, + &alias->retirement_latency_mean); + } + if (!ret && pe && pe->retirement_latency_min) { + ret = parse_double(pe->retirement_latency_min, NULL, + &alias->retirement_latency_min); + } + if (!ret && pe && pe->retirement_latency_max) { + ret = parse_double(pe->retirement_latency_max, NULL, + &alias->retirement_latency_max); + } + if (ret) + return ret; ret = parse_events_terms(&alias->terms, val, val_fd); if (ret) { @@ -596,7 +645,7 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, }; if (pmu_events_table__find_event(pmu->events_table, pmu, name, update_alias, &data) == 0) - pmu->cpu_json_aliases++; + pmu->cpu_common_json_aliases++; } pmu->sysfs_aliases++; break; @@ -608,7 +657,8 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, break; } - list_add_tail(&alias->list, &pmu->aliases); + hashmap__set(pmu->aliases, alias->name, alias, /*old_key=*/ NULL, &old_alias); + perf_pmu_free_alias(old_alias); return 0; } @@ -635,14 +685,12 @@ static inline bool pmu_alias_info_file(const char *name) */ static int __pmu_aliases_parse(struct perf_pmu *pmu, int events_dir_fd) { - struct dirent *evt_ent; - DIR *event_dir; + struct io_dirent64 *evt_ent; + struct io_dir event_dir; - event_dir = fdopendir(events_dir_fd); - if (!event_dir) - return -EINVAL; + io_dir__init(&event_dir, events_dir_fd); - while ((evt_ent = readdir(event_dir))) { + while ((evt_ent = io_dir__readdir(&event_dir))) { char *name = evt_ent->d_name; int fd; FILE *file; @@ -674,7 +722,6 @@ static int __pmu_aliases_parse(struct perf_pmu *pmu, int events_dir_fd) fclose(file); } - closedir(event_dir); pmu->sysfs_aliases_loaded = true; return 0; } @@ -783,7 +830,7 @@ static struct perf_cpu_map *pmu_cpumask(int dirfd, const char *pmu_name, bool is } /* Nothing found, for core PMUs assume this means all CPUs. */ - return is_core ? perf_cpu_map__get(cpu_map__online()) : NULL; + return is_core ? cpu_map__online() : NULL; } static bool pmu_is_uncore(int dirfd, const char *name) @@ -847,21 +894,23 @@ static size_t pmu_deduped_name_len(const struct perf_pmu *pmu, const char *name, } /** - * perf_pmu__match_ignoring_suffix - Does the pmu_name match tok ignoring any - * trailing suffix? The Suffix must be in form - * tok_{digits}, or tok{digits}. + * perf_pmu__match_wildcard - Does the pmu_name start with tok and is then only + * followed by nothing or a suffix? tok may contain + * part of a suffix. * @pmu_name: The pmu_name with possible suffix. - * @tok: The possible match to pmu_name without suffix. + * @tok: The wildcard argument to match. */ -static bool perf_pmu__match_ignoring_suffix(const char *pmu_name, const char *tok) +static bool perf_pmu__match_wildcard(const char *pmu_name, const char *tok) { const char *p, *suffix; bool has_hex = false; + size_t tok_len = strlen(tok); - if (strncmp(pmu_name, tok, strlen(tok))) + /* Check start of pmu_name for equality. */ + if (strncmp(pmu_name, tok, tok_len)) return false; - suffix = p = pmu_name + strlen(tok); + suffix = p = pmu_name + tok_len; if (*p == 0) return true; @@ -887,60 +936,84 @@ static bool perf_pmu__match_ignoring_suffix(const char *pmu_name, const char *to } /** - * pmu_uncore_alias_match - does name match the PMU name? - * @pmu_name: the json struct pmu_event name. This may lack a suffix (which + * perf_pmu__match_ignoring_suffix_uncore - Does the pmu_name match tok ignoring + * any trailing suffix on pmu_name and + * tok? The Suffix must be in form + * tok_{digits}, or tok{digits}. + * @pmu_name: The pmu_name with possible suffix. + * @tok: The possible match to pmu_name. + */ +static bool perf_pmu__match_ignoring_suffix_uncore(const char *pmu_name, const char *tok) +{ + size_t pmu_name_len, tok_len; + + /* For robustness, check for NULL. */ + if (pmu_name == NULL) + return tok == NULL; + + /* uncore_ prefixes are ignored. */ + if (!strncmp(pmu_name, "uncore_", 7)) + pmu_name += 7; + if (!strncmp(tok, "uncore_", 7)) + tok += 7; + + pmu_name_len = pmu_name_len_no_suffix(pmu_name); + tok_len = pmu_name_len_no_suffix(tok); + if (pmu_name_len != tok_len) + return false; + + return strncmp(pmu_name, tok, pmu_name_len) == 0; +} + + +/** + * perf_pmu__match_wildcard_uncore - does to_match match the PMU's name? + * @pmu_name: The pmu->name or pmu->alias to match against. + * @to_match: the json struct pmu_event name. This may lack a suffix (which * matches) or be of the form "socket,pmuname" which will match * "socketX_pmunameY". - * @name: a real full PMU name as from sysfs. */ -static bool pmu_uncore_alias_match(const char *pmu_name, const char *name) +static bool perf_pmu__match_wildcard_uncore(const char *pmu_name, const char *to_match) { - char *tmp = NULL, *tok, *str; - bool res; - - if (strchr(pmu_name, ',') == NULL) - return perf_pmu__match_ignoring_suffix(name, pmu_name); + char *mutable_to_match, *tok, *tmp; - str = strdup(pmu_name); - if (!str) + if (!pmu_name) return false; - /* - * uncore alias may be from different PMU with common prefix - */ - tok = strtok_r(str, ",", &tmp); - if (strncmp(pmu_name, tok, strlen(tok))) { - res = false; - goto out; - } + /* uncore_ prefixes are ignored. */ + if (!strncmp(pmu_name, "uncore_", 7)) + pmu_name += 7; + if (!strncmp(to_match, "uncore_", 7)) + to_match += 7; - /* - * Match more complex aliases where the alias name is a comma-delimited - * list of tokens, orderly contained in the matching PMU name. - * - * Example: For alias "socket,pmuname" and PMU "socketX_pmunameY", we - * match "socket" in "socketX_pmunameY" and then "pmuname" in - * "pmunameY". - */ - while (1) { - char *next_tok = strtok_r(NULL, ",", &tmp); + if (strchr(to_match, ',') == NULL) + return perf_pmu__match_wildcard(pmu_name, to_match); - name = strstr(name, tok); - if (!name || - (!next_tok && !perf_pmu__match_ignoring_suffix(name, tok))) { - res = false; - goto out; + /* Process comma separated list of PMU name components. */ + mutable_to_match = strdup(to_match); + if (!mutable_to_match) + return false; + + tok = strtok_r(mutable_to_match, ",", &tmp); + while (tok) { + size_t tok_len = strlen(tok); + + if (strncmp(pmu_name, tok, tok_len)) { + /* Mismatch between part of pmu_name and tok. */ + free(mutable_to_match); + return false; } - if (!next_tok) - break; - tok = next_tok; - name += strlen(tok); + /* Move pmu_name forward over tok and suffix. */ + pmu_name += tok_len; + while (*pmu_name != '\0' && isdigit(*pmu_name)) + pmu_name++; + if (*pmu_name == '_') + pmu_name++; + + tok = strtok_r(NULL, ",", &tmp); } - - res = true; -out: - free(str); - return res; + free(mutable_to_match); + return *pmu_name == '\0'; } bool pmu_uncore_identifier_match(const char *compat, const char *id) @@ -1003,11 +1076,19 @@ static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe, { struct perf_pmu *pmu = vdata; - if (!pe->compat || !pe->pmu) + if (!pe->compat || !pe->pmu) { + /* No data to match. */ return 0; + } + + if (!perf_pmu__match_wildcard_uncore(pmu->name, pe->pmu) && + !perf_pmu__match_wildcard_uncore(pmu->alias_name, pe->pmu)) { + /* PMU name/alias_name don't match. */ + return 0; + } - if (pmu_uncore_alias_match(pe->pmu, pmu->name) && - pmu_uncore_identifier_match(pe->compat, pmu->id)) { + if (pmu_uncore_identifier_match(pe->compat, pmu->id)) { + /* Id matched. */ perf_pmu__new_alias(pmu, pe->name, pe->desc, @@ -1016,7 +1097,6 @@ static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe, pe, EVENT_SRC_SYS_JSON); } - return 0; } @@ -1066,43 +1146,77 @@ perf_pmu__arch_init(struct perf_pmu *pmu) pmu->mem_events = perf_mem_events; } +/* Variant of str_hash that does tolower on each character. */ +static size_t aliases__hash(long key, void *ctx __maybe_unused) +{ + const char *s = (const char *)key; + size_t h = 0; + + while (*s) { + h = h * 31 + tolower(*s); + s++; + } + return h; +} + +static bool aliases__equal(long key1, long key2, void *ctx __maybe_unused) +{ + return strcasecmp((const char *)key1, (const char *)key2) == 0; +} + +int perf_pmu__init(struct perf_pmu *pmu, __u32 type, const char *name) +{ + pmu->type = type; + INIT_LIST_HEAD(&pmu->format); + INIT_LIST_HEAD(&pmu->caps); + + pmu->name = strdup(name); + if (!pmu->name) + return -ENOMEM; + + pmu->aliases = hashmap__new(aliases__hash, aliases__equal, /*ctx=*/ NULL); + if (!pmu->aliases) + return -ENOMEM; + + return 0; +} + struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *name, bool eager_load) { struct perf_pmu *pmu; - __u32 type; pmu = zalloc(sizeof(*pmu)); if (!pmu) return NULL; - pmu->name = strdup(name); - if (!pmu->name) - goto err; + if (perf_pmu__init(pmu, PERF_PMU_TYPE_FAKE, name) != 0) { + perf_pmu__delete(pmu); + return NULL; + } /* * Read type early to fail fast if a lookup name isn't a PMU. Ensure * that type value is successfully assigned (return 1). */ - if (perf_pmu__scan_file_at(pmu, dirfd, "type", "%u", &type) != 1) - goto err; - - INIT_LIST_HEAD(&pmu->format); - INIT_LIST_HEAD(&pmu->aliases); - INIT_LIST_HEAD(&pmu->caps); + if (perf_pmu__scan_file_at(pmu, dirfd, "type", "%u", &pmu->type) != 1) { + perf_pmu__delete(pmu); + return NULL; + } /* * The pmu data we store & need consists of the pmu * type value and format definitions. Load both right * now. */ - if (pmu_format(pmu, dirfd, name, eager_load)) - goto err; + if (pmu_format(pmu, dirfd, name, eager_load)) { + perf_pmu__delete(pmu); + return NULL; + } pmu->is_core = is_pmu_core(name); pmu->cpus = pmu_cpumask(dirfd, name, pmu->is_core); - pmu->type = type; pmu->is_uncore = pmu_is_uncore(dirfd, name); if (pmu->is_uncore) pmu->id = pmu_id(name); @@ -1124,10 +1238,6 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char pmu_aliases_parse_eager(pmu, dirfd); return pmu; -err: - zfree(&pmu->name); - free(pmu); - return NULL; } /* Creates the PMU when sysfs scanning fails. */ @@ -1149,7 +1259,7 @@ struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pm pmu->cpus = cpu_map__online(); INIT_LIST_HEAD(&pmu->format); - INIT_LIST_HEAD(&pmu->aliases); + pmu->aliases = hashmap__new(aliases__hash, aliases__equal, /*ctx=*/ NULL); INIT_LIST_HEAD(&pmu->caps); list_add_tail(&pmu->list, core_pmus); return pmu; @@ -1400,7 +1510,7 @@ static int pmu_config_term(const struct perf_pmu *pmu, break; case PARSE_EVENTS__TERM_TYPE_USER: /* Not hardcoded. */ return -EINVAL; - case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_HARDWARE: + case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_CPU: /* Skip non-config terms. */ break; default: @@ -1649,6 +1759,9 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ info->unit = NULL; info->scale = 0.0; info->snapshot = false; + info->retirement_latency_mean = 0.0; + info->retirement_latency_min = 0.0; + info->retirement_latency_max = 0.0; if (perf_pmu__is_hwmon(pmu)) { ret = hwmon_pmu__check_alias(head_terms, info, err); @@ -1682,6 +1795,10 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ if (term->alternate_hw_config) *alternate_hw_config = term->val.num; + info->retirement_latency_mean = alias->retirement_latency_mean; + info->retirement_latency_min = alias->retirement_latency_min; + info->retirement_latency_max = alias->retirement_latency_max; + list_del_init(&term->list); parse_events_term__delete(term); } @@ -1775,6 +1892,7 @@ int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_call "aux-output", "aux-action=(pause|resume|start-paused)", "aux-sample-size=number", + "cpu=number", }; struct perf_pmu_format *format; int ret; @@ -1851,9 +1969,10 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu) if (pmu->cpu_aliases_added) nr += pmu->cpu_json_aliases; else if (pmu->events_table) - nr += pmu_events_table__num_events(pmu->events_table, pmu) - pmu->cpu_json_aliases; + nr += pmu_events_table__num_events(pmu->events_table, pmu) - + pmu->cpu_common_json_aliases; else - assert(pmu->cpu_json_aliases == 0); + assert(pmu->cpu_json_aliases == 0 && pmu->cpu_common_json_aliases == 0); if (perf_pmu__is_tool(pmu)) nr -= tool_pmu__num_skip_events(); @@ -1900,13 +2019,14 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, void *state, pmu_event_callback cb) { char buf[1024]; - struct perf_pmu_alias *event; struct pmu_event_info info = { .pmu = pmu, .event_type_desc = "Kernel PMU event", }; int ret = 0; struct strbuf sb; + struct hashmap_entry *entry; + size_t bkt; if (perf_pmu__is_hwmon(pmu)) return hwmon_pmu__for_each_event(pmu, state, cb); @@ -1914,7 +2034,8 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, strbuf_init(&sb, /*hint=*/ 0); pmu_aliases_parse(pmu); pmu_add_cpu_aliases(pmu); - list_for_each_entry(event, &pmu->aliases, list) { + hashmap__for_each_entry(pmu->aliases, entry, bkt) { + struct perf_pmu_alias *event = entry->pvalue; size_t buf_used, pmu_name_len; if (perf_pmu__is_tool(pmu) && tool_pmu__skip_event(event->name)) @@ -1974,15 +2095,85 @@ out: return ret; } -bool pmu__name_match(const struct perf_pmu *pmu, const char *pmu_name) +static bool perf_pmu___name_match(const struct perf_pmu *pmu, const char *to_match, bool wildcard) { - return !strcmp(pmu->name, pmu_name) || - (pmu->is_uncore && pmu_uncore_alias_match(pmu_name, pmu->name)) || + const char *names[2] = { + pmu->name, + pmu->alias_name, + }; + if (pmu->is_core) { + for (size_t i = 0; i < ARRAY_SIZE(names); i++) { + const char *name = names[i]; + + if (!name) + continue; + + if (!strcmp(name, to_match)) { + /* Exact name match. */ + return true; + } + } + if (!strcmp(to_match, "default_core")) { + /* + * jevents and tests use default_core as a marker for any core + * PMU as the PMU name varies across architectures. + */ + return true; + } + return false; + } + if (!pmu->is_uncore) { /* - * jevents and tests use default_core as a marker for any core - * PMU as the PMU name varies across architectures. + * PMU isn't core or uncore, some kind of broken CPU mask + * situation. Only match exact name. */ - (pmu->is_core && !strcmp(pmu_name, "default_core")); + for (size_t i = 0; i < ARRAY_SIZE(names); i++) { + const char *name = names[i]; + + if (!name) + continue; + + if (!strcmp(name, to_match)) { + /* Exact name match. */ + return true; + } + } + return false; + } + for (size_t i = 0; i < ARRAY_SIZE(names); i++) { + const char *name = names[i]; + + if (!name) + continue; + + if (wildcard && perf_pmu__match_wildcard_uncore(name, to_match)) + return true; + if (!wildcard && perf_pmu__match_ignoring_suffix_uncore(name, to_match)) + return true; + } + return false; +} + +/** + * perf_pmu__name_wildcard_match - Called by the jevents generated code to see + * if pmu matches the json to_match string. + * @pmu: The pmu whose name/alias to match. + * @to_match: The possible match to pmu_name. + */ +bool perf_pmu__name_wildcard_match(const struct perf_pmu *pmu, const char *to_match) +{ + return perf_pmu___name_match(pmu, to_match, /*wildcard=*/true); +} + +/** + * perf_pmu__name_no_suffix_match - Does pmu's name match to_match ignoring any + * trailing suffix on the pmu_name and/or tok? + * @pmu: The pmu whose name/alias to match. + * @to_match: The possible match to pmu_name. + */ +bool perf_pmu__name_no_suffix_match(const struct perf_pmu *pmu, const char *to_match) +{ + return perf_pmu___name_match(pmu, to_match, /*wildcard=*/false); } bool perf_pmu__is_software(const struct perf_pmu *pmu) @@ -2114,6 +2305,17 @@ static void perf_pmu__del_caps(struct perf_pmu *pmu) } } +struct perf_pmu_caps *perf_pmu__get_cap(struct perf_pmu *pmu, const char *name) +{ + struct perf_pmu_caps *caps; + + list_for_each_entry(caps, &pmu->caps, list) { + if (!strcmp(caps->name, name)) + return caps; + } + return NULL; +} + /* * Reading/parsing the given pmu capabilities, which should be located at: * /sys/bus/event_source/devices/<dev>/caps as sysfs group attributes. @@ -2121,10 +2323,9 @@ static void perf_pmu__del_caps(struct perf_pmu *pmu) */ int perf_pmu__caps_parse(struct perf_pmu *pmu) { - struct stat st; char caps_path[PATH_MAX]; - DIR *caps_dir; - struct dirent *evt_ent; + struct io_dir caps_dir; + struct io_dirent64 *evt_ent; int caps_fd; if (pmu->caps_initialized) @@ -2135,24 +2336,21 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) if (!perf_pmu__pathname_scnprintf(caps_path, sizeof(caps_path), pmu->name, "caps")) return -1; - if (stat(caps_path, &st) < 0) { + caps_fd = open(caps_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY); + if (caps_fd == -1) { pmu->caps_initialized = true; return 0; /* no error if caps does not exist */ } - caps_dir = opendir(caps_path); - if (!caps_dir) - return -EINVAL; + io_dir__init(&caps_dir, caps_fd); - caps_fd = dirfd(caps_dir); - - while ((evt_ent = readdir(caps_dir)) != NULL) { + while ((evt_ent = io_dir__readdir(&caps_dir)) != NULL) { char *name = evt_ent->d_name; char value[128]; FILE *file; int fd; - if (!strcmp(name, ".") || !strcmp(name, "..")) + if (io_dir__is_dir(&caps_dir, evt_ent)) continue; fd = openat(caps_fd, name, O_RDONLY); @@ -2174,7 +2372,7 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) fclose(file); } - closedir(caps_dir); + close(caps_fd); pmu->caps_initialized = true; return pmu->nr_caps; @@ -2229,29 +2427,31 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, name ?: "N/A", buf, config_name, config); } -bool perf_pmu__match(const struct perf_pmu *pmu, const char *tok) +bool perf_pmu__wildcard_match(const struct perf_pmu *pmu, const char *wildcard_to_match) { - const char *name = pmu->name; - bool need_fnmatch = strisglob(tok); + const char *names[2] = { + pmu->name, + pmu->alias_name, + }; + bool need_fnmatch = strisglob(wildcard_to_match); - if (!strncmp(tok, "uncore_", 7)) - tok += 7; - if (!strncmp(name, "uncore_", 7)) - name += 7; + if (!strncmp(wildcard_to_match, "uncore_", 7)) + wildcard_to_match += 7; - if (perf_pmu__match_ignoring_suffix(name, tok) || - (need_fnmatch && !fnmatch(tok, name, 0))) - return true; + for (size_t i = 0; i < ARRAY_SIZE(names); i++) { + const char *pmu_name = names[i]; - name = pmu->alias_name; - if (!name) - return false; + if (!pmu_name) + continue; - if (!strncmp(name, "uncore_", 7)) - name += 7; + if (!strncmp(pmu_name, "uncore_", 7)) + pmu_name += 7; - return perf_pmu__match_ignoring_suffix(name, tok) || - (need_fnmatch && !fnmatch(tok, name, 0)); + if (perf_pmu__match_wildcard(pmu_name, wildcard_to_match) || + (need_fnmatch && !fnmatch(wildcard_to_match, pmu_name, 0))) + return true; + } + return false; } int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size) @@ -2306,6 +2506,9 @@ int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, void perf_pmu__delete(struct perf_pmu *pmu) { + if (!pmu) + return; + if (perf_pmu__is_hwmon(pmu)) hwmon_pmu__exit(pmu); @@ -2323,14 +2526,16 @@ void perf_pmu__delete(struct perf_pmu *pmu) const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config) { - struct perf_pmu_alias *event; + struct hashmap_entry *entry; + size_t bkt; if (!pmu) return NULL; pmu_aliases_parse(pmu); pmu_add_cpu_aliases(pmu); - list_for_each_entry(event, &pmu->aliases, list) { + hashmap__for_each_entry(pmu->aliases, entry, bkt) { + struct perf_pmu_alias *event = entry->pvalue; struct perf_event_attr attr = {.config = 0,}; int ret = perf_pmu__config(pmu, &attr, &event->terms, /*apply_hardcoded=*/true, diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index dbed6c243a5e..71b8636fd07d 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -14,6 +14,7 @@ #include "mem-events.h" struct evsel_config_term; +struct hashmap; struct perf_cpu_map; struct print_callbacks; @@ -37,6 +38,8 @@ struct perf_pmu_caps { }; enum { + PERF_PMU_TYPE_PE_START = 0, + PERF_PMU_TYPE_PE_END = 0xFFFEFFFF, PERF_PMU_TYPE_HWMON_START = 0xFFFF0000, PERF_PMU_TYPE_HWMON_END = 0xFFFFFFFD, PERF_PMU_TYPE_TOOL = 0xFFFFFFFE, @@ -123,7 +126,7 @@ struct perf_pmu { * event read from <sysfs>/bus/event_source/devices/<name>/events/ or * from json events in pmu-events.c. */ - struct list_head aliases; + struct hashmap *aliases; /** * @events_table: The events table for json events in pmu-events.c. */ @@ -134,6 +137,11 @@ struct perf_pmu { uint32_t cpu_json_aliases; /** @sys_json_aliases: Number of json event aliases loaded matching the PMU's identifier. */ uint32_t sys_json_aliases; + /** + * @cpu_common_json_aliases: Number of json events that overlapped with sysfs when + * loading all sysfs events. + */ + uint32_t cpu_common_json_aliases; /** @sysfs_aliases_loaded: Are sysfs aliases loaded from disk? */ bool sysfs_aliases_loaded; /** @@ -187,6 +195,9 @@ struct perf_pmu { struct perf_pmu_info { const char *unit; double scale; + double retirement_latency_mean; + double retirement_latency_min; + double retirement_latency_max; bool per_pkg; bool snapshot; }; @@ -238,7 +249,8 @@ bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name); size_t perf_pmu__num_events(struct perf_pmu *pmu); int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, void *state, pmu_event_callback cb); -bool pmu__name_match(const struct perf_pmu *pmu, const char *pmu_name); +bool perf_pmu__name_wildcard_match(const struct perf_pmu *pmu, const char *to_match); +bool perf_pmu__name_no_suffix_match(const struct perf_pmu *pmu, const char *to_match); /** * perf_pmu_is_software - is the PMU a software PMU as in it uses the @@ -266,6 +278,8 @@ bool pmu_uncore_identifier_match(const char *compat, const char *id); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); +struct perf_pmu_caps *perf_pmu__get_cap(struct perf_pmu *pmu, const char *name); + int perf_pmu__caps_parse(struct perf_pmu *pmu); void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, @@ -273,7 +287,7 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, const char *config_name); void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu); -bool perf_pmu__match(const struct perf_pmu *pmu, const char *tok); +bool perf_pmu__wildcard_match(const struct perf_pmu *pmu, const char *wildcard_to_match); int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size); int perf_pmu__pathname_scnprintf(char *buf, size_t size, @@ -281,6 +295,7 @@ int perf_pmu__pathname_scnprintf(char *buf, size_t size, int perf_pmu__event_source_devices_fd(void); int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, int flags); +int perf_pmu__init(struct perf_pmu *pmu, __u32 type, const char *name); struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name, bool eager_load); struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus); diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c index b493da0d22ef..3bbd26fec78a 100644 --- a/tools/perf/util/pmus.c +++ b/tools/perf/util/pmus.c @@ -3,10 +3,10 @@ #include <linux/list_sort.h> #include <linux/string.h> #include <linux/zalloc.h> +#include <api/io_dir.h> #include <subcmd/pager.h> #include <sys/types.h> #include <ctype.h> -#include <dirent.h> #include <pthread.h> #include <string.h> #include <unistd.h> @@ -37,10 +37,25 @@ */ static LIST_HEAD(core_pmus); static LIST_HEAD(other_pmus); -static bool read_sysfs_core_pmus; -static bool read_sysfs_all_pmus; +enum perf_tool_pmu_type { + PERF_TOOL_PMU_TYPE_PE_CORE, + PERF_TOOL_PMU_TYPE_PE_OTHER, + PERF_TOOL_PMU_TYPE_TOOL, + PERF_TOOL_PMU_TYPE_HWMON, + +#define PERF_TOOL_PMU_TYPE_PE_CORE_MASK (1 << PERF_TOOL_PMU_TYPE_PE_CORE) +#define PERF_TOOL_PMU_TYPE_PE_OTHER_MASK (1 << PERF_TOOL_PMU_TYPE_PE_OTHER) +#define PERF_TOOL_PMU_TYPE_TOOL_MASK (1 << PERF_TOOL_PMU_TYPE_TOOL) +#define PERF_TOOL_PMU_TYPE_HWMON_MASK (1 << PERF_TOOL_PMU_TYPE_HWMON) + +#define PERF_TOOL_PMU_TYPE_ALL_MASK (PERF_TOOL_PMU_TYPE_PE_CORE_MASK | \ + PERF_TOOL_PMU_TYPE_PE_OTHER_MASK | \ + PERF_TOOL_PMU_TYPE_TOOL_MASK | \ + PERF_TOOL_PMU_TYPE_HWMON_MASK) +}; +static unsigned int read_pmu_types; -static void pmu_read_sysfs(bool core_only); +static void pmu_read_sysfs(unsigned int to_read_pmus); size_t pmu_name_len_no_suffix(const char *str) { @@ -102,8 +117,7 @@ void perf_pmus__destroy(void) perf_pmu__delete(pmu); } - read_sysfs_core_pmus = false; - read_sysfs_all_pmus = false; + read_pmu_types = 0; } static struct perf_pmu *pmu_find(const char *name) @@ -129,6 +143,7 @@ struct perf_pmu *perf_pmus__find(const char *name) struct perf_pmu *pmu; int dirfd; bool core_pmu; + unsigned int to_read_pmus = 0; /* * Once PMU is loaded it stays in the list, @@ -139,11 +154,11 @@ struct perf_pmu *perf_pmus__find(const char *name) if (pmu) return pmu; - if (read_sysfs_all_pmus) + if (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK) return NULL; core_pmu = is_pmu_core(name); - if (core_pmu && read_sysfs_core_pmus) + if (core_pmu && (read_pmu_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK)) return NULL; dirfd = perf_pmu__event_source_devices_fd(); @@ -151,15 +166,27 @@ struct perf_pmu *perf_pmus__find(const char *name) /*eager_load=*/false); close(dirfd); - if (!pmu) { - /* - * Looking up an inidividual PMU failed. This may mean name is - * an alias, so read the PMUs from sysfs and try to find again. - */ - pmu_read_sysfs(core_pmu); + if (pmu) + return pmu; + + /* Looking up an individual perf event PMU failed, check if a tool PMU should be read. */ + if (!strncmp(name, "hwmon_", 6)) + to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK; + else if (!strcmp(name, "tool")) + to_read_pmus |= PERF_TOOL_PMU_TYPE_TOOL_MASK; + + if (to_read_pmus) { + pmu_read_sysfs(to_read_pmus); pmu = pmu_find(name); + if (pmu) + return pmu; } - return pmu; + /* Read all necessary PMUs from sysfs and see if the PMU is found. */ + to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK; + if (!core_pmu) + to_read_pmus |= PERF_TOOL_PMU_TYPE_PE_OTHER_MASK; + pmu_read_sysfs(to_read_pmus); + return pmu_find(name); } static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name) @@ -176,11 +203,11 @@ static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name) if (pmu) return pmu; - if (read_sysfs_all_pmus) + if (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK) return NULL; core_pmu = is_pmu_core(name); - if (core_pmu && read_sysfs_core_pmus) + if (core_pmu && (read_pmu_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK)) return NULL; return perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name, @@ -197,52 +224,57 @@ static int pmus_cmp(void *priv __maybe_unused, } /* Add all pmus in sysfs to pmu list: */ -static void pmu_read_sysfs(bool core_only) +static void pmu_read_sysfs(unsigned int to_read_types) { - int fd; - DIR *dir; - struct dirent *dent; struct perf_pmu *tool_pmu; - if (read_sysfs_all_pmus || (core_only && read_sysfs_core_pmus)) + if ((read_pmu_types & to_read_types) == to_read_types) { + /* All requested PMU types have been read. */ return; + } - fd = perf_pmu__event_source_devices_fd(); - if (fd < 0) - return; + if (to_read_types & (PERF_TOOL_PMU_TYPE_PE_CORE_MASK | PERF_TOOL_PMU_TYPE_PE_OTHER_MASK)) { + int fd = perf_pmu__event_source_devices_fd(); + struct io_dir dir; + struct io_dirent64 *dent; + bool core_only = (to_read_types & PERF_TOOL_PMU_TYPE_PE_OTHER_MASK) == 0; - dir = fdopendir(fd); - if (!dir) { - close(fd); - return; - } + if (fd < 0) + goto skip_pe_pmus; - while ((dent = readdir(dir))) { - if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) - continue; - if (core_only && !is_pmu_core(dent->d_name)) - continue; - /* add to static LIST_HEAD(core_pmus) or LIST_HEAD(other_pmus): */ - perf_pmu__find2(fd, dent->d_name); - } + io_dir__init(&dir, fd); - closedir(dir); - if (list_empty(&core_pmus)) { + while ((dent = io_dir__readdir(&dir)) != NULL) { + if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) + continue; + if (core_only && !is_pmu_core(dent->d_name)) + continue; + /* add to static LIST_HEAD(core_pmus) or LIST_HEAD(other_pmus): */ + perf_pmu__find2(fd, dent->d_name); + } + + close(fd); + } +skip_pe_pmus: + if ((to_read_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK) && list_empty(&core_pmus)) { if (!perf_pmu__create_placeholder_core_pmu(&core_pmus)) pr_err("Failure to set up any core PMUs\n"); } list_sort(NULL, &core_pmus, pmus_cmp); - if (!core_only) { - tool_pmu = perf_pmus__tool_pmu(); - list_add_tail(&tool_pmu->list, &other_pmus); - perf_pmus__read_hwmon_pmus(&other_pmus); + + if ((to_read_types & PERF_TOOL_PMU_TYPE_TOOL_MASK) != 0 && + (read_pmu_types & PERF_TOOL_PMU_TYPE_TOOL_MASK) == 0) { + tool_pmu = tool_pmu__new(); + if (tool_pmu) + list_add_tail(&tool_pmu->list, &other_pmus); } + if ((to_read_types & PERF_TOOL_PMU_TYPE_HWMON_MASK) != 0 && + (read_pmu_types & PERF_TOOL_PMU_TYPE_HWMON_MASK) == 0) + perf_pmus__read_hwmon_pmus(&other_pmus); + list_sort(NULL, &other_pmus, pmus_cmp); - if (!list_empty(&core_pmus)) { - read_sysfs_core_pmus = true; - if (!core_only) - read_sysfs_all_pmus = true; - } + + read_pmu_types |= to_read_types; } static struct perf_pmu *__perf_pmus__find_by_type(unsigned int type) @@ -263,12 +295,21 @@ static struct perf_pmu *__perf_pmus__find_by_type(unsigned int type) struct perf_pmu *perf_pmus__find_by_type(unsigned int type) { + unsigned int to_read_pmus; struct perf_pmu *pmu = __perf_pmus__find_by_type(type); - if (pmu || read_sysfs_all_pmus) + if (pmu || (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK)) return pmu; - pmu_read_sysfs(/*core_only=*/false); + if (type >= PERF_PMU_TYPE_PE_START && type <= PERF_PMU_TYPE_PE_END) { + to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK | + PERF_TOOL_PMU_TYPE_PE_OTHER_MASK; + } else if (type >= PERF_PMU_TYPE_HWMON_START && type <= PERF_PMU_TYPE_HWMON_END) { + to_read_pmus = PERF_TOOL_PMU_TYPE_HWMON_MASK; + } else { + to_read_pmus = PERF_TOOL_PMU_TYPE_TOOL_MASK; + } + pmu_read_sysfs(to_read_pmus); pmu = __perf_pmus__find_by_type(type); return pmu; } @@ -282,7 +323,7 @@ struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu) bool use_core_pmus = !pmu || pmu->is_core; if (!pmu) { - pmu_read_sysfs(/*core_only=*/false); + pmu_read_sysfs(PERF_TOOL_PMU_TYPE_ALL_MASK); pmu = list_prepare_entry(pmu, &core_pmus, list); } if (use_core_pmus) { @@ -300,7 +341,7 @@ struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu) struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu) { if (!pmu) { - pmu_read_sysfs(/*core_only=*/true); + pmu_read_sysfs(PERF_TOOL_PMU_TYPE_PE_CORE_MASK); return list_first_entry_or_null(&core_pmus, typeof(*pmu), list); } list_for_each_entry_continue(pmu, &core_pmus, list) @@ -316,7 +357,7 @@ static struct perf_pmu *perf_pmus__scan_skip_duplicates(struct perf_pmu *pmu) const char *last_pmu_name = (pmu && pmu->name) ? pmu->name : ""; if (!pmu) { - pmu_read_sysfs(/*core_only=*/false); + pmu_read_sysfs(PERF_TOOL_PMU_TYPE_ALL_MASK); pmu = list_prepare_entry(pmu, &core_pmus, list); } else last_pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: ""); @@ -674,47 +715,35 @@ bool perf_pmus__supports_extended_type(void) return perf_pmus__do_support_extended_type; } -char *perf_pmus__default_pmu_name(void) +struct perf_pmu *evsel__find_pmu(const struct evsel *evsel) { - int fd; - DIR *dir; - struct dirent *dent; - char *result = NULL; + struct perf_pmu *pmu = evsel->pmu; + bool legacy_core_type; - if (!list_empty(&core_pmus)) - return strdup(list_first_entry(&core_pmus, struct perf_pmu, list)->name); + if (pmu) + return pmu; - fd = perf_pmu__event_source_devices_fd(); - if (fd < 0) - return strdup("cpu"); + pmu = perf_pmus__find_by_type(evsel->core.attr.type); + legacy_core_type = + evsel->core.attr.type == PERF_TYPE_HARDWARE || + evsel->core.attr.type == PERF_TYPE_HW_CACHE; + if (!pmu && legacy_core_type && perf_pmus__supports_extended_type()) { + u32 type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT; - dir = fdopendir(fd); - if (!dir) { - close(fd); - return strdup("cpu"); + pmu = perf_pmus__find_by_type(type); } - - while ((dent = readdir(dir))) { - if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) - continue; - if (is_pmu_core(dent->d_name)) { - result = strdup(dent->d_name); - break; - } - } - - closedir(dir); - return result ?: strdup("cpu"); -} - -struct perf_pmu *evsel__find_pmu(const struct evsel *evsel) -{ - struct perf_pmu *pmu = evsel->pmu; - - if (!pmu) { - pmu = perf_pmus__find_by_type(evsel->core.attr.type); - ((struct evsel *)evsel)->pmu = pmu; + if (!pmu && (legacy_core_type || evsel->core.attr.type == PERF_TYPE_RAW)) { + /* + * For legacy events, if there was no extended type info then + * assume the PMU is the first core PMU. + * + * On architectures like ARM there is no sysfs PMU with type + * PERF_TYPE_RAW, assume the RAW events are going to be handled + * by the first core PMU. + */ + pmu = perf_pmus__find_core_pmu(); } + ((struct evsel *)evsel)->pmu = pmu; return pmu; } diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h index a0cb0eb2ff97..8def20e615ad 100644 --- a/tools/perf/util/pmus.h +++ b/tools/perf/util/pmus.h @@ -27,7 +27,6 @@ void perf_pmus__print_raw_pmu_events(const struct print_callbacks *print_cb, voi bool perf_pmus__have_event(const char *pname, const char *name); int perf_pmus__num_core_pmus(void); bool perf_pmus__supports_extended_type(void); -char *perf_pmus__default_pmu_name(void); struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name); struct perf_pmu *perf_pmus__add_test_hwmon_pmu(int hwmon_dir, diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index a786cbfb0ff5..83aaf7cda635 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -268,6 +268,7 @@ bool is_event_supported(u8 type, u64 config) ret = evsel__open(evsel, NULL, tmap) >= 0; } + evsel__close(evsel); evsel__delete(evsel); } diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h index 445efa1636c1..8f19c2bea64a 100644 --- a/tools/perf/util/print-events.h +++ b/tools/perf/util/print-events.h @@ -25,7 +25,8 @@ struct print_callbacks { const char *long_desc, const char *expr, const char *threshold, - const char *unit); + const char *unit, + const char *pmu_name); bool (*skip_duplicate_pmus)(void *print_state); }; diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 1e769b68da37..3cc7c40f5097 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -973,6 +973,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) pr_debug("Matched function: %s [%lx]\n", dwarf_diename(sp_die), (unsigned long)dwarf_dieoffset(sp_die)); pf->fname = fname; + pf->abstrace_dieoffset = dwarf_dieoffset(sp_die); if (pp->line) { /* Function relative line */ dwarf_decl_line(sp_die, &pf->lno); pf->lno += pp->line; @@ -1179,6 +1180,8 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data) struct local_vars_finder *vf = data; struct probe_finder *pf = vf->pf; int tag; + Dwarf_Attribute attr; + Dwarf_Die var_die; tag = dwarf_tag(die_mem); if (tag == DW_TAG_formal_parameter || @@ -1196,10 +1199,22 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data) } } - if (dwarf_haspc(die_mem, vf->pf->addr)) + if (dwarf_haspc(die_mem, vf->pf->addr)) { + /* + * when DW_AT_entry_pc contains instruction address, + * also check if the DW_AT_abstract_origin of die_mem + * points to correct die. + */ + if (dwarf_attr(die_mem, DW_AT_abstract_origin, &attr)) { + dwarf_formref_die(&attr, &var_die); + if (pf->abstrace_dieoffset != dwarf_dieoffset(&var_die)) + goto out; + } return DIE_FIND_CB_CONTINUE; - else - return DIE_FIND_CB_SIBLING; + } + +out: + return DIE_FIND_CB_SIBLING; } static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf, diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index dcf6cc1e1cbe..ecd6d937c592 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -63,6 +63,7 @@ struct probe_finder { const char *fname; /* Real file name */ Dwarf_Die cu_die; /* Current CU */ Dwarf_Die sp_die; + Dwarf_Off abstrace_dieoffset; struct intlist *lcache; /* Line cache for lazy match */ /* For variable searching */ diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c index a1d1e4ef6257..141ffa129c69 100644 --- a/tools/perf/util/pstack.c +++ b/tools/perf/util/pstack.c @@ -63,20 +63,6 @@ void pstack__push(struct pstack *pstack, void *key) pstack->entries[pstack->top++] = key; } -void *pstack__pop(struct pstack *pstack) -{ - void *ret; - - if (pstack->top == 0) { - pr_err("%s: underflow!\n", __func__); - return NULL; - } - - ret = pstack->entries[--pstack->top]; - pstack->entries[pstack->top] = NULL; - return ret; -} - void *pstack__peek(struct pstack *pstack) { if (pstack->top == 0) diff --git a/tools/perf/util/pstack.h b/tools/perf/util/pstack.h index 8729b8be061d..712051b8130f 100644 --- a/tools/perf/util/pstack.h +++ b/tools/perf/util/pstack.h @@ -10,7 +10,6 @@ void pstack__delete(struct pstack *pstack); bool pstack__empty(const struct pstack *pstack); void pstack__remove(struct pstack *pstack, void *key); void pstack__push(struct pstack *pstack, void *key); -void *pstack__pop(struct pstack *pstack); void *pstack__peek(struct pstack *pstack); #endif /* _PERF_PSTACK_ */ diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index b4bc57859f73..321c333877fa 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -9,10 +9,12 @@ #include <event-parse.h> #endif #include <perf/mmap.h> +#include "callchain.h" #include "evlist.h" #include "evsel.h" #include "event.h" #include "print_binary.h" +#include "record.h" #include "strbuf.h" #include "thread_map.h" #include "trace-event.h" @@ -20,13 +22,6 @@ #include "util/sample.h" #include <internal/lib.h> -#define _PyUnicode_FromString(arg) \ - PyUnicode_FromString(arg) -#define _PyUnicode_FromFormat(...) \ - PyUnicode_FromFormat(__VA_ARGS__) -#define _PyLong_FromLong(arg) \ - PyLong_FromLong(arg) - PyMODINIT_FUNC PyInit_perf(void); #define member_def(type, member, ptype, help) \ @@ -47,7 +42,7 @@ struct pyrf_event { }; #define sample_members \ - sample_member_def(sample_ip, ip, T_ULONGLONG, "event type"), \ + sample_member_def(sample_ip, ip, T_ULONGLONG, "event ip"), \ sample_member_def(sample_pid, pid, T_INT, "event pid"), \ sample_member_def(sample_tid, tid, T_INT, "event tid"), \ sample_member_def(sample_time, time, T_ULONGLONG, "event timestamp"), \ @@ -270,6 +265,12 @@ static PyMemberDef pyrf_sample_event__members[] = { { .name = NULL, }, }; +static void pyrf_sample_event__delete(struct pyrf_event *pevent) +{ + perf_sample__exit(&pevent->sample); + Py_TYPE(pevent)->tp_free((PyObject*)pevent); +} + static PyObject *pyrf_sample_event__repr(const struct pyrf_event *pevent) { PyObject *ret; @@ -336,23 +337,14 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name) { const char *str = _PyUnicode_AsString(PyObject_Str(attr_name)); struct evsel *evsel = pevent->evsel; + struct tep_event *tp_format = evsel__tp_format(evsel); struct tep_format_field *field; - if (!evsel->tp_format) { - struct tep_event *tp_format; - - tp_format = trace_event__tp_format_id(evsel->core.attr.config); - if (IS_ERR_OR_NULL(tp_format)) - return NULL; - - evsel->tp_format = tp_format; - } - - field = tep_find_any_field(evsel->tp_format, str); - if (!field) + if (IS_ERR_OR_NULL(tp_format)) return NULL; - return tracepoint_field(pevent, field); + field = tep_find_any_field(tp_format, str); + return field ? tracepoint_field(pevent, field) : NULL; } #endif /* HAVE_LIBTRACEEVENT */ @@ -428,6 +420,9 @@ static int pyrf_event__setup_types(void) pyrf_sample_event__type.tp_new = pyrf_context_switch_event__type.tp_new = pyrf_throttle_event__type.tp_new = PyType_GenericNew; + + pyrf_sample_event__type.tp_dealloc = (destructor)pyrf_sample_event__delete, + err = PyType_Ready(&pyrf_mmap_event__type); if (err < 0) goto out; @@ -481,6 +476,11 @@ static PyObject *pyrf_event__new(const union perf_event *event) event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)) return NULL; + // FIXME this better be dynamic or we need to parse everything + // before calling perf_mmap__consume(), including tracepoint fields. + if (sizeof(pevent->event) < event->header.size) + return NULL; + ptype = pyrf_event__type[event->header.type]; pevent = PyObject_New(struct pyrf_event, ptype); if (pevent != NULL) @@ -626,6 +626,92 @@ static int pyrf_thread_map__setup_types(void) return PyType_Ready(&pyrf_thread_map__type); } +struct pyrf_counts_values { + PyObject_HEAD + + struct perf_counts_values values; +}; + +static const char pyrf_counts_values__doc[] = PyDoc_STR("perf counts values object."); + +static void pyrf_counts_values__delete(struct pyrf_counts_values *pcounts_values) +{ + Py_TYPE(pcounts_values)->tp_free((PyObject *)pcounts_values); +} + +#define counts_values_member_def(member, ptype, help) \ + { #member, ptype, \ + offsetof(struct pyrf_counts_values, values.member), \ + 0, help } + +static PyMemberDef pyrf_counts_values_members[] = { + counts_values_member_def(val, T_ULONG, "Value of event"), + counts_values_member_def(ena, T_ULONG, "Time for which enabled"), + counts_values_member_def(run, T_ULONG, "Time for which running"), + counts_values_member_def(id, T_ULONG, "Unique ID for an event"), + counts_values_member_def(lost, T_ULONG, "Num of lost samples"), + { .name = NULL, }, +}; + +static PyObject *pyrf_counts_values_get_values(struct pyrf_counts_values *self, void *closure) +{ + PyObject *vals = PyList_New(5); + + if (!vals) + return NULL; + for (int i = 0; i < 5; i++) + PyList_SetItem(vals, i, PyLong_FromLong(self->values.values[i])); + + return vals; +} + +static int pyrf_counts_values_set_values(struct pyrf_counts_values *self, PyObject *list, + void *closure) +{ + Py_ssize_t size; + PyObject *item = NULL; + + if (!PyList_Check(list)) { + PyErr_SetString(PyExc_TypeError, "Value assigned must be a list"); + return -1; + } + + size = PyList_Size(list); + for (Py_ssize_t i = 0; i < size; i++) { + item = PyList_GetItem(list, i); + if (!PyLong_Check(item)) { + PyErr_SetString(PyExc_TypeError, "List members should be numbers"); + return -1; + } + self->values.values[i] = PyLong_AsLong(item); + } + + return 0; +} + +static PyGetSetDef pyrf_counts_values_getset[] = { + {"values", (getter)pyrf_counts_values_get_values, (setter)pyrf_counts_values_set_values, + "Name field", NULL}, + { .name = NULL, }, +}; + +static PyTypeObject pyrf_counts_values__type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "perf.counts_values", + .tp_basicsize = sizeof(struct pyrf_counts_values), + .tp_dealloc = (destructor)pyrf_counts_values__delete, + .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, + .tp_doc = pyrf_counts_values__doc, + .tp_members = pyrf_counts_values_members, + .tp_getset = pyrf_counts_values_getset, +}; + +static int pyrf_counts_values__setup_types(void) +{ + pyrf_counts_values__type.tp_new = PyType_GenericNew; + return PyType_Ready(&pyrf_counts_values__type); +} + struct pyrf_evsel { PyObject_HEAD @@ -781,6 +867,58 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel, return Py_None; } +static PyObject *pyrf_evsel__cpus(struct pyrf_evsel *pevsel) +{ + struct pyrf_cpu_map *pcpu_map = PyObject_New(struct pyrf_cpu_map, &pyrf_cpu_map__type); + + if (pcpu_map) + pcpu_map->cpus = perf_cpu_map__get(pevsel->evsel.core.cpus); + + return (PyObject *)pcpu_map; +} + +static PyObject *pyrf_evsel__threads(struct pyrf_evsel *pevsel) +{ + struct pyrf_thread_map *pthread_map = + PyObject_New(struct pyrf_thread_map, &pyrf_thread_map__type); + + if (pthread_map) + pthread_map->threads = perf_thread_map__get(pevsel->evsel.core.threads); + + return (PyObject *)pthread_map; +} + +static PyObject *pyrf_evsel__read(struct pyrf_evsel *pevsel, + PyObject *args, PyObject *kwargs) +{ + struct evsel *evsel = &pevsel->evsel; + int cpu = 0, cpu_idx, thread = 0, thread_idx; + struct perf_counts_values counts; + struct pyrf_counts_values *count_values = PyObject_New(struct pyrf_counts_values, + &pyrf_counts_values__type); + + if (!count_values) + return NULL; + + if (!PyArg_ParseTuple(args, "ii", &cpu, &thread)) + return NULL; + + cpu_idx = perf_cpu_map__idx(evsel->core.cpus, (struct perf_cpu){.cpu = cpu}); + if (cpu_idx < 0) { + PyErr_Format(PyExc_TypeError, "CPU %d is not part of evsel's CPUs", cpu); + return NULL; + } + thread_idx = perf_thread_map__idx(evsel->core.threads, thread); + if (cpu_idx < 0) { + PyErr_Format(PyExc_TypeError, "Thread %d is not part of evsel's threads", + thread); + return NULL; + } + perf_evsel__read(&(evsel->core), cpu_idx, thread_idx, &counts); + count_values->values = counts; + return (PyObject *)count_values; +} + static PyObject *pyrf_evsel__str(PyObject *self) { struct pyrf_evsel *pevsel = (void *)self; @@ -799,9 +937,49 @@ static PyMethodDef pyrf_evsel__methods[] = { .ml_flags = METH_VARARGS | METH_KEYWORDS, .ml_doc = PyDoc_STR("open the event selector file descriptor table.") }, + { + .ml_name = "cpus", + .ml_meth = (PyCFunction)pyrf_evsel__cpus, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("CPUs the event is to be used with.") + }, + { + .ml_name = "threads", + .ml_meth = (PyCFunction)pyrf_evsel__threads, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("threads the event is to be used with.") + }, + { + .ml_name = "read", + .ml_meth = (PyCFunction)pyrf_evsel__read, + .ml_flags = METH_VARARGS | METH_KEYWORDS, + .ml_doc = PyDoc_STR("read counters") + }, { .ml_name = NULL, } }; +#define evsel_member_def(member, ptype, help) \ + { #member, ptype, \ + offsetof(struct pyrf_evsel, evsel.member), \ + 0, help } + +#define evsel_attr_member_def(member, ptype, help) \ + { #member, ptype, \ + offsetof(struct pyrf_evsel, evsel.core.attr.member), \ + 0, help } + +static PyMemberDef pyrf_evsel__members[] = { + evsel_member_def(tracking, T_BOOL, "tracking event."), + evsel_attr_member_def(type, T_UINT, "attribute type."), + evsel_attr_member_def(size, T_UINT, "attribute size."), + evsel_attr_member_def(config, T_ULONGLONG, "attribute config."), + evsel_attr_member_def(sample_period, T_ULONGLONG, "attribute sample_period."), + evsel_attr_member_def(sample_type, T_ULONGLONG, "attribute sample_type."), + evsel_attr_member_def(read_format, T_ULONGLONG, "attribute read_format."), + evsel_attr_member_def(wakeup_events, T_UINT, "attribute wakeup_events."), + { .name = NULL, }, +}; + static const char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object."); static PyTypeObject pyrf_evsel__type = { @@ -811,6 +989,7 @@ static PyTypeObject pyrf_evsel__type = { .tp_dealloc = (destructor)pyrf_evsel__delete, .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, .tp_doc = pyrf_evsel__doc, + .tp_members = pyrf_evsel__members, .tp_methods = pyrf_evsel__methods, .tp_init = (initproc)pyrf_evsel__init, .tp_str = pyrf_evsel__str, @@ -851,6 +1030,16 @@ static void pyrf_evlist__delete(struct pyrf_evlist *pevlist) Py_TYPE(pevlist)->tp_free((PyObject*)pevlist); } +static PyObject *pyrf_evlist__all_cpus(struct pyrf_evlist *pevlist) +{ + struct pyrf_cpu_map *pcpu_map = PyObject_New(struct pyrf_cpu_map, &pyrf_cpu_map__type); + + if (pcpu_map) + pcpu_map->cpus = perf_cpu_map__get(pevlist->evlist.core.all_cpus); + + return (PyObject *)pcpu_map; +} + static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist, PyObject *args, PyObject *kwargs) { @@ -984,20 +1173,22 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, evsel = evlist__event2evsel(evlist, event); if (!evsel) { + Py_DECREF(pyevent); Py_INCREF(Py_None); return Py_None; } pevent->evsel = evsel; - err = evsel__parse_sample(evsel, event, &pevent->sample); - - /* Consume the even only after we parsed it out. */ perf_mmap__consume(&md->core); - if (err) + err = evsel__parse_sample(evsel, &pevent->event, &pevent->sample); + if (err) { + Py_DECREF(pyevent); return PyErr_Format(PyExc_OSError, "perf: can't parse sample, err=%d", err); + } + return pyevent; } end: @@ -1019,8 +1210,63 @@ static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist, return Py_None; } +static PyObject *pyrf_evlist__close(struct pyrf_evlist *pevlist) +{ + struct evlist *evlist = &pevlist->evlist; + + evlist__close(evlist); + + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject *pyrf_evlist__config(struct pyrf_evlist *pevlist) +{ + struct record_opts opts = { + .sample_time = true, + .mmap_pages = UINT_MAX, + .user_freq = UINT_MAX, + .user_interval = ULLONG_MAX, + .freq = 4000, + .target = { + .uses_mmap = true, + .default_per_cpu = true, + }, + .nr_threads_synthesize = 1, + .ctl_fd = -1, + .ctl_fd_ack = -1, + .no_buffering = true, + .no_inherit = true, + }; + struct evlist *evlist = &pevlist->evlist; + + evlist__config(evlist, &opts, &callchain_param); + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject *pyrf_evlist__disable(struct pyrf_evlist *pevlist) +{ + evlist__disable(&pevlist->evlist); + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject *pyrf_evlist__enable(struct pyrf_evlist *pevlist) +{ + evlist__enable(&pevlist->evlist); + Py_INCREF(Py_None); + return Py_None; +} + static PyMethodDef pyrf_evlist__methods[] = { { + .ml_name = "all_cpus", + .ml_meth = (PyCFunction)pyrf_evlist__all_cpus, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("CPU map union of all evsel CPU maps.") + }, + { .ml_name = "mmap", .ml_meth = (PyCFunction)pyrf_evlist__mmap, .ml_flags = METH_VARARGS | METH_KEYWORDS, @@ -1033,6 +1279,12 @@ static PyMethodDef pyrf_evlist__methods[] = { .ml_doc = PyDoc_STR("open the file descriptors.") }, { + .ml_name = "close", + .ml_meth = (PyCFunction)pyrf_evlist__close, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("close the file descriptors.") + }, + { .ml_name = "poll", .ml_meth = (PyCFunction)pyrf_evlist__poll, .ml_flags = METH_VARARGS | METH_KEYWORDS, @@ -1056,6 +1308,24 @@ static PyMethodDef pyrf_evlist__methods[] = { .ml_flags = METH_VARARGS | METH_KEYWORDS, .ml_doc = PyDoc_STR("reads an event.") }, + { + .ml_name = "config", + .ml_meth = (PyCFunction)pyrf_evlist__config, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("Apply default record options to the evlist.") + }, + { + .ml_name = "disable", + .ml_meth = (PyCFunction)pyrf_evlist__disable, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("Disable the evsels in the evlist.") + }, + { + .ml_name = "enable", + .ml_meth = (PyCFunction)pyrf_evlist__enable, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("Enable the evsels in the evlist.") + }, { .ml_name = NULL, } }; @@ -1254,6 +1524,8 @@ static PyObject *pyrf_evsel__from_evsel(struct evsel *evsel) evsel__init(&pevsel->evsel, &evsel->core.attr, evsel->core.idx); evsel__clone(&pevsel->evsel, evsel); + if (evsel__is_group_leader(evsel)) + evsel__set_leader(&pevsel->evsel, &pevsel->evsel); return (PyObject *)pevsel; } @@ -1281,12 +1553,18 @@ static PyObject *pyrf__parse_events(PyObject *self, PyObject *args) struct evlist evlist = {}; struct parse_events_error err; PyObject *result; + PyObject *pcpus = NULL, *pthreads = NULL; + struct perf_cpu_map *cpus; + struct perf_thread_map *threads; - if (!PyArg_ParseTuple(args, "s", &input)) + if (!PyArg_ParseTuple(args, "s|OO", &input, &pcpus, &pthreads)) return NULL; + threads = pthreads ? ((struct pyrf_thread_map *)pthreads)->threads : NULL; + cpus = pcpus ? ((struct pyrf_cpu_map *)pcpus)->cpus : NULL; + parse_events_error__init(&err); - evlist__init(&evlist, NULL, NULL); + evlist__init(&evlist, cpus, threads); if (parse_events(&evlist, input, &err)) { parse_events_error__print(&err, input); PyErr_SetFromErrno(PyExc_OSError); @@ -1336,7 +1614,8 @@ PyMODINIT_FUNC PyInit_perf(void) pyrf_evlist__setup_types() < 0 || pyrf_evsel__setup_types() < 0 || pyrf_thread_map__setup_types() < 0 || - pyrf_cpu_map__setup_types() < 0) + pyrf_cpu_map__setup_types() < 0 || + pyrf_counts_values__setup_types() < 0) return module; /* The page_size is placed in util object. */ @@ -1381,6 +1660,9 @@ PyMODINIT_FUNC PyInit_perf(void) Py_INCREF(&pyrf_cpu_map__type); PyModule_AddObject(module, "cpu_map", (PyObject*)&pyrf_cpu_map__type); + Py_INCREF(&pyrf_counts_values__type); + PyModule_AddObject(module, "counts_values", (PyObject *)&pyrf_counts_values__type); + dict = PyModule_GetDict(module); if (dict == NULL) goto error; diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h deleted file mode 100644 index d927a0d25052..000000000000 --- a/tools/perf/util/rb_resort.h +++ /dev/null @@ -1,146 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _PERF_RESORT_RB_H_ -#define _PERF_RESORT_RB_H_ -/* - * Template for creating a class to resort an existing rb_tree according to - * a new sort criteria, that must be present in the entries of the source - * rb_tree. - * - * (c) 2016 Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Quick example, resorting threads by its shortname: - * - * First define the prefix (threads) to be used for the functions and data - * structures created, and provide an expression for the sorting, then the - * fields to be present in each of the entries in the new, sorted, rb_tree. - * - * The body of the init function should collect the fields, maybe - * pre-calculating them from multiple entries in the original 'entry' from - * the rb_tree used as a source for the entries to be sorted: - -DEFINE_RB_RESORT_RB(threads, strcmp(a->thread->shortname, - b->thread->shortname) < 0, - struct thread *thread; -) -{ - entry->thread = rb_entry(nd, struct thread, rb_node); -} - - * After this it is just a matter of instantiating it and iterating it, - * for a few data structures with existing rb_trees, such as 'struct machine', - * helpers are available to get the rb_root and the nr_entries: - - DECLARE_RESORT_RB_MACHINE_THREADS(threads, machine_ptr); - - * This will instantiate the new rb_tree and a cursor for it, that can be used as: - - struct rb_node *nd; - - resort_rb__for_each_entry(nd, threads) { - struct thread *t = threads_entry; - printf("%s: %d\n", t->shortname, t->tid); - } - - * Then delete it: - - resort_rb__delete(threads); - - * The name of the data structures and functions will have a _sorted suffix - * right before the method names, i.e. will look like: - * - * struct threads_sorted_entry {} - * threads_sorted__insert() - */ - -#define DEFINE_RESORT_RB(__name, __comp, ...) \ -struct __name##_sorted_entry { \ - struct rb_node rb_node; \ - __VA_ARGS__ \ -}; \ -static void __name##_sorted__init_entry(struct rb_node *nd, \ - struct __name##_sorted_entry *entry); \ - \ -static int __name##_sorted__cmp(struct rb_node *nda, struct rb_node *ndb) \ -{ \ - struct __name##_sorted_entry *a, *b; \ - a = rb_entry(nda, struct __name##_sorted_entry, rb_node); \ - b = rb_entry(ndb, struct __name##_sorted_entry, rb_node); \ - return __comp; \ -} \ - \ -struct __name##_sorted { \ - struct rb_root entries; \ - struct __name##_sorted_entry nd[0]; \ -}; \ - \ -static void __name##_sorted__insert(struct __name##_sorted *sorted, \ - struct rb_node *sorted_nd) \ -{ \ - struct rb_node **p = &sorted->entries.rb_node, *parent = NULL; \ - while (*p != NULL) { \ - parent = *p; \ - if (__name##_sorted__cmp(sorted_nd, parent)) \ - p = &(*p)->rb_left; \ - else \ - p = &(*p)->rb_right; \ - } \ - rb_link_node(sorted_nd, parent, p); \ - rb_insert_color(sorted_nd, &sorted->entries); \ -} \ - \ -static void __name##_sorted__sort(struct __name##_sorted *sorted, \ - struct rb_root *entries) \ -{ \ - struct rb_node *nd; \ - unsigned int i = 0; \ - for (nd = rb_first(entries); nd; nd = rb_next(nd)) { \ - struct __name##_sorted_entry *snd = &sorted->nd[i++]; \ - __name##_sorted__init_entry(nd, snd); \ - __name##_sorted__insert(sorted, &snd->rb_node); \ - } \ -} \ - \ -static struct __name##_sorted *__name##_sorted__new(struct rb_root *entries, \ - int nr_entries) \ -{ \ - struct __name##_sorted *sorted; \ - sorted = malloc(sizeof(*sorted) + sizeof(sorted->nd[0]) * nr_entries); \ - if (sorted) { \ - sorted->entries = RB_ROOT; \ - __name##_sorted__sort(sorted, entries); \ - } \ - return sorted; \ -} \ - \ -static void __name##_sorted__delete(struct __name##_sorted *sorted) \ -{ \ - free(sorted); \ -} \ - \ -static void __name##_sorted__init_entry(struct rb_node *nd, \ - struct __name##_sorted_entry *entry) - -#define DECLARE_RESORT_RB(__name) \ -struct __name##_sorted_entry *__name##_entry; \ -struct __name##_sorted *__name = __name##_sorted__new - -#define resort_rb__for_each_entry(__nd, __name) \ - for (__nd = rb_first(&__name->entries); \ - __name##_entry = rb_entry(__nd, struct __name##_sorted_entry, \ - rb_node), __nd; \ - __nd = rb_next(__nd)) - -#define resort_rb__delete(__name) \ - __name##_sorted__delete(__name), __name = NULL - -/* - * Helpers for other classes that contains both an rbtree and the - * number of entries in it: - */ - -/* For 'struct intlist' */ -#define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \ - DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root, \ - __ilist->rblist.nr_entries) - -#endif /* _PERF_RESORT_RB_H_ */ diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index a6566134e09e..ea3a6c4657ee 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -28,6 +28,7 @@ struct record_opts { bool sample_time_set; bool sample_cpu; bool sample_identifier; + bool sample_data_src; bool period; bool period_set; bool running_time; @@ -79,6 +80,7 @@ struct record_opts { int synth; int threads_spec; const char *threads_user_spec; + u64 off_cpu_thresh_ns; }; extern const char * const *record_usage; diff --git a/tools/perf/util/rwsem.c b/tools/perf/util/rwsem.c index 5109167f27f7..9d26832398db 100644 --- a/tools/perf/util/rwsem.c +++ b/tools/perf/util/rwsem.c @@ -27,6 +27,7 @@ int exit_rwsem(struct rw_semaphore *sem) } int down_read(struct rw_semaphore *sem) + NO_THREAD_SAFETY_ANALYSIS { #if RWS_ERRORCHECK mutex_lock(&sem->mtx); @@ -37,6 +38,7 @@ int down_read(struct rw_semaphore *sem) } int up_read(struct rw_semaphore *sem) + NO_THREAD_SAFETY_ANALYSIS { #if RWS_ERRORCHECK mutex_unlock(&sem->mtx); @@ -47,6 +49,7 @@ int up_read(struct rw_semaphore *sem) } int down_write(struct rw_semaphore *sem) + NO_THREAD_SAFETY_ANALYSIS { #if RWS_ERRORCHECK mutex_lock(&sem->mtx); @@ -57,6 +60,7 @@ int down_write(struct rw_semaphore *sem) } int up_write(struct rw_semaphore *sem) + NO_THREAD_SAFETY_ANALYSIS { #if RWS_ERRORCHECK mutex_unlock(&sem->mtx); diff --git a/tools/perf/util/rwsem.h b/tools/perf/util/rwsem.h index ef5cbc31d967..b102d8143181 100644 --- a/tools/perf/util/rwsem.h +++ b/tools/perf/util/rwsem.h @@ -10,7 +10,7 @@ */ #define RWS_ERRORCHECK 0 -struct rw_semaphore { +struct LOCKABLE rw_semaphore { #if RWS_ERRORCHECK struct mutex mtx; #else @@ -21,10 +21,10 @@ struct rw_semaphore { int init_rwsem(struct rw_semaphore *sem); int exit_rwsem(struct rw_semaphore *sem); -int down_read(struct rw_semaphore *sem); -int up_read(struct rw_semaphore *sem); +int down_read(struct rw_semaphore *sem) SHARED_LOCK_FUNCTION(sem); +int up_read(struct rw_semaphore *sem) UNLOCK_FUNCTION(sem); -int down_write(struct rw_semaphore *sem); -int up_write(struct rw_semaphore *sem); +int down_write(struct rw_semaphore *sem) EXCLUSIVE_LOCK_FUNCTION(sem); +int up_write(struct rw_semaphore *sem) UNLOCK_FUNCTION(sem); #endif /* _PERF_RWSEM_H */ diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 30638653ad2d..0ce52f0280b8 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -513,6 +513,7 @@ static bool s390_cpumsf_make_event(size_t pos, .period = 1 }; union perf_event event; + int ret; memset(&event, 0, sizeof(event)); if (basic->CL == 1) /* Native LPAR mode */ @@ -536,8 +537,9 @@ static bool s390_cpumsf_make_event(size_t pos, pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n", __func__, pos, sample.ip, basic->P, basic->CL, sample.pid, sample.tid, sample.cpumode, sample.cpu); - if (perf_session__deliver_synth_event(sfq->sf->session, &event, - &sample)) { + ret = perf_session__deliver_synth_event(sfq->sf->session, &event, &sample); + perf_sample__exit(&sample); + if (ret) { pr_err("s390 Auxiliary Trace: failed to deliver event\n"); return false; } diff --git a/tools/perf/util/sample.c b/tools/perf/util/sample.c new file mode 100644 index 000000000000..605fee971f55 --- /dev/null +++ b/tools/perf/util/sample.c @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include "sample.h" +#include "debug.h" +#include <linux/zalloc.h> +#include <stdlib.h> +#include <string.h> + +void perf_sample__init(struct perf_sample *sample, bool all) +{ + if (all) { + memset(sample, 0, sizeof(*sample)); + } else { + sample->user_regs = NULL; + sample->intr_regs = NULL; + } +} + +void perf_sample__exit(struct perf_sample *sample) +{ + free(sample->user_regs); + free(sample->intr_regs); +} + +struct regs_dump *perf_sample__user_regs(struct perf_sample *sample) +{ + if (!sample->user_regs) { + sample->user_regs = zalloc(sizeof(*sample->user_regs)); + if (!sample->user_regs) + pr_err("Failure to allocate sample user_regs"); + } + return sample->user_regs; +} + + +struct regs_dump *perf_sample__intr_regs(struct perf_sample *sample) +{ + if (!sample->intr_regs) { + sample->intr_regs = zalloc(sizeof(*sample->intr_regs)); + if (!sample->intr_regs) + pr_err("Failure to allocate sample intr_regs"); + } + return sample->intr_regs; +} diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h index 70b2c3135555..0e96240052e9 100644 --- a/tools/perf/util/sample.h +++ b/tools/perf/util/sample.h @@ -67,7 +67,7 @@ struct aux_sample { }; struct simd_flags { - u64 arch:1, /* architecture (isa) */ + u8 arch:1, /* architecture (isa) */ pred:2; /* predication */ }; @@ -114,14 +114,19 @@ struct perf_sample { struct ip_callchain *callchain; struct branch_stack *branch_stack; u64 *branch_stack_cntr; - struct regs_dump user_regs; - struct regs_dump intr_regs; + struct regs_dump *user_regs; + struct regs_dump *intr_regs; struct stack_dump user_stack; struct sample_read read; struct aux_sample aux_sample; struct simd_flags simd_flags; }; +void perf_sample__init(struct perf_sample *sample, bool all); +void perf_sample__exit(struct perf_sample *sample); +struct regs_dump *perf_sample__user_regs(struct perf_sample *sample); +struct regs_dump *perf_sample__intr_regs(struct perf_sample *sample); + /* * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get * 8-byte alignment. diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index b1b5e94537e4..520729e78965 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -745,19 +745,30 @@ static int set_regs_in_dict(PyObject *dict, const char *arch = perf_env__arch(evsel__env(evsel)); int size = (__sw_hweight64(attr->sample_regs_intr) * MAX_REG_SIZE) + 1; - char *bf = malloc(size); - if (!bf) - return -1; + char *bf = NULL; - regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, size); + if (sample->intr_regs) { + bf = malloc(size); + if (!bf) + return -1; - pydict_set_item_string_decref(dict, "iregs", - _PyUnicode_FromString(bf)); + regs_map(sample->intr_regs, attr->sample_regs_intr, arch, bf, size); - regs_map(&sample->user_regs, attr->sample_regs_user, arch, bf, size); + pydict_set_item_string_decref(dict, "iregs", + _PyUnicode_FromString(bf)); + } - pydict_set_item_string_decref(dict, "uregs", - _PyUnicode_FromString(bf)); + if (sample->user_regs) { + if (!bf) { + bf = malloc(size); + if (!bf) + return -1; + } + regs_map(sample->user_regs, attr->sample_regs_user, arch, bf, size); + + pydict_set_item_string_decref(dict, "uregs", + _PyUnicode_FromString(bf)); + } free(bf); return 0; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index c06e3020a976..a320672c264e 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -950,7 +950,12 @@ static void regs__printf(const char *type, struct regs_dump *regs, const char *a static void regs_user__printf(struct perf_sample *sample, const char *arch) { - struct regs_dump *user_regs = &sample->user_regs; + struct regs_dump *user_regs; + + if (!sample->user_regs) + return; + + user_regs = perf_sample__user_regs(sample); if (user_regs->regs) regs__printf("user", user_regs, arch); @@ -958,7 +963,12 @@ static void regs_user__printf(struct perf_sample *sample, const char *arch) static void regs_intr__printf(struct perf_sample *sample, const char *arch) { - struct regs_dump *intr_regs = &sample->intr_regs; + struct regs_dump *intr_regs; + + if (!sample->intr_regs) + return; + + intr_regs = perf_sample__intr_regs(sample); if (intr_regs->regs) regs__printf("intr", intr_regs, arch); @@ -1351,25 +1361,30 @@ static int perf_session__deliver_event(struct perf_session *session, const char *file_path) { struct perf_sample sample; - int ret = evlist__parse_sample(session->evlist, event, &sample); + int ret; + perf_sample__init(&sample, /*all=*/false); + ret = evlist__parse_sample(session->evlist, event, &sample); if (ret) { pr_err("Can't parse sample, err = %d\n", ret); - return ret; + goto out; } ret = auxtrace__process_event(session, event, &sample, tool); if (ret < 0) - return ret; - if (ret > 0) - return 0; + goto out; + if (ret > 0) { + ret = 0; + goto out; + } ret = machines__deliver_event(&session->machines, session->evlist, event, &sample, tool, file_offset, file_path); if (dump_trace && sample.aux_sample.size) auxtrace__dump_auxtrace_sample(session, &sample); - +out: + perf_sample__exit(&sample); return ret; } @@ -1380,11 +1395,14 @@ static s64 perf_session__process_user_event(struct perf_session *session, { struct ordered_events *oe = &session->ordered_events; const struct perf_tool *tool = session->tool; - struct perf_sample sample = { .time = 0, }; + struct perf_sample sample; int fd = perf_data__fd(session->data); int err; - if (event->header.type != PERF_RECORD_COMPRESSED || perf_tool__compressed_is_stub(tool)) + perf_sample__init(&sample, /*all=*/true); + if ((event->header.type != PERF_RECORD_COMPRESSED && + event->header.type != PERF_RECORD_COMPRESSED2) || + perf_tool__compressed_is_stub(tool)) dump_event(session->evlist, event, file_offset, &sample, file_path); /* These events are processed right away */ @@ -1395,15 +1413,17 @@ static s64 perf_session__process_user_event(struct perf_session *session, perf_session__set_id_hdr_size(session); perf_session__set_comm_exec(session); } - return err; + break; case PERF_RECORD_EVENT_UPDATE: - return tool->event_update(tool, event, &session->evlist); + err = tool->event_update(tool, event, &session->evlist); + break; case PERF_RECORD_HEADER_EVENT_TYPE: /* * Deprecated, but we need to handle it for sake * of old data files create in pipe mode. */ - return 0; + err = 0; + break; case PERF_RECORD_HEADER_TRACING_DATA: /* * Setup for reading amidst mmap, but only when we @@ -1412,15 +1432,20 @@ static s64 perf_session__process_user_event(struct perf_session *session, */ if (!perf_data__is_pipe(session->data)) lseek(fd, file_offset, SEEK_SET); - return tool->tracing_data(session, event); + err = tool->tracing_data(session, event); + break; case PERF_RECORD_HEADER_BUILD_ID: - return tool->build_id(session, event); + err = tool->build_id(session, event); + break; case PERF_RECORD_FINISHED_ROUND: - return tool->finished_round(tool, event, oe); + err = tool->finished_round(tool, event, oe); + break; case PERF_RECORD_ID_INDEX: - return tool->id_index(session, event); + err = tool->id_index(session, event); + break; case PERF_RECORD_AUXTRACE_INFO: - return tool->auxtrace_info(session, event); + err = tool->auxtrace_info(session, event); + break; case PERF_RECORD_AUXTRACE: /* * Setup for reading amidst mmap, but only when we @@ -1429,35 +1454,49 @@ static s64 perf_session__process_user_event(struct perf_session *session, */ if (!perf_data__is_pipe(session->data)) lseek(fd, file_offset + event->header.size, SEEK_SET); - return tool->auxtrace(session, event); + err = tool->auxtrace(session, event); + break; case PERF_RECORD_AUXTRACE_ERROR: perf_session__auxtrace_error_inc(session, event); - return tool->auxtrace_error(session, event); + err = tool->auxtrace_error(session, event); + break; case PERF_RECORD_THREAD_MAP: - return tool->thread_map(session, event); + err = tool->thread_map(session, event); + break; case PERF_RECORD_CPU_MAP: - return tool->cpu_map(session, event); + err = tool->cpu_map(session, event); + break; case PERF_RECORD_STAT_CONFIG: - return tool->stat_config(session, event); + err = tool->stat_config(session, event); + break; case PERF_RECORD_STAT: - return tool->stat(session, event); + err = tool->stat(session, event); + break; case PERF_RECORD_STAT_ROUND: - return tool->stat_round(session, event); + err = tool->stat_round(session, event); + break; case PERF_RECORD_TIME_CONV: session->time_conv = event->time_conv; - return tool->time_conv(session, event); + err = tool->time_conv(session, event); + break; case PERF_RECORD_HEADER_FEATURE: - return tool->feature(session, event); + err = tool->feature(session, event); + break; case PERF_RECORD_COMPRESSED: + case PERF_RECORD_COMPRESSED2: err = tool->compressed(session, event, file_offset, file_path); if (err) dump_event(session->evlist, event, file_offset, &sample, file_path); - return err; + break; case PERF_RECORD_FINISHED_INIT: - return tool->finished_init(session, event); + err = tool->finished_init(session, event); + break; default: - return -EINVAL; + err = -EINVAL; + break; } + perf_sample__exit(&sample); + return err; } int perf_session__deliver_synth_event(struct perf_session *session, @@ -1603,8 +1642,17 @@ static s64 perf_session__process_event(struct perf_session *session, if (session->header.needs_swap) event_swap(event, evlist__sample_id_all(evlist)); - if (event->header.type >= PERF_RECORD_HEADER_MAX) - return -EINVAL; + if (event->header.type >= PERF_RECORD_HEADER_MAX) { + /* perf should not support unaligned event, stop here. */ + if (event->header.size % sizeof(u64)) + return -EINVAL; + + /* This perf is outdated and does not support the latest event type. */ + ui__warning("Unsupported header type %u, please consider updating perf.\n", + event->header.type); + /* Skip unsupported event by returning its size. */ + return event->header.size; + } events_stats__inc(&evlist->stats, event->header.type); @@ -2403,6 +2451,18 @@ bool perf_session__has_traces(struct perf_session *session, const char *msg) return false; } +bool perf_session__has_switch_events(struct perf_session *session) +{ + struct evsel *evsel; + + evlist__for_each_entry(session->evlist, evsel) { + if (evsel->core.attr.context_switch) + return true; + } + + return false; +} + int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr) { char *bracket; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index bcf1bcf06959..db1c120a9e67 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -141,6 +141,7 @@ int perf_session__resolve_callchain(struct perf_session *session, struct symbol **parent); bool perf_session__has_traces(struct perf_session *session, const char *msg); +bool perf_session__has_switch_events(struct perf_session *session); void perf_event__attr_swap(struct perf_event_attr *attr); diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 649550e9b7aa..dd289d15acfd 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -3,6 +3,7 @@ from subprocess import Popen, PIPE from re import sub cc = getenv("CC") +assert cc, "Environment variable CC not set" # Check if CC has options, as is the case in yocto, where it uses CC="cc --sysroot..." cc_tokens = cc.split() @@ -12,8 +13,13 @@ if len(cc_tokens) > 1: else: cc_options = "" +# ignore optional stderr could be None as it is set to PIPE to avoid that. +# mypy: disable-error-code="union-attr" cc_is_clang = b"clang version" in Popen([cc, "-v"], stderr=PIPE).stderr.readline() -src_feature_tests = getenv('srctree') + '/tools/build/feature' + +srctree = getenv('srctree') +assert srctree, "Environment variable srctree, for the Linux sources, not set" +src_feature_tests = f'{srctree}/tools/build/feature' def clang_has_option(option): cc_output = Popen([cc, cc_options + option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines() @@ -71,7 +77,7 @@ else: # The python headers have mixed code with declarations (decls after asserts, for instance) cflags += [ "-Wno-declaration-after-statement" ] -src_perf = getenv('srctree') + '/tools/perf' +src_perf = f'{srctree}/tools/perf' build_lib = getenv('PYTHON_EXTBUILD_LIB') build_tmp = getenv('PYTHON_EXTBUILD_TMP') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 3dd33721823f..45e654653960 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -141,6 +141,43 @@ struct sort_entry sort_thread = { .se_width_idx = HISTC_THREAD, }; +/* --sort tgid */ + +static int64_t +sort__tgid_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return thread__pid(right->thread) - thread__pid(left->thread); +} + +static int hist_entry__tgid_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + int tgid = thread__pid(he->thread); + const char *comm = NULL; + + /* display comm of the thread-group leader */ + if (thread__pid(he->thread) == thread__tid(he->thread)) { + comm = thread__comm_str(he->thread); + } else { + struct maps *maps = thread__maps(he->thread); + struct thread *leader = machine__find_thread(maps__machine(maps), + tgid, tgid); + if (leader) { + comm = thread__comm_str(leader); + thread__put(leader); + } + } + width = max(7U, width) - 8; + return repsep_snprintf(bf, size, "%7d:%-*.*s", tgid, width, width, comm ?: ""); +} + +struct sort_entry sort_tgid = { + .se_header = " Tgid:Command", + .se_cmp = sort__tgid_cmp, + .se_snprintf = hist_entry__tgid_snprintf, + .se_width_idx = HISTC_TGID, +}; + /* --sort simd */ static int64_t @@ -892,6 +929,38 @@ struct sort_entry sort_cpu = { .se_width_idx = HISTC_CPU, }; +/* --sort parallelism */ + +static int64_t +sort__parallelism_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return right->parallelism - left->parallelism; +} + +static int hist_entry__parallelism_filter(struct hist_entry *he, int type, const void *arg) +{ + const unsigned long *parallelism_filter = arg; + + if (type != HIST_FILTER__PARALLELISM) + return -1; + + return test_bit(he->parallelism, parallelism_filter); +} + +static int hist_entry__parallelism_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%*d", width, he->parallelism); +} + +struct sort_entry sort_parallelism = { + .se_header = "Parallelism", + .se_cmp = sort__parallelism_cmp, + .se_filter = hist_entry__parallelism_filter, + .se_snprintf = hist_entry__parallelism_snprintf, + .se_width_idx = HISTC_PARALLELISM, +}; + /* --sort cgroup_id */ static int64_t _sort__cgroup_dev_cmp(u64 left_dev, u64 right_dev) @@ -2371,44 +2440,19 @@ sort__typeoff_sort(struct hist_entry *left, struct hist_entry *right) return left->mem_type_off - right->mem_type_off; } -static void fill_member_name(char *buf, size_t sz, struct annotated_member *m, - int offset, bool first) -{ - struct annotated_member *child; - - if (list_empty(&m->children)) - return; - - list_for_each_entry(child, &m->children, node) { - if (child->offset <= offset && offset < child->offset + child->size) { - int len = 0; - - /* It can have anonymous struct/union members */ - if (child->var_name) { - len = scnprintf(buf, sz, "%s%s", - first ? "" : ".", child->var_name); - first = false; - } - - fill_member_name(buf + len, sz - len, child, offset, first); - return; - } - } -} - static int hist_entry__typeoff_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width __maybe_unused) { struct annotated_data_type *he_type = he->mem_type; char buf[4096]; - buf[0] = '\0'; - if (list_empty(&he_type->self.children)) - snprintf(buf, sizeof(buf), "no field"); - else - fill_member_name(buf, sizeof(buf), &he_type->self, - he->mem_type_off, true); - buf[4095] = '\0'; + if (he_type == &unknown_type || he_type == &stackop_type || + he_type == &canary_type) + return repsep_snprintf(bf, size, "%s", he_type->self.type_name); + + if (!annotated_data_type__get_member_name(he_type, buf, sizeof(buf), + he->mem_type_off)) + scnprintf(buf, sizeof(buf), "no field"); return repsep_snprintf(bf, size, "%s +%#x (%s)", he_type->self.type_name, he->mem_type_off, buf); @@ -2501,6 +2545,7 @@ static void sort_dimension_add_dynamic_header(struct sort_dimension *sd) static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_PID, "pid", sort_thread), + DIM(SORT_TGID, "tgid", sort_tgid), DIM(SORT_COMM, "comm", sort_comm), DIM(SORT_DSO, "dso", sort_dso), DIM(SORT_SYM, "symbol", sort_sym), @@ -2534,6 +2579,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_ANNOTATE_DATA_TYPE_OFFSET, "typeoff", sort_type_offset), DIM(SORT_SYM_OFFSET, "symoff", sort_sym_offset), DIM(SORT_ANNOTATE_DATA_TYPE_CACHELINE, "typecln", sort_type_cacheline), + DIM(SORT_PARALLELISM, "parallelism", sort_parallelism), }; #undef DIM @@ -2589,17 +2635,22 @@ struct hpp_dimension { const char *name; struct perf_hpp_fmt *fmt; int taken; + int was_taken; + int mem_mode; }; #define DIM(d, n) { .name = n, .fmt = &perf_hpp__format[d], } +#define DIM_MEM(d, n) { .name = n, .fmt = &perf_hpp__format[d], .mem_mode = 1, } static struct hpp_dimension hpp_sort_dimensions[] = { DIM(PERF_HPP__OVERHEAD, "overhead"), + DIM(PERF_HPP__LATENCY, "latency"), DIM(PERF_HPP__OVERHEAD_SYS, "overhead_sys"), DIM(PERF_HPP__OVERHEAD_US, "overhead_us"), DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"), DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"), DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"), + DIM(PERF_HPP__LATENCY_ACC, "latency_children"), DIM(PERF_HPP__SAMPLES, "sample"), DIM(PERF_HPP__PERIOD, "period"), DIM(PERF_HPP__WEIGHT1, "weight1"), @@ -2609,8 +2660,15 @@ static struct hpp_dimension hpp_sort_dimensions[] = { DIM(PERF_HPP__WEIGHT2, "ins_lat"), DIM(PERF_HPP__WEIGHT3, "retire_lat"), DIM(PERF_HPP__WEIGHT3, "p_stage_cyc"), + /* used for output only when SORT_MODE__MEM */ + DIM_MEM(PERF_HPP__MEM_STAT_OP, "op"), + DIM_MEM(PERF_HPP__MEM_STAT_CACHE, "cache"), + DIM_MEM(PERF_HPP__MEM_STAT_MEMORY, "memory"), + DIM_MEM(PERF_HPP__MEM_STAT_SNOOP, "snoop"), + DIM_MEM(PERF_HPP__MEM_STAT_DTLB, "dtlb"), }; +#undef DIM_MEM #undef DIM struct hpp_sort_entry { @@ -2630,18 +2688,22 @@ void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists) } static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, - struct hists *hists, int line __maybe_unused, + struct hists *hists, int line, int *span __maybe_unused) { struct hpp_sort_entry *hse; size_t len = fmt->user_len; + const char *hdr = ""; + + if (line == hists->hpp_list->nr_header_lines - 1) + hdr = fmt->name; hse = container_of(fmt, struct hpp_sort_entry, hpp); if (!len) len = hists__col_len(hists, hse->se->se_width_idx); - return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, fmt->name); + return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, hdr); } static int __sort__hpp_width(struct perf_hpp_fmt *fmt, @@ -2735,6 +2797,7 @@ MK_SORT_ENTRY_CHK(thread) MK_SORT_ENTRY_CHK(comm) MK_SORT_ENTRY_CHK(dso) MK_SORT_ENTRY_CHK(sym) +MK_SORT_ENTRY_CHK(parallelism) static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) @@ -2872,9 +2935,10 @@ static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd, } static int __sort_dimension__add_hpp_output(struct sort_dimension *sd, - struct perf_hpp_list *list) + struct perf_hpp_list *list, + int level) { - struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, 0); + struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, level); if (hse == NULL) return -1; @@ -3477,17 +3541,19 @@ static int __hpp_dimension__add(struct hpp_dimension *hd, return -1; hd->taken = 1; + hd->was_taken = 1; perf_hpp_list__register_sort_field(list, fmt); return 0; } static int __sort_dimension__add_output(struct perf_hpp_list *list, - struct sort_dimension *sd) + struct sort_dimension *sd, + int level) { if (sd->taken) return 0; - if (__sort_dimension__add_hpp_output(sd, list) < 0) + if (__sort_dimension__add_hpp_output(sd, list, level) < 0) return -1; sd->taken = 1; @@ -3495,14 +3561,15 @@ static int __sort_dimension__add_output(struct perf_hpp_list *list, } static int __hpp_dimension__add_output(struct perf_hpp_list *list, - struct hpp_dimension *hd) + struct hpp_dimension *hd, + int level) { struct perf_hpp_fmt *fmt; if (hd->taken) return 0; - fmt = __hpp_dimension__alloc_hpp(hd, 0); + fmt = __hpp_dimension__alloc_hpp(hd, level); if (!fmt) return -1; @@ -3511,10 +3578,15 @@ static int __hpp_dimension__add_output(struct perf_hpp_list *list, return 0; } -int hpp_dimension__add_output(unsigned col) +int hpp_dimension__add_output(unsigned col, bool implicit) { + struct hpp_dimension *hd; + BUG_ON(col >= PERF_HPP__MAX_INDEX); - return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]); + hd = &hpp_sort_dimensions[col]; + if (implicit && !hd->was_taken) + return 0; + return __hpp_dimension__add_output(&perf_hpp_list, hd, /*level=*/0); } int sort_dimension__add(struct perf_hpp_list *list, const char *tok, @@ -3583,15 +3655,6 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, return __sort_dimension__add(sd, list, level); } - for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { - struct hpp_dimension *hd = &hpp_sort_dimensions[i]; - - if (strncasecmp(tok, hd->name, strlen(tok))) - continue; - - return __hpp_dimension__add(hd, list, level); - } - for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { struct sort_dimension *sd = &bstack_sort_dimensions[i]; @@ -3633,12 +3696,49 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, return 0; } + for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { + struct hpp_dimension *hd = &hpp_sort_dimensions[i]; + + if (strncasecmp(tok, hd->name, strlen(tok))) + continue; + + return __hpp_dimension__add(hd, list, level); + } + if (!add_dynamic_entry(evlist, tok, level)) return 0; return -ESRCH; } +/* This should match with sort_dimension__add() above */ +static bool is_hpp_sort_key(const char *key) +{ + unsigned i; + + for (i = 0; i < ARRAY_SIZE(arch_specific_sort_keys); i++) { + if (!strcmp(arch_specific_sort_keys[i], key) && + !arch_support_sort_key(key)) { + return false; + } + } + + for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) { + struct sort_dimension *sd = &common_sort_dimensions[i]; + + if (sd->name && !strncasecmp(key, sd->name, strlen(key))) + return false; + } + + for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { + struct hpp_dimension *hd = &hpp_sort_dimensions[i]; + + if (!strncasecmp(key, hd->name, strlen(key))) + return true; + } + return false; +} + static int setup_sort_list(struct perf_hpp_list *list, char *str, struct evlist *evlist) { @@ -3646,7 +3746,9 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str, int ret = 0; int level = 0; int next_level = 1; + int prev_level = 0; bool in_group = false; + bool prev_was_hpp = false; do { tok = str; @@ -3667,6 +3769,19 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str, } if (*tok) { + if (is_hpp_sort_key(tok)) { + /* keep output (hpp) sort keys in the same level */ + if (prev_was_hpp) { + bool next_same = (level == next_level); + + level = prev_level; + next_level = next_same ? level : level+1; + } + prev_was_hpp = true; + } else { + prev_was_hpp = false; + } + ret = sort_dimension__add(list, tok, evlist, level); if (ret == -EINVAL) { if (!cacheline_size() && !strncasecmp(tok, "dcacheline", strlen(tok))) @@ -3678,6 +3793,7 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str, ui__error("Unknown --sort key: `%s'", tok); break; } + prev_level = level; } level = next_level; @@ -3773,10 +3889,24 @@ static char *setup_overhead(char *keys) if (sort__mode == SORT_MODE__DIFF) return keys; - keys = prefix_if_not_in("overhead", keys); - - if (symbol_conf.cumulate_callchain) - keys = prefix_if_not_in("overhead_children", keys); + if (symbol_conf.prefer_latency) { + keys = prefix_if_not_in("overhead", keys); + keys = prefix_if_not_in("latency", keys); + if (symbol_conf.cumulate_callchain) { + keys = prefix_if_not_in("overhead_children", keys); + keys = prefix_if_not_in("latency_children", keys); + } + } else if (!keys || (!strstr(keys, "overhead") && + !strstr(keys, "latency"))) { + if (symbol_conf.enable_latency) + keys = prefix_if_not_in("latency", keys); + keys = prefix_if_not_in("overhead", keys); + if (symbol_conf.cumulate_callchain) { + if (symbol_conf.enable_latency) + keys = prefix_if_not_in("latency_children", keys); + keys = prefix_if_not_in("overhead_children", keys); + } + } return keys; } @@ -3924,7 +4054,7 @@ void sort__setup_elide(FILE *output) } } -int output_field_add(struct perf_hpp_list *list, const char *tok) +int output_field_add(struct perf_hpp_list *list, const char *tok, int *level) { unsigned int i; @@ -3937,16 +4067,25 @@ int output_field_add(struct perf_hpp_list *list, const char *tok) if (!strcasecmp(tok, "weight")) ui__warning("--fields weight shows the average value unlike in the --sort key.\n"); - return __hpp_dimension__add_output(list, hd); + if (hd->mem_mode && sort__mode != SORT_MODE__MEMORY) + continue; + + return __hpp_dimension__add_output(list, hd, *level); } + /* + * A non-output field will increase level so that it can be in a + * different hierarchy. + */ + (*level)++; + for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) { struct sort_dimension *sd = &common_sort_dimensions[i]; if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; - return __sort_dimension__add_output(list, sd); + return __sort_dimension__add_output(list, sd, *level); } for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { @@ -3958,7 +4097,7 @@ int output_field_add(struct perf_hpp_list *list, const char *tok) if (sort__mode != SORT_MODE__BRANCH) return -EINVAL; - return __sort_dimension__add_output(list, sd); + return __sort_dimension__add_output(list, sd, *level); } for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) { @@ -3970,7 +4109,7 @@ int output_field_add(struct perf_hpp_list *list, const char *tok) if (sort__mode != SORT_MODE__MEMORY) return -EINVAL; - return __sort_dimension__add_output(list, sd); + return __sort_dimension__add_output(list, sd, *level); } return -ESRCH; @@ -3980,10 +4119,11 @@ static int setup_output_list(struct perf_hpp_list *list, char *str) { char *tmp, *tok; int ret = 0; + int level = 0; for (tok = strtok_r(str, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { - ret = output_field_add(list, tok); + ret = output_field_add(list, tok, &level); if (ret == -EINVAL) { ui__error("Invalid --fields key: `%s'", tok); break; @@ -4073,6 +4213,10 @@ int setup_sorting(struct evlist *evlist) if (err < 0) return err; + err = perf_hpp__alloc_mem_stats(&perf_hpp_list, evlist); + if (err < 0) + return err; + /* copy sort keys to output fields */ perf_hpp__setup_output_field(&perf_hpp_list); /* and then copy output fields to sort keys */ diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index a8572574e168..a742ab7f3c67 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -72,6 +72,8 @@ enum sort_type { SORT_ANNOTATE_DATA_TYPE_OFFSET, SORT_SYM_OFFSET, SORT_ANNOTATE_DATA_TYPE_CACHELINE, + SORT_PARALLELISM, + SORT_TGID, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, @@ -140,12 +142,12 @@ int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, i bool is_strict_order(const char *order); -int hpp_dimension__add_output(unsigned col); +int hpp_dimension__add_output(unsigned col, bool implicit); void reset_dimensions(void); int sort_dimension__add(struct perf_hpp_list *list, const char *tok, struct evlist *evlist, int level); -int output_field_add(struct perf_hpp_list *list, const char *tok); +int output_field_add(struct perf_hpp_list *list, const char *tok, int *level); int64_t sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right); int64_t diff --git a/tools/perf/util/srccode.c b/tools/perf/util/srccode.c index 476e99896d5e..0f4907843ac1 100644 --- a/tools/perf/util/srccode.c +++ b/tools/perf/util/srccode.c @@ -16,7 +16,7 @@ #include "srccode.h" #include "debug.h" #include <internal/lib.h> // page_size -#include "fncache.h" +#include "hashmap.h" #define MAXSRCCACHE (32*1024*1024) #define MAXSRCFILES 64 @@ -92,7 +92,7 @@ static struct srcfile *find_srcfile(char *fn) struct srcfile *h; int fd; unsigned long sz; - unsigned hval = shash((unsigned char *)fn) % SRC_HTAB_SZ; + size_t hval = str_hash(fn) % SRC_HTAB_SZ; hlist_for_each_entry (h, &srcfile_htab[hval], hash_nd) { if (!strcmp(fn, h->fn)) { diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index ba79f73e1cf5..729ad5cd52cb 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -798,40 +798,28 @@ static void abs_printout(struct perf_stat_config *config, print_cgroup(config, os, evsel->cgrp); } -static bool is_mixed_hw_group(struct evsel *counter) -{ - struct evlist *evlist = counter->evlist; - u32 pmu_type = counter->core.attr.type; - struct evsel *pos; - - if (counter->core.nr_members < 2) - return false; - - evlist__for_each_entry(evlist, pos) { - /* software events can be part of any hardware group */ - if (pos->core.attr.type == PERF_TYPE_SOFTWARE) - continue; - if (pmu_type == PERF_TYPE_SOFTWARE) { - pmu_type = pos->core.attr.type; - continue; - } - if (pmu_type != pos->core.attr.type) - return true; - } - - return false; -} - -static bool evlist__has_hybrid(struct evlist *evlist) +static bool evlist__has_hybrid_pmus(struct evlist *evlist) { struct evsel *evsel; + struct perf_pmu *last_core_pmu = NULL; if (perf_pmus__num_core_pmus() == 1) return false; evlist__for_each_entry(evlist, evsel) { - if (evsel->core.is_pmu_core) + if (evsel->core.is_pmu_core) { + struct perf_pmu *pmu = evsel__find_pmu(evsel); + + if (pmu == last_core_pmu) + continue; + + if (last_core_pmu == NULL) { + last_core_pmu = pmu; + continue; + } + /* A distinct core PMU. */ return true; + } } return false; @@ -872,10 +860,8 @@ static void printout(struct perf_stat_config *config, struct outstate *os, ok = false; if (counter->supported) { - if (!evlist__has_hybrid(counter->evlist)) { + if (!evlist__has_hybrid_pmus(counter->evlist)) { config->print_free_counters_hint = 1; - if (is_mixed_hw_group(counter)) - config->print_mixed_hw_group_error = 1; } } } @@ -929,81 +915,6 @@ static void printout(struct perf_stat_config *config, struct outstate *os, } } -static void uniquify_event_name(struct evsel *counter) -{ - const char *name, *pmu_name; - char *new_name, *config; - int ret; - - /* The evsel was already uniquified. */ - if (counter->uniquified_name) - return; - - /* Avoid checking to uniquify twice. */ - counter->uniquified_name = true; - - /* The evsel has a "name=" config term or is from libpfm. */ - if (counter->use_config_name || counter->is_libpfm_event) - return; - - /* Legacy no PMU event, don't uniquify. */ - if (!counter->pmu || - (counter->pmu->type < PERF_TYPE_MAX && counter->pmu->type != PERF_TYPE_RAW)) - return; - - /* A sysfs or json event replacing a legacy event, don't uniquify. */ - if (counter->pmu->is_core && counter->alternate_hw_config != PERF_COUNT_HW_MAX) - return; - - name = evsel__name(counter); - pmu_name = counter->pmu->name; - /* Already prefixed by the PMU name. */ - if (!strncmp(name, pmu_name, strlen(pmu_name))) - return; - - config = strchr(name, '/'); - if (config) { - int len = config - name; - - if (config[1] == '/') { - /* case: event// */ - ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 2); - } else { - /* case: event/.../ */ - ret = asprintf(&new_name, "%s/%.*s,%s", pmu_name, len, name, config + 1); - } - } else { - config = strchr(name, ':'); - if (config) { - /* case: event:.. */ - int len = config - name; - - ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 1); - } else { - /* case: event */ - ret = asprintf(&new_name, "%s/%s/", pmu_name, name); - } - } - if (ret > 0) { - free(counter->name); - counter->name = new_name; - } else { - /* ENOMEM from asprintf. */ - counter->uniquified_name = false; - } -} - -static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config) -{ - return evsel__is_hybrid(evsel) && !config->hybrid_merge; -} - -static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter) -{ - if (config->aggr_mode == AGGR_NONE || hybrid_uniquify(counter, config)) - uniquify_event_name(counter); -} - /** * should_skip_zero_count() - Check if the event should print 0 values. * @config: The perf stat configuration (including aggregation mode). @@ -1042,8 +953,16 @@ static bool should_skip_zero_counter(struct perf_stat_config *config, return true; /* - * Many tool events are only gathered on the first index, skip other - * zero values. + * In per-thread mode the aggr_map and aggr_get_id functions may be + * NULL, assume all 0 values should be output in that case. + */ + if (!config->aggr_map || !config->aggr_get_id) + return false; + + /* + * Tool events may be gathered on all logical CPUs, for example + * system_time, but for many the first index is the only one used, for + * example num_cores. Don't skip for the first index. */ if (evsel__is_tool(counter)) { struct aggr_cpu_id own_id = @@ -1051,15 +970,12 @@ static bool should_skip_zero_counter(struct perf_stat_config *config, return !aggr_cpu_id__equal(id, &own_id); } - /* - * Skip value 0 when it's an uncore event and the given aggr id - * does not belong to the PMU cpumask. + * Skip value 0 when the counter's cpumask doesn't match the given aggr + * id. */ - if (!counter->pmu || !counter->pmu->is_uncore) - return false; - perf_cpu_map__for_each_cpu(cpu, idx, counter->pmu->cpus) { + perf_cpu_map__for_each_cpu(cpu, idx, counter->core.cpus) { struct aggr_cpu_id own_id = config->aggr_get_id(config, cpu); if (aggr_cpu_id__equal(id, &own_id)) @@ -1086,10 +1002,15 @@ static void print_counter_aggrdata(struct perf_stat_config *config, os->evsel = counter; /* Skip already merged uncore/hybrid events */ - if (counter->merged_stat) - return; - - uniquify_counter(config, counter); + if (config->aggr_mode != AGGR_NONE) { + if (evsel__is_hybrid(counter)) { + if (config->hybrid_merge && counter->first_wildcard_match != NULL) + return; + } else { + if (counter->first_wildcard_match != NULL) + return; + } + } val = aggr->counts.val; ena = aggr->counts.ena; @@ -1595,11 +1516,6 @@ static void print_footer(struct perf_stat_config *config) " echo 0 > /proc/sys/kernel/nmi_watchdog\n" " perf stat ...\n" " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); - - if (config->print_mixed_hw_group_error) - fprintf(output, - "The events in group usually have to be from " - "the same PMU. Try reorganizing the group.\n"); } static void print_percore(struct perf_stat_config *config, @@ -1670,31 +1586,6 @@ static void print_cgroup_counter(struct perf_stat_config *config, struct evlist print_metric_end(config, os); } -static void disable_uniquify(struct evlist *evlist) -{ - struct evsel *counter; - struct perf_pmu *last_pmu = NULL; - bool first = true; - - evlist__for_each_entry(evlist, counter) { - /* If PMUs vary then uniquify can be useful. */ - if (!first && counter->pmu != last_pmu) - return; - first = false; - if (counter->pmu) { - /* Allow uniquify for uncore PMUs. */ - if (!counter->pmu->is_core) - return; - /* Keep hybrid event names uniquified for clarity. */ - if (perf_pmus__num_core_pmus() > 1) - return; - } - } - evlist__for_each_entry_continue(evlist, counter) { - counter->uniquified_name = true; - } -} - void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, struct target *_target, struct timespec *ts, int argc, const char **argv) @@ -1706,7 +1597,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf .first = true, }; - disable_uniquify(evlist); + evlist__uniquify_evsel_names(evlist, config); if (config->iostat_run) evlist->selected = evlist__first(evlist); diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index fa8b2a1048ff..d83bda5824d2 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -151,6 +151,7 @@ static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type { struct evsel *cur; int evsel_ctx = evsel_context(evsel); + struct perf_pmu *evsel_pmu = evsel__find_pmu(evsel); evlist__for_each_entry(evsel->evlist, cur) { struct perf_stat_aggr *aggr; @@ -177,7 +178,7 @@ static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type * Except the SW CLOCK events, * ignore if not the PMU we're looking for. */ - if ((type != STAT_NSECS) && (evsel->pmu != cur->pmu)) + if ((type != STAT_NSECS) && (evsel_pmu != evsel__find_pmu(cur))) continue; aggr = &cur->stats->aggr[aggr_idx]; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 7c2ccdcc3fdb..355a7d5c8ab8 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -535,26 +535,6 @@ static int evsel__merge_aggr_counters(struct evsel *evsel, struct evsel *alias) return 0; } -/* events should have the same name, scale, unit, cgroup but on different PMUs */ -static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b) -{ - if (strcmp(evsel__name(evsel_a), evsel__name(evsel_b))) - return false; - - if (evsel_a->scale != evsel_b->scale) - return false; - - if (evsel_a->cgrp != evsel_b->cgrp) - return false; - - if (strcmp(evsel_a->unit, evsel_b->unit)) - return false; - - if (evsel__is_clock(evsel_a) != evsel__is_clock(evsel_b)) - return false; - - return evsel_a->pmu != evsel_b->pmu; -} static void evsel__merge_aliases(struct evsel *evsel) { @@ -563,10 +543,9 @@ static void evsel__merge_aliases(struct evsel *evsel) alias = list_prepare_entry(evsel, &(evlist->core.entries), core.node); list_for_each_entry_continue(alias, &evlist->core.entries, core.node) { - /* Merge the same events on different PMUs. */ - if (evsel__is_alias(evsel, alias)) { + if (alias->first_wildcard_match == evsel) { + /* Merge the same events on different PMUs. */ evsel__merge_aggr_counters(evsel, alias); - alias->merged_stat = true; } } } @@ -579,11 +558,7 @@ static bool evsel__should_merge_hybrid(const struct evsel *evsel, static void evsel__merge_stats(struct evsel *evsel, struct perf_stat_config *config) { - /* this evsel is already merged */ - if (evsel->merged_stat) - return; - - if (evsel->auto_merge_stats || evsel__should_merge_hybrid(evsel, config)) + if (!evsel->pmu || !evsel->pmu->is_core || evsel__should_merge_hybrid(evsel, config)) evsel__merge_aliases(evsel); } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 2fda9acd7374..1bcd7634bf47 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -100,7 +100,6 @@ struct perf_stat_config { int times; int run_count; int print_free_counters_hint; - int print_mixed_hw_group_error; const char *csv_sep; struct stats *walltime_nsecs_stats; struct rusage ru_data; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 66fd1249660a..6d2c280a1730 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -7,15 +7,12 @@ #include <unistd.h> #include <inttypes.h> +#include "compress.h" #include "dso.h" #include "map.h" #include "maps.h" #include "symbol.h" #include "symsrc.h" -#include "demangle-cxx.h" -#include "demangle-ocaml.h" -#include "demangle-java.h" -#include "demangle-rust.h" #include "machine.h" #include "vdso.h" #include "debug.h" @@ -278,62 +275,6 @@ static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr) return -1; } -static bool want_demangle(bool is_kernel_sym) -{ - return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; -} - -/* - * Demangle C++ function signature, typically replaced by demangle-cxx.cpp - * version. - */ -#ifndef HAVE_CXA_DEMANGLE_SUPPORT -char *cxx_demangle_sym(const char *str __maybe_unused, bool params __maybe_unused, - bool modifiers __maybe_unused) -{ -#ifdef HAVE_LIBBFD_SUPPORT - int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0); - - return bfd_demangle(NULL, str, flags); -#elif defined(HAVE_CPLUS_DEMANGLE_SUPPORT) - int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0); - - return cplus_demangle(str, flags); -#else - return NULL; -#endif -} -#endif /* !HAVE_CXA_DEMANGLE_SUPPORT */ - -static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name) -{ - char *demangled = NULL; - - /* - * We need to figure out if the object was created from C++ sources - * DWARF DW_compile_unit has this, but we don't always have access - * to it... - */ - if (!want_demangle(dso__kernel(dso) || kmodule)) - return demangled; - - demangled = cxx_demangle_sym(elf_name, verbose > 0, verbose > 0); - if (demangled == NULL) { - demangled = ocaml_demangle_sym(elf_name); - if (demangled == NULL) { - demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET); - } - } - else if (rust_is_mangled(demangled)) - /* - * Input to Rust demangling is the BFD-demangled - * name which it Rust-demangles in place. - */ - rust_demangle_sym(demangled); - - return demangled; -} - struct rel_info { u32 nr_entries; u32 *sorted; @@ -619,7 +560,7 @@ static bool get_plt_got_name(GElf_Shdr *shdr, size_t i, /* Get the associated symbol */ gelf_getsym(di->dynsym_data, vr->sym_idx, &sym); sym_name = elf_sym__name(&sym, di->dynstr_data); - demangled = demangle_sym(di->dso, 0, sym_name); + demangled = dso__demangle_sym(di->dso, /*kmodule=*/0, sym_name); if (demangled != NULL) sym_name = demangled; @@ -817,7 +758,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) gelf_getsym(syms, get_rel_symidx(&ri, idx), &sym); elf_name = elf_sym__name(&sym, symstrs); - demangled = demangle_sym(dso, 0, elf_name); + demangled = dso__demangle_sym(dso, /*kmodule=*/0, elf_name); if (demangled) elf_name = demangled; if (*elf_name) @@ -846,11 +787,6 @@ out_elf_end: return 0; } -char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name) -{ - return demangle_sym(dso, kmodule, elf_name); -} - /* * Align offset to 4 bytes as needed for note name and descriptor data. */ @@ -1173,33 +1109,6 @@ out: #endif -static int dso__swap_init(struct dso *dso, unsigned char eidata) -{ - static unsigned int const endian = 1; - - dso__set_needs_swap(dso, DSO_SWAP__NO); - - switch (eidata) { - case ELFDATA2LSB: - /* We are big endian, DSO is little endian. */ - if (*(unsigned char const *)&endian != 1) - dso__set_needs_swap(dso, DSO_SWAP__YES); - break; - - case ELFDATA2MSB: - /* We are little endian, DSO is big endian. */ - if (*(unsigned char const *)&endian != 0) - dso__set_needs_swap(dso, DSO_SWAP__YES); - break; - - default: - pr_err("unrecognized DSO data encoding %d\n", eidata); - return -EINVAL; - } - - return 0; -} - bool symsrc__possibly_runtime(struct symsrc *ss) { return ss->dynsym || ss->opdsec; @@ -1228,6 +1137,81 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) ehdr.e_type == ET_DYN; } +static Elf *read_gnu_debugdata(struct dso *dso, Elf *elf, const char *name, int *fd_ret) +{ + Elf *elf_embedded; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + Elf_Scn *scn; + Elf_Data *scn_data; + FILE *wrapped; + size_t shndx; + char temp_filename[] = "/tmp/perf.gnu_debugdata.elf.XXXXXX"; + int ret, temp_fd; + + if (gelf_getehdr(elf, &ehdr) == NULL) { + pr_debug("%s: cannot read %s ELF file.\n", __func__, name); + *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF; + return NULL; + } + + scn = elf_section_by_name(elf, &ehdr, &shdr, ".gnu_debugdata", &shndx); + if (!scn) { + *dso__load_errno(dso) = -ENOENT; + return NULL; + } + + if (shdr.sh_type == SHT_NOBITS) { + pr_debug("%s: .gnu_debugdata of ELF file %s has no data.\n", __func__, name); + *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF; + return NULL; + } + + scn_data = elf_rawdata(scn, NULL); + if (!scn_data) { + pr_debug("%s: error reading .gnu_debugdata of %s: %s\n", __func__, + name, elf_errmsg(-1)); + *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF; + return NULL; + } + + wrapped = fmemopen(scn_data->d_buf, scn_data->d_size, "r"); + if (!wrapped) { + pr_debug("%s: fmemopen: %s\n", __func__, strerror(errno)); + *dso__load_errno(dso) = -errno; + return NULL; + } + + temp_fd = mkstemp(temp_filename); + if (temp_fd < 0) { + pr_debug("%s: mkstemp: %s\n", __func__, strerror(errno)); + *dso__load_errno(dso) = -errno; + fclose(wrapped); + return NULL; + } + unlink(temp_filename); + + ret = lzma_decompress_stream_to_file(wrapped, temp_fd); + fclose(wrapped); + if (ret < 0) { + *dso__load_errno(dso) = -errno; + close(temp_fd); + return NULL; + } + + elf_embedded = elf_begin(temp_fd, PERF_ELF_C_READ_MMAP, NULL); + if (!elf_embedded) { + pr_debug("%s: error reading .gnu_debugdata of %s: %s\n", __func__, + name, elf_errmsg(-1)); + *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF; + close(temp_fd); + return NULL; + } + pr_debug("%s: using .gnu_debugdata of %s\n", __func__, name); + *fd_ret = temp_fd; + return elf_embedded; +} + int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, enum dso_binary_type type) { @@ -1256,6 +1240,19 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, goto out_close; } + if (type == DSO_BINARY_TYPE__GNU_DEBUGDATA) { + int new_fd; + Elf *embedded = read_gnu_debugdata(dso, elf, name, &new_fd); + + if (!embedded) + goto out_close; + + elf_end(elf); + close(fd); + fd = new_fd; + elf = embedded; + } + if (gelf_getehdr(elf, &ehdr) == NULL) { *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF; pr_debug("%s: cannot get elf header.\n", __func__); @@ -1671,6 +1668,12 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, continue; } + /* Reject RISCV ELF "mapping symbols" */ + if (ehdr.e_machine == EM_RISCV) { + if (elf_name[0] == '$' && strchr("dx", elf_name[1])) + continue; + } + if (runtime_ss->opdsec && sym.st_shndx == runtime_ss->opdidx) { u32 offset = sym.st_value - syms_ss->opdshdr.sh_addr; u64 *opd = opddata->d_buf + offset; @@ -1778,7 +1781,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, } } - demangled = demangle_sym(dso, kmodule, elf_name); + demangled = dso__demangle_sym(dso, kmodule, elf_name); if (demangled != NULL) elf_name = demangled; @@ -1854,10 +1857,23 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, kmodule, 1); if (err < 0) return err; - err += nr; + nr += err; } - return err; + /* + * The .gnu_debugdata is a special situation: it contains a symbol + * table, but the runtime file may also contain dynsym entries which are + * not present there. We need to load both. + */ + if (syms_ss->type == DSO_BINARY_TYPE__GNU_DEBUGDATA && runtime_ss->dynsym) { + err = dso__load_sym_internal(dso, map, runtime_ss, runtime_ss, + kmodule, 1); + if (err < 0) + return err; + nr += err; + } + + return nr; } static int elf_read_maps(Elf *elf, bool exe, mapfn_t mapfn, void *data) diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index c6f369b5d893..c73fe2e09fe9 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -90,11 +90,23 @@ int filename__read_build_id(const char *filename, struct build_id *bid) { FILE *fp; int ret = -1; - bool need_swap = false; + bool need_swap = false, elf32; u8 e_ident[EI_NIDENT]; - size_t buf_size; - void *buf; int i; + union { + struct { + Elf32_Ehdr ehdr32; + Elf32_Phdr *phdr32; + }; + struct { + Elf64_Ehdr ehdr64; + Elf64_Phdr *phdr64; + }; + } hdrs; + void *phdr; + size_t phdr_size; + void *buf = NULL; + size_t buf_size = 0; fp = fopen(filename, "r"); if (fp == NULL) @@ -108,117 +120,79 @@ int filename__read_build_id(const char *filename, struct build_id *bid) goto out; need_swap = check_need_swap(e_ident[EI_DATA]); + elf32 = e_ident[EI_CLASS] == ELFCLASS32; - /* for simplicity */ - fseek(fp, 0, SEEK_SET); - - if (e_ident[EI_CLASS] == ELFCLASS32) { - Elf32_Ehdr ehdr; - Elf32_Phdr *phdr; - - if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) - goto out; + if (fread(elf32 ? (void *)&hdrs.ehdr32 : (void *)&hdrs.ehdr64, + elf32 ? sizeof(hdrs.ehdr32) : sizeof(hdrs.ehdr64), + 1, fp) != 1) + goto out; - if (need_swap) { - ehdr.e_phoff = bswap_32(ehdr.e_phoff); - ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); - ehdr.e_phnum = bswap_16(ehdr.e_phnum); + if (need_swap) { + if (elf32) { + hdrs.ehdr32.e_phoff = bswap_32(hdrs.ehdr32.e_phoff); + hdrs.ehdr32.e_phentsize = bswap_16(hdrs.ehdr32.e_phentsize); + hdrs.ehdr32.e_phnum = bswap_16(hdrs.ehdr32.e_phnum); + } else { + hdrs.ehdr64.e_phoff = bswap_64(hdrs.ehdr64.e_phoff); + hdrs.ehdr64.e_phentsize = bswap_16(hdrs.ehdr64.e_phentsize); + hdrs.ehdr64.e_phnum = bswap_16(hdrs.ehdr64.e_phnum); } + } + phdr_size = elf32 ? hdrs.ehdr32.e_phentsize * hdrs.ehdr32.e_phnum + : hdrs.ehdr64.e_phentsize * hdrs.ehdr64.e_phnum; + phdr = malloc(phdr_size); + if (phdr == NULL) + goto out; - buf_size = ehdr.e_phentsize * ehdr.e_phnum; - buf = malloc(buf_size); - if (buf == NULL) - goto out; - - fseek(fp, ehdr.e_phoff, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; - - for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { - void *tmp; - long offset; - - if (need_swap) { - phdr->p_type = bswap_32(phdr->p_type); - phdr->p_offset = bswap_32(phdr->p_offset); - phdr->p_filesz = bswap_32(phdr->p_filesz); - } - - if (phdr->p_type != PT_NOTE) - continue; - - buf_size = phdr->p_filesz; - offset = phdr->p_offset; - tmp = realloc(buf, buf_size); - if (tmp == NULL) - goto out_free; - - buf = tmp; - fseek(fp, offset, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; + fseek(fp, elf32 ? hdrs.ehdr32.e_phoff : hdrs.ehdr64.e_phoff, SEEK_SET); + if (fread(phdr, phdr_size, 1, fp) != 1) + goto out_free; - ret = read_build_id(buf, buf_size, bid, need_swap); - if (ret == 0) { - ret = bid->size; - break; - } - } - } else { - Elf64_Ehdr ehdr; - Elf64_Phdr *phdr; + if (elf32) + hdrs.phdr32 = phdr; + else + hdrs.phdr64 = phdr; - if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) - goto out; + for (i = 0; i < elf32 ? hdrs.ehdr32.e_phnum : hdrs.ehdr64.e_phnum; i++) { + size_t p_filesz; if (need_swap) { - ehdr.e_phoff = bswap_64(ehdr.e_phoff); - ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); - ehdr.e_phnum = bswap_16(ehdr.e_phnum); + if (elf32) { + hdrs.phdr32[i].p_type = bswap_32(hdrs.phdr32[i].p_type); + hdrs.phdr32[i].p_offset = bswap_32(hdrs.phdr32[i].p_offset); + hdrs.phdr32[i].p_filesz = bswap_32(hdrs.phdr32[i].p_offset); + } else { + hdrs.phdr64[i].p_type = bswap_32(hdrs.phdr64[i].p_type); + hdrs.phdr64[i].p_offset = bswap_64(hdrs.phdr64[i].p_offset); + hdrs.phdr64[i].p_filesz = bswap_64(hdrs.phdr64[i].p_filesz); + } } + if ((elf32 ? hdrs.phdr32[i].p_type : hdrs.phdr64[i].p_type) != PT_NOTE) + continue; - buf_size = ehdr.e_phentsize * ehdr.e_phnum; - buf = malloc(buf_size); - if (buf == NULL) - goto out; - - fseek(fp, ehdr.e_phoff, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; - - for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { + p_filesz = elf32 ? hdrs.phdr32[i].p_filesz : hdrs.phdr64[i].p_filesz; + if (p_filesz > buf_size) { void *tmp; - long offset; - - if (need_swap) { - phdr->p_type = bswap_32(phdr->p_type); - phdr->p_offset = bswap_64(phdr->p_offset); - phdr->p_filesz = bswap_64(phdr->p_filesz); - } - - if (phdr->p_type != PT_NOTE) - continue; - buf_size = phdr->p_filesz; - offset = phdr->p_offset; + buf_size = p_filesz; tmp = realloc(buf, buf_size); if (tmp == NULL) goto out_free; - buf = tmp; - fseek(fp, offset, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; + } + fseek(fp, elf32 ? hdrs.phdr32[i].p_offset : hdrs.phdr64[i].p_offset, SEEK_SET); + if (fread(buf, p_filesz, 1, fp) != 1) + goto out_free; - ret = read_build_id(buf, buf_size, bid, need_swap); - if (ret == 0) { - ret = bid->size; - break; - } + ret = read_build_id(buf, p_filesz, bid, need_swap); + if (ret == 0) { + ret = bid->size; + break; } } out_free: free(buf); + free(phdr); out: fclose(fp); return ret; @@ -381,13 +355,6 @@ void symbol__elf_init(void) { } -char *dso__demangle_sym(struct dso *dso __maybe_unused, - int kmodule __maybe_unused, - const char *elf_name __maybe_unused) -{ - return NULL; -} - bool filename__has_section(const char *filename __maybe_unused, const char *sec __maybe_unused) { return false; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 49b08adc6ee3..8b30c6f16a9e 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -18,6 +18,12 @@ #include "annotate.h" #include "build-id.h" #include "cap.h" +#include "cpumap.h" +#include "debug.h" +#include "demangle-cxx.h" +#include "demangle-java.h" +#include "demangle-ocaml.h" +#include "demangle-rust-v0.h" #include "dso.h" #include "util.h" // lsdir() #include "debug.h" @@ -35,6 +41,7 @@ #include "header.h" #include "path.h" #include <linux/ctype.h> +#include <linux/log2.h> #include <linux/zalloc.h> #include <elf.h> @@ -84,6 +91,7 @@ static enum dso_binary_type binary_type_symtab[] = { DSO_BINARY_TYPE__FEDORA_DEBUGINFO, DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, DSO_BINARY_TYPE__BUILDID_DEBUGINFO, + DSO_BINARY_TYPE__GNU_DEBUGDATA, DSO_BINARY_TYPE__SYSTEM_PATH_DSO, DSO_BINARY_TYPE__GUEST_KMODULE, DSO_BINARY_TYPE__GUEST_KMODULE_COMP, @@ -96,10 +104,12 @@ static enum dso_binary_type binary_type_symtab[] = { #define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab) -static bool symbol_type__filter(char symbol_type) +static bool symbol_type__filter(char __symbol_type) { - symbol_type = toupper(symbol_type); - return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B'; + // Since 'U' == undefined and 'u' == unique global symbol, we can't use toupper there + char symbol_type = toupper(__symbol_type); + return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B' || + __symbol_type == 'u' || __symbol_type == 'l'; } static int prefix_underscores_count(const char *str) @@ -1716,6 +1726,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: case DSO_BINARY_TYPE__BUILDID_DEBUGINFO: case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: + case DSO_BINARY_TYPE__GNU_DEBUGDATA: return !kmod && dso__kernel(dso) == DSO_SPACE__USER; case DSO_BINARY_TYPE__KALLSYMS: @@ -2471,6 +2482,36 @@ int symbol__annotation_init(void) return 0; } +static int setup_parallelism_bitmap(void) +{ + struct perf_cpu_map *map; + struct perf_cpu cpu; + int i, err = -1; + + if (symbol_conf.parallelism_list_str == NULL) + return 0; + + map = perf_cpu_map__new(symbol_conf.parallelism_list_str); + if (map == NULL) { + pr_err("failed to parse parallelism filter list\n"); + return -1; + } + + bitmap_fill(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1); + perf_cpu_map__for_each_cpu(cpu, i, map) { + if (cpu.cpu <= 0 || cpu.cpu > MAX_NR_CPUS) { + pr_err("Requested parallelism level %d is invalid.\n", cpu.cpu); + goto out_delete_map; + } + __clear_bit(cpu.cpu, symbol_conf.parallelism_filter); + } + + err = 0; +out_delete_map: + perf_cpu_map__put(map); + return err; +} + int symbol__init(struct perf_env *env) { const char *symfs; @@ -2490,6 +2531,9 @@ int symbol__init(struct perf_env *env) return -1; } + if (setup_parallelism_bitmap()) + return -1; + if (setup_list(&symbol_conf.dso_list, symbol_conf.dso_list_str, "dso") < 0) return -1; @@ -2610,3 +2654,79 @@ int symbol__validate_sym_arguments(void) } return 0; } + +static bool want_demangle(bool is_kernel_sym) +{ + return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; +} + +/* + * Demangle C++ function signature, typically replaced by demangle-cxx.cpp + * version. + */ +#ifndef HAVE_CXA_DEMANGLE_SUPPORT +char *cxx_demangle_sym(const char *str __maybe_unused, bool params __maybe_unused, + bool modifiers __maybe_unused) +{ +#ifdef HAVE_LIBBFD_SUPPORT + int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0); + + return bfd_demangle(NULL, str, flags); +#elif defined(HAVE_CPLUS_DEMANGLE_SUPPORT) + int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0); + + return cplus_demangle(str, flags); +#else + return NULL; +#endif +} +#endif /* !HAVE_CXA_DEMANGLE_SUPPORT */ + +char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name) +{ + struct demangle rust_demangle = { + .style = DemangleStyleUnknown, + }; + char *demangled = NULL; + + /* + * We need to figure out if the object was created from C++ sources + * DWARF DW_compile_unit has this, but we don't always have access + * to it... + */ + if (!want_demangle((dso && dso__kernel(dso)) || kmodule)) + return demangled; + + rust_demangle_demangle(elf_name, &rust_demangle); + if (rust_demangle_is_known(&rust_demangle)) { + /* A rust mangled name. */ + if (rust_demangle.mangled_len == 0) + return demangled; + + for (size_t buf_len = roundup_pow_of_two(rust_demangle.mangled_len * 2); + buf_len < 1024 * 1024; buf_len += 32) { + char *tmp = realloc(demangled, buf_len); + + if (!tmp) { + /* Failure to grow output buffer, return what is there. */ + return demangled; + } + demangled = tmp; + if (rust_demangle_display_demangle(&rust_demangle, demangled, buf_len, + /*alternate=*/true) == OverflowOk) + return demangled; + } + /* Buffer exceeded sensible bounds, return what is there. */ + return demangled; + } + + demangled = cxx_demangle_sym(elf_name, verbose > 0, verbose > 0); + if (demangled) + return demangled; + + demangled = ocaml_demangle_sym(elf_name); + if (demangled) + return demangled; + + return java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET); +} diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index a9c51acc722f..cd9aa82c7d5a 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -3,6 +3,8 @@ #define __PERF_SYMBOL_CONF 1 #include <stdbool.h> +#include <linux/bitmap.h> +#include "perf.h" struct strlist; struct intlist; @@ -47,7 +49,9 @@ struct symbol_conf { keep_exited_threads, annotate_data_member, annotate_data_sample, - skip_empty; + skip_empty, + enable_latency, + prefer_latency; const char *vmlinux_name, *kallsyms_name, *source_prefix, @@ -62,6 +66,7 @@ struct symbol_conf { *pid_list_str, *tid_list_str, *sym_list_str, + *parallelism_list_str, *col_width_list_str, *bt_stop_list_str; const char *addr2line_path; @@ -82,6 +87,7 @@ struct symbol_conf { int pad_output_len_dso; int group_sort_idx; int addr_range; + DECLARE_BITMAP(parallelism_filter, MAX_NR_CPUS + 1); }; extern struct symbol_conf symbol_conf; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 6923b0d5efed..2fc4d0537840 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -38,6 +38,7 @@ #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */ #include <api/fs/fs.h> #include <api/io.h> +#include <api/io_dir.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> @@ -767,10 +768,10 @@ static int __event__synthesize_thread(union perf_event *comm_event, bool needs_mmap, bool mmap_data) { char filename[PATH_MAX]; - struct dirent **dirent; + struct io_dir iod; + struct io_dirent64 *dent; pid_t tgid, ppid; int rc = 0; - int i, n; /* special case: only send one comm event using passed in pid */ if (!full) { @@ -802,16 +803,19 @@ static int __event__synthesize_thread(union perf_event *comm_event, snprintf(filename, sizeof(filename), "%s/proc/%d/task", machine->root_dir, pid); - n = scandir(filename, &dirent, filter_task, NULL); - if (n < 0) - return n; + io_dir__init(&iod, open(filename, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + if (iod.dirfd < 0) + return -1; - for (i = 0; i < n; i++) { + while ((dent = io_dir__readdir(&iod)) != NULL) { char *end; pid_t _pid; bool kernel_thread = false; - _pid = strtol(dirent[i]->d_name, &end, 10); + if (!isdigit(dent->d_name[0])) + continue; + + _pid = strtol(dent->d_name, &end, 10); if (*end) continue; @@ -845,9 +849,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, } } - for (i = 0; i < n; i++) - zfree(&dirent[i]); - free(dirent); + close(iod.dirfd); return rc; } @@ -1508,9 +1510,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, } if (type & PERF_SAMPLE_REGS_USER) { - if (sample->user_regs.abi) { + if (sample->user_regs && sample->user_regs->abi) { result += sizeof(u64); - sz = hweight64(sample->user_regs.mask) * sizeof(u64); + sz = hweight64(sample->user_regs->mask) * sizeof(u64); result += sz; } else { result += sizeof(u64); @@ -1536,9 +1538,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, result += sizeof(u64); if (type & PERF_SAMPLE_REGS_INTR) { - if (sample->intr_regs.abi) { + if (sample->intr_regs && sample->intr_regs->abi) { result += sizeof(u64); - sz = hweight64(sample->intr_regs.mask) * sizeof(u64); + sz = hweight64(sample->intr_regs->mask) * sizeof(u64); result += sz; } else { result += sizeof(u64); @@ -1707,10 +1709,10 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo } if (type & PERF_SAMPLE_REGS_USER) { - if (sample->user_regs.abi) { - *array++ = sample->user_regs.abi; - sz = hweight64(sample->user_regs.mask) * sizeof(u64); - memcpy(array, sample->user_regs.regs, sz); + if (sample->user_regs && sample->user_regs->abi) { + *array++ = sample->user_regs->abi; + sz = hweight64(sample->user_regs->mask) * sizeof(u64); + memcpy(array, sample->user_regs->regs, sz); array = (void *)array + sz; } else { *array++ = 0; @@ -1743,10 +1745,10 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo } if (type & PERF_SAMPLE_REGS_INTR) { - if (sample->intr_regs.abi) { - *array++ = sample->intr_regs.abi; - sz = hweight64(sample->intr_regs.mask) * sizeof(u64); - memcpy(array, sample->intr_regs.regs, sz); + if (sample->intr_regs && sample->intr_regs->abi) { + *array++ = sample->intr_regs->abi; + sz = hweight64(sample->intr_regs->mask) * sizeof(u64); + memcpy(array, sample->intr_regs->regs, sz); array = (void *)array + sz; } else { *array++ = 0; diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 928aca4cd6e9..67a8ec10e9e4 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -7,119 +7,127 @@ #include "syscalltbl.h" #include <stdlib.h> +#include <asm/bitsperlong.h> #include <linux/compiler.h> +#include <linux/kernel.h> #include <linux/zalloc.h> #include <string.h> #include "string2.h" -#include <syscall_table.h> -const int syscalltbl_native_max_id = SYSCALLTBL_MAX_ID; -static const char *const *syscalltbl_native = syscalltbl; +#include "trace/beauty/generated/syscalltbl.c" -struct syscall { - int id; - const char *name; -}; - -static int syscallcmpname(const void *vkey, const void *ventry) -{ - const char *key = vkey; - const struct syscall *entry = ventry; - - return strcmp(key, entry->name); -} - -static int syscallcmp(const void *va, const void *vb) +static const struct syscalltbl *find_table(int e_machine) { - const struct syscall *a = va, *b = vb; + static const struct syscalltbl *last_table; + static int last_table_machine = EM_NONE; - return strcmp(a->name, b->name); -} + /* Tables only exist for EM_SPARC. */ + if (e_machine == EM_SPARCV9) + e_machine = EM_SPARC; -static int syscalltbl__init_native(struct syscalltbl *tbl) -{ - int nr_entries = 0, i, j; - struct syscall *entries; + if (last_table_machine == e_machine && last_table != NULL) + return last_table; - for (i = 0; i <= syscalltbl_native_max_id; ++i) - if (syscalltbl_native[i]) - ++nr_entries; + for (size_t i = 0; i < ARRAY_SIZE(syscalltbls); i++) { + const struct syscalltbl *entry = &syscalltbls[i]; - entries = tbl->syscalls.entries = malloc(sizeof(struct syscall) * nr_entries); - if (tbl->syscalls.entries == NULL) - return -1; + if (entry->e_machine != e_machine && entry->e_machine != EM_NONE) + continue; - for (i = 0, j = 0; i <= syscalltbl_native_max_id; ++i) { - if (syscalltbl_native[i]) { - entries[j].name = syscalltbl_native[i]; - entries[j].id = i; - ++j; - } + last_table = entry; + last_table_machine = e_machine; + return entry; } - - qsort(tbl->syscalls.entries, nr_entries, sizeof(struct syscall), syscallcmp); - tbl->syscalls.nr_entries = nr_entries; - tbl->syscalls.max_id = syscalltbl_native_max_id; - return 0; + return NULL; } -struct syscalltbl *syscalltbl__new(void) +const char *syscalltbl__name(int e_machine, int id) { - struct syscalltbl *tbl = malloc(sizeof(*tbl)); - if (tbl) { - if (syscalltbl__init_native(tbl)) { - free(tbl); - return NULL; - } + const struct syscalltbl *table = find_table(e_machine); + + if (e_machine == EM_MIPS && id > 1000) { + /* + * MIPS may encode the N32/64/O32 type in the high part of + * syscall number. Mask this off if present. See the values of + * __NR_N32_Linux, __NR_64_Linux, __NR_O32_Linux and __NR_Linux. + */ + id = id % 1000; } - return tbl; + if (table && id >= 0 && id < table->num_to_name_len) + return table->num_to_name[id]; + return NULL; } -void syscalltbl__delete(struct syscalltbl *tbl) +struct syscall_cmp_key { + const char *name; + const char *const *tbl; +}; + +static int syscallcmpname(const void *vkey, const void *ventry) { - zfree(&tbl->syscalls.entries); - free(tbl); + const struct syscall_cmp_key *key = vkey; + const uint16_t *entry = ventry; + + return strcmp(key->name, key->tbl[*entry]); } -const char *syscalltbl__name(const struct syscalltbl *tbl __maybe_unused, int id) +int syscalltbl__id(int e_machine, const char *name) { - return id <= syscalltbl_native_max_id ? syscalltbl_native[id]: NULL; + const struct syscalltbl *table = find_table(e_machine); + struct syscall_cmp_key key; + const uint16_t *id; + + if (!table) + return -1; + + key.name = name; + key.tbl = table->num_to_name; + id = bsearch(&key, table->sorted_names, table->sorted_names_len, + sizeof(table->sorted_names[0]), syscallcmpname); + + return id ? *id : -1; } -int syscalltbl__id(struct syscalltbl *tbl, const char *name) +int syscalltbl__num_idx(int e_machine) { - struct syscall *sc = bsearch(name, tbl->syscalls.entries, - tbl->syscalls.nr_entries, sizeof(*sc), - syscallcmpname); + const struct syscalltbl *table = find_table(e_machine); + + if (!table) + return 0; - return sc ? sc->id : -1; + return table->sorted_names_len; } -int syscalltbl__id_at_idx(struct syscalltbl *tbl, int idx) +int syscalltbl__id_at_idx(int e_machine, int idx) { - struct syscall *syscalls = tbl->syscalls.entries; + const struct syscalltbl *table = find_table(e_machine); - return idx < tbl->syscalls.nr_entries ? syscalls[idx].id : -1; + if (!table) + return -1; + + assert(idx >= 0 && idx < table->sorted_names_len); + return table->sorted_names[idx]; } -int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx) +int syscalltbl__strglobmatch_next(int e_machine, const char *syscall_glob, int *idx) { - int i; - struct syscall *syscalls = tbl->syscalls.entries; + const struct syscalltbl *table = find_table(e_machine); + + for (int i = *idx + 1; table && i < table->sorted_names_len; ++i) { + const char *name = table->num_to_name[table->sorted_names[i]]; - for (i = *idx + 1; i < tbl->syscalls.nr_entries; ++i) { - if (strglobmatch(syscalls[i].name, syscall_glob)) { + if (strglobmatch(name, syscall_glob)) { *idx = i; - return syscalls[i].id; + return table->sorted_names[i]; } } return -1; } -int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx) +int syscalltbl__strglobmatch_first(int e_machine, const char *syscall_glob, int *idx) { *idx = -1; - return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx); + return syscalltbl__strglobmatch_next(e_machine, syscall_glob, idx); } diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h index 362411a6d849..2bb628eff367 100644 --- a/tools/perf/util/syscalltbl.h +++ b/tools/perf/util/syscalltbl.h @@ -2,22 +2,12 @@ #ifndef __PERF_SYSCALLTBL_H #define __PERF_SYSCALLTBL_H -struct syscalltbl { - struct { - int max_id; - int nr_entries; - void *entries; - } syscalls; -}; +const char *syscalltbl__name(int e_machine, int id); +int syscalltbl__id(int e_machine, const char *name); +int syscalltbl__num_idx(int e_machine); +int syscalltbl__id_at_idx(int e_machine, int idx); -struct syscalltbl *syscalltbl__new(void); -void syscalltbl__delete(struct syscalltbl *tbl); - -const char *syscalltbl__name(const struct syscalltbl *tbl, int id); -int syscalltbl__id(struct syscalltbl *tbl, const char *name); -int syscalltbl__id_at_idx(struct syscalltbl *tbl, int idx); - -int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx); -int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx); +int syscalltbl__strglobmatch_first(int e_machine, const char *syscall_glob, int *idx); +int syscalltbl__strglobmatch_next(int e_machine, const char *syscall_glob, int *idx); #endif /* __PERF_SYSCALLTBL_H */ diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 0ffdd52d86d7..ffb48cc2103f 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 +#include <elf.h> #include <errno.h> +#include <fcntl.h> #include <stdlib.h> #include <stdio.h> #include <string.h> @@ -16,6 +18,7 @@ #include "symbol.h" #include "unwind.h" #include "callchain.h" +#include "dwarf-regs.h" #include <api/fs/fs.h> @@ -51,6 +54,7 @@ struct thread *thread__new(pid_t pid, pid_t tid) thread__set_ppid(thread, -1); thread__set_cpu(thread, -1); thread__set_guest_cpu(thread, -1); + thread__set_e_machine(thread, EM_NONE); thread__set_lbr_stitch_enable(thread, false); INIT_LIST_HEAD(thread__namespaces_list(thread)); INIT_LIST_HEAD(thread__comm_list(thread)); @@ -406,7 +410,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bo } void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, - struct addr_location *al) + bool symbols, struct addr_location *al) { size_t i; const u8 cpumodes[] = { @@ -417,12 +421,93 @@ void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, }; for (i = 0; i < ARRAY_SIZE(cpumodes); i++) { - thread__find_symbol(thread, cpumodes[i], addr, al); + if (symbols) + thread__find_symbol(thread, cpumodes[i], addr, al); + else + thread__find_map(thread, cpumodes[i], addr, al); + if (al->map) break; } } +static uint16_t read_proc_e_machine_for_pid(pid_t pid) +{ + char path[6 /* "/proc/" */ + 11 /* max length of pid */ + 5 /* "/exe\0" */]; + int fd; + uint16_t e_machine = EM_NONE; + + snprintf(path, sizeof(path), "/proc/%d/exe", pid); + fd = open(path, O_RDONLY); + if (fd >= 0) { + _Static_assert(offsetof(Elf32_Ehdr, e_machine) == 18, "Unexpected offset"); + _Static_assert(offsetof(Elf64_Ehdr, e_machine) == 18, "Unexpected offset"); + if (pread(fd, &e_machine, sizeof(e_machine), 18) != sizeof(e_machine)) + e_machine = EM_NONE; + close(fd); + } + return e_machine; +} + +static int thread__e_machine_callback(struct map *map, void *machine) +{ + struct dso *dso = map__dso(map); + + _Static_assert(0 == EM_NONE, "Unexpected EM_NONE"); + if (!dso) + return EM_NONE; + + return dso__e_machine(dso, machine); +} + +uint16_t thread__e_machine(struct thread *thread, struct machine *machine) +{ + pid_t tid, pid; + uint16_t e_machine = RC_CHK_ACCESS(thread)->e_machine; + + if (e_machine != EM_NONE) + return e_machine; + + tid = thread__tid(thread); + pid = thread__pid(thread); + if (pid != tid) { + struct thread *parent = machine__findnew_thread(machine, pid, pid); + + if (parent) { + e_machine = thread__e_machine(parent, machine); + thread__put(parent); + thread__set_e_machine(thread, e_machine); + return e_machine; + } + /* Something went wrong, fallback. */ + } + /* Reading on the PID thread. First try to find from the maps. */ + e_machine = maps__for_each_map(thread__maps(thread), + thread__e_machine_callback, + machine); + if (e_machine == EM_NONE) { + /* Maps failed, perhaps we're live with map events disabled. */ + bool is_live = machine->machines == NULL; + + if (!is_live) { + /* Check if the session has a data file. */ + struct perf_session *session = container_of(machine->machines, + struct perf_session, + machines); + + is_live = !!session->data; + } + /* Read from /proc/pid/exe if live. */ + if (is_live) + e_machine = read_proc_e_machine_for_pid(pid); + } + if (e_machine != EM_NONE) + thread__set_e_machine(thread, e_machine); + else + e_machine = EM_HOST; + return e_machine; +} + struct thread *thread__main_thread(struct machine *machine, struct thread *thread) { if (thread__pid(thread) == thread__tid(thread)) diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 6cbf6eb2812e..2b90bbed7a61 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -60,7 +60,11 @@ DECLARE_RC_STRUCT(thread) { struct srccode_state srccode_state; bool filter; int filter_entry_depth; - + /** + * @e_machine: The ELF EM_* associated with the thread. EM_NONE if not + * computed. + */ + uint16_t e_machine; /* LBR call stack stitch */ bool lbr_stitch_enable; struct lbr_stitch *lbr_stitch; @@ -122,7 +126,7 @@ struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al); void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, - struct addr_location *al); + bool symbols, struct addr_location *al); int thread__memcpy(struct thread *thread, struct machine *machine, void *buf, u64 ip, int len, bool *is64bit); @@ -302,6 +306,14 @@ static inline void thread__set_filter_entry_depth(struct thread *thread, int dep RC_CHK_ACCESS(thread)->filter_entry_depth = depth; } +uint16_t thread__e_machine(struct thread *thread, struct machine *machine); + +static inline void thread__set_e_machine(struct thread *thread, uint16_t e_machine) +{ + RC_CHK_ACCESS(thread)->e_machine = e_machine; +} + + static inline bool thread__lbr_stitch_enable(const struct thread *thread) { return RC_CHK_ACCESS(thread)->lbr_stitch_enable; diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c index 3b7f390f26eb..37bd8ac63b01 100644 --- a/tools/perf/util/tool.c +++ b/tools/perf/util/tool.c @@ -43,8 +43,15 @@ static int perf_session__process_compressed_event(struct perf_session *session, decomp->size = decomp_last_rem; } - src = (void *)event + sizeof(struct perf_record_compressed); - src_size = event->pack.header.size - sizeof(struct perf_record_compressed); + if (event->header.type == PERF_RECORD_COMPRESSED) { + src = (void *)event + sizeof(struct perf_record_compressed); + src_size = event->pack.header.size - sizeof(struct perf_record_compressed); + } else if (event->header.type == PERF_RECORD_COMPRESSED2) { + src = (void *)event + sizeof(struct perf_record_compressed2); + src_size = event->pack2.data_size; + } else { + return -1; + } decomp_size = zstd_decompress_stream(session->active_decomp->zstd_decomp, src, src_size, &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem); diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c index 4fb097578479..4630b8cc8e52 100644 --- a/tools/perf/util/tool_pmu.c +++ b/tools/perf/util/tool_pmu.c @@ -62,7 +62,8 @@ int tool_pmu__num_skip_events(void) const char *tool_pmu__event_to_str(enum tool_pmu_event ev) { - if (ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) + if ((ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) && + !tool_pmu__skip_event(tool_pmu__event_names[ev])) return tool_pmu__event_names[ev]; return NULL; @@ -354,6 +355,7 @@ bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result) if (online) { *result = perf_cpu_map__nr(online); + perf_cpu_map__put(online); return true; } return false; @@ -484,22 +486,28 @@ int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) delta_start *= 1000000000 / ticks_per_sec; } count->val = delta_start; - count->ena = count->run = delta_start; count->lost = 0; + /* + * The values of enabled and running must make a ratio of 100%. The + * exact values don't matter as long as they are non-zero to avoid + * issues with evsel__count_has_error. + */ + count->ena++; + count->run++; return 0; } -struct perf_pmu *perf_pmus__tool_pmu(void) +struct perf_pmu *tool_pmu__new(void) { - static struct perf_pmu tool = { - .name = "tool", - .type = PERF_PMU_TYPE_TOOL, - .aliases = LIST_HEAD_INIT(tool.aliases), - .caps = LIST_HEAD_INIT(tool.caps), - .format = LIST_HEAD_INIT(tool.format), - }; - if (!tool.events_table) - tool.events_table = find_core_events_table("common", "common"); - - return &tool; + struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu)); + + if (!tool) + return NULL; + + if (perf_pmu__init(tool, PERF_PMU_TYPE_TOOL, "tool") != 0) { + perf_pmu__delete(tool); + return NULL; + } + tool->events_table = find_core_events_table("common", "common"); + return tool; } diff --git a/tools/perf/util/tool_pmu.h b/tools/perf/util/tool_pmu.h index a60184859080..c6ad1dd90a56 100644 --- a/tools/perf/util/tool_pmu.h +++ b/tools/perf/util/tool_pmu.h @@ -51,6 +51,6 @@ int evsel__tool_pmu_open(struct evsel *evsel, int start_cpu_map_idx, int end_cpu_map_idx); int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread); -struct perf_pmu *perf_pmus__tool_pmu(void); +struct perf_pmu *tool_pmu__new(void); #endif /* __TOOL_PMU_H */ diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 4e81e02a4f18..72abb28b7b5a 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -309,53 +309,107 @@ static const struct { {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "tr end"}, {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMENTRY, "vmentry"}, {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMEXIT, "vmexit"}, - {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_BRANCH_MISS, "br miss"}, {0, NULL} }; -static const char *sample_flags_to_name(u32 flags) +static const struct { + u32 flags; + const char *name; +} branch_events[] = { + {PERF_IP_FLAG_BRANCH_MISS, "miss"}, + {PERF_IP_FLAG_NOT_TAKEN, "not_taken"}, + {0, NULL} +}; + +static int sample_flags_to_name(u32 flags, char *str, size_t size) { int i; + const char *prefix; + int pos = 0, ret, ev_idx = 0; + u32 xf = flags & PERF_ADDITIONAL_STATE_MASK; + u32 types, events; + char xs[16] = { 0 }; + + /* Clear additional state bits */ + flags &= ~PERF_ADDITIONAL_STATE_MASK; + + if (flags & PERF_IP_FLAG_TRACE_BEGIN) + prefix = "tr strt "; + else if (flags & PERF_IP_FLAG_TRACE_END) + prefix = "tr end "; + else + prefix = ""; + + ret = snprintf(str + pos, size - pos, "%s", prefix); + if (ret < 0) + return ret; + pos += ret; + + flags &= ~(PERF_IP_FLAG_TRACE_BEGIN | PERF_IP_FLAG_TRACE_END); + + types = flags & ~PERF_IP_FLAG_BRANCH_EVENT_MASK; + for (i = 0; sample_flags[i].name; i++) { + if (sample_flags[i].flags != types) + continue; + + ret = snprintf(str + pos, size - pos, "%s", sample_flags[i].name); + if (ret < 0) + return ret; + pos += ret; + break; + } - for (i = 0; sample_flags[i].name ; i++) { - if (sample_flags[i].flags == flags) - return sample_flags[i].name; + events = flags & PERF_IP_FLAG_BRANCH_EVENT_MASK; + for (i = 0; branch_events[i].name; i++) { + if (!(branch_events[i].flags & events)) + continue; + + ret = snprintf(str + pos, size - pos, !ev_idx ? "/%s" : ",%s", + branch_events[i].name); + if (ret < 0) + return ret; + pos += ret; + ev_idx++; } - return NULL; + /* Add an end character '/' for events */ + if (ev_idx) { + ret = snprintf(str + pos, size - pos, "/"); + if (ret < 0) + return ret; + pos += ret; + } + + if (!xf) + return pos; + + snprintf(xs, sizeof(xs), "(%s%s%s)", + flags & PERF_IP_FLAG_IN_TX ? "x" : "", + flags & PERF_IP_FLAG_INTR_DISABLE ? "D" : "", + flags & PERF_IP_FLAG_INTR_TOGGLE ? "t" : ""); + + /* Right align the string if its length is less than the limit */ + if ((pos + strlen(xs)) < SAMPLE_FLAGS_STR_ALIGNED_SIZE) + ret = snprintf(str + pos, size - pos, "%*s", + (int)(SAMPLE_FLAGS_STR_ALIGNED_SIZE - ret), xs); + else + ret = snprintf(str + pos, size - pos, " %s", xs); + if (ret < 0) + return ret; + + return pos + ret; } int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz) { - u32 xf = PERF_IP_FLAG_IN_TX | PERF_IP_FLAG_INTR_DISABLE | - PERF_IP_FLAG_INTR_TOGGLE; const char *chars = PERF_IP_FLAG_CHARS; const size_t n = strlen(PERF_IP_FLAG_CHARS); - const char *name = NULL; size_t i, pos = 0; - char xs[16] = {0}; - - if (flags & xf) - snprintf(xs, sizeof(xs), "(%s%s%s)", - flags & PERF_IP_FLAG_IN_TX ? "x" : "", - flags & PERF_IP_FLAG_INTR_DISABLE ? "D" : "", - flags & PERF_IP_FLAG_INTR_TOGGLE ? "t" : ""); - - name = sample_flags_to_name(flags & ~xf); - if (name) - return snprintf(str, sz, "%-15s%6s", name, xs); - - if (flags & PERF_IP_FLAG_TRACE_BEGIN) { - name = sample_flags_to_name(flags & ~(xf | PERF_IP_FLAG_TRACE_BEGIN)); - if (name) - return snprintf(str, sz, "tr strt %-7s%6s", name, xs); - } + int ret; - if (flags & PERF_IP_FLAG_TRACE_END) { - name = sample_flags_to_name(flags & ~(xf | PERF_IP_FLAG_TRACE_END)); - if (name) - return snprintf(str, sz, "tr end %-7s%6s", name, xs); - } + ret = sample_flags_to_name(flags, str, sz); + if (ret > 0) + return ret; for (i = 0; i < n; i++, flags >>= 1) { if ((flags & 1) && pos < sz) diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index ac9fde2f980c..71e680bc3d4b 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -145,6 +145,8 @@ int common_flags(struct scripting_context *context); int common_lock_depth(struct scripting_context *context); #define SAMPLE_FLAGS_BUF_SIZE 64 +#define SAMPLE_FLAGS_STR_ALIGNED_SIZE 21 + int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz); #if defined(LIBTRACEEVENT_VERSION) && LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) diff --git a/tools/perf/util/trace.h b/tools/perf/util/trace.h new file mode 100644 index 000000000000..fa8d480527a2 --- /dev/null +++ b/tools/perf/util/trace.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef UTIL_TRACE_H +#define UTIL_TRACE_H + +#include <stdio.h> /* for FILE */ + +enum trace_summary_mode { + SUMMARY__NONE = 0, + SUMMARY__BY_TOTAL, + SUMMARY__BY_THREAD, + SUMMARY__BY_CGROUP, +}; + +#ifdef HAVE_BPF_SKEL + +int trace_prepare_bpf_summary(enum trace_summary_mode mode); +void trace_start_bpf_summary(void); +void trace_end_bpf_summary(void); +int trace_print_bpf_summary(FILE *fp); +void trace_cleanup_bpf_summary(void); + +#else /* !HAVE_BPF_SKEL */ + +static inline int trace_prepare_bpf_summary(enum trace_summary_mode mode __maybe_unused) +{ + return -1; +} +static inline void trace_start_bpf_summary(void) {} +static inline void trace_end_bpf_summary(void) {} +static inline int trace_print_bpf_summary(FILE *fp __maybe_unused) +{ + return 0; +} +static inline void trace_cleanup_bpf_summary(void) {} + +#endif /* HAVE_BPF_SKEL */ + +#endif /* UTIL_TRACE_H */ diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c index 32c39cfe209b..4c6a86e1cb54 100644 --- a/tools/perf/util/units.c +++ b/tools/perf/util/units.c @@ -64,7 +64,7 @@ unsigned long convert_unit(unsigned long value, char *unit) int unit_number__scnprintf(char *buf, size_t size, u64 n) { - char unit[4] = "BKMG"; + char unit[] = "BKMG"; int i = 0; while (((n / 1024) > 1) && (i < 3)) { diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index bde216e630d2..793d11832694 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -190,7 +190,10 @@ static bool memory_read(Dwfl *dwfl __maybe_unused, Dwarf_Addr addr, Dwarf_Word * int offset; int ret; - ret = perf_reg_value(&start, &ui->sample->user_regs, + if (!ui->sample->user_regs) + return false; + + ret = perf_reg_value(&start, ui->sample->user_regs, perf_arch_reg_sp(arch)); if (ret) return false; @@ -273,7 +276,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, Dwarf_Word ip; int err = -EINVAL, i; - if (!data->user_regs.regs) + if (!data->user_regs || !data->user_regs->regs) return -EINVAL; ui = zalloc(sizeof(ui_buf) + sizeof(ui_buf.entries[0]) * max_stack); @@ -286,7 +289,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, if (!ui->dwfl) goto out; - err = perf_reg_value(&ip, &data->user_regs, perf_arch_reg_ip(arch)); + err = perf_reg_value(&ip, data->user_regs, perf_arch_reg_ip(arch)); if (err) goto out; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 16c2b03831f3..0b037e7389a0 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -330,8 +330,7 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui, int ret, fd; if (dso__data(dso)->eh_frame_hdr_offset == 0) { - fd = dso__data_get_fd(dso, ui->machine); - if (fd < 0) + if (!dso__data_get_fd(dso, ui->machine, &fd)) return -EINVAL; /* Check the .eh_frame section for unwinding info */ @@ -372,8 +371,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso, * has to be pointed by symsrc_filename */ if (ofs == 0) { - fd = dso__data_get_fd(dso, machine); - if (fd >= 0) { + if (dso__data_get_fd(dso, machine, &fd)) { ofs = elf_section_offset(fd, ".debug_frame"); dso__data_put_fd(dso); } @@ -485,14 +483,16 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, /* Check the .debug_frame section for unwinding info */ if (ret < 0 && !read_unwind_spec_debug_frame(dso, ui->machine, &segbase)) { - int fd = dso__data_get_fd(dso, ui->machine); - int is_exec = elf_is_exec(fd, dso__name(dso)); + int fd; u64 start = map__start(map); - unw_word_t base = is_exec ? 0 : start; + unw_word_t base = start; const char *symfile; - if (fd >= 0) + if (dso__data_get_fd(dso, ui->machine, &fd)) { + if (elf_is_exec(fd, dso__name(dso))) + base = 0; dso__data_put_fd(dso); + } symfile = dso__symsrc_filename(dso) ?: dso__name(dso); @@ -579,12 +579,12 @@ static int access_mem(unw_addr_space_t __maybe_unused as, int ret; /* Don't support write, probably not needed. */ - if (__write || !stack || !ui->sample->user_regs.regs) { + if (__write || !stack || !ui->sample->user_regs || !ui->sample->user_regs->regs) { *valp = 0; return 0; } - ret = perf_reg_value(&start, &ui->sample->user_regs, + ret = perf_reg_value(&start, perf_sample__user_regs(ui->sample), perf_arch_reg_sp(arch)); if (ret) return ret; @@ -628,7 +628,7 @@ static int access_reg(unw_addr_space_t __maybe_unused as, return 0; } - if (!ui->sample->user_regs.regs) { + if (!ui->sample->user_regs || !ui->sample->user_regs->regs) { *valp = 0; return 0; } @@ -637,7 +637,7 @@ static int access_reg(unw_addr_space_t __maybe_unused as, if (id < 0) return -EINVAL; - ret = perf_reg_value(&val, &ui->sample->user_regs, id); + ret = perf_reg_value(&val, perf_sample__user_regs(ui->sample), id); if (ret) { if (!ui->best_effort) pr_err("unwind: can't read reg %d\n", regnum); @@ -741,7 +741,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, unw_cursor_t c; int ret, i = 0; - ret = perf_reg_value(&val, &ui->sample->user_regs, + ret = perf_reg_value(&val, perf_sample__user_regs(ui->sample), perf_arch_reg_ip(arch)); if (ret) return ret; @@ -808,7 +808,7 @@ static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg, .best_effort = best_effort }; - if (!data->user_regs.regs) + if (!data->user_regs || !data->user_regs->regs) return -EINVAL; if (max_stack <= 0) |