diff options
Diffstat (limited to 'tools/perf/util')
349 files changed, 44968 insertions, 21491 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 96f4ea1d45c5..1c2a43e1dc68 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -1,174 +1,201 @@ -perf-y += arm64-frame-pointer-unwind-support.o -perf-y += addr_location.o -perf-y += annotate.o -perf-y += block-info.o -perf-y += block-range.o -perf-y += build-id.o -perf-y += cacheline.o -perf-y += config.o -perf-y += copyfile.o -perf-y += ctype.o -perf-y += db-export.o -perf-y += env.o -perf-y += event.o -perf-y += evlist.o -perf-y += sideband_evlist.o -perf-y += evsel.o -perf-y += evsel_fprintf.o -perf-y += perf_event_attr_fprintf.o -perf-y += evswitch.o -perf-y += find_bit.o -perf-y += get_current_dir_name.o -perf-y += levenshtein.o -perf-y += llvm-utils.o -perf-y += mmap.o -perf-y += memswap.o -perf-y += parse-events.o -perf-y += print-events.o -perf-y += tracepoint.o -perf-y += perf_regs.o -perf-y += path.o -perf-y += print_binary.o -perf-y += rlimit.o -perf-y += argv_split.o -perf-y += rbtree.o -perf-y += libstring.o -perf-y += bitmap.o -perf-y += hweight.o -perf-y += smt.o -perf-y += strbuf.o -perf-y += string.o -perf-y += strlist.o -perf-y += strfilter.o -perf-y += top.o -perf-y += usage.o -perf-y += dso.o -perf-y += dsos.o -perf-y += symbol.o -perf-y += symbol_fprintf.o -perf-y += color.o -perf-y += color_config.o -perf-y += metricgroup.o -perf-y += header.o -perf-y += callchain.o -perf-y += values.o -perf-y += debug.o -perf-y += fncache.o -perf-y += machine.o -perf-y += map.o -perf-y += maps.o -perf-y += pstack.o -perf-y += session.o -perf-y += sample-raw.o -perf-y += s390-sample-raw.o -perf-y += amd-sample-raw.o -perf-$(CONFIG_TRACE) += syscalltbl.o -perf-y += ordered-events.o -perf-y += namespaces.o -perf-y += comm.o -perf-y += thread.o -perf-y += thread_map.o -perf-y += parse-events-flex.o -perf-y += parse-events-bison.o -perf-y += pmu.o -perf-y += pmus.o -perf-y += pmu-flex.o -perf-y += pmu-bison.o -perf-y += svghelper.o -perf-$(CONFIG_LIBTRACEEVENT) += trace-event-info.o -perf-y += trace-event-scripting.o -perf-$(CONFIG_LIBTRACEEVENT) += trace-event.o -perf-$(CONFIG_LIBTRACEEVENT) += trace-event-parse.o -perf-$(CONFIG_LIBTRACEEVENT) += trace-event-read.o -perf-y += sort.o -perf-y += hist.o -perf-y += util.o -perf-y += cpumap.o -perf-y += affinity.o -perf-y += cputopo.o -perf-y += cgroup.o -perf-y += target.o -perf-y += rblist.o -perf-y += intlist.o -perf-y += vdso.o -perf-y += counts.o -perf-y += stat.o -perf-y += stat-shadow.o -perf-y += stat-display.o -perf-y += perf_api_probe.o -perf-y += record.o -perf-y += srcline.o -perf-y += srccode.o -perf-y += synthetic-events.o -perf-y += data.o -perf-y += tsc.o -perf-y += cloexec.o -perf-y += call-path.o -perf-y += rwsem.o -perf-y += thread-stack.o -perf-y += spark.o -perf-y += topdown.o -perf-y += iostat.o -perf-y += stream.o -perf-$(CONFIG_AUXTRACE) += auxtrace.o -perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ -perf-$(CONFIG_AUXTRACE) += intel-pt.o -perf-$(CONFIG_AUXTRACE) += intel-bts.o -perf-$(CONFIG_AUXTRACE) += arm-spe.o -perf-$(CONFIG_AUXTRACE) += arm-spe-decoder/ -perf-$(CONFIG_AUXTRACE) += hisi-ptt.o -perf-$(CONFIG_AUXTRACE) += hisi-ptt-decoder/ -perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o +include $(srctree)/tools/scripts/Makefile.include +include $(srctree)/tools/scripts/utilities.mak + +perf-util-y += arm64-frame-pointer-unwind-support.o +perf-util-y += addr2line.o +perf-util-y += addr_location.o +perf-util-y += annotate.o +perf-util-y += block-info.o +perf-util-y += block-range.o +perf-util-y += build-id.o +perf-util-y += cacheline.o +perf-util-y += capstone.o +perf-util-y += config.o +perf-util-y += copyfile.o +perf-util-y += ctype.o +perf-util-y += db-export.o +perf-util-y += disasm.o +perf-util-y += env.o +perf-util-y += event.o +perf-util-y += evlist.o +perf-util-y += sideband_evlist.o +perf-util-y += evsel.o +perf-util-y += evsel_fprintf.o +perf-util-y += perf_event_attr_fprintf.o +perf-util-y += evswitch.o +perf-util-y += find_bit.o +perf-util-y += levenshtein.o +perf-util-$(CONFIG_LIBBFD) += libbfd.o +perf-util-y += llvm.o +perf-util-y += mmap.o +perf-util-y += memswap.o +perf-util-y += parse-events.o +perf-util-y += print-events.o +perf-util-y += tracepoint.o +perf-util-y += perf_regs.o +perf-util-y += perf-regs-arch/ +perf-util-y += path.o +perf-util-y += print_binary.o +perf-util-y += print_insn.o +perf-util-y += rlimit.o +perf-util-y += argv_split.o +perf-util-y += rbtree.o +perf-util-y += libstring.o +perf-util-y += bitmap.o +perf-util-y += hweight.o +perf-util-y += sha1.o +perf-util-y += smt.o +perf-util-y += strbuf.o +perf-util-y += string.o +perf-util-y += strlist.o +perf-util-y += strfilter.o +perf-util-y += top.o +perf-util-y += usage.o +perf-util-y += dso.o +perf-util-y += dsos.o +perf-util-y += symbol.o +perf-util-y += symbol_fprintf.o +perf-util-y += map_symbol.o +perf-util-y += color.o +perf-util-y += color_config.o +perf-util-y += metricgroup.o +perf-util-y += header.o +perf-util-y += callchain.o +perf-util-y += values.o +perf-util-y += debug.o +perf-util-y += fncache.o +perf-util-y += machine.o +perf-util-y += map.o +perf-util-y += maps.o +perf-util-y += pstack.o +perf-util-y += session.o +perf-util-y += tool.o +perf-util-y += sample.o +perf-util-y += sample-raw.o +perf-util-y += s390-sample-raw.o +perf-util-y += amd-sample-raw.o +perf-util-$(CONFIG_TRACE) += syscalltbl.o +perf-util-y += ordered-events.o +perf-util-y += namespaces.o +perf-util-y += comm.o +perf-util-y += thread.o +perf-util-y += threads.o +perf-util-y += thread_map.o +perf-util-y += parse-events-flex.o +perf-util-y += parse-events-bison.o +perf-util-y += pmu.o +perf-util-y += pmus.o +perf-util-y += pmu-flex.o +perf-util-y += pmu-bison.o +perf-util-y += drm_pmu.o +perf-util-y += hwmon_pmu.o +perf-util-y += tool_pmu.o +perf-util-y += tp_pmu.o +perf-util-y += svghelper.o +perf-util-y += trace-event-info.o +perf-util-y += trace-event-scripting.o +perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event.o +perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event-parse.o +perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event-read.o +perf-util-y += sort.o +perf-util-y += hist.o +perf-util-y += util.o +perf-util-y += cpumap.o +perf-util-y += affinity.o +perf-util-y += cputopo.o +perf-util-y += cgroup.o +perf-util-y += target.o +perf-util-y += rblist.o +perf-util-y += intlist.o +perf-util-y += vdso.o +perf-util-y += counts.o +perf-util-y += stat.o +perf-util-y += stat-shadow.o +perf-util-y += stat-display.o +perf-util-y += perf_api_probe.o +perf-util-y += record.o +perf-util-y += srcline.o +perf-util-y += srccode.o +perf-util-y += synthetic-events.o +perf-util-y += data.o +perf-util-y += tsc.o +perf-util-y += cloexec.o +perf-util-y += call-path.o +perf-util-y += rwsem.o +perf-util-y += thread-stack.o +perf-util-y += spark.o +perf-util-y += topdown.o +perf-util-y += iostat.o +perf-util-y += stream.o +perf-util-y += kvm-stat.o +perf-util-y += lock-contention.o +perf-util-y += auxtrace.o +perf-util-y += intel-pt-decoder/ +perf-util-y += intel-pt.o +perf-util-y += intel-bts.o +perf-util-y += arm-spe.o +perf-util-y += arm-spe-decoder/ +perf-util-y += hisi-ptt.o +perf-util-y += hisi-ptt-decoder/ +perf-util-y += s390-cpumsf.o +perf-util-y += powerpc-vpadtl.o ifdef CONFIG_LIBOPENCSD -perf-$(CONFIG_AUXTRACE) += cs-etm.o -perf-$(CONFIG_AUXTRACE) += cs-etm-decoder/ +perf-util-y += cs-etm.o +perf-util-y += cs-etm-decoder/ +endif +perf-util-y += cs-etm-base.o + +perf-util-y += parse-branch-options.o +perf-util-y += dump-insn.o +perf-util-y += parse-regs-options.o +perf-util-y += parse-sublevel-options.o +perf-util-y += term.o +perf-util-y += help-unknown-cmd.o +perf-util-y += dlfilter.o +perf-util-y += mem-events.o +perf-util-y += mem-info.o +perf-util-y += vsprintf.o +perf-util-y += units.o +perf-util-y += time-utils.o +perf-util-y += expr-flex.o +perf-util-y += expr-bison.o +perf-util-y += expr.o +perf-util-y += branch.o +perf-util-y += mem2node.o +perf-util-y += clockid.o +perf-util-y += list_sort.o +perf-util-y += mutex.o +perf-util-y += sharded_mutex.o +perf-util-y += intel-tpebs.o + +perf-util-$(CONFIG_LIBBPF) += bpf_map.o +perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o +perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o +perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o +perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o +perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter.o +perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-flex.o +perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-bison.o +perf-util-$(CONFIG_PERF_BPF_SKEL) += btf.o + +ifeq ($(CONFIG_TRACE),y) + perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-trace-summary.o + perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_trace_augment.o endif -perf-$(CONFIG_AUXTRACE) += cs-etm-base.o - -perf-y += parse-branch-options.o -perf-y += dump-insn.o -perf-y += parse-regs-options.o -perf-y += parse-sublevel-options.o -perf-y += term.o -perf-y += help-unknown-cmd.o -perf-y += dlfilter.o -perf-y += mem-events.o -perf-y += vsprintf.o -perf-y += units.o -perf-y += time-utils.o -perf-y += expr-flex.o -perf-y += expr-bison.o -perf-y += expr.o -perf-y += branch.o -perf-y += mem2node.o -perf-y += clockid.o -perf-y += list_sort.o -perf-y += mutex.o -perf-y += sharded_mutex.o - -perf-$(CONFIG_LIBBPF) += bpf-loader.o -perf-$(CONFIG_LIBBPF) += bpf_map.o -perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o -perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o -perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o -perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o -perf-$(CONFIG_PERF_BPF_SKEL) += bpf-filter.o -perf-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-flex.o -perf-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-bison.o ifeq ($(CONFIG_LIBTRACEEVENT),y) - perf-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o + perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o endif ifeq ($(CONFIG_LIBTRACEEVENT),y) - perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o + perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o + perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork_top.o endif -perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o -perf-$(CONFIG_LIBELF) += symbol-elf.o -perf-$(CONFIG_LIBELF) += probe-file.o -perf-$(CONFIG_LIBELF) += probe-event.o +perf-util-$(CONFIG_LIBELF) += symbol-elf.o +perf-util-$(CONFIG_LIBELF) += probe-file.o +perf-util-$(CONFIG_LIBELF) += probe-event.o ifdef CONFIG_LIBBPF_DYNAMIC hashmap := 1 @@ -178,63 +205,69 @@ ifndef CONFIG_LIBBPF endif ifdef hashmap -perf-y += hashmap.o +perf-util-y += hashmap.o endif ifndef CONFIG_LIBELF -perf-y += symbol-minimal.o +perf-util-y += symbol-minimal.o endif ifndef CONFIG_SETNS -perf-y += setns.o +perf-util-y += setns.o endif -perf-$(CONFIG_DWARF) += probe-finder.o -perf-$(CONFIG_DWARF) += dwarf-aux.o -perf-$(CONFIG_DWARF) += dwarf-regs.o - -perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o -perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o -perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o -perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o +perf-util-$(CONFIG_LIBDW) += probe-finder.o +perf-util-$(CONFIG_LIBDW) += dwarf-aux.o +perf-util-$(CONFIG_LIBDW) += dwarf-regs.o +perf-util-$(CONFIG_LIBDW) += dwarf-regs-csky.o +perf-util-$(CONFIG_LIBDW) += dwarf-regs-powerpc.o +perf-util-$(CONFIG_LIBDW) += dwarf-regs-x86.o +perf-util-$(CONFIG_LIBDW) += debuginfo.o +perf-util-$(CONFIG_LIBDW) += annotate-data.o + +perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o +perf-util-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +perf-util-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o +perf-util-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o ifeq ($(CONFIG_LIBTRACEEVENT),y) - perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o + perf-util-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o endif -perf-y += data-convert-json.o +perf-util-y += data-convert-json.o + +perf-util-y += scripting-engines/ -perf-y += scripting-engines/ +perf-util-$(CONFIG_ZLIB) += zlib.o +perf-util-$(CONFIG_LZMA) += lzma.o +perf-util-$(CONFIG_ZSTD) += zstd.o -perf-$(CONFIG_ZLIB) += zlib.o -perf-$(CONFIG_LZMA) += lzma.o -perf-$(CONFIG_ZSTD) += zstd.o +perf-util-y += cap.o -perf-$(CONFIG_LIBCAP) += cap.o +perf-util-$(CONFIG_CXX_DEMANGLE) += demangle-cxx.o +perf-util-y += demangle-ocaml.o +perf-util-y += demangle-java.o +perf-util-y += demangle-rust-v0.o +perf-util-$(CONFIG_LIBLLVM) += llvm-c-helpers.o -perf-$(CONFIG_CXX_DEMANGLE) += demangle-cxx.o -perf-y += demangle-ocaml.o -perf-y += demangle-java.o -perf-y += demangle-rust.o +CFLAGS_demangle-rust-v0.o += -Wno-shadow -Wno-declaration-after-statement \ + -Wno-switch-default -Wno-switch-enum -Wno-missing-field-initializers ifdef CONFIG_JITDUMP -perf-$(CONFIG_LIBELF) += jitdump.o -perf-$(CONFIG_LIBELF) += genelf.o -perf-$(CONFIG_DWARF) += genelf_debug.o +perf-util-$(CONFIG_LIBELF) += jitdump.o +perf-util-$(CONFIG_LIBELF) += genelf.o +perf-util-$(CONFIG_LIBDW) += genelf_debug.o endif -perf-y += perf-hooks.o +perf-util-y += perf-hooks.o -perf-$(CONFIG_LIBBPF) += bpf-event.o -perf-$(CONFIG_LIBBPF) += bpf-utils.o +perf-util-$(CONFIG_LIBBPF) += bpf-event.o +perf-util-$(CONFIG_LIBBPF) += bpf-utils.o -perf-$(CONFIG_CXX) += c++/ - -perf-$(CONFIG_LIBPFM4) += pfm.o +perf-util-$(CONFIG_LIBPFM4) += pfm.o CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" -CFLAGS_llvm-utils.o += -DLIBBPF_INCLUDE_DIR="BUILD_STR($(libbpf_include_dir_SQ))" # avoid compiler warnings in 32-bit mode CFLAGS_genelf_debug.o += -Wno-packed @@ -246,7 +279,7 @@ $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-flex.h: util/parse- $(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/parse-events-bison.h: util/parse-events.y $(call rule_mkdir) - $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \ + $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) $(BISON_FALLBACK_FLAGS) \ -o $(OUTPUT)util/parse-events-bison.c -p parse_events_ $(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-flex.h: util/expr.l $(OUTPUT)util/expr-bison.c @@ -269,38 +302,68 @@ $(OUTPUT)util/pmu-bison.c $(OUTPUT)util/pmu-bison.h: util/pmu.y $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \ -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_ -$(OUTPUT)util/bpf-filter-flex.c $(OUTPUT)util/bpf-filter-flex.h: util/bpf-filter.l $(OUTPUT)util/bpf-filter-bison.c +$(OUTPUT)util/bpf-filter-flex.c $(OUTPUT)util/bpf-filter-flex.h: util/bpf-filter.l $(OUTPUT)util/bpf-filter-bison.c util/bpf-filter.h util/bpf_skel/sample-filter.h $(call rule_mkdir) $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/bpf-filter-flex.c \ --header-file=$(OUTPUT)util/bpf-filter-flex.h $(PARSER_DEBUG_FLEX) $< -$(OUTPUT)util/bpf-filter-bison.c $(OUTPUT)util/bpf-filter-bison.h: util/bpf-filter.y +$(OUTPUT)util/bpf-filter-bison.c $(OUTPUT)util/bpf-filter-bison.h: util/bpf-filter.y util/bpf-filter.h util/bpf_skel/sample-filter.h $(call rule_mkdir) $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \ -o $(OUTPUT)util/bpf-filter-bison.c -p perf_bpf_filter_ -FLEX_GE_26 := $(shell expr $(shell $(FLEX) --version | sed -e 's/flex \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 26) -ifeq ($(FLEX_GE_26),1) - flex_flags := -Wno-switch-enum -Wno-switch-default -Wno-unused-function -Wno-redundant-decls -Wno-sign-compare -Wno-unused-parameter -Wno-missing-prototypes -Wno-missing-declarations - CC_HASNT_MISLEADING_INDENTATION := $(shell echo "int main(void) { return 0 }" | $(CC) -Werror -Wno-misleading-indentation -o /dev/null -xc - 2>&1 | grep -q -- -Wno-misleading-indentation ; echo $$?) - ifeq ($(CC_HASNT_MISLEADING_INDENTATION), 1) - flex_flags += -Wno-misleading-indentation +FLEX_VERSION := $(shell $(FLEX) --version | cut -d' ' -f2) + +FLEX_GE_260 := $(call version-ge3,$(FLEX_VERSION),2.6.0) +ifeq ($(FLEX_GE_260),1) + flex_flags := -Wno-redundant-decls -Wno-switch-default -Wno-unused-function -Wno-misleading-indentation + + # Some newer clang and gcc version complain about this + # util/parse-events-bison.c:1317:9: error: variable 'parse_events_nerrs' set but not used [-Werror,-Wunused-but-set-variable] + # int yynerrs = 0; + + flex_flags += -Wno-unused-but-set-variable + + FLEX_LT_262 := $(call version-lt3,$(FLEX_VERSION),2.6.2) + ifeq ($(FLEX_LT_262),1) + flex_flags += -Wno-sign-compare endif else flex_flags := -w endif -CFLAGS_parse-events-flex.o += $(flex_flags) -CFLAGS_pmu-flex.o += $(flex_flags) -CFLAGS_expr-flex.o += $(flex_flags) -CFLAGS_bpf-filter-flex.o += $(flex_flags) -bison_flags := -DYYENABLE_NLS=0 -BISON_GE_35 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 35) -ifeq ($(BISON_GE_35),1) - bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum -Wno-unused-but-set-variable -Wno-unknown-warning-option +# Some newer clang and gcc version complain about this +# util/parse-events-bison.c:1317:9: error: variable 'parse_events_nerrs' set but not used [-Werror,-Wunused-but-set-variable] +# int yynerrs = 0; + +bison_flags := -DYYENABLE_NLS=0 -Wno-unused-but-set-variable + +# Old clangs don't grok -Wno-unused-but-set-variable, remove it +ifeq ($(CC_NO_CLANG), 0) + CLANG_VERSION := $(shell $(CLANG) --version | head -1 | sed 's/.*clang version \([[:digit:]]\+.[[:digit:]]\+.[[:digit:]]\+\).*/\1/g') + ifeq ($(call version-lt3,$(CLANG_VERSION),13.0.0),1) + bison_flags := $(subst -Wno-unused-but-set-variable,,$(bison_flags)) + flex_flags := $(subst -Wno-unused-but-set-variable,,$(flex_flags)) + endif +endif + +BISON_GE_382 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \>\= 382) +ifeq ($(BISON_GE_382),1) + bison_flags += -Wno-switch-enum else bison_flags += -w endif + +BISON_LT_381 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \< 381) +ifeq ($(BISON_LT_381),1) + bison_flags += -DYYNOMEM=YYABORT +endif + +CFLAGS_parse-events-flex.o += $(flex_flags) -Wno-unused-label +CFLAGS_pmu-flex.o += $(flex_flags) +CFLAGS_expr-flex.o += $(flex_flags) +CFLAGS_bpf-filter-flex.o += $(flex_flags) + CFLAGS_parse-events-bison.o += $(bison_flags) CFLAGS_pmu-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags) CFLAGS_expr-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags) @@ -316,10 +379,8 @@ CFLAGS_find_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" -CFLAGS_parse-events.o += -Wno-redundant-decls -CFLAGS_expr.o += -Wno-redundant-decls CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE -CFLAGS_arm-spe.o += -I$(srctree)/tools/arch/arm64/include/ +CFLAGS_arm-spe.o += -I$(srctree)/tools/arch/arm64/include/ -I$(OUTPUT)arch/arm64/include/generated/ $(OUTPUT)util/argv_split.o: ../lib/argv_split.c FORCE $(call rule_mkdir) @@ -356,3 +417,42 @@ $(OUTPUT)util/vsprintf.o: ../lib/vsprintf.c FORCE $(OUTPUT)util/list_sort.o: ../lib/list_sort.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) + +ifdef SHELLCHECK + SHELL_TESTS := generate-cmdlist.sh + SHELL_TEST_LOGS := $(SHELL_TESTS:%=%.shellcheck_log) +else + SHELL_TESTS := + SHELL_TEST_LOGS := +endif + +$(OUTPUT)%.shellcheck_log: % + $(call rule_mkdir) + $(Q)$(call echo-cmd,test)$(SHELLCHECK) "$<" > $@ || (cat $@ && rm $@ && false) + +perf-util-y += $(SHELL_TEST_LOGS) + +PY_TESTS := setup.py +ifdef MYPY + MYPY_TEST_LOGS := $(PY_TESTS:%=%.mypy_log) +else + MYPY_TEST_LOGS := +endif + +$(OUTPUT)%.mypy_log: % + $(call rule_mkdir) + $(Q)$(call echo-cmd,test)mypy "$<" > $@ || (cat $@ && rm $@ && false) + +perf-util-y += $(MYPY_TEST_LOGS) + +ifdef PYLINT + PYLINT_TEST_LOGS := $(PY_TESTS:%=%.pylint_log) +else + PYLINT_TEST_LOGS := +endif + +$(OUTPUT)%.pylint_log: % + $(call rule_mkdir) + $(Q)$(call echo-cmd,test)pylint "$<" > $@ || (cat $@ && rm $@ && false) + +perf-util-y += $(PYLINT_TEST_LOGS) diff --git a/tools/perf/util/addr2line.c b/tools/perf/util/addr2line.c new file mode 100644 index 000000000000..f2d94a3272d7 --- /dev/null +++ b/tools/perf/util/addr2line.c @@ -0,0 +1,439 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "addr2line.h" +#include "debug.h" +#include "dso.h" +#include "string2.h" +#include "srcline.h" +#include "symbol.h" +#include "symbol_conf.h" + +#include <api/io.h> +#include <linux/zalloc.h> +#include <subcmd/run-command.h> + +#include <inttypes.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> + +#define MAX_INLINE_NEST 1024 + +/* If addr2line doesn't return data for 1 second then timeout. */ +int addr2line_timeout_ms = 1 * 1000; + +static int filename_split(char *filename, unsigned int *line_nr) +{ + char *sep; + + sep = strchr(filename, '\n'); + if (sep) + *sep = '\0'; + + if (!strcmp(filename, "??:0")) + return 0; + + sep = strchr(filename, ':'); + if (sep) { + *sep++ = '\0'; + *line_nr = strtoul(sep, NULL, 0); + return 1; + } + pr_debug("addr2line missing ':' in filename split\n"); + return 0; +} + +static void addr2line_subprocess_cleanup(struct child_process *a2l) +{ + if (a2l->pid != -1) { + kill(a2l->pid, SIGKILL); + finish_command(a2l); /* ignore result, we don't care */ + a2l->pid = -1; + close(a2l->in); + close(a2l->out); + } + + free(a2l); +} + +static struct child_process *addr2line_subprocess_init(const char *addr2line_path, + const char *binary_path) +{ + const char *argv[] = { + addr2line_path ?: "addr2line", + "-e", binary_path, + "-a", "-i", "-f", NULL + }; + struct child_process *a2l = zalloc(sizeof(*a2l)); + int start_command_status = 0; + + if (a2l == NULL) { + pr_err("Failed to allocate memory for addr2line"); + return NULL; + } + + a2l->pid = -1; + a2l->in = -1; + a2l->out = -1; + a2l->no_stderr = 1; + + a2l->argv = argv; + start_command_status = start_command(a2l); + a2l->argv = NULL; /* it's not used after start_command; avoid dangling pointers */ + + if (start_command_status != 0) { + pr_warning("could not start addr2line (%s) for %s: start_command return code %d\n", + addr2line_path, binary_path, start_command_status); + addr2line_subprocess_cleanup(a2l); + return NULL; + } + + return a2l; +} + +enum a2l_style { + BROKEN, + GNU_BINUTILS, + LLVM, +}; + +static enum a2l_style addr2line_configure(struct child_process *a2l, const char *dso_name) +{ + static bool cached; + static enum a2l_style style; + + if (!cached) { + char buf[128]; + struct io io; + int ch; + int lines; + + if (write(a2l->in, ",\n", 2) != 2) + return BROKEN; + + io__init(&io, a2l->out, buf, sizeof(buf)); + ch = io__get_char(&io); + if (ch == ',') { + style = LLVM; + cached = true; + lines = 1; + pr_debug3("Detected LLVM addr2line style\n"); + } else if (ch == '0') { + style = GNU_BINUTILS; + cached = true; + lines = 3; + pr_debug3("Detected binutils addr2line style\n"); + } else { + if (!symbol_conf.disable_add2line_warn) { + char *output = NULL; + size_t output_len; + + io__getline(&io, &output, &output_len); + pr_warning("%s %s: addr2line configuration failed\n", + __func__, dso_name); + pr_warning("\t%c%s", ch, output); + } + pr_debug("Unknown/broken addr2line style\n"); + return BROKEN; + } + while (lines) { + ch = io__get_char(&io); + if (ch <= 0) + break; + if (ch == '\n') + lines--; + } + /* Ignore SIGPIPE in the event addr2line exits. */ + signal(SIGPIPE, SIG_IGN); + } + return style; +} + +static int read_addr2line_record(struct io *io, + enum a2l_style style, + const char *dso_name, + u64 addr, + bool first, + char **function, + char **filename, + unsigned int *line_nr) +{ + /* + * Returns: + * -1 ==> error + * 0 ==> sentinel (or other ill-formed) record read + * 1 ==> a genuine record read + */ + char *line = NULL; + size_t line_len = 0; + unsigned int dummy_line_nr = 0; + int ret = -1; + + if (function != NULL) + zfree(function); + + if (filename != NULL) + zfree(filename); + + if (line_nr != NULL) + *line_nr = 0; + + /* + * Read the first line. Without an error this will be: + * - for the first line an address like 0x1234, + * - the binutils sentinel 0x0000000000000000, + * - the llvm-addr2line the sentinel ',' character, + * - the function name line for an inlined function. + */ + if (io__getline(io, &line, &line_len) < 0 || !line_len) + goto error; + + pr_debug3("%s %s: addr2line read address for sentinel: %s", __func__, dso_name, line); + if (style == LLVM && line_len == 2 && line[0] == ',') { + /* Found the llvm-addr2line sentinel character. */ + zfree(&line); + return 0; + } else if (style == GNU_BINUTILS && (!first || addr != 0)) { + int zero_count = 0, non_zero_count = 0; + /* + * Check for binutils sentinel ignoring it for the case the + * requested address is 0. + */ + + /* A given address should always start 0x. */ + if (line_len >= 2 || line[0] != '0' || line[1] != 'x') { + for (size_t i = 2; i < line_len; i++) { + if (line[i] == '0') + zero_count++; + else if (line[i] != '\n') + non_zero_count++; + } + if (!non_zero_count) { + int ch; + + if (first && !zero_count) { + /* Line was erroneous just '0x'. */ + goto error; + } + /* + * Line was 0x0..0, the sentinel for binutils. Remove + * the function and filename lines. + */ + zfree(&line); + do { + ch = io__get_char(io); + } while (ch > 0 && ch != '\n'); + do { + ch = io__get_char(io); + } while (ch > 0 && ch != '\n'); + return 0; + } + } + } + /* Read the second function name line (if inline data then this is the first line). */ + if (first && (io__getline(io, &line, &line_len) < 0 || !line_len)) + goto error; + + pr_debug3("%s %s: addr2line read line: %s", __func__, dso_name, line); + if (function != NULL) + *function = strdup(strim(line)); + + zfree(&line); + line_len = 0; + + /* Read the third filename and line number line. */ + if (io__getline(io, &line, &line_len) < 0 || !line_len) + goto error; + + pr_debug3("%s %s: addr2line filename:number : %s", __func__, dso_name, line); + if (filename_split(line, line_nr == NULL ? &dummy_line_nr : line_nr) == 0 && + style == GNU_BINUTILS) { + ret = 0; + goto error; + } + + if (filename != NULL) + *filename = strdup(line); + + zfree(&line); + line_len = 0; + + return 1; + +error: + free(line); + if (function != NULL) + zfree(function); + if (filename != NULL) + zfree(filename); + return ret; +} + +static int inline_list__append_record(struct dso *dso, + struct inline_node *node, + struct symbol *sym, + const char *function, + const char *filename, + unsigned int line_nr) +{ + struct symbol *inline_sym = new_inline_sym(dso, sym, function); + + return inline_list__append(inline_sym, srcline_from_fileline(filename, line_nr), node); +} + +int cmd__addr2line(const char *dso_name, u64 addr, + char **file, unsigned int *line_nr, + struct dso *dso, + bool unwind_inlines, + struct inline_node *node, + struct symbol *sym __maybe_unused) +{ + struct child_process *a2l = dso__a2l(dso); + char *record_function = NULL; + char *record_filename = NULL; + unsigned int record_line_nr = 0; + int record_status = -1; + int ret = 0; + size_t inline_count = 0; + int len; + char buf[128]; + ssize_t written; + struct io io = { .eof = false }; + enum a2l_style a2l_style; + + if (!a2l) { + if (!filename__has_section(dso_name, ".debug_line")) + goto out; + + dso__set_a2l(dso, + addr2line_subprocess_init(symbol_conf.addr2line_path, dso_name)); + a2l = dso__a2l(dso); + } + + if (a2l == NULL) { + if (!symbol_conf.disable_add2line_warn) + pr_warning("%s %s: addr2line_subprocess_init failed\n", __func__, dso_name); + goto out; + } + a2l_style = addr2line_configure(a2l, dso_name); + if (a2l_style == BROKEN) + goto out; + + /* + * Send our request and then *deliberately* send something that can't be + * interpreted as a valid address to ask addr2line about (namely, + * ","). This causes addr2line to first write out the answer to our + * request, in an unbounded/unknown number of records, and then to write + * out the lines "0x0...0", "??" and "??:0", for GNU binutils, or "," + * for llvm-addr2line, so that we can detect when it has finished giving + * us anything useful. + */ + len = snprintf(buf, sizeof(buf), "%016"PRIx64"\n,\n", addr); + written = len > 0 ? write(a2l->in, buf, len) : -1; + if (written != len) { + if (!symbol_conf.disable_add2line_warn) + pr_warning("%s %s: could not send request\n", __func__, dso_name); + goto out; + } + io__init(&io, a2l->out, buf, sizeof(buf)); + io.timeout_ms = addr2line_timeout_ms; + switch (read_addr2line_record(&io, a2l_style, dso_name, addr, /*first=*/true, + &record_function, &record_filename, &record_line_nr)) { + case -1: + if (!symbol_conf.disable_add2line_warn) + pr_warning("%s %s: could not read first record\n", __func__, dso_name); + goto out; + case 0: + /* + * The first record was invalid, so return failure, but first + * read another record, since we sent a sentinel ',' for the + * sake of detected the last inlined function. Treat this as the + * first of a record as the ',' generates a new start with GNU + * binutils, also force a non-zero address as we're no longer + * reading that record. + */ + switch (read_addr2line_record(&io, a2l_style, dso_name, + /*addr=*/1, /*first=*/true, + NULL, NULL, NULL)) { + case -1: + if (!symbol_conf.disable_add2line_warn) + pr_warning("%s %s: could not read sentinel record\n", + __func__, dso_name); + break; + case 0: + /* The sentinel as expected. */ + break; + default: + if (!symbol_conf.disable_add2line_warn) + pr_warning("%s %s: unexpected record instead of sentinel", + __func__, dso_name); + break; + } + goto out; + default: + /* First record as expected. */ + break; + } + + if (file) { + *file = strdup(record_filename); + ret = 1; + } + if (line_nr) + *line_nr = record_line_nr; + + if (unwind_inlines) { + if (node && inline_list__append_record(dso, node, sym, + record_function, + record_filename, + record_line_nr)) { + ret = 0; + goto out; + } + } + + /* + * We have to read the records even if we don't care about the inline + * info. This isn't the first record and force the address to non-zero + * as we're reading records beyond the first. + */ + while ((record_status = read_addr2line_record(&io, + a2l_style, + dso_name, + /*addr=*/1, + /*first=*/false, + &record_function, + &record_filename, + &record_line_nr)) == 1) { + if (unwind_inlines && node && inline_count++ < MAX_INLINE_NEST) { + if (inline_list__append_record(dso, node, sym, + record_function, + record_filename, + record_line_nr)) { + ret = 0; + goto out; + } + ret = 1; /* found at least one inline frame */ + } + } + +out: + free(record_function); + free(record_filename); + if (io.eof) { + dso__set_a2l(dso, NULL); + addr2line_subprocess_cleanup(a2l); + } + return ret; +} + +void dso__free_a2l(struct dso *dso) +{ + struct child_process *a2l = dso__a2l(dso); + + if (!a2l) + return; + + addr2line_subprocess_cleanup(a2l); + + dso__set_a2l(dso, NULL); +} diff --git a/tools/perf/util/addr2line.h b/tools/perf/util/addr2line.h new file mode 100644 index 000000000000..d35a47ba8dab --- /dev/null +++ b/tools/perf/util/addr2line.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ADDR2LINE_H +#define __PERF_ADDR2LINE_H + +#include <linux/types.h> + +struct dso; +struct inline_node; +struct symbol; + +extern int addr2line_timeout_ms; + +int cmd__addr2line(const char *dso_name, u64 addr, + char **file, unsigned int *line_nr, + struct dso *dso, + bool unwind_inlines, + struct inline_node *node, + struct symbol *sym); + +#endif /* __PERF_ADDR2LINE_H */ diff --git a/tools/perf/util/addr_location.c b/tools/perf/util/addr_location.c index 51825ef8c0ab..007a2f5df9a6 100644 --- a/tools/perf/util/addr_location.c +++ b/tools/perf/util/addr_location.c @@ -17,6 +17,7 @@ void addr_location__init(struct addr_location *al) al->cpumode = 0; al->cpu = 0; al->socket = 0; + al->parallelism = 1; } /* diff --git a/tools/perf/util/addr_location.h b/tools/perf/util/addr_location.h index d8ac0428dff2..64b551025216 100644 --- a/tools/perf/util/addr_location.h +++ b/tools/perf/util/addr_location.h @@ -17,10 +17,14 @@ struct addr_location { const char *srcline; u64 addr; char level; - u8 filtered; u8 cpumode; + u16 filtered; s32 cpu; s32 socket; + /* Same as machine.parallelism but within [1, nr_cpus]. */ + int parallelism; + /* See he_stat.latency. */ + u64 latency; }; void addr_location__init(struct addr_location *al); diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c index 38dc4524b7e8..4fe851334296 100644 --- a/tools/perf/util/affinity.c +++ b/tools/perf/util/affinity.c @@ -5,6 +5,7 @@ #include <stdlib.h> #include <linux/bitmap.h> #include <linux/zalloc.h> +#include <perf/cpumap.h> #include "perf.h" #include "cpumap.h" #include "affinity.h" @@ -83,3 +84,20 @@ void affinity__cleanup(struct affinity *a) if (a != NULL) __affinity__cleanup(a); } + +void cpu_map__set_affinity(const struct perf_cpu_map *cpumap) +{ + int cpu_set_size = get_cpu_set_size(); + unsigned long *cpuset = bitmap_zalloc(cpu_set_size * 8); + struct perf_cpu cpu; + int idx; + + if (!cpuset) + return; + + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpumap) + __set_bit(cpu.cpu, cpuset); + + sched_setaffinity(0, cpu_set_size, (cpu_set_t *)cpuset); + zfree(&cpuset); +} diff --git a/tools/perf/util/affinity.h b/tools/perf/util/affinity.h index 0ad6a18ef20c..7341194b2298 100644 --- a/tools/perf/util/affinity.h +++ b/tools/perf/util/affinity.h @@ -4,6 +4,7 @@ #include <stdbool.h> +struct perf_cpu_map; struct affinity { unsigned long *orig_cpus; unsigned long *sched_cpus; @@ -13,5 +14,6 @@ struct affinity { void affinity__cleanup(struct affinity *a); void affinity__set(struct affinity *a, int cpu); int affinity__setup(struct affinity *a); +void cpu_map__set_affinity(const struct perf_cpu_map *cpumap); #endif // PERF_AFFINITY_H diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c index 6a6ddba76c75..b084dee76b1a 100644 --- a/tools/perf/util/amd-sample-raw.c +++ b/tools/perf/util/amd-sample-raw.c @@ -9,17 +9,18 @@ #include <inttypes.h> #include <linux/string.h> -#include "../../arch/x86/include/asm/amd-ibs.h" +#include "../../arch/x86/include/asm/amd/ibs.h" #include "debug.h" #include "session.h" #include "evlist.h" #include "sample-raw.h" -#include "pmu-events/pmu-events.h" #include "util/sample.h" static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type; static bool zen4_ibs_extensions; +static bool ldlat_cap; +static bool dtlb_pgsize_cap; static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg) { @@ -79,14 +80,20 @@ static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg) static void pr_ibs_op_ctl(union ibs_op_ctl reg) { char l3_miss_only[sizeof(" L3MissOnly _")] = ""; + char ldlat[sizeof(" LdLatThrsh __ LdLatEn _")] = ""; if (zen4_ibs_extensions) snprintf(l3_miss_only, sizeof(l3_miss_only), " L3MissOnly %d", reg.l3_miss_only); - printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d\n", + if (ldlat_cap) { + snprintf(ldlat, sizeof(ldlat), " LdLatThrsh %2d LdLatEn %d", + reg.ldlat_thrsh, reg.ldlat_en); + } + + printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d%s\n", reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, l3_miss_only, reg.op_en, reg.op_val, reg.cnt_ctl, - reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt); + reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt, ldlat); } static void pr_ibs_op_data(union ibs_op_data reg) @@ -155,9 +162,20 @@ static void pr_ibs_op_data2(union ibs_op_data2 reg) static void pr_ibs_op_data3(union ibs_op_data3 reg) { - char l2_miss_str[sizeof(" L2Miss _")] = ""; - char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = ""; + static const char * const dc_page_sizes[] = { + " 4K", + " 2M", + " 1G", + " ??", + }; char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = ""; + char dc_l1_l2tlb_miss_str[sizeof(" DcL1TlbMiss _ DcL2TlbMiss _")] = ""; + char dc_l1tlb_hit_str[sizeof(" DcL1TlbHit2M _ DcL1TlbHit1G _")] = ""; + char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = ""; + char dc_l2tlb_hit_2m_str[sizeof(" DcL2TlbHit2M _")] = ""; + char dc_l2tlb_hit_1g_str[sizeof(" DcL2TlbHit1G _")] = ""; + char dc_page_size_str[sizeof(" DcPageSize ____")] = ""; + char l2_miss_str[sizeof(" L2Miss _")] = ""; /* * Erratum #1293 @@ -173,16 +191,40 @@ static void pr_ibs_op_data3(union ibs_op_data3 reg) snprintf(op_mem_width_str, sizeof(op_mem_width_str), " OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1)); - printf("ibs_op_data3:\t%016llx LdOp %d StOp %d DcL1TlbMiss %d DcL2TlbMiss %d " - "DcL1TlbHit2M %d DcL1TlbHit1G %d DcL2TlbHit2M %d DcMiss %d DcMisAcc %d " - "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d DcLinAddrValid %d " - "DcPhyAddrValid %d DcL2TlbHit1G %d%s SwPf %d%s%s DcMissLat %5d TlbRefillLat %5d\n", - reg.val, reg.ld_op, reg.st_op, reg.dc_l1tlb_miss, reg.dc_l2tlb_miss, - reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g, reg.dc_l2tlb_hit_2m, reg.dc_miss, - reg.dc_mis_acc, reg.dc_wc_mem_acc, reg.dc_uc_mem_acc, reg.dc_locked_op, - reg.dc_miss_no_mab_alloc, reg.dc_lin_addr_valid, reg.dc_phy_addr_valid, - reg.dc_l2_tlb_hit_1g, l2_miss_str, reg.sw_pf, op_mem_width_str, - op_dc_miss_open_mem_reqs_str, reg.dc_miss_lat, reg.tlb_refill_lat); + if (dtlb_pgsize_cap) { + if (reg.dc_phy_addr_valid) { + int idx = (reg.dc_l1tlb_hit_1g << 1) | reg.dc_l1tlb_hit_2m; + + snprintf(dc_l1_l2tlb_miss_str, sizeof(dc_l1_l2tlb_miss_str), + " DcL1TlbMiss %d DcL2TlbMiss %d", + reg.dc_l1tlb_miss, reg.dc_l2tlb_miss); + snprintf(dc_page_size_str, sizeof(dc_page_size_str), + " DcPageSize %4s", dc_page_sizes[idx]); + } + } else { + snprintf(dc_l1_l2tlb_miss_str, sizeof(dc_l1_l2tlb_miss_str), + " DcL1TlbMiss %d DcL2TlbMiss %d", + reg.dc_l1tlb_miss, reg.dc_l2tlb_miss); + snprintf(dc_l1tlb_hit_str, sizeof(dc_l1tlb_hit_str), + " DcL1TlbHit2M %d DcL1TlbHit1G %d", + reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g); + snprintf(dc_l2tlb_hit_2m_str, sizeof(dc_l2tlb_hit_2m_str), + " DcL2TlbHit2M %d", reg.dc_l2tlb_hit_2m); + snprintf(dc_l2tlb_hit_1g_str, sizeof(dc_l2tlb_hit_1g_str), + " DcL2TlbHit1G %d", reg.dc_l2_tlb_hit_1g); + } + + printf("ibs_op_data3:\t%016llx LdOp %d StOp %d%s%s%s DcMiss %d DcMisAcc %d " + "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d " + "DcLinAddrValid %d DcPhyAddrValid %d%s%s SwPf %d%s%s " + "DcMissLat %5d TlbRefillLat %5d\n", + reg.val, reg.ld_op, reg.st_op, dc_l1_l2tlb_miss_str, + dtlb_pgsize_cap ? dc_page_size_str : dc_l1tlb_hit_str, + dc_l2tlb_hit_2m_str, reg.dc_miss, reg.dc_mis_acc, reg.dc_wc_mem_acc, + reg.dc_uc_mem_acc, reg.dc_locked_op, reg.dc_miss_no_mab_alloc, + reg.dc_lin_addr_valid, reg.dc_phy_addr_valid, dc_l2tlb_hit_1g_str, + l2_miss_str, reg.sw_pf, op_mem_width_str, op_dc_miss_open_mem_reqs_str, + reg.dc_miss_lat, reg.tlb_refill_lat); } /* @@ -312,7 +354,7 @@ static void parse_cpuid(struct perf_env *env) */ bool evlist__has_amd_ibs(struct evlist *evlist) { - struct perf_env *env = evlist->env; + struct perf_env *env = perf_session__env(evlist->session); int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env); const char *pmu_mapping = perf_env__pmu_mappings(env); char name[sizeof("ibs_fetch")]; @@ -332,6 +374,12 @@ bool evlist__has_amd_ibs(struct evlist *evlist) if (perf_env__find_pmu_cap(env, "ibs_op", "zen4_ibs_extensions")) zen4_ibs_extensions = 1; + if (perf_env__find_pmu_cap(env, "ibs_op", "ldlat")) + ldlat_cap = 1; + + if (perf_env__find_pmu_cap(env, "ibs_op", "dtlb_pgsize")) + dtlb_pgsize_cap = 1; + if (ibs_fetch_type || ibs_op_type) { if (!cpu_family) parse_cpuid(env); diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c new file mode 100644 index 000000000000..07cf9c334be0 --- /dev/null +++ b/tools/perf/util/annotate-data.c @@ -0,0 +1,1932 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Convert sample address to data type using DWARF debug info. + * + * Written by Namhyung Kim <namhyung@kernel.org> + */ +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <inttypes.h> +#include <linux/zalloc.h> + +#include "annotate.h" +#include "annotate-data.h" +#include "debuginfo.h" +#include "debug.h" +#include "dso.h" +#include "dwarf-regs.h" +#include "evsel.h" +#include "evlist.h" +#include "map.h" +#include "map_symbol.h" +#include "sort.h" +#include "strbuf.h" +#include "symbol.h" +#include "symbol_conf.h" +#include "thread.h" + +/* register number of the stack pointer */ +#define X86_REG_SP 7 + +static void delete_var_types(struct die_var_type *var_types); + +#define pr_debug_dtp(fmt, ...) \ +do { \ + if (debug_type_profile) \ + pr_info(fmt, ##__VA_ARGS__); \ + else \ + pr_debug3(fmt, ##__VA_ARGS__); \ +} while (0) + +void pr_debug_type_name(Dwarf_Die *die, enum type_state_kind kind) +{ + struct strbuf sb; + char *str; + Dwarf_Word size = 0; + + if (!debug_type_profile && verbose < 3) + return; + + switch (kind) { + case TSR_KIND_INVALID: + pr_info("\n"); + return; + case TSR_KIND_PERCPU_BASE: + pr_info(" percpu base\n"); + return; + case TSR_KIND_CONST: + pr_info(" constant\n"); + return; + case TSR_KIND_PERCPU_POINTER: + pr_info(" percpu pointer"); + /* it also prints the type info */ + break; + case TSR_KIND_POINTER: + pr_info(" pointer"); + /* it also prints the type info */ + break; + case TSR_KIND_CANARY: + pr_info(" stack canary\n"); + return; + case TSR_KIND_TYPE: + default: + break; + } + + dwarf_aggregate_size(die, &size); + + strbuf_init(&sb, 32); + die_get_typename_from_type(die, &sb); + str = strbuf_detach(&sb, NULL); + pr_info(" type='%s' size=%#lx (die:%#lx)\n", + str, (long)size, (long)dwarf_dieoffset(die)); + free(str); +} + +static void pr_debug_location(Dwarf_Die *die, u64 pc, int reg) +{ + ptrdiff_t off = 0; + Dwarf_Attribute attr; + Dwarf_Addr base, start, end; + Dwarf_Op *ops; + size_t nops; + + if (!debug_type_profile && verbose < 3) + return; + + if (dwarf_attr(die, DW_AT_location, &attr) == NULL) + return; + + while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) { + if (reg != DWARF_REG_PC && end <= pc) + continue; + if (reg != DWARF_REG_PC && start > pc) + break; + + pr_info(" variable location: "); + switch (ops->atom) { + case DW_OP_reg0 ...DW_OP_reg31: + pr_info("reg%d\n", ops->atom - DW_OP_reg0); + break; + case DW_OP_breg0 ...DW_OP_breg31: + pr_info("base=reg%d, offset=%#lx\n", + ops->atom - DW_OP_breg0, (long)ops->number); + break; + case DW_OP_regx: + pr_info("reg%ld\n", (long)ops->number); + break; + case DW_OP_bregx: + pr_info("base=reg%ld, offset=%#lx\n", + (long)ops->number, (long)ops->number2); + break; + case DW_OP_fbreg: + pr_info("use frame base, offset=%#lx\n", (long)ops->number); + break; + case DW_OP_addr: + pr_info("address=%#lx\n", (long)ops->number); + break; + default: + pr_info("unknown: code=%#x, number=%#lx\n", + ops->atom, (long)ops->number); + break; + } + break; + } +} + +static void pr_debug_scope(Dwarf_Die *scope_die) +{ + int tag; + + if (!debug_type_profile && verbose < 3) + return; + + pr_info("(die:%lx) ", (long)dwarf_dieoffset(scope_die)); + + tag = dwarf_tag(scope_die); + if (tag == DW_TAG_subprogram) + pr_info("[function] %s\n", dwarf_diename(scope_die)); + else if (tag == DW_TAG_inlined_subroutine) + pr_info("[inlined] %s\n", dwarf_diename(scope_die)); + else if (tag == DW_TAG_lexical_block) + pr_info("[block]\n"); + else + pr_info("[unknown] tag=%x\n", tag); +} + +bool has_reg_type(struct type_state *state, int reg) +{ + return (unsigned)reg < ARRAY_SIZE(state->regs); +} + +static void init_type_state(struct type_state *state, struct arch *arch) +{ + memset(state, 0, sizeof(*state)); + INIT_LIST_HEAD(&state->stack_vars); + + if (arch__is(arch, "x86")) { + state->regs[0].caller_saved = true; + state->regs[1].caller_saved = true; + state->regs[2].caller_saved = true; + state->regs[4].caller_saved = true; + state->regs[5].caller_saved = true; + state->regs[8].caller_saved = true; + state->regs[9].caller_saved = true; + state->regs[10].caller_saved = true; + state->regs[11].caller_saved = true; + state->ret_reg = 0; + state->stack_reg = X86_REG_SP; + } +} + +static void exit_type_state(struct type_state *state) +{ + struct type_state_stack *stack, *tmp; + + list_for_each_entry_safe(stack, tmp, &state->stack_vars, list) { + list_del(&stack->list); + free(stack); + } +} + +/* + * Compare type name and size to maintain them in a tree. + * I'm not sure if DWARF would have information of a single type in many + * different places (compilation units). If not, it could compare the + * offset of the type entry in the .debug_info section. + */ +static int data_type_cmp(const void *_key, const struct rb_node *node) +{ + const struct annotated_data_type *key = _key; + struct annotated_data_type *type; + + type = rb_entry(node, struct annotated_data_type, node); + + if (key->self.size != type->self.size) + return key->self.size - type->self.size; + return strcmp(key->self.type_name, type->self.type_name); +} + +static bool data_type_less(struct rb_node *node_a, const struct rb_node *node_b) +{ + struct annotated_data_type *a, *b; + + a = rb_entry(node_a, struct annotated_data_type, node); + b = rb_entry(node_b, struct annotated_data_type, node); + + if (a->self.size != b->self.size) + return a->self.size < b->self.size; + return strcmp(a->self.type_name, b->self.type_name) < 0; +} + +/* Recursively add new members for struct/union */ +static int __add_member_cb(Dwarf_Die *die, void *arg) +{ + struct annotated_member *parent = arg; + struct annotated_member *member; + Dwarf_Die member_type, die_mem; + Dwarf_Word size, loc, bit_size = 0; + Dwarf_Attribute attr; + struct strbuf sb; + int tag; + + if (dwarf_tag(die) != DW_TAG_member) + return DIE_FIND_CB_SIBLING; + + member = zalloc(sizeof(*member)); + if (member == NULL) + return DIE_FIND_CB_END; + + strbuf_init(&sb, 32); + die_get_typename(die, &sb); + + __die_get_real_type(die, &member_type); + if (dwarf_tag(&member_type) == DW_TAG_typedef) + die_get_real_type(&member_type, &die_mem); + else + die_mem = member_type; + + if (dwarf_aggregate_size(&die_mem, &size) < 0) + size = 0; + + if (dwarf_attr_integrate(die, DW_AT_data_member_location, &attr)) + dwarf_formudata(&attr, &loc); + else { + /* bitfield member */ + if (dwarf_attr_integrate(die, DW_AT_data_bit_offset, &attr) && + dwarf_formudata(&attr, &loc) == 0) + loc /= 8; + else + loc = 0; + + if (dwarf_attr_integrate(die, DW_AT_bit_size, &attr) && + dwarf_formudata(&attr, &bit_size) == 0) + size = (bit_size + 7) / 8; + } + + member->type_name = strbuf_detach(&sb, NULL); + /* member->var_name can be NULL */ + if (dwarf_diename(die)) { + if (bit_size) { + if (asprintf(&member->var_name, "%s:%ld", + dwarf_diename(die), (long)bit_size) < 0) + member->var_name = NULL; + } else { + member->var_name = strdup(dwarf_diename(die)); + } + + if (member->var_name == NULL) { + free(member); + return DIE_FIND_CB_END; + } + } + member->size = size; + member->offset = loc + parent->offset; + INIT_LIST_HEAD(&member->children); + list_add_tail(&member->node, &parent->children); + + tag = dwarf_tag(&die_mem); + switch (tag) { + case DW_TAG_structure_type: + case DW_TAG_union_type: + die_find_child(&die_mem, __add_member_cb, member, &die_mem); + break; + default: + break; + } + return DIE_FIND_CB_SIBLING; +} + +static void add_member_types(struct annotated_data_type *parent, Dwarf_Die *type) +{ + Dwarf_Die die_mem; + + die_find_child(type, __add_member_cb, &parent->self, &die_mem); +} + +static void delete_members(struct annotated_member *member) +{ + struct annotated_member *child, *tmp; + + list_for_each_entry_safe(child, tmp, &member->children, node) { + list_del(&child->node); + delete_members(child); + zfree(&child->type_name); + zfree(&child->var_name); + free(child); + } +} + +static int fill_member_name(char *buf, size_t sz, struct annotated_member *m, + int offset, bool first) +{ + struct annotated_member *child; + + if (list_empty(&m->children)) + return 0; + + list_for_each_entry(child, &m->children, node) { + int len; + + if (offset < child->offset || offset >= child->offset + child->size) + continue; + + /* It can have anonymous struct/union members */ + if (child->var_name) { + len = scnprintf(buf, sz, "%s%s", + first ? "" : ".", child->var_name); + first = false; + } else { + len = 0; + } + + return fill_member_name(buf + len, sz - len, child, offset, first) + len; + } + return 0; +} + +int annotated_data_type__get_member_name(struct annotated_data_type *adt, + char *buf, size_t sz, int member_offset) +{ + return fill_member_name(buf, sz, &adt->self, member_offset, /*first=*/true); +} + +static struct annotated_data_type *dso__findnew_data_type(struct dso *dso, + Dwarf_Die *type_die) +{ + struct annotated_data_type *result = NULL; + struct annotated_data_type key; + struct rb_node *node; + struct strbuf sb; + char *type_name; + Dwarf_Word size; + + strbuf_init(&sb, 32); + if (die_get_typename_from_type(type_die, &sb) < 0) + strbuf_add(&sb, "(unknown type)", 14); + type_name = strbuf_detach(&sb, NULL); + + if (dwarf_tag(type_die) == DW_TAG_typedef) + die_get_real_type(type_die, type_die); + + dwarf_aggregate_size(type_die, &size); + + /* Check existing nodes in dso->data_types tree */ + key.self.type_name = type_name; + key.self.size = size; + node = rb_find(&key, dso__data_types(dso), data_type_cmp); + if (node) { + result = rb_entry(node, struct annotated_data_type, node); + free(type_name); + return result; + } + + /* If not, add a new one */ + result = zalloc(sizeof(*result)); + if (result == NULL) { + free(type_name); + return NULL; + } + + result->self.type_name = type_name; + result->self.size = size; + INIT_LIST_HEAD(&result->self.children); + + if (symbol_conf.annotate_data_member) + add_member_types(result, type_die); + + rb_add(&result->node, dso__data_types(dso), data_type_less); + return result; +} + +static bool find_cu_die(struct debuginfo *di, u64 pc, Dwarf_Die *cu_die) +{ + Dwarf_Off off, next_off; + size_t header_size; + + if (dwarf_addrdie(di->dbg, pc, cu_die) != NULL) + return cu_die; + + /* + * There are some kernels don't have full aranges and contain only a few + * aranges entries. Fallback to iterate all CU entries in .debug_info + * in case it's missing. + */ + off = 0; + while (dwarf_nextcu(di->dbg, off, &next_off, &header_size, + NULL, NULL, NULL) == 0) { + if (dwarf_offdie(di->dbg, off + header_size, cu_die) && + dwarf_haspc(cu_die, pc)) + return true; + + off = next_off; + } + return false; +} + +enum type_match_result { + PERF_TMR_UNKNOWN = 0, + PERF_TMR_OK, + PERF_TMR_NO_TYPE, + PERF_TMR_NO_POINTER, + PERF_TMR_NO_SIZE, + PERF_TMR_BAD_OFFSET, + PERF_TMR_BAIL_OUT, +}; + +static const char *match_result_str(enum type_match_result tmr) +{ + switch (tmr) { + case PERF_TMR_OK: + return "Good!"; + case PERF_TMR_NO_TYPE: + return "no type information"; + case PERF_TMR_NO_POINTER: + return "no/void pointer"; + case PERF_TMR_NO_SIZE: + return "type size is unknown"; + case PERF_TMR_BAD_OFFSET: + return "offset bigger than size"; + case PERF_TMR_UNKNOWN: + case PERF_TMR_BAIL_OUT: + default: + return "invalid state"; + } +} + +static bool is_pointer_type(Dwarf_Die *type_die) +{ + int tag = dwarf_tag(type_die); + + return tag == DW_TAG_pointer_type || tag == DW_TAG_array_type; +} + +static bool is_compound_type(Dwarf_Die *type_die) +{ + int tag = dwarf_tag(type_die); + + return tag == DW_TAG_structure_type || tag == DW_TAG_union_type; +} + +/* returns if Type B has better information than Type A */ +static bool is_better_type(Dwarf_Die *type_a, Dwarf_Die *type_b) +{ + Dwarf_Word size_a, size_b; + Dwarf_Die die_a, die_b; + + /* pointer type is preferred */ + if (is_pointer_type(type_a) != is_pointer_type(type_b)) + return is_pointer_type(type_b); + + if (is_pointer_type(type_b)) { + /* + * We want to compare the target type, but 'void *' can fail to + * get the target type. + */ + if (die_get_real_type(type_a, &die_a) == NULL) + return true; + if (die_get_real_type(type_b, &die_b) == NULL) + return false; + + type_a = &die_a; + type_b = &die_b; + } + + /* bigger type is preferred */ + if (dwarf_aggregate_size(type_a, &size_a) < 0 || + dwarf_aggregate_size(type_b, &size_b) < 0) + return false; + + if (size_a != size_b) + return size_a < size_b; + + /* struct or union is preferred */ + if (is_compound_type(type_a) != is_compound_type(type_b)) + return is_compound_type(type_b); + + /* typedef is preferred */ + if (dwarf_tag(type_b) == DW_TAG_typedef) + return true; + + return false; +} + +/* The type info will be saved in @type_die */ +static enum type_match_result check_variable(struct data_loc_info *dloc, + Dwarf_Die *var_die, + Dwarf_Die *type_die, int reg, + int offset, bool is_fbreg) +{ + Dwarf_Word size; + bool needs_pointer = true; + Dwarf_Die sized_type; + + if (reg == DWARF_REG_PC) + needs_pointer = false; + else if (reg == dloc->fbreg || is_fbreg) + needs_pointer = false; + else if (arch__is(dloc->arch, "x86") && reg == X86_REG_SP) + needs_pointer = false; + + /* Get the type of the variable */ + if (__die_get_real_type(var_die, type_die) == NULL) + return PERF_TMR_NO_TYPE; + + /* + * Usually it expects a pointer type for a memory access. + * Convert to a real type it points to. But global variables + * and local variables are accessed directly without a pointer. + */ + if (needs_pointer) { + if (!is_pointer_type(type_die) || + __die_get_real_type(type_die, type_die) == NULL) + return PERF_TMR_NO_POINTER; + } + + if (dwarf_tag(type_die) == DW_TAG_typedef) + die_get_real_type(type_die, &sized_type); + else + sized_type = *type_die; + + /* Get the size of the actual type */ + if (dwarf_aggregate_size(&sized_type, &size) < 0) + return PERF_TMR_NO_SIZE; + + /* Minimal sanity check */ + if ((unsigned)offset >= size) + return PERF_TMR_BAD_OFFSET; + + return PERF_TMR_OK; +} + +struct type_state_stack *find_stack_state(struct type_state *state, + int offset) +{ + struct type_state_stack *stack; + + list_for_each_entry(stack, &state->stack_vars, list) { + if (offset == stack->offset) + return stack; + + if (stack->compound && stack->offset < offset && + offset < stack->offset + stack->size) + return stack; + } + return NULL; +} + +void set_stack_state(struct type_state_stack *stack, int offset, u8 kind, + Dwarf_Die *type_die, int ptr_offset) +{ + int tag; + Dwarf_Word size; + + if (kind == TSR_KIND_POINTER) { + /* TODO: arch-dependent pointer size */ + size = sizeof(void *); + } + else if (dwarf_aggregate_size(type_die, &size) < 0) + size = 0; + + stack->type = *type_die; + stack->size = size; + stack->offset = offset; + stack->ptr_offset = ptr_offset; + stack->kind = kind; + + if (kind == TSR_KIND_POINTER) { + stack->compound = false; + return; + } + + tag = dwarf_tag(type_die); + + switch (tag) { + case DW_TAG_structure_type: + case DW_TAG_union_type: + stack->compound = (kind != TSR_KIND_PERCPU_POINTER); + break; + default: + stack->compound = false; + break; + } +} + +struct type_state_stack *findnew_stack_state(struct type_state *state, + int offset, u8 kind, + Dwarf_Die *type_die, + int ptr_offset) +{ + struct type_state_stack *stack = find_stack_state(state, offset); + + if (stack) { + set_stack_state(stack, offset, kind, type_die, ptr_offset); + return stack; + } + + stack = malloc(sizeof(*stack)); + if (stack) { + set_stack_state(stack, offset, kind, type_die, ptr_offset); + list_add(&stack->list, &state->stack_vars); + } + return stack; +} + +/* Maintain a cache for quick global variable lookup */ +struct global_var_entry { + struct rb_node node; + char *name; + u64 start; + u64 end; + u64 die_offset; +}; + +static int global_var_cmp(const void *_key, const struct rb_node *node) +{ + const u64 addr = (uintptr_t)_key; + struct global_var_entry *gvar; + + gvar = rb_entry(node, struct global_var_entry, node); + + if (gvar->start <= addr && addr < gvar->end) + return 0; + return gvar->start > addr ? -1 : 1; +} + +static bool global_var_less(struct rb_node *node_a, const struct rb_node *node_b) +{ + struct global_var_entry *gvar_a, *gvar_b; + + gvar_a = rb_entry(node_a, struct global_var_entry, node); + gvar_b = rb_entry(node_b, struct global_var_entry, node); + + return gvar_a->start < gvar_b->start; +} + +static struct global_var_entry *global_var__find(struct data_loc_info *dloc, u64 addr) +{ + struct dso *dso = map__dso(dloc->ms->map); + struct rb_node *node; + + node = rb_find((void *)(uintptr_t)addr, dso__global_vars(dso), global_var_cmp); + if (node == NULL) + return NULL; + + return rb_entry(node, struct global_var_entry, node); +} + +static bool global_var__add(struct data_loc_info *dloc, u64 addr, + const char *name, Dwarf_Die *type_die) +{ + struct dso *dso = map__dso(dloc->ms->map); + struct global_var_entry *gvar; + Dwarf_Word size; + + if (dwarf_aggregate_size(type_die, &size) < 0) + return false; + + gvar = malloc(sizeof(*gvar)); + if (gvar == NULL) + return false; + + gvar->name = name ? strdup(name) : NULL; + if (name && gvar->name == NULL) { + free(gvar); + return false; + } + + gvar->start = addr; + gvar->end = addr + size; + gvar->die_offset = dwarf_dieoffset(type_die); + + rb_add(&gvar->node, dso__global_vars(dso), global_var_less); + return true; +} + +void global_var_type__tree_delete(struct rb_root *root) +{ + struct global_var_entry *gvar; + + while (!RB_EMPTY_ROOT(root)) { + struct rb_node *node = rb_first(root); + + rb_erase(node, root); + gvar = rb_entry(node, struct global_var_entry, node); + zfree(&gvar->name); + free(gvar); + } +} + +bool get_global_var_info(struct data_loc_info *dloc, u64 addr, + const char **var_name, int *var_offset) +{ + struct addr_location al; + struct symbol *sym; + u64 mem_addr; + + /* Kernel symbols might be relocated */ + mem_addr = addr + map__reloc(dloc->ms->map); + + addr_location__init(&al); + sym = thread__find_symbol_fb(dloc->thread, dloc->cpumode, + mem_addr, &al); + if (sym) { + *var_name = sym->name; + /* Calculate type offset from the start of variable */ + *var_offset = mem_addr - map__unmap_ip(al.map, sym->start); + } else { + *var_name = NULL; + } + addr_location__exit(&al); + if (*var_name == NULL) + return false; + + return true; +} + +static void global_var__collect(struct data_loc_info *dloc) +{ + Dwarf *dwarf = dloc->di->dbg; + Dwarf_Off off, next_off; + Dwarf_Die cu_die, type_die; + size_t header_size; + + /* Iterate all CU and collect global variables that have no location in a register. */ + off = 0; + while (dwarf_nextcu(dwarf, off, &next_off, &header_size, + NULL, NULL, NULL) == 0) { + struct die_var_type *var_types = NULL; + struct die_var_type *pos; + + if (dwarf_offdie(dwarf, off + header_size, &cu_die) == NULL) { + off = next_off; + continue; + } + + die_collect_global_vars(&cu_die, &var_types); + + for (pos = var_types; pos; pos = pos->next) { + const char *var_name = NULL; + int var_offset = 0; + + if (pos->reg != -1) + continue; + + if (!dwarf_offdie(dwarf, pos->die_off, &type_die)) + continue; + + if (!get_global_var_info(dloc, pos->addr, &var_name, + &var_offset)) + continue; + + if (var_offset != 0) + continue; + + global_var__add(dloc, pos->addr, var_name, &type_die); + } + + delete_var_types(var_types); + + off = next_off; + } +} + +bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc, + u64 ip, u64 var_addr, int *var_offset, + Dwarf_Die *type_die) +{ + u64 pc; + int offset; + const char *var_name = NULL; + struct global_var_entry *gvar; + struct dso *dso = map__dso(dloc->ms->map); + Dwarf_Die var_die; + + if (RB_EMPTY_ROOT(dso__global_vars(dso))) + global_var__collect(dloc); + + gvar = global_var__find(dloc, var_addr); + if (gvar) { + if (!dwarf_offdie(dloc->di->dbg, gvar->die_offset, type_die)) + return false; + + *var_offset = var_addr - gvar->start; + return true; + } + + /* Try to get the variable by address first */ + if (die_find_variable_by_addr(cu_die, var_addr, &var_die, &offset) && + check_variable(dloc, &var_die, type_die, DWARF_REG_PC, offset, + /*is_fbreg=*/false) == PERF_TMR_OK) { + var_name = dwarf_diename(&var_die); + *var_offset = offset; + goto ok; + } + + if (!get_global_var_info(dloc, var_addr, &var_name, var_offset)) + return false; + + pc = map__rip_2objdump(dloc->ms->map, ip); + + /* Try to get the name of global variable */ + if (die_find_variable_at(cu_die, var_name, pc, &var_die) && + check_variable(dloc, &var_die, type_die, DWARF_REG_PC, *var_offset, + /*is_fbreg=*/false) == PERF_TMR_OK) + goto ok; + + return false; + +ok: + /* The address should point to the start of the variable */ + global_var__add(dloc, var_addr - *var_offset, var_name, type_die); + return true; +} + +static bool die_is_same(Dwarf_Die *die_a, Dwarf_Die *die_b) +{ + return (die_a->cu == die_b->cu) && (die_a->addr == die_b->addr); +} + +/** + * update_var_state - Update type state using given variables + * @state: type state table + * @dloc: data location info + * @addr: instruction address to match with variable + * @insn_offset: instruction offset (for debug) + * @var_types: list of variables with type info + * + * This function fills the @state table using @var_types info. Each variable + * is used only at the given location and updates an entry in the table. + */ +static void update_var_state(struct type_state *state, struct data_loc_info *dloc, + u64 addr, u64 insn_offset, struct die_var_type *var_types) +{ + Dwarf_Die mem_die; + struct die_var_type *var; + int fbreg = dloc->fbreg; + int fb_offset = 0; + + if (dloc->fb_cfa) { + if (die_get_cfa(dloc->di->dbg, addr, &fbreg, &fb_offset) < 0) + fbreg = -1; + } + + for (var = var_types; var != NULL; var = var->next) { + if (var->addr != addr) + continue; + /* Get the type DIE using the offset */ + if (!dwarf_offdie(dloc->di->dbg, var->die_off, &mem_die)) + continue; + + if (var->reg == DWARF_REG_FB || var->reg == fbreg || var->reg == state->stack_reg) { + int offset = var->offset; + struct type_state_stack *stack; + + /* If the reg location holds the pointer value, dereference the type */ + if (!var->is_reg_var_addr && is_pointer_type(&mem_die) && + __die_get_real_type(&mem_die, &mem_die) == NULL) + continue; + + if (var->reg != DWARF_REG_FB) + offset -= fb_offset; + + stack = find_stack_state(state, offset); + if (stack && stack->kind == TSR_KIND_TYPE && + !is_better_type(&stack->type, &mem_die)) + continue; + + findnew_stack_state(state, offset, TSR_KIND_TYPE, + &mem_die, /*ptr_offset=*/0); + + if (var->reg == state->stack_reg) { + pr_debug_dtp("var [%"PRIx64"] %#x(reg%d)", + insn_offset, offset, state->stack_reg); + } else { + pr_debug_dtp("var [%"PRIx64"] -%#x(stack)", + insn_offset, -offset); + } + pr_debug_type_name(&mem_die, TSR_KIND_TYPE); + } else if (has_reg_type(state, var->reg)) { + struct type_state_reg *reg; + Dwarf_Die orig_type; + + reg = &state->regs[var->reg]; + + if (reg->ok && reg->kind == TSR_KIND_TYPE && + (!is_better_type(®->type, &mem_die) || var->is_reg_var_addr)) + continue; + + /* Handle address registers with TSR_KIND_POINTER */ + if (var->is_reg_var_addr) { + if (reg->ok && reg->kind == TSR_KIND_POINTER && + !is_better_type(®->type, &mem_die)) + continue; + + reg->offset = -var->offset; + reg->type = mem_die; + reg->kind = TSR_KIND_POINTER; + reg->ok = true; + + pr_debug_dtp("var [%"PRIx64"] reg%d addr offset %x", + insn_offset, var->reg, var->offset); + pr_debug_type_name(&mem_die, TSR_KIND_POINTER); + continue; + } + + orig_type = reg->type; + /* + * var->offset + reg value is the beginning of the struct + * reg->offset is the offset the reg points + */ + reg->offset = -var->offset; + reg->type = mem_die; + reg->kind = TSR_KIND_TYPE; + reg->ok = true; + + pr_debug_dtp("var [%"PRIx64"] reg%d offset %x", + insn_offset, var->reg, var->offset); + pr_debug_type_name(&mem_die, TSR_KIND_TYPE); + + /* + * If this register is directly copied from another and it gets a + * better type, also update the type of the source register. This + * is usually the case of container_of() macro with offset of 0. + */ + if (has_reg_type(state, reg->copied_from)) { + struct type_state_reg *copy_reg; + + copy_reg = &state->regs[reg->copied_from]; + + /* TODO: check if type is compatible or embedded */ + if (!copy_reg->ok || (copy_reg->kind != TSR_KIND_TYPE) || + !die_is_same(©_reg->type, &orig_type) || + !is_better_type(©_reg->type, &mem_die)) + continue; + + copy_reg->type = mem_die; + + pr_debug_dtp("var [%"PRIx64"] copyback reg%d", + insn_offset, reg->copied_from); + pr_debug_type_name(&mem_die, TSR_KIND_TYPE); + } + } + } +} + +/** + * update_insn_state - Update type state for an instruction + * @state: type state table + * @dloc: data location info + * @cu_die: compile unit debug entry + * @dl: disasm line for the instruction + * + * This function updates the @state table for the target operand of the + * instruction at @dl if it transfers the type like MOV on x86. Since it + * tracks the type, it won't care about the values like in arithmetic + * instructions like ADD/SUB/MUL/DIV and INC/DEC. + * + * Note that ops->reg2 is only available when both mem_ref and multi_regs + * are true. + */ +static void update_insn_state(struct type_state *state, struct data_loc_info *dloc, + Dwarf_Die *cu_die, struct disasm_line *dl) +{ + if (dloc->arch->update_insn_state) + dloc->arch->update_insn_state(state, dloc, cu_die, dl); +} + +/* + * Prepend this_blocks (from the outer scope) to full_blocks, removing + * duplicate disasm line. + */ +static void prepend_basic_blocks(struct list_head *this_blocks, + struct list_head *full_blocks) +{ + struct annotated_basic_block *first_bb, *last_bb; + + last_bb = list_last_entry(this_blocks, typeof(*last_bb), list); + first_bb = list_first_entry(full_blocks, typeof(*first_bb), list); + + if (list_empty(full_blocks)) + goto out; + + /* Last insn in this_blocks should be same as first insn in full_blocks */ + if (last_bb->end != first_bb->begin) { + pr_debug("prepend basic blocks: mismatched disasm line %"PRIx64" -> %"PRIx64"\n", + last_bb->end->al.offset, first_bb->begin->al.offset); + goto out; + } + + /* Is the basic block have only one disasm_line? */ + if (last_bb->begin == last_bb->end) { + list_del(&last_bb->list); + free(last_bb); + goto out; + } + + /* Point to the insn before the last when adding this block to full_blocks */ + last_bb->end = list_prev_entry(last_bb->end, al.node); + +out: + list_splice(this_blocks, full_blocks); +} + +static void delete_basic_blocks(struct list_head *basic_blocks) +{ + struct annotated_basic_block *bb, *tmp; + + list_for_each_entry_safe(bb, tmp, basic_blocks, list) { + list_del(&bb->list); + free(bb); + } +} + +/* Make sure all variables have a valid start address */ +static void fixup_var_address(struct die_var_type *var_types, u64 addr) +{ + while (var_types) { + /* + * Some variables have no address range meaning it's always + * available in the whole scope. Let's adjust the start + * address to the start of the scope. + */ + if (var_types->addr == 0) + var_types->addr = addr; + + var_types = var_types->next; + } +} + +static void delete_var_types(struct die_var_type *var_types) +{ + while (var_types) { + struct die_var_type *next = var_types->next; + + free(var_types); + var_types = next; + } +} + +/* should match to is_stack_canary() in util/annotate.c */ +static void setup_stack_canary(struct data_loc_info *dloc) +{ + if (arch__is(dloc->arch, "x86")) { + dloc->op->segment = INSN_SEG_X86_GS; + dloc->op->imm = true; + dloc->op->offset = 40; + } +} + +/* + * It's at the target address, check if it has a matching type. + * It returns PERF_TMR_BAIL_OUT when it looks up per-cpu variables which + * are similar to global variables and no additional info is needed. + */ +static enum type_match_result check_matching_type(struct type_state *state, + struct data_loc_info *dloc, + Dwarf_Die *cu_die, + struct disasm_line *dl, + Dwarf_Die *type_die) +{ + Dwarf_Word size; + u32 insn_offset = dl->al.offset; + int reg = dloc->op->reg1; + int offset = dloc->op->offset; + const char *offset_sign = ""; + bool retry = true; + + if (offset < 0) { + offset = -offset; + offset_sign = "-"; + } + +again: + pr_debug_dtp("chk [%x] reg%d offset=%s%#x ok=%d kind=%d ", + insn_offset, reg, offset_sign, offset, + state->regs[reg].ok, state->regs[reg].kind); + + if (!state->regs[reg].ok) + goto check_non_register; + + if (state->regs[reg].kind == TSR_KIND_TYPE) { + Dwarf_Die sized_type; + struct strbuf sb; + + strbuf_init(&sb, 32); + die_get_typename_from_type(&state->regs[reg].type, &sb); + pr_debug_dtp("(%s)", sb.buf); + strbuf_release(&sb); + + /* + * Normal registers should hold a pointer (or array) to + * dereference a memory location. + */ + if (!is_pointer_type(&state->regs[reg].type)) { + if (dloc->op->offset < 0 && reg != state->stack_reg) + goto check_kernel; + + return PERF_TMR_NO_POINTER; + } + + /* Remove the pointer and get the target type */ + if (__die_get_real_type(&state->regs[reg].type, type_die) == NULL) + return PERF_TMR_NO_POINTER; + + dloc->type_offset = dloc->op->offset + state->regs[reg].offset; + + if (dwarf_tag(type_die) == DW_TAG_typedef) + die_get_real_type(type_die, &sized_type); + else + sized_type = *type_die; + + /* Get the size of the actual type */ + if (dwarf_aggregate_size(&sized_type, &size) < 0 || + (unsigned)dloc->type_offset >= size) + return PERF_TMR_BAD_OFFSET; + + return PERF_TMR_OK; + } + + if (state->regs[reg].kind == TSR_KIND_POINTER) { + struct strbuf sb; + + strbuf_init(&sb, 32); + die_get_typename_from_type(&state->regs[reg].type, &sb); + pr_debug_dtp("(ptr->%s)", sb.buf); + strbuf_release(&sb); + + /* + * Register holds a pointer (address) to the target variable. + * The type is the type of the variable it points to. + */ + *type_die = state->regs[reg].type; + + dloc->type_offset = dloc->op->offset + state->regs[reg].offset; + + /* Get the size of the actual type */ + if (dwarf_aggregate_size(type_die, &size) < 0 || + (unsigned)dloc->type_offset >= size) + return PERF_TMR_BAD_OFFSET; + + return PERF_TMR_OK; + } + + if (state->regs[reg].kind == TSR_KIND_PERCPU_POINTER) { + pr_debug_dtp("percpu ptr"); + + /* + * It's actaully pointer but the address was calculated using + * some arithmetic. So it points to the actual type already. + */ + *type_die = state->regs[reg].type; + + dloc->type_offset = dloc->op->offset; + + /* Get the size of the actual type */ + if (dwarf_aggregate_size(type_die, &size) < 0 || + (unsigned)dloc->type_offset >= size) + return PERF_TMR_BAIL_OUT; + + return PERF_TMR_OK; + } + + if (state->regs[reg].kind == TSR_KIND_CANARY) { + pr_debug_dtp("stack canary"); + + /* + * This is a saved value of the stack canary which will be handled + * in the outer logic when it returns failure here. Pretend it's + * from the stack canary directly. + */ + setup_stack_canary(dloc); + + return PERF_TMR_BAIL_OUT; + } + + if (state->regs[reg].kind == TSR_KIND_PERCPU_BASE) { + u64 var_addr = dloc->op->offset; + int var_offset; + + pr_debug_dtp("percpu var"); + + if (dloc->op->multi_regs) { + int reg2 = dloc->op->reg2; + + if (dloc->op->reg2 == reg) + reg2 = dloc->op->reg1; + + if (has_reg_type(state, reg2) && state->regs[reg2].ok && + state->regs[reg2].kind == TSR_KIND_CONST) + var_addr += state->regs[reg2].imm_value; + } + + if (get_global_var_type(cu_die, dloc, dloc->ip, var_addr, + &var_offset, type_die)) { + dloc->type_offset = var_offset; + return PERF_TMR_OK; + } + /* No need to retry per-cpu (global) variables */ + return PERF_TMR_BAIL_OUT; + } + +check_non_register: + if (reg == dloc->fbreg || reg == state->stack_reg) { + struct type_state_stack *stack; + + pr_debug_dtp("%s", reg == dloc->fbreg ? "fbreg" : "stack"); + + stack = find_stack_state(state, dloc->type_offset); + if (stack == NULL) { + if (retry) { + pr_debug_dtp(" : retry\n"); + retry = false; + + /* update type info it's the first store to the stack */ + update_insn_state(state, dloc, cu_die, dl); + goto again; + } + return PERF_TMR_NO_TYPE; + } + + if (stack->kind == TSR_KIND_CANARY) { + setup_stack_canary(dloc); + return PERF_TMR_BAIL_OUT; + } + + if (stack->kind != TSR_KIND_TYPE) + return PERF_TMR_NO_TYPE; + + *type_die = stack->type; + /* Update the type offset from the start of slot */ + dloc->type_offset -= stack->offset; + + return PERF_TMR_OK; + } + + if (dloc->fb_cfa) { + struct type_state_stack *stack; + u64 pc = map__rip_2objdump(dloc->ms->map, dloc->ip); + int fbreg, fboff; + + pr_debug_dtp("cfa"); + + if (die_get_cfa(dloc->di->dbg, pc, &fbreg, &fboff) < 0) + fbreg = -1; + + if (reg != fbreg) + return PERF_TMR_NO_TYPE; + + stack = find_stack_state(state, dloc->type_offset - fboff); + if (stack == NULL) { + if (retry) { + pr_debug_dtp(" : retry\n"); + retry = false; + + /* update type info it's the first store to the stack */ + update_insn_state(state, dloc, cu_die, dl); + goto again; + } + return PERF_TMR_NO_TYPE; + } + + if (stack->kind == TSR_KIND_CANARY) { + setup_stack_canary(dloc); + return PERF_TMR_BAIL_OUT; + } + + if (stack->kind != TSR_KIND_TYPE) + return PERF_TMR_NO_TYPE; + + *type_die = stack->type; + /* Update the type offset from the start of slot */ + dloc->type_offset -= fboff + stack->offset; + + return PERF_TMR_OK; + } + +check_kernel: + if (dso__kernel(map__dso(dloc->ms->map))) { + u64 addr; + + /* Direct this-cpu access like "%gs:0x34740" */ + if (dloc->op->segment == INSN_SEG_X86_GS && dloc->op->imm && + arch__is(dloc->arch, "x86")) { + pr_debug_dtp("this-cpu var"); + + addr = dloc->op->offset; + + if (get_global_var_type(cu_die, dloc, dloc->ip, addr, + &offset, type_die)) { + dloc->type_offset = offset; + return PERF_TMR_OK; + } + return PERF_TMR_BAIL_OUT; + } + + /* Access to global variable like "-0x7dcf0500(,%rdx,8)" */ + if (dloc->op->offset < 0 && reg != state->stack_reg) { + addr = (s64) dloc->op->offset; + + if (get_global_var_type(cu_die, dloc, dloc->ip, addr, + &offset, type_die)) { + pr_debug_dtp("global var"); + + dloc->type_offset = offset; + return PERF_TMR_OK; + } + return PERF_TMR_BAIL_OUT; + } + } + + return PERF_TMR_UNKNOWN; +} + +/* Iterate instructions in basic blocks and update type table */ +static enum type_match_result find_data_type_insn(struct data_loc_info *dloc, + struct list_head *basic_blocks, + struct die_var_type *var_types, + Dwarf_Die *cu_die, + Dwarf_Die *type_die) +{ + struct type_state state; + struct symbol *sym = dloc->ms->sym; + struct annotation *notes = symbol__annotation(sym); + struct annotated_basic_block *bb; + enum type_match_result ret = PERF_TMR_UNKNOWN; + + init_type_state(&state, dloc->arch); + + list_for_each_entry(bb, basic_blocks, list) { + struct disasm_line *dl = bb->begin; + + BUG_ON(bb->begin->al.offset == -1 || bb->end->al.offset == -1); + + pr_debug_dtp("bb: [%"PRIx64" - %"PRIx64"]\n", + bb->begin->al.offset, bb->end->al.offset); + + list_for_each_entry_from(dl, ¬es->src->source, al.node) { + u64 this_ip = sym->start + dl->al.offset; + u64 addr = map__rip_2objdump(dloc->ms->map, this_ip); + + /* Skip comment or debug info lines */ + if (dl->al.offset == -1) + continue; + + /* Update variable type at this address */ + update_var_state(&state, dloc, addr, dl->al.offset, var_types); + + if (this_ip == dloc->ip) { + ret = check_matching_type(&state, dloc, + cu_die, dl, type_die); + pr_debug_dtp(" : %s\n", match_result_str(ret)); + goto out; + } + + /* Update type table after processing the instruction */ + update_insn_state(&state, dloc, cu_die, dl); + if (dl == bb->end) + break; + } + } + +out: + exit_type_state(&state); + return ret; +} + +static int arch_supports_insn_tracking(struct data_loc_info *dloc) +{ + if ((arch__is(dloc->arch, "x86")) || (arch__is(dloc->arch, "powerpc"))) + return 1; + return 0; +} + +/* + * Construct a list of basic blocks for each scope with variables and try to find + * the data type by updating a type state table through instructions. + */ +static enum type_match_result find_data_type_block(struct data_loc_info *dloc, + Dwarf_Die *cu_die, + Dwarf_Die *scopes, + int nr_scopes, + Dwarf_Die *type_die) +{ + LIST_HEAD(basic_blocks); + struct die_var_type *var_types = NULL; + u64 src_ip, dst_ip, prev_dst_ip; + enum type_match_result ret = PERF_TMR_UNKNOWN; + + /* TODO: other architecture support */ + if (!arch_supports_insn_tracking(dloc)) + return PERF_TMR_BAIL_OUT; + + prev_dst_ip = dst_ip = dloc->ip; + for (int i = nr_scopes - 1; i >= 0; i--) { + Dwarf_Addr base, start, end; + LIST_HEAD(this_blocks); + + if (dwarf_ranges(&scopes[i], 0, &base, &start, &end) < 0) + break; + + pr_debug_dtp("scope: [%d/%d] ", i + 1, nr_scopes); + pr_debug_scope(&scopes[i]); + + src_ip = map__objdump_2rip(dloc->ms->map, start); + +again: + /* Get basic blocks for this scope */ + if (annotate_get_basic_blocks(dloc->ms->sym, src_ip, dst_ip, + &this_blocks) < 0) { + /* Try previous block if they are not connected */ + if (prev_dst_ip != dst_ip) { + dst_ip = prev_dst_ip; + goto again; + } + + pr_debug_dtp("cannot find a basic block from %"PRIx64" to %"PRIx64"\n", + src_ip - dloc->ms->sym->start, + dst_ip - dloc->ms->sym->start); + continue; + } + prepend_basic_blocks(&this_blocks, &basic_blocks); + + /* Get variable info for this scope and add to var_types list */ + die_collect_vars(&scopes[i], &var_types); + fixup_var_address(var_types, start); + + /* Find from start of this scope to the target instruction */ + ret = find_data_type_insn(dloc, &basic_blocks, var_types, + cu_die, type_die); + if (ret == PERF_TMR_OK) { + char buf[64]; + int offset = dloc->op->offset; + const char *offset_sign = ""; + + if (offset < 0) { + offset = -offset; + offset_sign = "-"; + } + + if (dloc->op->multi_regs) + snprintf(buf, sizeof(buf), "reg%d, reg%d", + dloc->op->reg1, dloc->op->reg2); + else + snprintf(buf, sizeof(buf), "reg%d", dloc->op->reg1); + + pr_debug_dtp("found by insn track: %s%#x(%s) type-offset=%#x\n", + offset_sign, offset, buf, dloc->type_offset); + break; + } + + if (ret == PERF_TMR_BAIL_OUT) + break; + + /* Go up to the next scope and find blocks to the start */ + prev_dst_ip = dst_ip; + dst_ip = src_ip; + } + + delete_basic_blocks(&basic_blocks); + delete_var_types(var_types); + return ret; +} + +/* The result will be saved in @type_die */ +static int find_data_type_die(struct data_loc_info *dloc, Dwarf_Die *type_die) +{ + struct annotated_op_loc *loc = dloc->op; + Dwarf_Die cu_die, var_die; + Dwarf_Die *scopes = NULL; + int reg, offset = loc->offset; + int ret = -1; + int i, nr_scopes; + int fbreg = -1; + int fb_offset = 0; + bool is_fbreg = false; + bool found = false; + u64 pc; + char buf[64]; + enum type_match_result result = PERF_TMR_UNKNOWN; + const char *offset_sign = ""; + + if (dloc->op->multi_regs) + snprintf(buf, sizeof(buf), "reg%d, reg%d", dloc->op->reg1, dloc->op->reg2); + else if (dloc->op->reg1 == DWARF_REG_PC) + snprintf(buf, sizeof(buf), "PC"); + else + snprintf(buf, sizeof(buf), "reg%d", dloc->op->reg1); + + if (offset < 0) { + offset = -offset; + offset_sign = "-"; + } + + pr_debug_dtp("-----------------------------------------------------------\n"); + pr_debug_dtp("find data type for %s%#x(%s) at %s+%#"PRIx64"\n", + offset_sign, offset, buf, + dloc->ms->sym->name, dloc->ip - dloc->ms->sym->start); + + /* + * IP is a relative instruction address from the start of the map, as + * it can be randomized/relocated, it needs to translate to PC which is + * a file address for DWARF processing. + */ + pc = map__rip_2objdump(dloc->ms->map, dloc->ip); + + /* Get a compile_unit for this address */ + if (!find_cu_die(dloc->di, pc, &cu_die)) { + pr_debug_dtp("cannot find CU for address %"PRIx64"\n", pc); + ann_data_stat.no_cuinfo++; + return -1; + } + + reg = loc->reg1; + offset = loc->offset; + + pr_debug_dtp("CU for %s (die:%#lx)\n", + dwarf_diename(&cu_die), (long)dwarf_dieoffset(&cu_die)); + + if (reg == DWARF_REG_PC) { + if (get_global_var_type(&cu_die, dloc, dloc->ip, dloc->var_addr, + &offset, type_die)) { + dloc->type_offset = offset; + + pr_debug_dtp("found by addr=%#"PRIx64" type_offset=%#x\n", + dloc->var_addr, offset); + pr_debug_type_name(type_die, TSR_KIND_TYPE); + found = true; + goto out; + } + } + + /* Get a list of nested scopes - i.e. (inlined) functions and blocks. */ + nr_scopes = die_get_scopes(&cu_die, pc, &scopes); + + if (reg != DWARF_REG_PC && dwarf_hasattr(&scopes[0], DW_AT_frame_base)) { + Dwarf_Attribute attr; + Dwarf_Block block; + + /* Check if the 'reg' is assigned as frame base register */ + if (dwarf_attr(&scopes[0], DW_AT_frame_base, &attr) != NULL && + dwarf_formblock(&attr, &block) == 0 && block.length == 1) { + switch (*block.data) { + case DW_OP_reg0 ... DW_OP_reg31: + fbreg = dloc->fbreg = *block.data - DW_OP_reg0; + break; + case DW_OP_call_frame_cfa: + dloc->fb_cfa = true; + if (die_get_cfa(dloc->di->dbg, pc, &fbreg, + &fb_offset) < 0) + fbreg = -1; + break; + default: + break; + } + + pr_debug_dtp("frame base: cfa=%d fbreg=%d\n", + dloc->fb_cfa, fbreg); + } + } + +retry: + is_fbreg = (reg == fbreg); + if (is_fbreg) + offset = loc->offset - fb_offset; + + /* Search from the inner-most scope to the outer */ + for (i = nr_scopes - 1; i >= 0; i--) { + Dwarf_Die mem_die; + int type_offset = offset; + + if (reg == DWARF_REG_PC) { + if (!die_find_variable_by_addr(&scopes[i], dloc->var_addr, + &var_die, &type_offset)) + continue; + } else { + /* Look up variables/parameters in this scope */ + if (!die_find_variable_by_reg(&scopes[i], pc, reg, + &type_offset, is_fbreg, &var_die)) + continue; + } + + pr_debug_dtp("found \"%s\" (die: %#lx) in scope=%d/%d (die: %#lx) ", + dwarf_diename(&var_die), (long)dwarf_dieoffset(&var_die), + i+1, nr_scopes, (long)dwarf_dieoffset(&scopes[i])); + + /* Found a variable, see if it's correct */ + result = check_variable(dloc, &var_die, &mem_die, reg, type_offset, is_fbreg); + if (result == PERF_TMR_OK) { + if (reg == DWARF_REG_PC) { + pr_debug_dtp("addr=%#"PRIx64" type_offset=%#x\n", + dloc->var_addr, type_offset); + } else if (reg == DWARF_REG_FB || is_fbreg) { + pr_debug_dtp("stack_offset=%#x type_offset=%#x\n", + fb_offset, type_offset); + } else { + pr_debug_dtp("type_offset=%#x\n", type_offset); + } + + if (!found || is_better_type(type_die, &mem_die)) { + *type_die = mem_die; + dloc->type_offset = type_offset; + found = true; + } + } else { + pr_debug_dtp("failed: %s\n", match_result_str(result)); + } + + pr_debug_location(&var_die, pc, reg); + pr_debug_type_name(&mem_die, TSR_KIND_TYPE); + } + + if (!found && loc->multi_regs && reg == loc->reg1 && loc->reg1 != loc->reg2) { + reg = loc->reg2; + goto retry; + } + + if (!found && reg != DWARF_REG_PC) { + result = find_data_type_block(dloc, &cu_die, scopes, + nr_scopes, type_die); + if (result == PERF_TMR_OK) { + ann_data_stat.insn_track++; + found = true; + } + } + +out: + pr_debug_dtp("final result: "); + if (found) { + pr_debug_type_name(type_die, TSR_KIND_TYPE); + ret = 0; + } else { + switch (result) { + case PERF_TMR_NO_TYPE: + case PERF_TMR_NO_POINTER: + pr_debug_dtp("%s\n", match_result_str(result)); + ann_data_stat.no_typeinfo++; + break; + case PERF_TMR_NO_SIZE: + pr_debug_dtp("%s\n", match_result_str(result)); + ann_data_stat.invalid_size++; + break; + case PERF_TMR_BAD_OFFSET: + pr_debug_dtp("%s\n", match_result_str(result)); + ann_data_stat.bad_offset++; + break; + case PERF_TMR_UNKNOWN: + case PERF_TMR_BAIL_OUT: + case PERF_TMR_OK: /* should not reach here */ + default: + pr_debug_dtp("no variable found\n"); + ann_data_stat.no_var++; + break; + } + ret = -1; + } + + free(scopes); + return ret; +} + +/** + * find_data_type - Return a data type at the location + * @dloc: data location + * + * This functions searches the debug information of the binary to get the data + * type it accesses. The exact location is expressed by (ip, reg, offset) + * for pointer variables or (ip, addr) for global variables. Note that global + * variables might update the @dloc->type_offset after finding the start of the + * variable. If it cannot find a global variable by address, it tried to find + * a declaration of the variable using var_name. In that case, @dloc->offset + * won't be updated. + * + * It return %NULL if not found. + */ +struct annotated_data_type *find_data_type(struct data_loc_info *dloc) +{ + struct dso *dso = map__dso(dloc->ms->map); + Dwarf_Die type_die; + + /* + * The type offset is the same as instruction offset by default. + * But when finding a global variable, the offset won't be valid. + */ + dloc->type_offset = dloc->op->offset; + + dloc->fbreg = -1; + + if (find_data_type_die(dloc, &type_die) < 0) + return NULL; + + return dso__findnew_data_type(dso, &type_die); +} + +static int alloc_data_type_histograms(struct annotated_data_type *adt, int nr_entries) +{ + int i; + size_t sz = sizeof(struct type_hist); + + sz += sizeof(struct type_hist_entry) * adt->self.size; + + /* Allocate a table of pointers for each event */ + adt->histograms = calloc(nr_entries, sizeof(*adt->histograms)); + if (adt->histograms == NULL) + return -ENOMEM; + + /* + * Each histogram is allocated for the whole size of the type. + * TODO: Probably we can move the histogram to members. + */ + for (i = 0; i < nr_entries; i++) { + adt->histograms[i] = zalloc(sz); + if (adt->histograms[i] == NULL) + goto err; + } + + adt->nr_histograms = nr_entries; + return 0; + +err: + while (--i >= 0) + zfree(&(adt->histograms[i])); + zfree(&adt->histograms); + return -ENOMEM; +} + +static void delete_data_type_histograms(struct annotated_data_type *adt) +{ + for (int i = 0; i < adt->nr_histograms; i++) + zfree(&(adt->histograms[i])); + + zfree(&adt->histograms); + adt->nr_histograms = 0; +} + +void annotated_data_type__tree_delete(struct rb_root *root) +{ + struct annotated_data_type *pos; + + while (!RB_EMPTY_ROOT(root)) { + struct rb_node *node = rb_first(root); + + rb_erase(node, root); + pos = rb_entry(node, struct annotated_data_type, node); + delete_members(&pos->self); + delete_data_type_histograms(pos); + zfree(&pos->self.type_name); + free(pos); + } +} + +/** + * annotated_data_type__update_samples - Update histogram + * @adt: Data type to update + * @evsel: Event to update + * @offset: Offset in the type + * @nr_samples: Number of samples at this offset + * @period: Event count at this offset + * + * This function updates type histogram at @ofs for @evsel. Samples are + * aggregated before calling this function so it can be called with more + * than one samples at a certain offset. + */ +int annotated_data_type__update_samples(struct annotated_data_type *adt, + struct evsel *evsel, int offset, + int nr_samples, u64 period) +{ + struct type_hist *h; + + if (adt == NULL) + return 0; + + if (adt->histograms == NULL) { + int nr = evsel->evlist->core.nr_entries; + + if (alloc_data_type_histograms(adt, nr) < 0) + return -1; + } + + if (offset < 0 || offset >= adt->self.size) + return -1; + + h = adt->histograms[evsel->core.idx]; + + h->nr_samples += nr_samples; + h->addr[offset].nr_samples += nr_samples; + h->period += period; + h->addr[offset].period += period; + return 0; +} + +static void print_annotated_data_header(struct hist_entry *he, struct evsel *evsel) +{ + struct dso *dso = map__dso(he->ms.map); + int nr_members = 1; + int nr_samples = he->stat.nr_events; + int width = 7; + const char *val_hdr = "Percent"; + + if (evsel__is_group_event(evsel)) { + struct hist_entry *pair; + + list_for_each_entry(pair, &he->pairs.head, pairs.node) + nr_samples += pair->stat.nr_events; + } + + printf("Annotate type: '%s' in %s (%d samples):\n", + he->mem_type->self.type_name, dso__name(dso), nr_samples); + + if (evsel__is_group_event(evsel)) { + struct evsel *pos; + int i = 0; + + nr_members = 0; + for_each_group_evsel(pos, evsel) { + if (symbol_conf.skip_empty && + evsel__hists(pos)->stats.nr_samples == 0) + continue; + + printf(" event[%d] = %s\n", i++, pos->name); + nr_members++; + } + } + + if (symbol_conf.show_total_period) { + width = 11; + val_hdr = "Period"; + } else if (symbol_conf.show_nr_samples) { + width = 7; + val_hdr = "Samples"; + } + + printf("============================================================================\n"); + printf("%*s %10s %10s %s\n", (width + 1) * nr_members, val_hdr, + "offset", "size", "field"); +} + +static void print_annotated_data_value(struct type_hist *h, u64 period, int nr_samples) +{ + double percent = h->period ? (100.0 * period / h->period) : 0; + const char *color = get_percent_color(percent); + + if (symbol_conf.show_total_period) + color_fprintf(stdout, color, " %11" PRIu64, period); + else if (symbol_conf.show_nr_samples) + color_fprintf(stdout, color, " %7d", nr_samples); + else + color_fprintf(stdout, color, " %7.2f", percent); +} + +static void print_annotated_data_type(struct annotated_data_type *mem_type, + struct annotated_member *member, + struct evsel *evsel, int indent) +{ + struct annotated_member *child; + struct type_hist *h = mem_type->histograms[evsel->core.idx]; + int i, nr_events = 0, samples = 0; + u64 period = 0; + int width = symbol_conf.show_total_period ? 11 : 7; + struct evsel *pos; + + for_each_group_evsel(pos, evsel) { + h = mem_type->histograms[pos->core.idx]; + + if (symbol_conf.skip_empty && + evsel__hists(pos)->stats.nr_samples == 0) + continue; + + samples = 0; + period = 0; + for (i = 0; i < member->size; i++) { + samples += h->addr[member->offset + i].nr_samples; + period += h->addr[member->offset + i].period; + } + print_annotated_data_value(h, period, samples); + nr_events++; + } + + printf(" %#10x %#10x %*s%s\t%s", + member->offset, member->size, indent, "", member->type_name, + member->var_name ?: ""); + + if (!list_empty(&member->children)) + printf(" {\n"); + + list_for_each_entry(child, &member->children, node) + print_annotated_data_type(mem_type, child, evsel, indent + 4); + + if (!list_empty(&member->children)) + printf("%*s}", (width + 1) * nr_events + 24 + indent, ""); + printf(";\n"); +} + +int hist_entry__annotate_data_tty(struct hist_entry *he, struct evsel *evsel) +{ + print_annotated_data_header(he, evsel); + print_annotated_data_type(he->mem_type, &he->mem_type->self, evsel, 0); + printf("\n"); + + /* move to the next entry */ + return '>'; +} diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h new file mode 100644 index 000000000000..869307c7f130 --- /dev/null +++ b/tools/perf/util/annotate-data.h @@ -0,0 +1,319 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _PERF_ANNOTATE_DATA_H +#define _PERF_ANNOTATE_DATA_H + +#include <errno.h> +#include <linux/compiler.h> +#include <linux/rbtree.h> +#include <linux/types.h> +#include "dwarf-regs.h" +#include "annotate.h" + +#ifdef HAVE_LIBDW_SUPPORT +#include "debuginfo.h" +#endif + +struct annotated_op_loc; +struct debuginfo; +struct evsel; +struct hist_browser_timer; +struct hist_entry; +struct map_symbol; +struct thread; + +#define pr_debug_dtp(fmt, ...) \ +do { \ + if (debug_type_profile) \ + pr_info(fmt, ##__VA_ARGS__); \ + else \ + pr_debug3(fmt, ##__VA_ARGS__); \ +} while (0) + +enum type_state_kind { + TSR_KIND_INVALID = 0, + TSR_KIND_TYPE, + TSR_KIND_PERCPU_BASE, + TSR_KIND_CONST, + TSR_KIND_PERCPU_POINTER, + TSR_KIND_POINTER, + TSR_KIND_CANARY, +}; + +/** + * struct annotated_member - Type of member field + * @node: List entry in the parent list + * @children: List head for child nodes + * @type_name: Name of the member type + * @var_name: Name of the member variable + * @offset: Offset from the outer data type + * @size: Size of the member field + * + * This represents a member type in a data type. + */ +struct annotated_member { + struct list_head node; + struct list_head children; + char *type_name; + char *var_name; + int offset; + int size; +}; + +/** + * struct type_hist_entry - Histogram entry per offset + * @nr_samples: Number of samples + * @period: Count of event + */ +struct type_hist_entry { + int nr_samples; + u64 period; +}; + +/** + * struct type_hist - Type histogram for each event + * @nr_samples: Total number of samples in this data type + * @period: Total count of the event in this data type + * @offset: Array of histogram entry + */ +struct type_hist { + u64 nr_samples; + u64 period; + struct type_hist_entry addr[]; +}; + +/** + * struct annotated_data_type - Data type to profile + * @node: RB-tree node for dso->type_tree + * @self: Actual type information + * @nr_histogram: Number of histogram entries + * @histograms: An array of pointers to histograms + * + * This represents a data type accessed by samples in the profile data. + */ +struct annotated_data_type { + struct rb_node node; + struct annotated_member self; + int nr_histograms; + struct type_hist **histograms; +}; + +extern struct annotated_data_type unknown_type; +extern struct annotated_data_type stackop_type; +extern struct annotated_data_type canary_type; + +/** + * struct data_loc_info - Data location information + * @arch: CPU architecture info + * @thread: Thread info + * @ms: Map and Symbol info + * @ip: Instruction address + * @var_addr: Data address (for global variables) + * @cpumode: CPU execution mode + * @op: Instruction operand location (regs and offset) + * @di: Debug info + * @fbreg: Frame base register + * @fb_cfa: Whether the frame needs to check CFA + * @type_offset: Final offset in the type + */ +struct data_loc_info { + /* These are input field, should be filled by caller */ + struct arch *arch; + struct thread *thread; + struct map_symbol *ms; + u64 ip; + u64 var_addr; + u8 cpumode; + struct annotated_op_loc *op; + struct debuginfo *di; + + /* These are used internally */ + int fbreg; + bool fb_cfa; + + /* This is for the result */ + int type_offset; +}; + +/** + * struct annotated_data_stat - Debug statistics + * @total: Total number of entry + * @no_sym: No symbol or map found + * @no_insn: Failed to get disasm line + * @no_insn_ops: The instruction has no operands + * @no_mem_ops: The instruction has no memory operands + * @no_reg: Failed to extract a register from the operand + * @no_dbginfo: The binary has no debug information + * @no_cuinfo: Failed to find a compile_unit + * @no_var: Failed to find a matching variable + * @no_typeinfo: Failed to get a type info for the variable + * @invalid_size: Failed to get a size info of the type + * @bad_offset: The access offset is out of the type + */ +struct annotated_data_stat { + int total; + int no_sym; + int no_insn; + int no_insn_ops; + int no_mem_ops; + int no_reg; + int no_dbginfo; + int no_cuinfo; + int no_var; + int no_typeinfo; + int invalid_size; + int bad_offset; + int insn_track; +}; +extern struct annotated_data_stat ann_data_stat; + +#ifdef HAVE_LIBDW_SUPPORT +/* + * Type information in a register, valid when @ok is true. + * The @caller_saved registers are invalidated after a function call. + */ +struct type_state_reg { + Dwarf_Die type; + u32 imm_value; + /* + * The offset within the struct that the register points to. + * A value of 0 means the register points to the beginning. + * type_offset = op->offset + reg->offset + */ + s32 offset; + bool ok; + bool caller_saved; + u8 kind; + u8 copied_from; +}; + +/* Type information in a stack location, dynamically allocated */ +struct type_state_stack { + struct list_head list; + Dwarf_Die type; + int offset; + /* pointer offset, saves tsr->offset on the stack state */ + int ptr_offset; + int size; + bool compound; + u8 kind; +}; + +/* + * Maximum number of registers tracked in type_state. + * + * This limit must cover all supported architectures, since perf + * may analyze perf.data files generated on systems with a different + * register set. Use 32 as a safe upper bound instead of relying on + * build-arch specific values. + */ +#define TYPE_STATE_MAX_REGS 32 + +/* + * State table to maintain type info in each register and stack location. + * It'll be updated when new variable is allocated or type info is moved + * to a new location (register or stack). As it'd be used with the + * shortest path of basic blocks, it only maintains a single table. + */ +struct type_state { + /* state of general purpose registers */ + struct type_state_reg regs[TYPE_STATE_MAX_REGS]; + /* state of stack location */ + struct list_head stack_vars; + /* return value register */ + int ret_reg; + /* stack pointer register */ + int stack_reg; +}; + +/* Returns data type at the location (ip, reg, offset) */ +struct annotated_data_type *find_data_type(struct data_loc_info *dloc); + +/* Update type access histogram at the given offset */ +int annotated_data_type__update_samples(struct annotated_data_type *adt, + struct evsel *evsel, int offset, + int nr_samples, u64 period); + +/* Release all data type information in the tree */ +void annotated_data_type__tree_delete(struct rb_root *root); + +/* Release all global variable information in the tree */ +void global_var_type__tree_delete(struct rb_root *root); + +/* Print data type annotation (including members) on stdout */ +int hist_entry__annotate_data_tty(struct hist_entry *he, struct evsel *evsel); + +/* Get name of member field at the given offset in the data type */ +int annotated_data_type__get_member_name(struct annotated_data_type *adt, + char *buf, size_t sz, int member_offset); + +bool has_reg_type(struct type_state *state, int reg); +struct type_state_stack *findnew_stack_state(struct type_state *state, + int offset, u8 kind, + Dwarf_Die *type_die, + int ptr_offset); +void set_stack_state(struct type_state_stack *stack, int offset, u8 kind, + Dwarf_Die *type_die, int ptr_offset); +struct type_state_stack *find_stack_state(struct type_state *state, + int offset); +bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc, + u64 ip, u64 var_addr, int *var_offset, + Dwarf_Die *type_die); +bool get_global_var_info(struct data_loc_info *dloc, u64 addr, + const char **var_name, int *var_offset); +void pr_debug_type_name(Dwarf_Die *die, enum type_state_kind kind); + +#else /* HAVE_LIBDW_SUPPORT */ + +static inline struct annotated_data_type * +find_data_type(struct data_loc_info *dloc __maybe_unused) +{ + return NULL; +} + +static inline int +annotated_data_type__update_samples(struct annotated_data_type *adt __maybe_unused, + struct evsel *evsel __maybe_unused, + int offset __maybe_unused, + int nr_samples __maybe_unused, + u64 period __maybe_unused) +{ + return -1; +} + +static inline void annotated_data_type__tree_delete(struct rb_root *root __maybe_unused) +{ +} + +static inline void global_var_type__tree_delete(struct rb_root *root __maybe_unused) +{ +} + +static inline int hist_entry__annotate_data_tty(struct hist_entry *he __maybe_unused, + struct evsel *evsel __maybe_unused) +{ + return -1; +} + +static inline int annotated_data_type__get_member_name(struct annotated_data_type *adt __maybe_unused, + char *buf __maybe_unused, + size_t sz __maybe_unused, + int member_offset __maybe_unused) +{ + return -1; +} + +#endif /* HAVE_LIBDW_SUPPORT */ + +#ifdef HAVE_SLANG_SUPPORT +int hist_entry__annotate_data_tui(struct hist_entry *he, struct evsel *evsel, + struct hist_browser_timer *hbt); +#else +static inline int hist_entry__annotate_data_tui(struct hist_entry *he __maybe_unused, + struct evsel *evsel __maybe_unused, + struct hist_browser_timer *hbt __maybe_unused) +{ + return -1; +} +#endif /* HAVE_SLANG_SUPPORT */ + +#endif /* _PERF_ANNOTATE_DATA_H */ diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index ba988a13dacb..cc7764455faf 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -16,6 +16,7 @@ #include "build-id.h" #include "color.h" #include "config.h" +#include "disasm.h" #include "dso.h" #include "env.h" #include "map.h" @@ -24,17 +25,23 @@ #include "srcline.h" #include "units.h" #include "debug.h" +#include "debuginfo.h" #include "annotate.h" +#include "annotate-data.h" #include "evsel.h" #include "evlist.h" #include "bpf-event.h" #include "bpf-utils.h" #include "block-range.h" #include "string2.h" +#include "dwarf-regs.h" #include "util/event.h" #include "util/sharded_mutex.h" #include "arch/common.h" #include "namespaces.h" +#include "thread.h" +#include "hashmap.h" +#include "strbuf.h" #include <regex.h> #include <linux/bitops.h> #include <linux/kernel.h> @@ -42,6 +49,7 @@ #include <linux/zalloc.h> #include <subcmd/parse-options.h> #include <subcmd/run-command.h> +#include <math.h> /* FIXME: For the HE_COLORSET */ #include "ui/browser.h" @@ -57,742 +65,39 @@ #include <linux/ctype.h> -static regex_t file_lineno; - -static struct ins_ops *ins__find(struct arch *arch, const char *name); -static void ins__sort(struct arch *arch); -static int disasm_line__parse(char *line, const char **namep, char **rawp); -static int call__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name); -static int jump__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name); - -struct arch { - const char *name; - struct ins *instructions; - size_t nr_instructions; - size_t nr_instructions_allocated; - struct ins_ops *(*associate_instruction_ops)(struct arch *arch, const char *name); - bool sorted_instructions; - bool initialized; - const char *insn_suffix; - void *priv; - unsigned int model; - unsigned int family; - int (*init)(struct arch *arch, char *cpuid); - bool (*ins_is_fused)(struct arch *arch, const char *ins1, - const char *ins2); - struct { - char comment_char; - char skip_functions_char; - } objdump; -}; - -static struct ins_ops call_ops; -static struct ins_ops dec_ops; -static struct ins_ops jump_ops; -static struct ins_ops mov_ops; -static struct ins_ops nop_ops; -static struct ins_ops lock_ops; -static struct ins_ops ret_ops; - -static int arch__grow_instructions(struct arch *arch) -{ - struct ins *new_instructions; - size_t new_nr_allocated; - - if (arch->nr_instructions_allocated == 0 && arch->instructions) - goto grow_from_non_allocated_table; - - new_nr_allocated = arch->nr_instructions_allocated + 128; - new_instructions = realloc(arch->instructions, new_nr_allocated * sizeof(struct ins)); - if (new_instructions == NULL) - return -1; - -out_update_instructions: - arch->instructions = new_instructions; - arch->nr_instructions_allocated = new_nr_allocated; - return 0; - -grow_from_non_allocated_table: - new_nr_allocated = arch->nr_instructions + 128; - new_instructions = calloc(new_nr_allocated, sizeof(struct ins)); - if (new_instructions == NULL) - return -1; - - memcpy(new_instructions, arch->instructions, arch->nr_instructions); - goto out_update_instructions; -} - -static int arch__associate_ins_ops(struct arch* arch, const char *name, struct ins_ops *ops) -{ - struct ins *ins; - - if (arch->nr_instructions == arch->nr_instructions_allocated && - arch__grow_instructions(arch)) - return -1; - - ins = &arch->instructions[arch->nr_instructions]; - ins->name = strdup(name); - if (!ins->name) - return -1; +/* global annotation options */ +struct annotation_options annotate_opts; - ins->ops = ops; - arch->nr_instructions++; +/* Data type collection debug statistics */ +struct annotated_data_stat ann_data_stat; +LIST_HEAD(ann_insn_stat); - ins__sort(arch); - return 0; -} - -#include "arch/arc/annotate/instructions.c" -#include "arch/arm/annotate/instructions.c" -#include "arch/arm64/annotate/instructions.c" -#include "arch/csky/annotate/instructions.c" -#include "arch/loongarch/annotate/instructions.c" -#include "arch/mips/annotate/instructions.c" -#include "arch/x86/annotate/instructions.c" -#include "arch/powerpc/annotate/instructions.c" -#include "arch/riscv64/annotate/instructions.c" -#include "arch/s390/annotate/instructions.c" -#include "arch/sparc/annotate/instructions.c" - -static struct arch architectures[] = { - { - .name = "arc", - .init = arc__annotate_init, - }, - { - .name = "arm", - .init = arm__annotate_init, - }, - { - .name = "arm64", - .init = arm64__annotate_init, - }, - { - .name = "csky", - .init = csky__annotate_init, - }, - { - .name = "mips", - .init = mips__annotate_init, - .objdump = { - .comment_char = '#', - }, - }, - { - .name = "x86", - .init = x86__annotate_init, - .instructions = x86__instructions, - .nr_instructions = ARRAY_SIZE(x86__instructions), - .insn_suffix = "bwlq", - .objdump = { - .comment_char = '#', - }, +/* Pseudo data types */ +struct annotated_data_type stackop_type = { + .self = { + .type_name = (char *)"(stack operation)", + .children = LIST_HEAD_INIT(stackop_type.self.children), }, - { - .name = "powerpc", - .init = powerpc__annotate_init, - }, - { - .name = "riscv64", - .init = riscv64__annotate_init, - }, - { - .name = "s390", - .init = s390__annotate_init, - .objdump = { - .comment_char = '#', - }, - }, - { - .name = "sparc", - .init = sparc__annotate_init, - .objdump = { - .comment_char = '#', - }, - }, - { - .name = "loongarch", - .init = loongarch__annotate_init, - .objdump = { - .comment_char = '#', - }, - }, -}; - -static void ins__delete(struct ins_operands *ops) -{ - if (ops == NULL) - return; - zfree(&ops->source.raw); - zfree(&ops->source.name); - zfree(&ops->target.raw); - zfree(&ops->target.name); -} - -static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name) -{ - return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->raw); -} - -int ins__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name) -{ - if (ins->ops->scnprintf) - return ins->ops->scnprintf(ins, bf, size, ops, max_ins_name); - - return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); -} - -bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2) -{ - if (!arch || !arch->ins_is_fused) - return false; - - return arch->ins_is_fused(arch, ins1, ins2); -} - -static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms) -{ - char *endptr, *tok, *name; - struct map *map = ms->map; - struct addr_map_symbol target = { - .ms = { .map = map, }, - }; - - ops->target.addr = strtoull(ops->raw, &endptr, 16); - - name = strchr(endptr, '<'); - if (name == NULL) - goto indirect_call; - - name++; - - if (arch->objdump.skip_functions_char && - strchr(name, arch->objdump.skip_functions_char)) - return -1; - - tok = strchr(name, '>'); - if (tok == NULL) - return -1; - - *tok = '\0'; - ops->target.name = strdup(name); - *tok = '>'; - - if (ops->target.name == NULL) - return -1; -find_target: - target.addr = map__objdump_2mem(map, ops->target.addr); - - if (maps__find_ams(ms->maps, &target) == 0 && - map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr) - ops->target.sym = target.ms.sym; - - return 0; - -indirect_call: - tok = strchr(endptr, '*'); - if (tok != NULL) { - endptr++; - - /* Indirect call can use a non-rip register and offset: callq *0x8(%rbx). - * Do not parse such instruction. */ - if (strstr(endptr, "(%r") == NULL) - ops->target.addr = strtoull(endptr, NULL, 16); - } - goto find_target; -} - -static int call__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name) -{ - if (ops->target.sym) - return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name); - - if (ops->target.addr == 0) - return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); - - if (ops->target.name) - return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.name); - - return scnprintf(bf, size, "%-*s *%" PRIx64, max_ins_name, ins->name, ops->target.addr); -} - -static struct ins_ops call_ops = { - .parse = call__parse, - .scnprintf = call__scnprintf, -}; - -bool ins__is_call(const struct ins *ins) -{ - return ins->ops == &call_ops || ins->ops == &s390_call_ops || ins->ops == &loongarch_call_ops; -} - -/* - * Prevents from matching commas in the comment section, e.g.: - * ffff200008446e70: b.cs ffff2000084470f4 <generic_exec_single+0x314> // b.hs, b.nlast - * - * and skip comma as part of function arguments, e.g.: - * 1d8b4ac <linemap_lookup(line_maps const*, unsigned int)+0xcc> - */ -static inline const char *validate_comma(const char *c, struct ins_operands *ops) -{ - if (ops->raw_comment && c > ops->raw_comment) - return NULL; - - if (ops->raw_func_start && c > ops->raw_func_start) - return NULL; - - return c; -} - -static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms) -{ - struct map *map = ms->map; - struct symbol *sym = ms->sym; - struct addr_map_symbol target = { - .ms = { .map = map, }, - }; - const char *c = strchr(ops->raw, ','); - u64 start, end; - - ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char); - ops->raw_func_start = strchr(ops->raw, '<'); - - c = validate_comma(c, ops); - - /* - * Examples of lines to parse for the _cpp_lex_token@@Base - * function: - * - * 1159e6c: jne 115aa32 <_cpp_lex_token@@Base+0xf92> - * 1159e8b: jne c469be <cpp_named_operator2name@@Base+0xa72> - * - * The first is a jump to an offset inside the same function, - * the second is to another function, i.e. that 0xa72 is an - * offset in the cpp_named_operator2name@@base function. - */ - /* - * skip over possible up to 2 operands to get to address, e.g.: - * tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0> - */ - if (c++ != NULL) { - ops->target.addr = strtoull(c, NULL, 16); - if (!ops->target.addr) { - c = strchr(c, ','); - c = validate_comma(c, ops); - if (c++ != NULL) - ops->target.addr = strtoull(c, NULL, 16); - } - } else { - ops->target.addr = strtoull(ops->raw, NULL, 16); - } - - target.addr = map__objdump_2mem(map, ops->target.addr); - start = map__unmap_ip(map, sym->start); - end = map__unmap_ip(map, sym->end); - - ops->target.outside = target.addr < start || target.addr > end; - - /* - * FIXME: things like this in _cpp_lex_token (gcc's cc1 program): - - cpp_named_operator2name@@Base+0xa72 - - * Point to a place that is after the cpp_named_operator2name - * boundaries, i.e. in the ELF symbol table for cc1 - * cpp_named_operator2name is marked as being 32-bytes long, but it in - * fact is much larger than that, so we seem to need a symbols__find() - * routine that looks for >= current->start and < next_symbol->start, - * possibly just for C++ objects? - * - * For now lets just make some progress by marking jumps to outside the - * current function as call like. - * - * Actual navigation will come next, with further understanding of how - * the symbol searching and disassembly should be done. - */ - if (maps__find_ams(ms->maps, &target) == 0 && - map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr) - ops->target.sym = target.ms.sym; - - if (!ops->target.outside) { - ops->target.offset = target.addr - start; - ops->target.offset_avail = true; - } else { - ops->target.offset_avail = false; - } - - return 0; -} - -static int jump__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name) -{ - const char *c; - - if (!ops->target.addr || ops->target.offset < 0) - return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); - - if (ops->target.outside && ops->target.sym != NULL) - return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name); - - c = strchr(ops->raw, ','); - c = validate_comma(c, ops); - - if (c != NULL) { - const char *c2 = strchr(c + 1, ','); - - c2 = validate_comma(c2, ops); - /* check for 3-op insn */ - if (c2 != NULL) - c = c2; - c++; - - /* mirror arch objdump's space-after-comma style */ - if (*c == ' ') - c++; - } - - return scnprintf(bf, size, "%-*s %.*s%" PRIx64, max_ins_name, - ins->name, c ? c - ops->raw : 0, ops->raw, - ops->target.offset); -} - -static struct ins_ops jump_ops = { - .parse = jump__parse, - .scnprintf = jump__scnprintf, -}; - -bool ins__is_jump(const struct ins *ins) -{ - return ins->ops == &jump_ops || ins->ops == &loongarch_jump_ops; -} - -static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) -{ - char *endptr, *name, *t; - - if (strstr(raw, "(%rip)") == NULL) - return 0; - - *addrp = strtoull(comment, &endptr, 16); - if (endptr == comment) - return 0; - name = strchr(endptr, '<'); - if (name == NULL) - return -1; - - name++; - - t = strchr(name, '>'); - if (t == NULL) - return 0; - - *t = '\0'; - *namep = strdup(name); - *t = '>'; - - return 0; -} - -static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms) -{ - ops->locked.ops = zalloc(sizeof(*ops->locked.ops)); - if (ops->locked.ops == NULL) - return 0; - - if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0) - goto out_free_ops; - - ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name); - - if (ops->locked.ins.ops == NULL) - goto out_free_ops; - - if (ops->locked.ins.ops->parse && - ops->locked.ins.ops->parse(arch, ops->locked.ops, ms) < 0) - goto out_free_ops; - - return 0; - -out_free_ops: - zfree(&ops->locked.ops); - return 0; -} - -static int lock__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name) -{ - int printed; - - if (ops->locked.ins.ops == NULL) - return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); - - printed = scnprintf(bf, size, "%-*s ", max_ins_name, ins->name); - return printed + ins__scnprintf(&ops->locked.ins, bf + printed, - size - printed, ops->locked.ops, max_ins_name); -} - -static void lock__delete(struct ins_operands *ops) -{ - struct ins *ins = &ops->locked.ins; - - if (ins->ops && ins->ops->free) - ins->ops->free(ops->locked.ops); - else - ins__delete(ops->locked.ops); - - zfree(&ops->locked.ops); - zfree(&ops->target.raw); - zfree(&ops->target.name); -} - -static struct ins_ops lock_ops = { - .free = lock__delete, - .parse = lock__parse, - .scnprintf = lock__scnprintf, -}; - -static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused) -{ - char *s = strchr(ops->raw, ','), *target, *comment, prev; - - if (s == NULL) - return -1; - - *s = '\0'; - - /* - * x86 SIB addressing has something like 0x8(%rax, %rcx, 1) - * then it needs to have the closing parenthesis. - */ - if (strchr(ops->raw, '(')) { - *s = ','; - s = strchr(ops->raw, ')'); - if (s == NULL || s[1] != ',') - return -1; - *++s = '\0'; - } - - ops->source.raw = strdup(ops->raw); - *s = ','; - - if (ops->source.raw == NULL) - return -1; - - target = skip_spaces(++s); - comment = strchr(s, arch->objdump.comment_char); - - if (comment != NULL) - s = comment - 1; - else - s = strchr(s, '\0') - 1; - - while (s > target && isspace(s[0])) - --s; - s++; - prev = *s; - *s = '\0'; - - ops->target.raw = strdup(target); - *s = prev; - - if (ops->target.raw == NULL) - goto out_free_source; - - if (comment == NULL) - return 0; - - comment = skip_spaces(comment); - comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name); - comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); - - return 0; - -out_free_source: - zfree(&ops->source.raw); - return -1; -} - -static int mov__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name) -{ - return scnprintf(bf, size, "%-*s %s,%s", max_ins_name, ins->name, - ops->source.name ?: ops->source.raw, - ops->target.name ?: ops->target.raw); -} - -static struct ins_ops mov_ops = { - .parse = mov__parse, - .scnprintf = mov__scnprintf, -}; - -static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused) -{ - char *target, *comment, *s, prev; - - target = s = ops->raw; - - while (s[0] != '\0' && !isspace(s[0])) - ++s; - prev = *s; - *s = '\0'; - - ops->target.raw = strdup(target); - *s = prev; - - if (ops->target.raw == NULL) - return -1; - - comment = strchr(s, arch->objdump.comment_char); - if (comment == NULL) - return 0; - - comment = skip_spaces(comment); - comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); - - return 0; -} - -static int dec__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name) -{ - return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, - ops->target.name ?: ops->target.raw); -} - -static struct ins_ops dec_ops = { - .parse = dec__parse, - .scnprintf = dec__scnprintf, }; -static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size, - struct ins_operands *ops __maybe_unused, int max_ins_name) -{ - return scnprintf(bf, size, "%-*s", max_ins_name, "nop"); -} - -static struct ins_ops nop_ops = { - .scnprintf = nop__scnprintf, -}; - -static struct ins_ops ret_ops = { - .scnprintf = ins__raw_scnprintf, +struct annotated_data_type canary_type = { + .self = { + .type_name = (char *)"(stack canary)", + .children = LIST_HEAD_INIT(canary_type.self.children), + }, }; -bool ins__is_ret(const struct ins *ins) -{ - return ins->ops == &ret_ops; -} - -bool ins__is_lock(const struct ins *ins) -{ - return ins->ops == &lock_ops; -} - -static int ins__key_cmp(const void *name, const void *insp) -{ - const struct ins *ins = insp; - - return strcmp(name, ins->name); -} - -static int ins__cmp(const void *a, const void *b) -{ - const struct ins *ia = a; - const struct ins *ib = b; - - return strcmp(ia->name, ib->name); -} - -static void ins__sort(struct arch *arch) -{ - const int nmemb = arch->nr_instructions; - - qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp); -} - -static struct ins_ops *__ins__find(struct arch *arch, const char *name) -{ - struct ins *ins; - const int nmemb = arch->nr_instructions; - - if (!arch->sorted_instructions) { - ins__sort(arch); - arch->sorted_instructions = true; - } - - ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); - if (ins) - return ins->ops; - - if (arch->insn_suffix) { - char tmp[32]; - char suffix; - size_t len = strlen(name); - - if (len == 0 || len >= sizeof(tmp)) - return NULL; - - suffix = name[len - 1]; - if (strchr(arch->insn_suffix, suffix) == NULL) - return NULL; - - strcpy(tmp, name); - tmp[len - 1] = '\0'; /* remove the suffix and check again */ - - ins = bsearch(tmp, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); - } - return ins ? ins->ops : NULL; -} - -static struct ins_ops *ins__find(struct arch *arch, const char *name) -{ - struct ins_ops *ops = __ins__find(arch, name); - - if (!ops && arch->associate_instruction_ops) - ops = arch->associate_instruction_ops(arch, name); - - return ops; -} +#define NO_TYPE ((struct annotated_data_type *)-1UL) -static int arch__key_cmp(const void *name, const void *archp) +/* symbol histogram: key = offset << 16 | evsel->core.idx */ +static size_t sym_hist_hash(long key, void *ctx __maybe_unused) { - const struct arch *arch = archp; - - return strcmp(name, arch->name); -} - -static int arch__cmp(const void *a, const void *b) -{ - const struct arch *aa = a; - const struct arch *ab = b; - - return strcmp(aa->name, ab->name); -} - -static void arch__sort(void) -{ - const int nmemb = ARRAY_SIZE(architectures); - - qsort(architectures, nmemb, sizeof(struct arch), arch__cmp); + return (key >> 16) + (key & 0xffff); } -static struct arch *arch__find(const char *name) +static bool sym_hist_equal(long key1, long key2, void *ctx __maybe_unused) { - const int nmemb = ARRAY_SIZE(architectures); - static bool sorted; - - if (!sorted) { - arch__sort(); - sorted = true; - } - - return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp); + return key1 == key2; } static struct annotated_source *annotated_source__new(void) @@ -807,56 +112,37 @@ static struct annotated_source *annotated_source__new(void) static __maybe_unused void annotated_source__delete(struct annotated_source *src) { + struct hashmap_entry *cur; + size_t bkt; + if (src == NULL) return; + + if (src->samples) { + hashmap__for_each_entry(src->samples, cur, bkt) + zfree(&cur->pvalue); + hashmap__free(src->samples); + } zfree(&src->histograms); - zfree(&src->cycles_hist); free(src); } static int annotated_source__alloc_histograms(struct annotated_source *src, - size_t size, int nr_hists) + int nr_hists) { - size_t sizeof_sym_hist; - - /* - * Add buffer of one element for zero length symbol. - * When sample is taken from first instruction of - * zero length symbol, perf still resolves it and - * shows symbol name in perf report and allows to - * annotate it. - */ - if (size == 0) - size = 1; + src->nr_histograms = nr_hists; + src->histograms = calloc(nr_hists, sizeof(*src->histograms)); - /* Check for overflow when calculating sizeof_sym_hist */ - if (size > (SIZE_MAX - sizeof(struct sym_hist)) / sizeof(struct sym_hist_entry)) + if (src->histograms == NULL) return -1; - sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(struct sym_hist_entry)); - - /* Check for overflow in zalloc argument */ - if (sizeof_sym_hist > SIZE_MAX / nr_hists) - return -1; + src->samples = hashmap__new(sym_hist_hash, sym_hist_equal, NULL); + if (src->samples == NULL) + zfree(&src->histograms); - src->sizeof_sym_hist = sizeof_sym_hist; - src->nr_histograms = nr_hists; - src->histograms = calloc(nr_hists, sizeof_sym_hist) ; return src->histograms ? 0 : -1; } -/* The cycles histogram is lazily allocated. */ -static int symbol__alloc_hist_cycles(struct symbol *sym) -{ - struct annotation *notes = symbol__annotation(sym); - const size_t size = symbol__size(sym); - - notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist)); - if (notes->src->cycles_hist == NULL) - return -1; - return 0; -} - void symbol__annotate_zero_histograms(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); @@ -864,10 +150,12 @@ void symbol__annotate_zero_histograms(struct symbol *sym) annotation__lock(notes); if (notes->src != NULL) { memset(notes->src->histograms, 0, - notes->src->nr_histograms * notes->src->sizeof_sym_hist); - if (notes->src->cycles_hist) - memset(notes->src->cycles_hist, 0, - symbol__size(sym) * sizeof(struct cyc_hist)); + notes->src->nr_histograms * sizeof(*notes->src->histograms)); + hashmap__clear(notes->src->samples); + } + if (notes->branch && notes->branch->cycles_hist) { + memset(notes->branch->cycles_hist, 0, + symbol__size(sym) * sizeof(struct cyc_hist)); } annotation__unlock(notes); } @@ -923,12 +211,14 @@ static int __symbol__account_cycles(struct cyc_hist *ch, } static int __symbol__inc_addr_samples(struct map_symbol *ms, - struct annotated_source *src, int evidx, u64 addr, + struct annotated_source *src, struct evsel *evsel, u64 addr, struct perf_sample *sample) { struct symbol *sym = ms->sym; - unsigned offset; + long hash_key; + u64 offset; struct sym_hist *h; + struct sym_hist_entry *entry; pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map__unmap_ip(ms->map, addr)); @@ -940,41 +230,70 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms, } offset = addr - sym->start; - h = annotated_source__histogram(src, evidx); + h = annotated_source__histogram(src, evsel); if (h == NULL) { pr_debug("%s(%d): ENOMEM! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 ", func: %d\n", __func__, __LINE__, sym->name, sym->start, addr, sym->end, sym->type == STT_FUNC); return -ENOMEM; } + + hash_key = offset << 16 | evsel->core.idx; + if (!hashmap__find(src->samples, hash_key, &entry)) { + entry = zalloc(sizeof(*entry)); + if (entry == NULL) + return -ENOMEM; + + if (hashmap__add(src->samples, hash_key, entry) < 0) + return -ENOMEM; + } + h->nr_samples++; - h->addr[offset].nr_samples++; h->period += sample->period; - h->addr[offset].period += sample->period; + entry->nr_samples++; + entry->period += sample->period; pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64 ", evidx=%d] => nr_samples: %" PRIu64 ", period: %" PRIu64 "\n", - sym->start, sym->name, addr, addr - sym->start, evidx, - h->addr[offset].nr_samples, h->addr[offset].period); + sym->start, sym->name, addr, addr - sym->start, evsel->core.idx, + entry->nr_samples, entry->period); return 0; } -static struct cyc_hist *symbol__cycles_hist(struct symbol *sym) +struct annotated_branch *annotation__get_branch(struct annotation *notes) +{ + if (notes == NULL) + return NULL; + + if (notes->branch == NULL) + notes->branch = zalloc(sizeof(*notes->branch)); + + return notes->branch; +} + +static struct annotated_branch *symbol__find_branch_hist(struct symbol *sym, + unsigned int br_cntr_nr) { struct annotation *notes = symbol__annotation(sym); + struct annotated_branch *branch; + const size_t size = symbol__size(sym); - if (notes->src == NULL) { - notes->src = annotated_source__new(); - if (notes->src == NULL) + branch = annotation__get_branch(notes); + if (branch == NULL) + return NULL; + + if (branch->cycles_hist == NULL) { + branch->cycles_hist = calloc(size, sizeof(struct cyc_hist)); + if (!branch->cycles_hist) return NULL; - goto alloc_cycles_hist; } - if (!notes->src->cycles_hist) { -alloc_cycles_hist: - symbol__alloc_hist_cycles(sym); + if (br_cntr_nr && branch->br_cntr == NULL) { + branch->br_cntr = calloc(br_cntr_nr * size, sizeof(u64)); + if (!branch->br_cntr) + return NULL; } - return notes->src->cycles_hist; + return branch; } struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists) @@ -990,8 +309,7 @@ struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists) if (notes->src->histograms == NULL) { alloc_histograms: - annotated_source__alloc_histograms(notes->src, symbol__size(sym), - nr_hists); + annotated_source__alloc_histograms(notes->src, nr_hists); } return notes->src; @@ -1007,19 +325,48 @@ static int symbol__inc_addr_samples(struct map_symbol *ms, if (sym == NULL) return 0; src = symbol__hists(sym, evsel->evlist->core.nr_entries); - return src ? __symbol__inc_addr_samples(ms, src, evsel->core.idx, addr, sample) : 0; + return src ? __symbol__inc_addr_samples(ms, src, evsel, addr, sample) : 0; +} + +static int symbol__account_br_cntr(struct annotated_branch *branch, + struct evsel *evsel, + unsigned offset, + u64 br_cntr) +{ + unsigned int br_cntr_nr = evsel__leader(evsel)->br_cntr_nr; + unsigned int base = evsel__leader(evsel)->br_cntr_idx; + unsigned int off = offset * evsel->evlist->nr_br_cntr; + u64 *branch_br_cntr = branch->br_cntr; + unsigned int i, mask, width; + + if (!br_cntr || !branch_br_cntr) + return 0; + + perf_env__find_br_cntr_info(evsel__env(evsel), NULL, &width); + mask = (1L << width) - 1; + for (i = 0; i < br_cntr_nr; i++) { + u64 cntr = (br_cntr >> i * width) & mask; + + branch_br_cntr[off + i + base] += cntr; + if (cntr == mask) + branch_br_cntr[off + i + base] |= ANNOTATION__BR_CNTR_SATURATED_FLAG; + } + + return 0; } -static int symbol__account_cycles(u64 addr, u64 start, - struct symbol *sym, unsigned cycles) +static int symbol__account_cycles(u64 addr, u64 start, struct symbol *sym, + unsigned cycles, struct evsel *evsel, + u64 br_cntr) { - struct cyc_hist *cycles_hist; + struct annotated_branch *branch; unsigned offset; + int ret; if (sym == NULL) return 0; - cycles_hist = symbol__cycles_hist(sym); - if (cycles_hist == NULL) + branch = symbol__find_branch_hist(sym, evsel->evlist->nr_br_cntr); + if (!branch) return -ENOMEM; if (addr < sym->start || addr >= sym->end) return -ERANGE; @@ -1031,15 +378,22 @@ static int symbol__account_cycles(u64 addr, u64 start, start = 0; } offset = addr - sym->start; - return __symbol__account_cycles(cycles_hist, + ret = __symbol__account_cycles(branch->cycles_hist, start ? start - sym->start : 0, offset, cycles, !!start); + + if (ret) + return ret; + + return symbol__account_br_cntr(branch, evsel, offset, br_cntr); } int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, struct addr_map_symbol *start, - unsigned cycles) + unsigned cycles, + struct evsel *evsel, + u64 br_cntr) { u64 saddr = 0; int err; @@ -1065,87 +419,164 @@ int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, start ? start->addr : 0, ams->ms.sym ? ams->ms.sym->start + map__start(ams->ms.map) : 0, saddr); - err = symbol__account_cycles(ams->al_addr, saddr, ams->ms.sym, cycles); + err = symbol__account_cycles(ams->al_addr, saddr, ams->ms.sym, cycles, evsel, br_cntr); if (err) pr_debug2("account_cycles failed %d\n", err); return err; } +struct annotation_line *annotated_source__get_line(struct annotated_source *src, + s64 offset) +{ + struct annotation_line *al; + + list_for_each_entry(al, &src->source, node) { + if (al->offset == offset) + return al; + } + return NULL; +} + static unsigned annotation__count_insn(struct annotation *notes, u64 start, u64 end) { + struct annotation_line *al; unsigned n_insn = 0; - u64 offset; - for (offset = start; offset <= end; offset++) { - if (notes->offsets[offset]) - n_insn++; + al = annotated_source__get_line(notes->src, start); + if (al == NULL) + return 0; + + list_for_each_entry_from(al, ¬es->src->source, node) { + if (al->offset == -1) + continue; + if ((u64)al->offset > end) + break; + n_insn++; } return n_insn; } +static void annotated_branch__delete(struct annotated_branch *branch) +{ + if (branch) { + zfree(&branch->cycles_hist); + free(branch->br_cntr); + free(branch); + } +} + static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 end, struct cyc_hist *ch) { unsigned n_insn; unsigned int cover_insn = 0; - u64 offset; n_insn = annotation__count_insn(notes, start, end); if (n_insn && ch->num && ch->cycles) { + struct annotation_line *al; + struct annotated_branch *branch; float ipc = n_insn / ((double)ch->cycles / (double)ch->num); /* Hide data when there are too many overlaps. */ if (ch->reset >= 0x7fff) return; - for (offset = start; offset <= end; offset++) { - struct annotation_line *al = notes->offsets[offset]; + al = annotated_source__get_line(notes->src, start); + if (al == NULL) + return; - if (al && al->ipc == 0.0) { - al->ipc = ipc; + list_for_each_entry_from(al, ¬es->src->source, node) { + if (al->offset == -1) + continue; + if ((u64)al->offset > end) + break; + if (al->cycles && al->cycles->ipc == 0.0) { + al->cycles->ipc = ipc; cover_insn++; } } - if (cover_insn) { - notes->hit_cycles += ch->cycles; - notes->hit_insn += n_insn * ch->num; - notes->cover_insn += cover_insn; + branch = annotation__get_branch(notes); + if (cover_insn && branch) { + branch->hit_cycles += ch->cycles; + branch->hit_insn += n_insn * ch->num; + branch->cover_insn += cover_insn; } } } -void annotation__compute_ipc(struct annotation *notes, size_t size) +static int annotation__compute_ipc(struct annotation *notes, size_t size, + struct evsel *evsel) { + unsigned int br_cntr_nr = evsel->evlist->nr_br_cntr; + int err = 0; s64 offset; - if (!notes->src || !notes->src->cycles_hist) - return; + if (!notes->branch || !notes->branch->cycles_hist) + return 0; - notes->total_insn = annotation__count_insn(notes, 0, size - 1); - notes->hit_cycles = 0; - notes->hit_insn = 0; - notes->cover_insn = 0; + notes->branch->total_insn = annotation__count_insn(notes, 0, size - 1); + notes->branch->hit_cycles = 0; + notes->branch->hit_insn = 0; + notes->branch->cover_insn = 0; annotation__lock(notes); for (offset = size - 1; offset >= 0; --offset) { struct cyc_hist *ch; - ch = ¬es->src->cycles_hist[offset]; + ch = ¬es->branch->cycles_hist[offset]; if (ch && ch->cycles) { struct annotation_line *al; + al = annotated_source__get_line(notes->src, offset); + if (al && al->cycles == NULL) { + al->cycles = zalloc(sizeof(*al->cycles)); + if (al->cycles == NULL) { + err = ENOMEM; + break; + } + } if (ch->have_start) annotation__count_and_fill(notes, ch->start, offset, ch); - al = notes->offsets[offset]; if (al && ch->num_aggr) { - al->cycles = ch->cycles_aggr / ch->num_aggr; - al->cycles_max = ch->cycles_max; - al->cycles_min = ch->cycles_min; + al->cycles->avg = ch->cycles_aggr / ch->num_aggr; + al->cycles->max = ch->cycles_max; + al->cycles->min = ch->cycles_min; + } + if (al && notes->branch->br_cntr) { + if (!al->br_cntr) { + al->br_cntr = calloc(br_cntr_nr, sizeof(u64)); + if (!al->br_cntr) { + err = ENOMEM; + break; + } + } + al->num_aggr = ch->num_aggr; + al->br_cntr_nr = br_cntr_nr; + al->evsel = evsel; + memcpy(al->br_cntr, ¬es->branch->br_cntr[offset * br_cntr_nr], + br_cntr_nr * sizeof(u64)); + } + } + } + + if (err) { + while (++offset < (s64)size) { + struct cyc_hist *ch = ¬es->branch->cycles_hist[offset]; + + if (ch && ch->cycles) { + struct annotation_line *al; + + al = annotated_source__get_line(notes->src, offset); + if (al) { + zfree(&al->cycles); + zfree(&al->br_cntr); + } } - notes->have_cycles = true; } } + annotation__unlock(notes); + return 0; } int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, @@ -1160,145 +591,11 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *samp return symbol__inc_addr_samples(&he->ms, evsel, ip, sample); } -static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms) -{ - dl->ins.ops = ins__find(arch, dl->ins.name); - - if (!dl->ins.ops) - return; - - if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms) < 0) - dl->ins.ops = NULL; -} - -static int disasm_line__parse(char *line, const char **namep, char **rawp) -{ - char tmp, *name = skip_spaces(line); - - if (name[0] == '\0') - return -1; - - *rawp = name + 1; - - while ((*rawp)[0] != '\0' && !isspace((*rawp)[0])) - ++*rawp; - - tmp = (*rawp)[0]; - (*rawp)[0] = '\0'; - *namep = strdup(name); - - if (*namep == NULL) - goto out; - - (*rawp)[0] = tmp; - *rawp = strim(*rawp); - - return 0; - -out: - return -1; -} - -struct annotate_args { - struct arch *arch; - struct map_symbol ms; - struct evsel *evsel; - struct annotation_options *options; - s64 offset; - char *line; - int line_nr; - char *fileloc; -}; - -static void annotation_line__init(struct annotation_line *al, - struct annotate_args *args, - int nr) -{ - al->offset = args->offset; - al->line = strdup(args->line); - al->line_nr = args->line_nr; - al->fileloc = args->fileloc; - al->data_nr = nr; -} - -static void annotation_line__exit(struct annotation_line *al) -{ - zfree_srcline(&al->path); - zfree(&al->line); -} - -static size_t disasm_line_size(int nr) -{ - struct annotation_line *al; - - return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr)); -} - -/* - * Allocating the disasm annotation line data with - * following structure: - * - * ------------------------------------------- - * struct disasm_line | struct annotation_line - * ------------------------------------------- - * - * We have 'struct annotation_line' member as last member - * of 'struct disasm_line' to have an easy access. - */ -static struct disasm_line *disasm_line__new(struct annotate_args *args) -{ - struct disasm_line *dl = NULL; - int nr = 1; - - if (evsel__is_group_event(args->evsel)) - nr = args->evsel->core.nr_members; - - dl = zalloc(disasm_line_size(nr)); - if (!dl) - return NULL; - - annotation_line__init(&dl->al, args, nr); - if (dl->al.line == NULL) - goto out_delete; - - if (args->offset != -1) { - if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) - goto out_free_line; - - disasm_line__init_ins(dl, args->arch, &args->ms); - } - - return dl; - -out_free_line: - zfree(&dl->al.line); -out_delete: - free(dl); - return NULL; -} - -void disasm_line__free(struct disasm_line *dl) -{ - if (dl->ins.ops && dl->ins.ops->free) - dl->ins.ops->free(&dl->ops); - else - ins__delete(&dl->ops); - zfree(&dl->ins.name); - annotation_line__exit(&dl->al); - free(dl); -} - -int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name) -{ - if (raw || !dl->ins.ops) - return scnprintf(bf, size, "%-*s %s", max_ins_name, dl->ins.name, dl->ops.raw); - - return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name); -} void annotation__exit(struct annotation *notes) { annotated_source__delete(notes->src); + annotated_branch__delete(notes->branch); } static struct sharded_mutex *sharded_mutex; @@ -1353,8 +650,7 @@ bool annotation__trylock(struct annotation *notes) return mutex_trylock(mutex); } - -static void annotation_line__add(struct annotation_line *al, struct list_head *head) +void annotation_line__add(struct annotation_line *al, struct list_head *head) { list_add_tail(&al->node, head); } @@ -1464,20 +760,38 @@ static int disasm_line__print(struct disasm_line *dl, u64 start, int addr_fmt_wi return 0; } +static struct annotated_data_type * +__hist_entry__get_data_type(struct hist_entry *he, struct arch *arch, + struct debuginfo *dbg, struct disasm_line *dl, + int *type_offset); + +static bool needs_type_info(struct annotated_data_type *data_type) +{ + if (data_type == NULL || data_type == NO_TYPE) + return false; + + if (verbose) + return true; + + return (data_type != &stackop_type) && (data_type != &canary_type); +} + static int -annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start, - struct evsel *evsel, u64 len, int min_pcnt, int printed, - int max_lines, struct annotation_line *queue, int addr_fmt_width, - int percent_type) +annotation_line__print(struct annotation_line *al, struct annotation_print_data *apd, + struct annotation_options *opts, int printed, + struct annotation_line *queue) { + struct symbol *sym = apd->he->ms.sym; struct disasm_line *dl = container_of(al, struct disasm_line, al); + struct annotation *notes = symbol__annotation(sym); static const char *prev_line; + int max_lines = opts->max_lines; + int percent_type = opts->percent_type; if (al->offset != -1) { double max_percent = 0.0; int i, nr_percent = 1; const char *color; - struct annotation *notes = symbol__annotation(sym); for (i = 0; i < al->data_nr; i++) { double percent; @@ -1492,19 +806,23 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start if (al->data_nr > nr_percent) nr_percent = al->data_nr; - if (max_percent < min_pcnt) + if (max_percent < opts->min_pcnt) return -1; if (max_lines && printed >= max_lines) return 1; if (queue != NULL) { + struct annotation_options queue_opts = { + .max_lines = 1, + .percent_type = percent_type, + }; + list_for_each_entry_from(queue, ¬es->src->source, node) { if (queue == al) break; - annotation_line__print(queue, sym, start, evsel, len, - 0, 0, 1, NULL, addr_fmt_width, - percent_type); + annotation_line__print(queue, apd, &queue_opts, + /*printed=*/0, /*queue=*/NULL); } } @@ -1529,7 +847,31 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start printf(" : "); - disasm_line__print(dl, start, addr_fmt_width); + disasm_line__print(dl, notes->src->start, apd->addr_fmt_width); + + if (opts->code_with_type && apd->dbg) { + struct annotated_data_type *data_type; + int offset = 0; + + data_type = __hist_entry__get_data_type(apd->he, apd->arch, + apd->dbg, dl, &offset); + if (needs_type_info(data_type)) { + char buf[4096]; + + printf("\t\t# data-type: %s", + data_type->self.type_name); + + if (data_type != &stackop_type && + data_type != &canary_type) + printf(" +%#x", offset); + + if (annotated_data_type__get_member_name(data_type, + buf, + sizeof(buf), + offset)) + printf(" (%s)", buf); + } + } /* * Also color the filename and line if needed, with @@ -1547,697 +889,39 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start } else if (max_lines && printed >= max_lines) return 1; else { - int width = symbol_conf.show_total_period ? 12 : 8; + int width = annotation__pcnt_width(notes); if (queue) return -1; - if (evsel__is_group_event(evsel)) - width *= evsel->core.nr_members; - if (!*al->line) printf(" %*s:\n", width, " "); else - printf(" %*s: %-*d %s\n", width, " ", addr_fmt_width, al->line_nr, al->line); - } - - return 0; -} - -/* - * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw) - * which looks like following - * - * 0000000000415500 <_init>: - * 415500: sub $0x8,%rsp - * 415504: mov 0x2f5ad5(%rip),%rax # 70afe0 <_DYNAMIC+0x2f8> - * 41550b: test %rax,%rax - * 41550e: je 415515 <_init+0x15> - * 415510: callq 416e70 <__gmon_start__@plt> - * 415515: add $0x8,%rsp - * 415519: retq - * - * it will be parsed and saved into struct disasm_line as - * <offset> <name> <ops.raw> - * - * The offset will be a relative offset from the start of the symbol and -1 - * means that it's not a disassembly line so should be treated differently. - * The ops.raw part will be parsed further according to type of the instruction. - */ -static int symbol__parse_objdump_line(struct symbol *sym, - struct annotate_args *args, - char *parsed_line, int *line_nr, char **fileloc) -{ - struct map *map = args->ms.map; - struct annotation *notes = symbol__annotation(sym); - struct disasm_line *dl; - char *tmp; - s64 line_ip, offset = -1; - regmatch_t match[2]; - - /* /filename:linenr ? Save line number and ignore. */ - if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { - *line_nr = atoi(parsed_line + match[1].rm_so); - free(*fileloc); - *fileloc = strdup(parsed_line); - return 0; - } - - /* Process hex address followed by ':'. */ - line_ip = strtoull(parsed_line, &tmp, 16); - if (parsed_line != tmp && tmp[0] == ':' && tmp[1] != '\0') { - u64 start = map__rip_2objdump(map, sym->start), - end = map__rip_2objdump(map, sym->end); - - offset = line_ip - start; - if ((u64)line_ip < start || (u64)line_ip >= end) - offset = -1; - else - parsed_line = tmp + 1; - } - - args->offset = offset; - args->line = parsed_line; - args->line_nr = *line_nr; - args->fileloc = *fileloc; - args->ms.sym = sym; - - dl = disasm_line__new(args); - (*line_nr)++; - - if (dl == NULL) - return -1; - - if (!disasm_line__has_local_offset(dl)) { - dl->ops.target.offset = dl->ops.target.addr - - map__rip_2objdump(map, sym->start); - dl->ops.target.offset_avail = true; - } - - /* kcore has no symbols, so add the call target symbol */ - if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) { - struct addr_map_symbol target = { - .addr = dl->ops.target.addr, - .ms = { .map = map, }, - }; - - if (!maps__find_ams(args->ms.maps, &target) && - target.ms.sym->start == target.al_addr) - dl->ops.target.sym = target.ms.sym; - } - - annotation_line__add(&dl->al, ¬es->src->source); - return 0; -} - -static __attribute__((constructor)) void symbol__init_regexpr(void) -{ - regcomp(&file_lineno, "^/[^:]+:([0-9]+)", REG_EXTENDED); -} - -static void delete_last_nop(struct symbol *sym) -{ - struct annotation *notes = symbol__annotation(sym); - struct list_head *list = ¬es->src->source; - struct disasm_line *dl; - - while (!list_empty(list)) { - dl = list_entry(list->prev, struct disasm_line, al.node); - - if (dl->ins.ops) { - if (dl->ins.ops != &nop_ops) - return; - } else { - if (!strstr(dl->al.line, " nop ") && - !strstr(dl->al.line, " nopl ") && - !strstr(dl->al.line, " nopw ")) - return; - } - - list_del_init(&dl->al.node); - disasm_line__free(dl); - } -} - -int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen) -{ - struct dso *dso = map__dso(ms->map); - - BUG_ON(buflen == 0); - - if (errnum >= 0) { - str_error_r(errnum, buf, buflen); - return 0; + printf(" %*s: %-*d %s\n", width, " ", apd->addr_fmt_width, + al->line_nr, al->line); } - switch (errnum) { - case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: { - char bf[SBUILD_ID_SIZE + 15] = " with build id "; - char *build_id_msg = NULL; - - if (dso->has_build_id) { - build_id__sprintf(&dso->bid, bf + 15); - build_id_msg = bf; - } - scnprintf(buf, buflen, - "No vmlinux file%s\nwas found in the path.\n\n" - "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n" - "Please use:\n\n" - " perf buildid-cache -vu vmlinux\n\n" - "or:\n\n" - " --vmlinux vmlinux\n", build_id_msg ?: ""); - } - break; - case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF: - scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation"); - break; - case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP: - scnprintf(buf, buflen, "Problems with arch specific instruction name regular expressions."); - break; - case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING: - scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch specific initialization."); - break; - case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE: - scnprintf(buf, buflen, "Invalid BPF file: %s.", dso->long_name); - break; - case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF: - scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.", - dso->long_name); - break; - default: - scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum); - break; - } - - return 0; -} - -static int dso__disassemble_filename(struct dso *dso, char *filename, size_t filename_size) -{ - char linkname[PATH_MAX]; - char *build_id_filename; - char *build_id_path = NULL; - char *pos; - int len; - - if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS && - !dso__is_kcore(dso)) - return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX; - - build_id_filename = dso__build_id_filename(dso, NULL, 0, false); - if (build_id_filename) { - __symbol__join_symfs(filename, filename_size, build_id_filename); - free(build_id_filename); - } else { - if (dso->has_build_id) - return ENOMEM; - goto fallback; - } - - build_id_path = strdup(filename); - if (!build_id_path) - return ENOMEM; - - /* - * old style build-id cache has name of XX/XXXXXXX.. while - * new style has XX/XXXXXXX../{elf,kallsyms,vdso}. - * extract the build-id part of dirname in the new style only. - */ - pos = strrchr(build_id_path, '/'); - if (pos && strlen(pos) < SBUILD_ID_SIZE - 2) - dirname(build_id_path); - - if (dso__is_kcore(dso)) - goto fallback; - - len = readlink(build_id_path, linkname, sizeof(linkname) - 1); - if (len < 0) - goto fallback; - - linkname[len] = '\0'; - if (strstr(linkname, DSO__NAME_KALLSYMS) || - access(filename, R_OK)) { -fallback: - /* - * If we don't have build-ids or the build-id file isn't in the - * cache, or is just a kallsyms file, well, lets hope that this - * DSO is the same as when 'perf record' ran. - */ - if (dso->kernel && dso->long_name[0] == '/') - snprintf(filename, filename_size, "%s", dso->long_name); - else - __symbol__join_symfs(filename, filename_size, dso->long_name); - - mutex_lock(&dso->lock); - if (access(filename, R_OK) && errno == ENOENT && dso->nsinfo) { - char *new_name = dso__filename_with_chroot(dso, filename); - if (new_name) { - strlcpy(filename, new_name, filename_size); - free(new_name); - } - } - mutex_unlock(&dso->lock); - } - - free(build_id_path); - return 0; -} - -#if defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) -#define PACKAGE "perf" -#include <bfd.h> -#include <dis-asm.h> -#include <bpf/bpf.h> -#include <bpf/btf.h> -#include <bpf/libbpf.h> -#include <linux/btf.h> -#include <tools/dis-asm-compat.h> - -static int symbol__disassemble_bpf(struct symbol *sym, - struct annotate_args *args) -{ - struct annotation *notes = symbol__annotation(sym); - struct annotation_options *opts = args->options; - struct bpf_prog_linfo *prog_linfo = NULL; - struct bpf_prog_info_node *info_node; - int len = sym->end - sym->start; - disassembler_ftype disassemble; - struct map *map = args->ms.map; - struct perf_bpil *info_linear; - struct disassemble_info info; - struct dso *dso = map__dso(map); - int pc = 0, count, sub_id; - struct btf *btf = NULL; - char tpath[PATH_MAX]; - size_t buf_size; - int nr_skip = 0; - char *buf; - bfd *bfdf; - int ret; - FILE *s; - - if (dso->binary_type != DSO_BINARY_TYPE__BPF_PROG_INFO) - return SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE; - - pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__, - sym->name, sym->start, sym->end - sym->start); - - memset(tpath, 0, sizeof(tpath)); - perf_exe(tpath, sizeof(tpath)); - - bfdf = bfd_openr(tpath, NULL); - assert(bfdf); - assert(bfd_check_format(bfdf, bfd_object)); - - s = open_memstream(&buf, &buf_size); - if (!s) { - ret = errno; - goto out; - } - init_disassemble_info_compat(&info, s, - (fprintf_ftype) fprintf, - fprintf_styled); - info.arch = bfd_get_arch(bfdf); - info.mach = bfd_get_mach(bfdf); - - info_node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, - dso->bpf_prog.id); - if (!info_node) { - ret = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF; - goto out; - } - info_linear = info_node->info_linear; - sub_id = dso->bpf_prog.sub_id; - - info.buffer = (void *)(uintptr_t)(info_linear->info.jited_prog_insns); - info.buffer_length = info_linear->info.jited_prog_len; - - if (info_linear->info.nr_line_info) - prog_linfo = bpf_prog_linfo__new(&info_linear->info); - - if (info_linear->info.btf_id) { - struct btf_node *node; - - node = perf_env__find_btf(dso->bpf_prog.env, - info_linear->info.btf_id); - if (node) - btf = btf__new((__u8 *)(node->data), - node->data_size); - } - - disassemble_init_for_target(&info); - -#ifdef DISASM_FOUR_ARGS_SIGNATURE - disassemble = disassembler(info.arch, - bfd_big_endian(bfdf), - info.mach, - bfdf); -#else - disassemble = disassembler(bfdf); -#endif - assert(disassemble); - - fflush(s); - do { - const struct bpf_line_info *linfo = NULL; - struct disasm_line *dl; - size_t prev_buf_size; - const char *srcline; - u64 addr; - - addr = pc + ((u64 *)(uintptr_t)(info_linear->info.jited_ksyms))[sub_id]; - count = disassemble(pc, &info); - - if (prog_linfo) - linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo, - addr, sub_id, - nr_skip); - - if (linfo && btf) { - srcline = btf__name_by_offset(btf, linfo->line_off); - nr_skip++; - } else - srcline = NULL; - - fprintf(s, "\n"); - prev_buf_size = buf_size; - fflush(s); - - if (!opts->hide_src_code && srcline) { - args->offset = -1; - args->line = strdup(srcline); - args->line_nr = 0; - args->fileloc = NULL; - args->ms.sym = sym; - dl = disasm_line__new(args); - if (dl) { - annotation_line__add(&dl->al, - ¬es->src->source); - } - } - - args->offset = pc; - args->line = buf + prev_buf_size; - args->line_nr = 0; - args->fileloc = NULL; - args->ms.sym = sym; - dl = disasm_line__new(args); - if (dl) - annotation_line__add(&dl->al, ¬es->src->source); - - pc += count; - } while (count > 0 && pc < len); - - ret = 0; -out: - free(prog_linfo); - btf__free(btf); - fclose(s); - bfd_close(bfdf); - return ret; -} -#else // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) -static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused, - struct annotate_args *args __maybe_unused) -{ - return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF; -} -#endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) - -static int -symbol__disassemble_bpf_image(struct symbol *sym, - struct annotate_args *args) -{ - struct annotation *notes = symbol__annotation(sym); - struct disasm_line *dl; - - args->offset = -1; - args->line = strdup("to be implemented"); - args->line_nr = 0; - args->fileloc = NULL; - dl = disasm_line__new(args); - if (dl) - annotation_line__add(&dl->al, ¬es->src->source); - - zfree(&args->line); return 0; } -/* - * Possibly create a new version of line with tabs expanded. Returns the - * existing or new line, storage is updated if a new line is allocated. If - * allocation fails then NULL is returned. - */ -static char *expand_tabs(char *line, char **storage, size_t *storage_len) -{ - size_t i, src, dst, len, new_storage_len, num_tabs; - char *new_line; - size_t line_len = strlen(line); - - for (num_tabs = 0, i = 0; i < line_len; i++) - if (line[i] == '\t') - num_tabs++; - - if (num_tabs == 0) - return line; - - /* - * Space for the line and '\0', less the leading and trailing - * spaces. Each tab may introduce 7 additional spaces. - */ - new_storage_len = line_len + 1 + (num_tabs * 7); - - new_line = malloc(new_storage_len); - if (new_line == NULL) { - pr_err("Failure allocating memory for tab expansion\n"); - return NULL; - } - - /* - * Copy regions starting at src and expand tabs. If there are two - * adjacent tabs then 'src == i', the memcpy is of size 0 and the spaces - * are inserted. - */ - for (i = 0, src = 0, dst = 0; i < line_len && num_tabs; i++) { - if (line[i] == '\t') { - len = i - src; - memcpy(&new_line[dst], &line[src], len); - dst += len; - new_line[dst++] = ' '; - while (dst % 8 != 0) - new_line[dst++] = ' '; - src = i + 1; - num_tabs--; - } - } - - /* Expand the last region. */ - len = line_len - src; - memcpy(&new_line[dst], &line[src], len); - dst += len; - new_line[dst] = '\0'; - - free(*storage); - *storage = new_line; - *storage_len = new_storage_len; - return new_line; - -} - -static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) -{ - struct annotation_options *opts = args->options; - struct map *map = args->ms.map; - struct dso *dso = map__dso(map); - char *command; - FILE *file; - char symfs_filename[PATH_MAX]; - struct kcore_extract kce; - bool delete_extract = false; - bool decomp = false; - int lineno = 0; - char *fileloc = NULL; - int nline; - char *line; - size_t line_len; - const char *objdump_argv[] = { - "/bin/sh", - "-c", - NULL, /* Will be the objdump command to run. */ - "--", - NULL, /* Will be the symfs path. */ - NULL, - }; - struct child_process objdump_process; - int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); - - if (err) - return err; - - pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, - symfs_filename, sym->name, map__unmap_ip(map, sym->start), - map__unmap_ip(map, sym->end)); - - pr_debug("annotating [%p] %30s : [%p] %30s\n", - dso, dso->long_name, sym, sym->name); - - if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) { - return symbol__disassemble_bpf(sym, args); - } else if (dso->binary_type == DSO_BINARY_TYPE__BPF_IMAGE) { - return symbol__disassemble_bpf_image(sym, args); - } else if (dso__is_kcore(dso)) { - kce.kcore_filename = symfs_filename; - kce.addr = map__rip_2objdump(map, sym->start); - kce.offs = sym->start; - kce.len = sym->end - sym->start; - if (!kcore_extract__create(&kce)) { - delete_extract = true; - strlcpy(symfs_filename, kce.extract_filename, - sizeof(symfs_filename)); - } - } else if (dso__needs_decompress(dso)) { - char tmp[KMOD_DECOMP_LEN]; - - if (dso__decompress_kmodule_path(dso, symfs_filename, - tmp, sizeof(tmp)) < 0) - return -1; - - decomp = true; - strcpy(symfs_filename, tmp); - } - - err = asprintf(&command, - "%s %s%s --start-address=0x%016" PRIx64 - " --stop-address=0x%016" PRIx64 - " -l -d %s %s %s %c%s%c %s%s -C \"$1\"", - opts->objdump_path ?: "objdump", - opts->disassembler_style ? "-M " : "", - opts->disassembler_style ?: "", - map__rip_2objdump(map, sym->start), - map__rip_2objdump(map, sym->end), - opts->show_asm_raw ? "" : "--no-show-raw-insn", - opts->annotate_src ? "-S" : "", - opts->prefix ? "--prefix " : "", - opts->prefix ? '"' : ' ', - opts->prefix ?: "", - opts->prefix ? '"' : ' ', - opts->prefix_strip ? "--prefix-strip=" : "", - opts->prefix_strip ?: ""); - - if (err < 0) { - pr_err("Failure allocating memory for the command to run\n"); - goto out_remove_tmp; - } - - pr_debug("Executing: %s\n", command); - - objdump_argv[2] = command; - objdump_argv[4] = symfs_filename; - - /* Create a pipe to read from for stdout */ - memset(&objdump_process, 0, sizeof(objdump_process)); - objdump_process.argv = objdump_argv; - objdump_process.out = -1; - objdump_process.err = -1; - objdump_process.no_stderr = 1; - if (start_command(&objdump_process)) { - pr_err("Failure starting to run %s\n", command); - err = -1; - goto out_free_command; - } - - file = fdopen(objdump_process.out, "r"); - if (!file) { - pr_err("Failure creating FILE stream for %s\n", command); - /* - * If we were using debug info should retry with - * original binary. - */ - err = -1; - goto out_close_stdout; - } - - /* Storage for getline. */ - line = NULL; - line_len = 0; - - nline = 0; - while (!feof(file)) { - const char *match; - char *expanded_line; - - if (getline(&line, &line_len, file) < 0 || !line) - break; - - /* Skip lines containing "filename:" */ - match = strstr(line, symfs_filename); - if (match && match[strlen(symfs_filename)] == ':') - continue; - - expanded_line = strim(line); - expanded_line = expand_tabs(expanded_line, &line, &line_len); - if (!expanded_line) - break; - - /* - * The source code line number (lineno) needs to be kept in - * across calls to symbol__parse_objdump_line(), so that it - * can associate it with the instructions till the next one. - * See disasm_line__new() and struct disasm_line::line_nr. - */ - if (symbol__parse_objdump_line(sym, args, expanded_line, - &lineno, &fileloc) < 0) - break; - nline++; - } - free(line); - free(fileloc); - - err = finish_command(&objdump_process); - if (err) - pr_err("Error running %s\n", command); - - if (nline == 0) { - err = -1; - pr_err("No output from %s\n", command); - } - - /* - * kallsyms does not have symbol sizes so there may a nop at the end. - * Remove it. - */ - if (dso__is_kcore(dso)) - delete_last_nop(sym); - - fclose(file); - -out_close_stdout: - close(objdump_process.out); - -out_free_command: - free(command); - -out_remove_tmp: - if (decomp) - unlink(symfs_filename); - - if (delete_extract) - kcore_extract__delete(&kce); - - return err; -} - -static void calc_percent(struct sym_hist *sym_hist, - struct hists *hists, +static void calc_percent(struct annotation *notes, + struct evsel *evsel, struct annotation_data *data, s64 offset, s64 end) { + struct hists *hists = evsel__hists(evsel); + struct sym_hist *sym_hist = annotation__histogram(notes, evsel); unsigned int hits = 0; u64 period = 0; while (offset < end) { - hits += sym_hist->addr[offset].nr_samples; - period += sym_hist->addr[offset].period; + struct sym_hist_entry *entry; + + entry = annotated_source__hist_entry(notes->src, evsel, offset); + if (entry) { + hits += entry->nr_samples; + period += entry->period; + } ++offset; } @@ -2274,16 +958,17 @@ static void annotation__calc_percent(struct annotation *notes, end = next ? next->offset : len; for_each_group_evsel(evsel, leader) { - struct hists *hists = evsel__hists(evsel); struct annotation_data *data; - struct sym_hist *sym_hist; BUG_ON(i >= al->data_nr); - sym_hist = annotation__histogram(notes, evsel->core.idx); + if (symbol_conf.skip_empty && + evsel__hists(evsel)->stats.nr_samples == 0) + continue; + data = &al->data[i++]; - calc_percent(sym_hist, hists, data, al->offset, end); + calc_percent(notes, evsel, data, al->offset, end); } } } @@ -2295,55 +980,92 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel) annotation__calc_percent(notes, evsel, symbol__size(sym)); } -int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct arch **parch) +int evsel__get_arch(struct evsel *evsel, struct arch **parch) { - struct symbol *sym = ms->sym; - struct annotation *notes = symbol__annotation(sym); - struct annotate_args args = { - .evsel = evsel, - .options = options, - }; struct perf_env *env = evsel__env(evsel); const char *arch_name = perf_env__arch(env); struct arch *arch; int err; - if (!arch_name) + if (!arch_name) { + *parch = NULL; return errno; + } - args.arch = arch = arch__find(arch_name); + *parch = arch = arch__find(arch_name); if (arch == NULL) { pr_err("%s: unsupported arch %s\n", __func__, arch_name); return ENOTSUP; } - if (parch) - *parch = arch; - if (arch->init) { err = arch->init(arch, env ? env->cpuid : NULL); if (err) { - pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); + pr_err("%s: failed to initialize %s arch priv area\n", + __func__, arch->name); return err; } } + return 0; +} + +int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, + struct arch **parch) +{ + struct symbol *sym = ms->sym; + struct annotation *notes = symbol__annotation(sym); + struct annotate_args args = { + .options = &annotate_opts, + }; + struct arch *arch = NULL; + int err, nr; + + err = evsel__get_arch(evsel, &arch); + if (err) + return err; + + if (parch) + *parch = arch; + + if (notes->src && !list_empty(¬es->src->source)) + return 0; + args.arch = arch; args.ms = *ms; - if (notes->options && notes->options->full_addr) - notes->start = map__objdump_2mem(ms->map, ms->sym->start); + + if (notes->src == NULL) { + notes->src = annotated_source__new(); + if (notes->src == NULL) + return -1; + } + + nr = 0; + if (evsel__is_group_event(evsel)) { + struct evsel *pos; + + for_each_group_evsel(pos, evsel) { + if (symbol_conf.skip_empty && + evsel__hists(pos)->stats.nr_samples == 0) + continue; + nr++; + } + } + notes->src->nr_events = nr ? nr : 1; + + if (annotate_opts.full_addr) + notes->src->start = map__objdump_2mem(ms->map, ms->sym->start); else - notes->start = map__rip_2objdump(ms->map, ms->sym->start); + notes->src->start = map__rip_2objdump(ms->map, ms->sym->start); return symbol__disassemble(sym, &args); } -static void insert_source_line(struct rb_root *root, struct annotation_line *al, - struct annotation_options *opts) +static void insert_source_line(struct rb_root *root, struct annotation_line *al) { struct annotation_line *iter; struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; + unsigned int percent_type = annotate_opts.percent_type; int i, ret; while (*p != NULL) { @@ -2354,7 +1076,7 @@ static void insert_source_line(struct rb_root *root, struct annotation_line *al, if (ret == 0) { for (i = 0; i < al->data_nr; i++) { iter->data[i].percent_sum += annotation_data__percent(&al->data[i], - opts->percent_type); + percent_type); } return; } @@ -2367,7 +1089,7 @@ static void insert_source_line(struct rb_root *root, struct annotation_line *al, for (i = 0; i < al->data_nr; i++) { al->data[i].percent_sum = annotation_data__percent(&al->data[i], - opts->percent_type); + percent_type); } rb_link_node(&al->rb_node, parent, p); @@ -2466,13 +1188,17 @@ static void print_summary(struct rb_root *root, const char *filename) static void symbol__annotate_hits(struct symbol *sym, struct evsel *evsel) { struct annotation *notes = symbol__annotation(sym); - struct sym_hist *h = annotation__histogram(notes, evsel->core.idx); + struct sym_hist *h = annotation__histogram(notes, evsel); u64 len = symbol__size(sym), offset; - for (offset = 0; offset < len; ++offset) - if (h->addr[offset].nr_samples != 0) + for (offset = 0; offset < len; ++offset) { + struct sym_hist_entry *entry; + + entry = annotated_source__hist_entry(notes->src, evsel, offset); + if (entry && entry->nr_samples != 0) printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2, - sym->start + offset, h->addr[offset].nr_samples); + sym->start + offset, entry->nr_samples); + } printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->nr_samples", h->nr_samples); } @@ -2489,9 +1215,9 @@ static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start) return 0; } -int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts) +int hist_entry__annotate_printf(struct hist_entry *he, struct evsel *evsel) { + struct map_symbol *ms = &he->ms; struct map *map = ms->map; struct symbol *sym = ms->sym; struct dso *dso = map__dso(map); @@ -2499,18 +1225,21 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, const char *d_filename; const char *evsel_name = evsel__name(evsel); struct annotation *notes = symbol__annotation(sym); - struct sym_hist *h = annotation__histogram(notes, evsel->core.idx); + struct sym_hist *h = annotation__histogram(notes, evsel); struct annotation_line *pos, *queue = NULL; - u64 start = map__rip_2objdump(map, sym->start); - int printed = 2, queue_len = 0, addr_fmt_width; + struct annotation_options *opts = &annotate_opts; + struct annotation_print_data apd = { + .he = he, + .evsel = evsel, + }; + int printed = 2, queue_len = 0; int more = 0; bool context = opts->context; - u64 len; - int width = symbol_conf.show_total_period ? 12 : 8; + int width = annotation__pcnt_width(notes); int graph_dotted_len; char buf[512]; - filename = strdup(dso->long_name); + filename = strdup(dso__long_name(dso)); if (!filename) return -ENOMEM; @@ -2519,10 +1248,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, else d_filename = basename(filename); - len = symbol__size(sym); - if (evsel__is_group_event(evsel)) { - width *= evsel->core.nr_members; evsel__group_desc(evsel, buf, sizeof(buf)); evsel_name = buf; } @@ -2540,7 +1266,10 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, if (verbose > 0) symbol__annotate_hits(sym, evsel); - addr_fmt_width = annotated_source__addr_fmt_width(¬es->src->source, start); + apd.addr_fmt_width = annotated_source__addr_fmt_width(¬es->src->source, + notes->src->start); + evsel__get_arch(evsel, &apd.arch); + apd.dbg = dso__debuginfo(dso); list_for_each_entry(pos, ¬es->src->source, node) { int err; @@ -2550,9 +1279,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, queue_len = 0; } - err = annotation_line__print(pos, sym, start, evsel, len, - opts->min_pcnt, printed, opts->max_lines, - queue, addr_fmt_width, opts->percent_type); + err = annotation_line__print(pos, &apd, opts, printed, queue); switch (err) { case 0: @@ -2583,6 +1310,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, } } + debuginfo__delete(apd.dbg); free(filename); return more; @@ -2630,7 +1358,7 @@ static void FILE__write_graph(void *fp, int graph) } static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp, - struct annotation_options *opts) + struct annotation_print_data *apd) { struct annotation *notes = symbol__annotation(sym); struct annotation_write_ops wops = { @@ -2644,25 +1372,37 @@ static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp, }; struct annotation_line *al; + if (annotate_opts.code_with_type) { + evsel__get_arch(apd->evsel, &apd->arch); + apd->dbg = dso__debuginfo(map__dso(apd->he->ms.map)); + } + list_for_each_entry(al, ¬es->src->source, node) { - if (annotation_line__filter(al, notes)) + if (annotation_line__filter(al)) continue; - annotation_line__write(al, notes, &wops, opts); + annotation_line__write(al, notes, &wops, apd); fputc('\n', fp); wops.first_line = false; } + if (annotate_opts.code_with_type) + debuginfo__delete(apd->dbg); + return 0; } int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts) + struct hist_entry *he) { const char *ev_name = evsel__name(evsel); char buf[1024]; char *filename; int err = -1; FILE *fp; + struct annotation_print_data apd = { + .he = he, + .evsel = evsel, + }; if (asprintf(&filename, "%s.annotation", ms->sym->name) < 0) return -1; @@ -2677,8 +1417,8 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, } fprintf(fp, "%s() %s\nEvent: %s\n\n", - ms->sym->name, map__dso(ms->map)->long_name, ev_name); - symbol__annotate_fprintf2(ms->sym, fp, opts); + ms->sym->name, dso__long_name(map__dso(ms->map)), ev_name); + symbol__annotate_fprintf2(ms->sym, fp, &apd); fclose(fp); err = 0; @@ -2687,24 +1427,33 @@ out_free_filename: return err; } -void symbol__annotate_zero_histogram(struct symbol *sym, int evidx) +void symbol__annotate_zero_histogram(struct symbol *sym, struct evsel *evsel) { struct annotation *notes = symbol__annotation(sym); - struct sym_hist *h = annotation__histogram(notes, evidx); + struct sym_hist *h = annotation__histogram(notes, evsel); - memset(h, 0, notes->src->sizeof_sym_hist); + memset(h, 0, sizeof(*notes->src->histograms) * notes->src->nr_histograms); } -void symbol__annotate_decay_histogram(struct symbol *sym, int evidx) +void symbol__annotate_decay_histogram(struct symbol *sym, struct evsel *evsel) { struct annotation *notes = symbol__annotation(sym); - struct sym_hist *h = annotation__histogram(notes, evidx); - int len = symbol__size(sym), offset; + struct sym_hist *h = annotation__histogram(notes, evsel); + struct annotation_line *al; h->nr_samples = 0; - for (offset = 0; offset < len; ++offset) { - h->addr[offset].nr_samples = h->addr[offset].nr_samples * 7 / 8; - h->nr_samples += h->addr[offset].nr_samples; + list_for_each_entry(al, ¬es->src->source, node) { + struct sym_hist_entry *entry; + + if (al->offset == -1) + continue; + + entry = annotated_source__hist_entry(notes->src, evsel, al->offset); + if (entry == NULL) + continue; + + entry->nr_samples = entry->nr_samples * 7 / 8; + h->nr_samples += entry->nr_samples; } } @@ -2716,6 +1465,7 @@ void annotated_source__purge(struct annotated_source *as) list_del_init(&al->node); disasm_line__free(disasm_line(al)); } + as->tried_source = false; } static size_t disasm_line__fprintf(struct disasm_line *dl, FILE *fp) @@ -2756,63 +1506,56 @@ bool disasm_line__is_valid_local_jump(struct disasm_line *dl, struct symbol *sym return true; } -void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) +static void +annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) { - u64 offset, size = symbol__size(sym); + struct annotation_line *al; /* PLT symbols contain external offsets */ if (strstr(sym->name, "@plt")) return; - for (offset = 0; offset < size; ++offset) { - struct annotation_line *al = notes->offsets[offset]; + list_for_each_entry(al, ¬es->src->source, node) { struct disasm_line *dl; + struct annotation_line *target; dl = disasm_line(al); if (!disasm_line__is_valid_local_jump(dl, sym)) continue; - al = notes->offsets[dl->ops.target.offset]; - + target = annotated_source__get_line(notes->src, + dl->ops.target.offset); /* * FIXME: Oops, no jump target? Buggy disassembler? Or do we * have to adjust to the previous offset? */ - if (al == NULL) + if (target == NULL) continue; - if (++al->jump_sources > notes->max_jump_sources) - notes->max_jump_sources = al->jump_sources; + if (++target->jump_sources > notes->src->max_jump_sources) + notes->src->max_jump_sources = target->jump_sources; } } -void annotation__set_offsets(struct annotation *notes, s64 size) +static void annotation__set_index(struct annotation *notes) { struct annotation_line *al; + struct annotated_source *src = notes->src; - notes->max_line_len = 0; - notes->nr_entries = 0; - notes->nr_asm_entries = 0; + src->widths.max_line_len = 0; + src->nr_entries = 0; + src->nr_asm_entries = 0; - list_for_each_entry(al, ¬es->src->source, node) { + list_for_each_entry(al, &src->source, node) { size_t line_len = strlen(al->line); - if (notes->max_line_len < line_len) - notes->max_line_len = line_len; - al->idx = notes->nr_entries++; - if (al->offset != -1) { - al->idx_asm = notes->nr_asm_entries++; - /* - * FIXME: short term bandaid to cope with assembly - * routines that comes with labels in the same column - * as the address in objdump, sigh. - * - * E.g. copy_user_generic_unrolled - */ - if (al->offset < size) - notes->offsets[al->offset] = al; - } else + if (src->widths.max_line_len < line_len) + src->widths.max_line_len = line_len; + al->idx = src->nr_entries++; + if (al->offset != -1) + al->idx_asm = src->nr_asm_entries++; + else al->idx_asm = -1; } } @@ -2843,58 +1586,59 @@ static int annotation__max_ins_name(struct annotation *notes) return max_name; } -void annotation__init_column_widths(struct annotation *notes, struct symbol *sym) +static void +annotation__init_column_widths(struct annotation *notes, struct symbol *sym) { - notes->widths.addr = notes->widths.target = - notes->widths.min_addr = hex_width(symbol__size(sym)); - notes->widths.max_addr = hex_width(sym->end); - notes->widths.jumps = width_jumps(notes->max_jump_sources); - notes->widths.max_ins_name = annotation__max_ins_name(notes); + notes->src->widths.addr = notes->src->widths.target = + notes->src->widths.min_addr = hex_width(symbol__size(sym)); + notes->src->widths.max_addr = hex_width(sym->end); + notes->src->widths.jumps = width_jumps(notes->src->max_jump_sources); + notes->src->widths.max_ins_name = annotation__max_ins_name(notes); } void annotation__update_column_widths(struct annotation *notes) { - if (notes->options->use_offset) - notes->widths.target = notes->widths.min_addr; - else if (notes->options->full_addr) - notes->widths.target = BITS_PER_LONG / 4; + if (annotate_opts.use_offset) + notes->src->widths.target = notes->src->widths.min_addr; + else if (annotate_opts.full_addr) + notes->src->widths.target = BITS_PER_LONG / 4; else - notes->widths.target = notes->widths.max_addr; + notes->src->widths.target = notes->src->widths.max_addr; - notes->widths.addr = notes->widths.target; + notes->src->widths.addr = notes->src->widths.target; - if (notes->options->show_nr_jumps) - notes->widths.addr += notes->widths.jumps + 1; + if (annotate_opts.show_nr_jumps) + notes->src->widths.addr += notes->src->widths.jumps + 1; } void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms) { - notes->options->full_addr = !notes->options->full_addr; + annotate_opts.full_addr = !annotate_opts.full_addr; - if (notes->options->full_addr) - notes->start = map__objdump_2mem(ms->map, ms->sym->start); + if (annotate_opts.full_addr) + notes->src->start = map__objdump_2mem(ms->map, ms->sym->start); else - notes->start = map__rip_2objdump(ms->map, ms->sym->start); + notes->src->start = map__rip_2objdump(ms->map, ms->sym->start); annotation__update_column_widths(notes); } -static void annotation__calc_lines(struct annotation *notes, struct map *map, - struct rb_root *root, - struct annotation_options *opts) +static void annotation__calc_lines(struct annotation *notes, struct map_symbol *ms, + struct rb_root *root) { struct annotation_line *al; struct rb_root tmp_root = RB_ROOT; list_for_each_entry(al, ¬es->src->source, node) { double percent_max = 0.0; + u64 addr; int i; for (i = 0; i < al->data_nr; i++) { double percent; percent = annotation_data__percent(&al->data[i], - opts->percent_type); + annotate_opts.percent_type); if (percent > percent_max) percent_max = percent; @@ -2903,71 +1647,75 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map, if (percent_max <= 0.5) continue; - al->path = get_srcline(map__dso(map), notes->start + al->offset, NULL, - false, true, notes->start + al->offset); - insert_source_line(&tmp_root, al, opts); + addr = map__rip_2objdump(ms->map, ms->sym->start); + al->path = get_srcline(map__dso(ms->map), addr + al->offset, NULL, + false, true, ms->sym->start + al->offset); + insert_source_line(&tmp_root, al); } resort_source_line(root, &tmp_root); } -static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root, - struct annotation_options *opts) +static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root) { struct annotation *notes = symbol__annotation(ms->sym); - annotation__calc_lines(notes, ms->map, root, opts); + annotation__calc_lines(notes, ms, root); } -int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts) +int hist_entry__tty_annotate2(struct hist_entry *he, struct evsel *evsel) { + struct map_symbol *ms = &he->ms; struct dso *dso = map__dso(ms->map); struct symbol *sym = ms->sym; struct rb_root source_line = RB_ROOT; struct hists *hists = evsel__hists(evsel); + struct annotation_print_data apd = { + .he = he, + .evsel = evsel, + }; char buf[1024]; int err; - err = symbol__annotate2(ms, evsel, opts, NULL); + err = symbol__annotate2(ms, evsel, NULL); if (err) { char msg[BUFSIZ]; - dso->annotate_warned = true; + dso__set_annotate_warned(dso); symbol__strerror_disassemble(ms, err, msg, sizeof(msg)); ui__error("Couldn't annotate %s:\n%s", sym->name, msg); return -1; } - if (opts->print_lines) { - srcline_full_filename = opts->full_path; - symbol__calc_lines(ms, &source_line, opts); - print_summary(&source_line, dso->long_name); + if (annotate_opts.print_lines) { + srcline_full_filename = annotate_opts.full_path; + symbol__calc_lines(ms, &source_line); + print_summary(&source_line, dso__long_name(dso)); } hists__scnprintf_title(hists, buf, sizeof(buf)); fprintf(stdout, "%s, [percent: %s]\n%s() %s\n", - buf, percent_type_str(opts->percent_type), sym->name, dso->long_name); - symbol__annotate_fprintf2(sym, stdout, opts); + buf, percent_type_str(annotate_opts.percent_type), sym->name, dso__long_name(dso)); + symbol__annotate_fprintf2(sym, stdout, &apd); annotated_source__purge(symbol__annotation(sym)->src); return 0; } -int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts) +int hist_entry__tty_annotate(struct hist_entry *he, struct evsel *evsel) { + struct map_symbol *ms = &he->ms; struct dso *dso = map__dso(ms->map); struct symbol *sym = ms->sym; struct rb_root source_line = RB_ROOT; int err; - err = symbol__annotate(ms, evsel, opts, NULL); + err = symbol__annotate(ms, evsel, NULL); if (err) { char msg[BUFSIZ]; - dso->annotate_warned = true; + dso__set_annotate_warned(dso); symbol__strerror_disassemble(ms, err, msg, sizeof(msg)); ui__error("Couldn't annotate %s:\n%s", sym->name, msg); return -1; @@ -2975,13 +1723,13 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, symbol__calc_percent(sym, evsel); - if (opts->print_lines) { - srcline_full_filename = opts->full_path; - symbol__calc_lines(ms, &source_line, opts); - print_summary(&source_line, dso->long_name); + if (annotate_opts.print_lines) { + srcline_full_filename = annotate_opts.full_path; + symbol__calc_lines(ms, &source_line); + print_summary(&source_line, dso__long_name(dso)); } - symbol__annotate_printf(ms, evsel, opts); + hist_entry__annotate_printf(he, evsel); annotated_source__purge(symbol__annotation(sym)->src); @@ -2995,13 +1743,12 @@ bool ui__has_annotation(void) static double annotation_line__max_percent(struct annotation_line *al, - struct annotation *notes, unsigned int percent_type) { double percent_max = 0.0; int i; - for (i = 0; i < notes->nr_events; i++) { + for (i = 0; i < al->data_nr; i++) { double percent; percent = annotation_data__percent(&al->data[i], @@ -3014,7 +1761,7 @@ static double annotation_line__max_percent(struct annotation_line *al, return percent_max; } -static void disasm_line__write(struct disasm_line *dl, struct annotation *notes, +static int disasm_line__write(struct disasm_line *dl, struct annotation *notes, void *obj, char *bf, size_t size, void (*obj__printf)(void *obj, const char *fmt, ...), void (*obj__write_graph)(void *obj, int graph)) @@ -3042,45 +1789,248 @@ call_like: obj__printf(obj, " "); } - disasm_line__scnprintf(dl, bf, size, !notes->options->use_offset, notes->widths.max_ins_name); + return disasm_line__scnprintf(dl, bf, size, !annotate_opts.use_offset, + notes->src->widths.max_ins_name) + 2; } static void ipc_coverage_string(char *bf, int size, struct annotation *notes) { double ipc = 0.0, coverage = 0.0; + struct annotated_branch *branch = annotation__get_branch(notes); - if (notes->hit_cycles) - ipc = notes->hit_insn / ((double)notes->hit_cycles); + if (branch && branch->hit_cycles) + ipc = branch->hit_insn / ((double)branch->hit_cycles); - if (notes->total_insn) { - coverage = notes->cover_insn * 100.0 / - ((double)notes->total_insn); + if (branch && branch->total_insn) { + coverage = branch->cover_insn * 100.0 / + ((double)branch->total_insn); } scnprintf(bf, size, "(Average IPC: %.2f, IPC Coverage: %.1f%%)", ipc, coverage); } -static void __annotation_line__write(struct annotation_line *al, struct annotation *notes, - bool first_line, bool current_entry, bool change_color, int width, - void *obj, unsigned int percent_type, - int (*obj__set_color)(void *obj, int color), - void (*obj__set_percent_color)(void *obj, double percent, bool current), - int (*obj__set_jumps_percent_color)(void *obj, int nr, bool current), - void (*obj__printf)(void *obj, const char *fmt, ...), - void (*obj__write_graph)(void *obj, int graph)) +int annotation_br_cntr_abbr_list(char **str, struct evsel *evsel, bool header) +{ + struct evsel *pos; + struct strbuf sb; + + if (evsel->evlist->nr_br_cntr <= 0) + return -ENOTSUP; + + strbuf_init(&sb, /*hint=*/ 0); + + if (header && strbuf_addf(&sb, "# Branch counter abbr list:\n")) + goto err; + + evlist__for_each_entry(evsel->evlist, pos) { + if (!(pos->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) + continue; + if (header && strbuf_addf(&sb, "#")) + goto err; + + if (strbuf_addf(&sb, " %s = %s\n", pos->name, pos->abbr_name)) + goto err; + } + + if (header && strbuf_addf(&sb, "#")) + goto err; + if (strbuf_addf(&sb, " '-' No event occurs\n")) + goto err; + + if (header && strbuf_addf(&sb, "#")) + goto err; + if (strbuf_addf(&sb, " '+' Event occurrences may be lost due to branch counter saturated\n")) + goto err; + + *str = strbuf_detach(&sb, NULL); + + return 0; +err: + strbuf_release(&sb); + return -ENOMEM; +} + +/* Assume the branch counter saturated at 3 */ +#define ANNOTATION_BR_CNTR_SATURATION 3 + +int annotation_br_cntr_entry(char **str, int br_cntr_nr, + u64 *br_cntr, int num_aggr, + struct evsel *evsel) +{ + struct evsel *pos = evsel ? evlist__first(evsel->evlist) : NULL; + bool saturated = false; + int i, j, avg, used; + struct strbuf sb; + + strbuf_init(&sb, /*hint=*/ 0); + for (i = 0; i < br_cntr_nr; i++) { + used = 0; + avg = ceil((double)(br_cntr[i] & ~ANNOTATION__BR_CNTR_SATURATED_FLAG) / + (double)num_aggr); + + /* + * A histogram with the abbr name is displayed by default. + * With -v, the exact number of branch counter is displayed. + */ + if (verbose) { + evlist__for_each_entry_from(evsel->evlist, pos) { + if ((pos->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) && + (pos->br_cntr_idx == i)) + break; + } + if (strbuf_addstr(&sb, pos->abbr_name)) + goto err; + + if (!br_cntr[i]) { + if (strbuf_addstr(&sb, "=-")) + goto err; + } else { + if (strbuf_addf(&sb, "=%d", avg)) + goto err; + } + if (br_cntr[i] & ANNOTATION__BR_CNTR_SATURATED_FLAG) { + if (strbuf_addch(&sb, '+')) + goto err; + } else { + if (strbuf_addch(&sb, ' ')) + goto err; + } + + if ((i < br_cntr_nr - 1) && strbuf_addch(&sb, ',')) + goto err; + continue; + } + + if (strbuf_addch(&sb, '|')) + goto err; + + if (!br_cntr[i]) { + if (strbuf_addch(&sb, '-')) + goto err; + used++; + } else { + evlist__for_each_entry_from(evsel->evlist, pos) { + if ((pos->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) && + (pos->br_cntr_idx == i)) + break; + } + if (br_cntr[i] & ANNOTATION__BR_CNTR_SATURATED_FLAG) + saturated = true; + + for (j = 0; j < avg; j++, used++) { + /* Print + if the number of logged events > 3 */ + if (j >= ANNOTATION_BR_CNTR_SATURATION) { + saturated = true; + break; + } + if (strbuf_addstr(&sb, pos->abbr_name)) + goto err; + } + + if (saturated) { + if (strbuf_addch(&sb, '+')) + goto err; + used++; + } + pos = list_next_entry(pos, core.node); + } + + for (j = used; j < ANNOTATION_BR_CNTR_SATURATION + 1; j++) { + if (strbuf_addch(&sb, ' ')) + goto err; + } + } + + if (!verbose && strbuf_addch(&sb, br_cntr_nr ? '|' : ' ')) + goto err; + + *str = strbuf_detach(&sb, NULL); + + return 0; +err: + strbuf_release(&sb); + return -ENOMEM; +} + +struct type_hash_entry { + struct annotated_data_type *type; + int offset; +}; +static int disasm_line__snprint_type_info(struct disasm_line *dl, + char *buf, int len, + struct annotation_print_data *apd) { - double percent_max = annotation_line__max_percent(al, notes, percent_type); - int pcnt_width = annotation__pcnt_width(notes), - cycles_width = annotation__cycles_width(notes); + struct annotated_data_type *data_type = NULL; + struct type_hash_entry *entry = NULL; + char member[256]; + int offset = 0; + int printed; + + scnprintf(buf, len, " "); + + if (!annotate_opts.code_with_type || apd->dbg == NULL) + return 1; + + if (apd->type_hash) { + hashmap__find(apd->type_hash, dl->al.offset, &entry); + if (entry != NULL) { + data_type = entry->type; + offset = entry->offset; + } + } + + if (data_type == NULL) + data_type = __hist_entry__get_data_type(apd->he, apd->arch, apd->dbg, dl, &offset); + + if (apd->type_hash && entry == NULL) { + entry = malloc(sizeof(*entry)); + if (entry != NULL) { + entry->type = data_type; + entry->offset = offset; + hashmap__add(apd->type_hash, dl->al.offset, entry); + } + } + + if (!needs_type_info(data_type)) + return 1; + + printed = scnprintf(buf, len, "\t\t# data-type: %s", data_type->self.type_name); + + if (data_type != &stackop_type && data_type != &canary_type && len > printed) + printed += scnprintf(buf + printed, len - printed, " +%#x", offset); + + if (annotated_data_type__get_member_name(data_type, member, sizeof(member), offset) && + len > printed) { + printed += scnprintf(buf + printed, len - printed, " (%s)", member); + } + return printed; +} + +void annotation_line__write(struct annotation_line *al, struct annotation *notes, + const struct annotation_write_ops *wops, + struct annotation_print_data *apd) +{ + bool current_entry = wops->current_entry; + bool change_color = wops->change_color; + double percent_max = annotation_line__max_percent(al, annotate_opts.percent_type); + int width = wops->width; + int pcnt_width = annotation__pcnt_width(notes); + int cycles_width = annotation__cycles_width(notes); bool show_title = false; char bf[256]; int printed; - - if (first_line && (al->offset == -1 || percent_max == 0.0)) { - if (notes->have_cycles) { - if (al->ipc == 0.0 && al->cycles == 0) + void *obj = wops->obj; + int (*obj__set_color)(void *obj, int color) = wops->set_color; + void (*obj__set_percent_color)(void *obj, double percent, bool current) = wops->set_percent_color; + int (*obj__set_jumps_percent_color)(void *obj, int nr, bool current) = wops->set_jumps_percent_color; + void (*obj__printf)(void *obj, const char *fmt, ...) = wops->printf; + void (*obj__write_graph)(void *obj, int graph) = wops->write_graph; + + if (wops->first_line && (al->offset == -1 || percent_max == 0.0)) { + if (notes->branch && al->cycles) { + if (al->cycles->ipc == 0.0 && al->cycles->avg == 0) show_title = true; } else show_title = true; @@ -3089,19 +2039,20 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati if (al->offset != -1 && percent_max != 0.0) { int i; - for (i = 0; i < notes->nr_events; i++) { + for (i = 0; i < al->data_nr; i++) { double percent; - percent = annotation_data__percent(&al->data[i], percent_type); + percent = annotation_data__percent(&al->data[i], + annotate_opts.percent_type); obj__set_percent_color(obj, percent, current_entry); if (symbol_conf.show_total_period) { obj__printf(obj, "%11" PRIu64 " ", al->data[i].he.period); } else if (symbol_conf.show_nr_samples) { - obj__printf(obj, "%6" PRIu64 " ", + obj__printf(obj, "%7" PRIu64 " ", al->data[i].he.nr_samples); } else { - obj__printf(obj, "%6.2f ", percent); + obj__printf(obj, "%7.2f ", percent); } } } else { @@ -3115,19 +2066,20 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati symbol_conf.show_nr_samples ? "Samples" : "Percent"); } } + width -= pcnt_width; - if (notes->have_cycles) { - if (al->ipc) - obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->ipc); + if (notes->branch) { + if (al->cycles && al->cycles->ipc) + obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->cycles->ipc); else if (!show_title) obj__printf(obj, "%*s", ANNOTATION__IPC_WIDTH, " "); else obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC"); - if (!notes->options->show_minmax_cycle) { - if (al->cycles) + if (!annotate_opts.show_minmax_cycle) { + if (al->cycles && al->cycles->avg) obj__printf(obj, "%*" PRIu64 " ", - ANNOTATION__CYCLES_WIDTH - 1, al->cycles); + ANNOTATION__CYCLES_WIDTH - 1, al->cycles->avg); else if (!show_title) obj__printf(obj, "%*s", ANNOTATION__CYCLES_WIDTH, " "); @@ -3141,8 +2093,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati scnprintf(str, sizeof(str), "%" PRIu64 "(%" PRIu64 "/%" PRIu64 ")", - al->cycles, al->cycles_min, - al->cycles_max); + al->cycles->avg, al->cycles->min, + al->cycles->max); obj__printf(obj, "%*s ", ANNOTATION__MINMAX_CYCLES_WIDTH - 1, @@ -3157,39 +2109,60 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati "Cycle(min/max)"); } + if (annotate_opts.show_br_cntr) { + if (show_title) { + obj__printf(obj, "%*s ", + ANNOTATION__BR_CNTR_WIDTH, + "Branch Counter"); + } else { + char *buf; + + if (!annotation_br_cntr_entry(&buf, al->br_cntr_nr, al->br_cntr, + al->num_aggr, al->evsel)) { + obj__printf(obj, "%*s ", ANNOTATION__BR_CNTR_WIDTH, buf); + free(buf); + } + } + } + if (show_title && !*al->line) { ipc_coverage_string(bf, sizeof(bf), notes); obj__printf(obj, "%*s", ANNOTATION__AVG_IPC_WIDTH, bf); } } + width -= cycles_width; obj__printf(obj, " "); + width -= 1; if (!*al->line) - obj__printf(obj, "%-*s", width - pcnt_width - cycles_width, " "); + obj__printf(obj, "%-*s", width, " "); else if (al->offset == -1) { - if (al->line_nr && notes->options->show_linenr) - printed = scnprintf(bf, sizeof(bf), "%-*d ", notes->widths.addr + 1, al->line_nr); + if (al->line_nr && annotate_opts.show_linenr) + printed = scnprintf(bf, sizeof(bf), "%-*d ", + notes->src->widths.addr + 1, al->line_nr); else - printed = scnprintf(bf, sizeof(bf), "%-*s ", notes->widths.addr, " "); + printed = scnprintf(bf, sizeof(bf), "%-*s ", + notes->src->widths.addr, " "); obj__printf(obj, bf); - obj__printf(obj, "%-*s", width - printed - pcnt_width - cycles_width + 1, al->line); + width -= printed; + obj__printf(obj, "%-*s", width, al->line); } else { u64 addr = al->offset; int color = -1; - if (!notes->options->use_offset) - addr += notes->start; + if (!annotate_opts.use_offset) + addr += notes->src->start; - if (!notes->options->use_offset) { + if (!annotate_opts.use_offset) { printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr); } else { if (al->jump_sources && - notes->options->offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) { - if (notes->options->show_nr_jumps) { + annotate_opts.offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) { + if (annotate_opts.show_nr_jumps) { int prev; printed = scnprintf(bf, sizeof(bf), "%*d ", - notes->widths.jumps, + notes->src->widths.jumps, al->jump_sources); prev = obj__set_jumps_percent_color(obj, al->jump_sources, current_entry); @@ -3198,15 +2171,15 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati } print_addr: printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ", - notes->widths.target, addr); + notes->src->widths.target, addr); } else if (ins__is_call(&disasm_line(al)->ins) && - notes->options->offset_level >= ANNOTATION__OFFSET_CALL) { + annotate_opts.offset_level >= ANNOTATION__OFFSET_CALL) { goto print_addr; - } else if (notes->options->offset_level == ANNOTATION__MAX_OFFSET_LEVEL) { + } else if (annotate_opts.offset_level == ANNOTATION__MAX_OFFSET_LEVEL) { goto print_addr; } else { printed = scnprintf(bf, sizeof(bf), "%-*s ", - notes->widths.addr, " "); + notes->src->widths.addr, " "); } } @@ -3216,62 +2189,97 @@ print_addr: if (change_color) obj__set_color(obj, color); - disasm_line__write(disasm_line(al), notes, obj, bf, sizeof(bf), obj__printf, obj__write_graph); + width -= printed; - obj__printf(obj, "%-*s", width - pcnt_width - cycles_width - 3 - printed, bf); - } + printed = disasm_line__write(disasm_line(al), notes, obj, bf, sizeof(bf), + obj__printf, obj__write_graph); -} + obj__printf(obj, "%s", bf); + width -= printed; + + disasm_line__snprint_type_info(disasm_line(al), bf, sizeof(bf), apd); + obj__printf(obj, "%-*s", width, bf); + } -void annotation_line__write(struct annotation_line *al, struct annotation *notes, - struct annotation_write_ops *wops, - struct annotation_options *opts) -{ - __annotation_line__write(al, notes, wops->first_line, wops->current_entry, - wops->change_color, wops->width, wops->obj, - opts->percent_type, - wops->set_color, wops->set_percent_color, - wops->set_jumps_percent_color, wops->printf, - wops->write_graph); } int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct arch **parch) + struct arch **parch) { struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); size_t size = symbol__size(sym); - int nr_pcnt = 1, err; - - notes->offsets = zalloc(size * sizeof(struct annotation_line *)); - if (notes->offsets == NULL) - return ENOMEM; - - if (evsel__is_group_event(evsel)) - nr_pcnt = evsel->core.nr_members; + int err; - err = symbol__annotate(ms, evsel, options, parch); + err = symbol__annotate(ms, evsel, parch); if (err) - goto out_free_offsets; - - notes->options = options; + return err; symbol__calc_percent(sym, evsel); - annotation__set_offsets(notes, size); + annotation__set_index(notes); annotation__mark_jump_targets(notes, sym); - annotation__compute_ipc(notes, size); - annotation__init_column_widths(notes, sym); - notes->nr_events = nr_pcnt; + err = annotation__compute_ipc(notes, size, evsel); + if (err) + return err; + + annotation__init_column_widths(notes, sym); annotation__update_column_widths(notes); sym->annotate2 = 1; return 0; +} -out_free_offsets: - zfree(¬es->offsets); - return err; +const char * const perf_disassembler__strs[] = { + [PERF_DISASM_UNKNOWN] = "unknown", + [PERF_DISASM_LLVM] = "llvm", + [PERF_DISASM_CAPSTONE] = "capstone", + [PERF_DISASM_OBJDUMP] = "objdump", +}; + + +static void annotation_options__add_disassembler(struct annotation_options *options, + enum perf_disassembler dis) +{ + for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers); i++) { + if (options->disassemblers[i] == dis) { + /* Disassembler is already present then don't add again. */ + return; + } + if (options->disassemblers[i] == PERF_DISASM_UNKNOWN) { + /* Found a free slot. */ + options->disassemblers[i] = dis; + return; + } + } + pr_err("Failed to add disassembler %d\n", dis); +} + +static int annotation_options__add_disassemblers_str(struct annotation_options *options, + const char *str) +{ + while (str && *str != '\0') { + const char *comma = strchr(str, ','); + int len = comma ? comma - str : (int)strlen(str); + bool match = false; + + for (u8 i = 0; i < ARRAY_SIZE(perf_disassembler__strs); i++) { + const char *dis_str = perf_disassembler__strs[i]; + + if (len == (int)strlen(dis_str) && !strncmp(str, dis_str, len)) { + annotation_options__add_disassembler(options, i); + match = true; + break; + } + } + if (!match) { + pr_err("Invalid disassembler '%.*s'\n", len, str); + return -1; + } + str = comma ? comma + 1 : NULL; + } + return 0; } static int annotation__config(const char *var, const char *value, void *data) @@ -3288,6 +2296,11 @@ static int annotation__config(const char *var, const char *value, void *data) opt->offset_level = ANNOTATION__MAX_OFFSET_LEVEL; else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL) opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL; + } else if (!strcmp(var, "annotate.disassemblers")) { + int err = annotation_options__add_disassemblers_str(opt, value); + + if (err) + return err; } else if (!strcmp(var, "annotate.hide_src_code")) { opt->hide_src_code = perf_config_bool("hide_src_code", value); } else if (!strcmp(var, "annotate.jump_arrows")) { @@ -3333,8 +2346,10 @@ static int annotation__config(const char *var, const char *value, void *data) return 0; } -void annotation_options__init(struct annotation_options *opt) +void annotation_options__init(void) { + struct annotation_options *opt = &annotate_opts; + memset(opt, 0, sizeof(*opt)); /* Default values. */ @@ -3343,18 +2358,35 @@ void annotation_options__init(struct annotation_options *opt) opt->annotate_src = true; opt->offset_level = ANNOTATION__OFFSET_JUMP_TARGETS; opt->percent_type = PERCENT_PERIOD_LOCAL; + opt->hide_src_code = true; + opt->hide_src_code_on_title = true; } +void annotation_options__exit(void) +{ + zfree(&annotate_opts.disassembler_style); + zfree(&annotate_opts.objdump_path); +} -void annotation_options__exit(struct annotation_options *opt) +static void annotation_options__default_init_disassemblers(struct annotation_options *options) { - zfree(&opt->disassembler_style); - zfree(&opt->objdump_path); + if (options->disassemblers[0] != PERF_DISASM_UNKNOWN) { + /* Already initialized. */ + return; + } +#ifdef HAVE_LIBLLVM_SUPPORT + annotation_options__add_disassembler(options, PERF_DISASM_LLVM); +#endif +#ifdef HAVE_LIBCAPSTONE_SUPPORT + annotation_options__add_disassembler(options, PERF_DISASM_CAPSTONE); +#endif + annotation_options__add_disassembler(options, PERF_DISASM_OBJDUMP); } -void annotation_config__init(struct annotation_options *opt) +void annotation_config__init(void) { - perf_config(annotation__config, opt); + perf_config(annotation__config, &annotate_opts); + annotation_options__default_init_disassemblers(&annotate_opts); } static unsigned int parse_percent_type(char *str1, char *str2) @@ -3378,10 +2410,9 @@ static unsigned int parse_percent_type(char *str1, char *str2) return type; } -int annotate_parse_percent_type(const struct option *opt, const char *_str, +int annotate_parse_percent_type(const struct option *opt __maybe_unused, const char *_str, int unset __maybe_unused) { - struct annotation_options *opts = opt->value; unsigned int type; char *str1, *str2; int err = -1; @@ -3400,7 +2431,7 @@ int annotate_parse_percent_type(const struct option *opt, const char *_str, if (type == (unsigned int) -1) type = parse_percent_type(str2, str1); if (type != (unsigned int) -1) { - opts->percent_type = type; + annotate_opts.percent_type = type; err = 0; } @@ -3409,11 +2440,766 @@ out: return err; } -int annotate_check_args(struct annotation_options *args) +int annotate_check_args(void) { + struct annotation_options *args = &annotate_opts; + if (args->prefix_strip && !args->prefix) { pr_err("--prefix-strip requires --prefix\n"); return -1; } return 0; } + +/* + * Get register number and access offset from the given instruction. + * It assumes AT&T x86 asm format like OFFSET(REG). Maybe it needs + * to revisit the format when it handles different architecture. + * Fills @reg and @offset when return 0. + */ +static int extract_reg_offset(struct arch *arch, const char *str, + struct annotated_op_loc *op_loc) +{ + char *p; + char *regname; + + if (arch->objdump.register_char == 0) + return -1; + + /* + * It should start from offset, but it's possible to skip 0 + * in the asm. So 0(%rax) should be same as (%rax). + * + * However, it also start with a segment select register like + * %gs:0x18(%rbx). In that case it should skip the part. + */ + if (*str == arch->objdump.register_char) { + if (arch__is(arch, "x86")) { + /* FIXME: Handle other segment registers */ + if (!strncmp(str, "%gs:", 4)) + op_loc->segment = INSN_SEG_X86_GS; + } + + while (*str && !isdigit(*str) && + *str != arch->objdump.memory_ref_char) + str++; + } + + op_loc->offset = strtol(str, &p, 0); + + p = strchr(p, arch->objdump.register_char); + if (p == NULL) + return -1; + + regname = strdup(p); + if (regname == NULL) + return -1; + + op_loc->reg1 = get_dwarf_regnum(regname, arch->e_machine, arch->e_flags); + free(regname); + + /* Get the second register */ + if (op_loc->multi_regs) { + p = strchr(p + 1, arch->objdump.register_char); + if (p == NULL) + return -1; + + regname = strdup(p); + if (regname == NULL) + return -1; + + op_loc->reg2 = get_dwarf_regnum(regname, arch->e_machine, arch->e_flags); + free(regname); + } + return 0; +} + +/** + * annotate_get_insn_location - Get location of instruction + * @arch: the architecture info + * @dl: the target instruction + * @loc: a buffer to save the data + * + * Get detailed location info (register and offset) in the instruction. + * It needs both source and target operand and whether it accesses a + * memory location. The offset field is meaningful only when the + * corresponding mem flag is set. The reg2 field is meaningful only + * when multi_regs flag is set. + * + * Some examples on x86: + * + * mov (%rax), %rcx # src_reg1 = rax, src_mem = 1, src_offset = 0 + * # dst_reg1 = rcx, dst_mem = 0 + * + * mov 0x18, %r8 # src_reg1 = -1, src_mem = 0 + * # dst_reg1 = r8, dst_mem = 0 + * + * mov %rsi, 8(%rbx,%rcx,4) # src_reg1 = rsi, src_mem = 0, src_multi_regs = 0 + * # dst_reg1 = rbx, dst_reg2 = rcx, dst_mem = 1 + * # dst_multi_regs = 1, dst_offset = 8 + */ +int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl, + struct annotated_insn_loc *loc) +{ + struct ins_operands *ops; + struct annotated_op_loc *op_loc; + int i; + + if (ins__is_lock(&dl->ins)) + ops = dl->ops.locked.ops; + else + ops = &dl->ops; + + if (ops == NULL) + return -1; + + memset(loc, 0, sizeof(*loc)); + + for_each_insn_op_loc(loc, i, op_loc) { + const char *insn_str = ops->source.raw; + bool multi_regs = ops->source.multi_regs; + bool mem_ref = ops->source.mem_ref; + + if (i == INSN_OP_TARGET) { + insn_str = ops->target.raw; + multi_regs = ops->target.multi_regs; + mem_ref = ops->target.mem_ref; + } + + /* Invalidate the register by default */ + op_loc->reg1 = -1; + op_loc->reg2 = -1; + + if (insn_str == NULL) { + if (!arch__is(arch, "powerpc")) + continue; + } + + /* + * For powerpc, call get_powerpc_regs function which extracts the + * required fields for op_loc, ie reg1, reg2, offset from the + * raw instruction. + */ + if (arch__is(arch, "powerpc")) { + op_loc->mem_ref = mem_ref; + op_loc->multi_regs = multi_regs; + get_powerpc_regs(dl->raw.raw_insn, !i, op_loc); + } else if (strchr(insn_str, arch->objdump.memory_ref_char)) { + op_loc->mem_ref = true; + op_loc->multi_regs = multi_regs; + extract_reg_offset(arch, insn_str, op_loc); + } else { + char *s, *p = NULL; + + if (arch__is(arch, "x86")) { + /* FIXME: Handle other segment registers */ + if (!strncmp(insn_str, "%gs:", 4)) { + op_loc->segment = INSN_SEG_X86_GS; + op_loc->offset = strtol(insn_str + 4, + &p, 0); + if (p && p != insn_str + 4) + op_loc->imm = true; + continue; + } + } + + s = strdup(insn_str); + if (s == NULL) + return -1; + + if (*s == arch->objdump.register_char) + op_loc->reg1 = get_dwarf_regnum(s, arch->e_machine, arch->e_flags); + else if (*s == arch->objdump.imm_char) { + op_loc->offset = strtol(s + 1, &p, 0); + if (p && p != s + 1) + op_loc->imm = true; + } + free(s); + } + } + + return 0; +} + +static struct disasm_line *find_disasm_line(struct symbol *sym, u64 ip, + bool allow_update) +{ + struct disasm_line *dl; + struct annotation *notes; + + notes = symbol__annotation(sym); + + list_for_each_entry(dl, ¬es->src->source, al.node) { + if (dl->al.offset == -1) + continue; + + if (sym->start + dl->al.offset == ip) { + /* + * llvm-objdump places "lock" in a separate line and + * in that case, we want to get the next line. + */ + if (ins__is_lock(&dl->ins) && + *dl->ops.raw == '\0' && allow_update) { + ip++; + continue; + } + return dl; + } + } + return NULL; +} + +static struct annotated_item_stat *annotate_data_stat(struct list_head *head, + const char *name) +{ + struct annotated_item_stat *istat; + + list_for_each_entry(istat, head, list) { + if (!strcmp(istat->name, name)) + return istat; + } + + istat = zalloc(sizeof(*istat)); + if (istat == NULL) + return NULL; + + istat->name = strdup(name); + if ((istat->name == NULL) || (!strlen(istat->name))) { + free(istat); + return NULL; + } + + list_add_tail(&istat->list, head); + return istat; +} + +static bool is_stack_operation(struct arch *arch, struct disasm_line *dl) +{ + if (arch__is(arch, "x86")) { + if (!strncmp(dl->ins.name, "push", 4) || + !strncmp(dl->ins.name, "pop", 3) || + !strncmp(dl->ins.name, "call", 4) || + !strncmp(dl->ins.name, "ret", 3)) + return true; + } + + return false; +} + +static bool is_stack_canary(struct arch *arch, struct annotated_op_loc *loc) +{ + /* On x86_64, %gs:40 is used for stack canary */ + if (arch__is(arch, "x86")) { + if (loc->segment == INSN_SEG_X86_GS && loc->imm && + loc->offset == 40) + return true; + } + + return false; +} + +/** + * Returns true if the instruction has a memory operand without + * performing a load/store + */ +static bool is_address_gen_insn(struct arch *arch, struct disasm_line *dl) +{ + if (arch__is(arch, "x86")) { + if (!strncmp(dl->ins.name, "lea", 3)) + return true; + } + + return false; +} + +static struct disasm_line * +annotation__prev_asm_line(struct annotation *notes, struct disasm_line *curr) +{ + struct list_head *sources = ¬es->src->source; + struct disasm_line *prev; + + if (curr == list_first_entry(sources, struct disasm_line, al.node)) + return NULL; + + prev = list_prev_entry(curr, al.node); + while (prev->al.offset == -1 && + prev != list_first_entry(sources, struct disasm_line, al.node)) + prev = list_prev_entry(prev, al.node); + + if (prev->al.offset == -1) + return NULL; + + return prev; +} + +static struct disasm_line * +annotation__next_asm_line(struct annotation *notes, struct disasm_line *curr) +{ + struct list_head *sources = ¬es->src->source; + struct disasm_line *next; + + if (curr == list_last_entry(sources, struct disasm_line, al.node)) + return NULL; + + next = list_next_entry(curr, al.node); + while (next->al.offset == -1 && + next != list_last_entry(sources, struct disasm_line, al.node)) + next = list_next_entry(next, al.node); + + if (next->al.offset == -1) + return NULL; + + return next; +} + +u64 annotate_calc_pcrel(struct map_symbol *ms, u64 ip, int offset, + struct disasm_line *dl) +{ + struct annotation *notes; + struct disasm_line *next; + u64 addr; + + notes = symbol__annotation(ms->sym); + /* + * PC-relative addressing starts from the next instruction address + * But the IP is for the current instruction. Since disasm_line + * doesn't have the instruction size, calculate it using the next + * disasm_line. If it's the last one, we can use symbol's end + * address directly. + */ + next = annotation__next_asm_line(notes, dl); + if (next == NULL) + addr = ms->sym->end + offset; + else + addr = ip + (next->al.offset - dl->al.offset) + offset; + + return map__rip_2objdump(ms->map, addr); +} + +static struct debuginfo_cache { + struct dso *dso; + struct debuginfo *dbg; +} di_cache; + +void debuginfo_cache__delete(void) +{ + dso__put(di_cache.dso); + di_cache.dso = NULL; + + debuginfo__delete(di_cache.dbg); + di_cache.dbg = NULL; +} + +static struct annotated_data_type * +__hist_entry__get_data_type(struct hist_entry *he, struct arch *arch, + struct debuginfo *dbg, struct disasm_line *dl, + int *type_offset) +{ + struct map_symbol *ms = &he->ms; + struct annotated_insn_loc loc; + struct annotated_op_loc *op_loc; + struct annotated_data_type *mem_type; + struct annotated_item_stat *istat; + int i; + + istat = annotate_data_stat(&ann_insn_stat, dl->ins.name); + if (istat == NULL) { + ann_data_stat.no_insn++; + return NO_TYPE; + } + + if (annotate_get_insn_location(arch, dl, &loc) < 0) { + ann_data_stat.no_insn_ops++; + istat->bad++; + return NO_TYPE; + } + + if (is_stack_operation(arch, dl)) { + istat->good++; + *type_offset = 0; + return &stackop_type; + } + + if (is_address_gen_insn(arch, dl)) { + istat->bad++; + ann_data_stat.no_mem_ops++; + return NO_TYPE; + } + + for_each_insn_op_loc(&loc, i, op_loc) { + struct data_loc_info dloc = { + .arch = arch, + .thread = he->thread, + .ms = ms, + .ip = ms->sym->start + dl->al.offset, + .cpumode = he->cpumode, + .op = op_loc, + .di = dbg, + }; + + if (!op_loc->mem_ref && op_loc->segment == INSN_SEG_NONE) + continue; + + /* PC-relative addressing */ + if (op_loc->reg1 == DWARF_REG_PC) { + dloc.var_addr = annotate_calc_pcrel(ms, dloc.ip, + op_loc->offset, dl); + } + + /* This CPU access in kernel - pretend PC-relative addressing */ + if (dso__kernel(map__dso(ms->map)) && arch__is(arch, "x86") && + op_loc->segment == INSN_SEG_X86_GS && op_loc->imm) { + dloc.var_addr = op_loc->offset; + op_loc->reg1 = DWARF_REG_PC; + } + + mem_type = find_data_type(&dloc); + + if (mem_type == NULL && is_stack_canary(arch, op_loc)) { + istat->good++; + *type_offset = 0; + return &canary_type; + } + + if (mem_type) + istat->good++; + else + istat->bad++; + + if (symbol_conf.annotate_data_sample) { + struct evsel *evsel = hists_to_evsel(he->hists); + + annotated_data_type__update_samples(mem_type, evsel, + dloc.type_offset, + he->stat.nr_events, + he->stat.period); + } + *type_offset = dloc.type_offset; + return mem_type ?: NO_TYPE; + } + + /* retry with a fused instruction */ + return NULL; +} + +/** + * hist_entry__get_data_type - find data type for given hist entry + * @he: hist entry + * + * This function first annotates the instruction at @he->ip and extracts + * register and offset info from it. Then it searches the DWARF debug + * info to get a variable and type information using the address, register, + * and offset. + */ +struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he) +{ + struct map_symbol *ms = &he->ms; + struct evsel *evsel = hists_to_evsel(he->hists); + struct arch *arch; + struct disasm_line *dl; + struct annotated_data_type *mem_type; + struct annotated_item_stat *istat; + u64 ip = he->ip; + + ann_data_stat.total++; + + if (ms->map == NULL || ms->sym == NULL) { + ann_data_stat.no_sym++; + return NULL; + } + + if (!symbol_conf.init_annotation) { + ann_data_stat.no_sym++; + return NULL; + } + + /* + * di_cache holds a pair of values, but code below assumes + * di_cache.dso can be compared/updated and di_cache.dbg can be + * read/updated independently from each other. That assumption only + * holds in single threaded code. + */ + assert(perf_singlethreaded); + + if (map__dso(ms->map) != di_cache.dso) { + dso__put(di_cache.dso); + di_cache.dso = dso__get(map__dso(ms->map)); + + debuginfo__delete(di_cache.dbg); + di_cache.dbg = dso__debuginfo(di_cache.dso); + } + + if (di_cache.dbg == NULL) { + ann_data_stat.no_dbginfo++; + return NULL; + } + + /* Make sure it has the disasm of the function */ + if (symbol__annotate(ms, evsel, &arch) < 0) { + ann_data_stat.no_insn++; + return NULL; + } + + /* + * Get a disasm to extract the location from the insn. + * This is too slow... + */ + dl = find_disasm_line(ms->sym, ip, /*allow_update=*/true); + if (dl == NULL) { + ann_data_stat.no_insn++; + return NULL; + } + +retry: + mem_type = __hist_entry__get_data_type(he, arch, di_cache.dbg, dl, + &he->mem_type_off); + if (mem_type) + return mem_type == NO_TYPE ? NULL : mem_type; + + /* + * Some instructions can be fused and the actual memory access came + * from the previous instruction. + */ + if (dl->al.offset > 0) { + struct annotation *notes; + struct disasm_line *prev_dl; + + notes = symbol__annotation(ms->sym); + prev_dl = annotation__prev_asm_line(notes, dl); + + if (prev_dl && ins__is_fused(arch, prev_dl->ins.name, dl->ins.name)) { + dl = prev_dl; + goto retry; + } + } + + ann_data_stat.no_mem_ops++; + istat = annotate_data_stat(&ann_insn_stat, dl->ins.name); + if (istat) + istat->bad++; + return NULL; +} + +/* Basic block traversal (BFS) data structure */ +struct basic_block_data { + struct list_head queue; + struct list_head visited; +}; + +/* + * During the traversal, it needs to know the parent block where the current + * block block started from. Note that single basic block can be parent of + * two child basic blocks (in case of condition jump). + */ +struct basic_block_link { + struct list_head node; + struct basic_block_link *parent; + struct annotated_basic_block *bb; +}; + +/* Check any of basic block in the list already has the offset */ +static bool basic_block_has_offset(struct list_head *head, s64 offset) +{ + struct basic_block_link *link; + + list_for_each_entry(link, head, node) { + s64 begin_offset = link->bb->begin->al.offset; + s64 end_offset = link->bb->end->al.offset; + + if (begin_offset <= offset && offset <= end_offset) + return true; + } + return false; +} + +static bool is_new_basic_block(struct basic_block_data *bb_data, + struct disasm_line *dl) +{ + s64 offset = dl->al.offset; + + if (basic_block_has_offset(&bb_data->visited, offset)) + return false; + if (basic_block_has_offset(&bb_data->queue, offset)) + return false; + return true; +} + +/* Add a basic block starting from dl and link it to the parent */ +static int add_basic_block(struct basic_block_data *bb_data, + struct basic_block_link *parent, + struct disasm_line *dl) +{ + struct annotated_basic_block *bb; + struct basic_block_link *link; + + if (dl == NULL) + return -1; + + if (!is_new_basic_block(bb_data, dl)) + return 0; + + bb = zalloc(sizeof(*bb)); + if (bb == NULL) + return -1; + + bb->begin = dl; + bb->end = dl; + INIT_LIST_HEAD(&bb->list); + + link = malloc(sizeof(*link)); + if (link == NULL) { + free(bb); + return -1; + } + + link->bb = bb; + link->parent = parent; + list_add_tail(&link->node, &bb_data->queue); + return 0; +} + +/* Returns true when it finds the target in the current basic block */ +static bool process_basic_block(struct basic_block_data *bb_data, + struct basic_block_link *link, + struct symbol *sym, u64 target) +{ + struct disasm_line *dl, *next_dl, *last_dl; + struct annotation *notes = symbol__annotation(sym); + bool found = false; + + dl = link->bb->begin; + /* Check if it's already visited */ + if (basic_block_has_offset(&bb_data->visited, dl->al.offset)) + return false; + + last_dl = list_last_entry(¬es->src->source, + struct disasm_line, al.node); + if (last_dl->al.offset == -1) + last_dl = annotation__prev_asm_line(notes, last_dl); + + if (last_dl == NULL) + return false; + + list_for_each_entry_from(dl, ¬es->src->source, al.node) { + /* Skip comment or debug info line */ + if (dl->al.offset == -1) + continue; + /* Found the target instruction */ + if (sym->start + dl->al.offset == target) { + found = true; + break; + } + /* End of the function, finish the block */ + if (dl == last_dl) + break; + /* 'return' instruction finishes the block */ + if (ins__is_ret(&dl->ins)) + break; + /* normal instructions are part of the basic block */ + if (!ins__is_jump(&dl->ins)) + continue; + /* jump to a different function, tail call or return */ + if (dl->ops.target.outside) + break; + /* jump instruction creates new basic block(s) */ + next_dl = find_disasm_line(sym, sym->start + dl->ops.target.offset, + /*allow_update=*/false); + if (next_dl) + add_basic_block(bb_data, link, next_dl); + + /* + * FIXME: determine conditional jumps properly. + * Conditional jumps create another basic block with the + * next disasm line. + */ + if (!strstr(dl->ins.name, "jmp")) { + next_dl = annotation__next_asm_line(notes, dl); + if (next_dl) + add_basic_block(bb_data, link, next_dl); + } + break; + + } + link->bb->end = dl; + return found; +} + +/* + * It founds a target basic block, build a proper linked list of basic blocks + * by following the link recursively. + */ +static void link_found_basic_blocks(struct basic_block_link *link, + struct list_head *head) +{ + while (link) { + struct basic_block_link *parent = link->parent; + + list_move(&link->bb->list, head); + list_del(&link->node); + free(link); + + link = parent; + } +} + +static void delete_basic_blocks(struct basic_block_data *bb_data) +{ + struct basic_block_link *link, *tmp; + + list_for_each_entry_safe(link, tmp, &bb_data->queue, node) { + list_del(&link->node); + zfree(&link->bb); + free(link); + } + + list_for_each_entry_safe(link, tmp, &bb_data->visited, node) { + list_del(&link->node); + zfree(&link->bb); + free(link); + } +} + +/** + * annotate_get_basic_blocks - Get basic blocks for given address range + * @sym: symbol to annotate + * @src: source address + * @dst: destination address + * @head: list head to save basic blocks + * + * This function traverses disasm_lines from @src to @dst and save them in a + * list of annotated_basic_block to @head. It uses BFS to find the shortest + * path between two. The basic_block_link is to maintain parent links so + * that it can build a list of blocks from the start. + */ +int annotate_get_basic_blocks(struct symbol *sym, s64 src, s64 dst, + struct list_head *head) +{ + struct basic_block_data bb_data = { + .queue = LIST_HEAD_INIT(bb_data.queue), + .visited = LIST_HEAD_INIT(bb_data.visited), + }; + struct basic_block_link *link; + struct disasm_line *dl; + int ret = -1; + + dl = find_disasm_line(sym, src, /*allow_update=*/false); + if (dl == NULL) + return -1; + + if (add_basic_block(&bb_data, /*parent=*/NULL, dl) < 0) + return -1; + + /* Find shortest path from src to dst using BFS */ + while (!list_empty(&bb_data.queue)) { + link = list_first_entry(&bb_data.queue, struct basic_block_link, node); + + if (process_basic_block(&bb_data, link, sym, dst)) { + link_found_basic_blocks(link, head); + ret = 0; + break; + } + list_move(&link->node, &bb_data.visited); + } + delete_basic_blocks(&bb_data); + return ret; +} diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 962780559176..d4990bff29a7 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -12,73 +12,39 @@ #include "symbol_conf.h" #include "mutex.h" #include "spark.h" +#include "hashmap.h" +#include "disasm.h" +#include "branch.h" +#include "evsel.h" struct hist_browser_timer; struct hist_entry; -struct ins_ops; struct map; struct map_symbol; struct addr_map_symbol; struct option; struct perf_sample; -struct evsel; struct symbol; - -struct ins { - const char *name; - struct ins_ops *ops; -}; - -struct ins_operands { - char *raw; - char *raw_comment; - char *raw_func_start; - struct { - char *raw; - char *name; - struct symbol *sym; - u64 addr; - s64 offset; - bool offset_avail; - bool outside; - } target; - union { - struct { - char *raw; - char *name; - u64 addr; - } source; - struct { - struct ins ins; - struct ins_operands *ops; - } locked; - }; -}; - -struct arch; - -struct ins_ops { - void (*free)(struct ins_operands *ops); - int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms); - int (*scnprintf)(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name); -}; - -bool ins__is_jump(const struct ins *ins); -bool ins__is_call(const struct ins *ins); -bool ins__is_ret(const struct ins *ins); -bool ins__is_lock(const struct ins *ins); -int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops, int max_ins_name); -bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); +struct annotated_data_type; #define ANNOTATION__IPC_WIDTH 6 #define ANNOTATION__CYCLES_WIDTH 6 #define ANNOTATION__MINMAX_CYCLES_WIDTH 19 #define ANNOTATION__AVG_IPC_WIDTH 36 +#define ANNOTATION__BR_CNTR_WIDTH 30 #define ANNOTATION_DUMMY_LEN 256 +enum perf_disassembler { + PERF_DISASM_UNKNOWN = 0, + PERF_DISASM_LLVM, + PERF_DISASM_CAPSTONE, + PERF_DISASM_OBJDUMP, +}; +#define MAX_DISASSEMBLERS (PERF_DISASM_OBJDUMP + 1) + struct annotation_options { bool hide_src_code, + hide_src_code_on_title, use_offset, jump_arrows, print_lines, @@ -88,9 +54,13 @@ struct annotation_options { show_nr_jumps, show_minmax_cycle, show_asm_raw, + show_br_cntr, annotate_src, + code_with_type, full_addr; u8 offset_level; + u8 disassemblers[MAX_DISASSEMBLERS]; + u8 disassembler_used; int min_pcnt; int max_lines; int context; @@ -101,6 +71,8 @@ struct annotation_options { unsigned int percent_type; }; +extern struct annotation_options annotate_opts; + enum { ANNOTATION__OFFSET_JUMP_TARGETS = 1, ANNOTATION__OFFSET_CALL, @@ -130,6 +102,13 @@ struct annotation_data { struct sym_hist_entry he; }; +struct cycles_info { + float ipc; + u64 avg; + u64 max; + u64 min; +}; + struct annotation_line { struct list_head node; struct rb_node rb_node; @@ -137,12 +116,13 @@ struct annotation_line { char *line; int line_nr; char *fileloc; - int jump_sources; - float ipc; - u64 cycles; - u64 cycles_max; - u64 cycles_min; char *path; + struct cycles_info *cycles; + int num_aggr; + int br_cntr_nr; + u64 *br_cntr; + struct evsel *evsel; + int jump_sources; u32 idx; int idx_asm; int data_nr; @@ -152,11 +132,18 @@ struct annotation_line { struct disasm_line { struct ins ins; struct ins_operands ops; - + union { + u8 bytes[4]; + u32 raw_insn; + } raw; /* This needs to be at the end. */ struct annotation_line al; }; +extern const char * const perf_disassembler__strs[]; + +void annotation_line__add(struct annotation_line *al, struct list_head *head); + static inline double annotation_data__percent(struct annotation_data *data, unsigned int which) { @@ -198,7 +185,6 @@ static inline bool disasm_line__has_local_offset(const struct disasm_line *dl) */ bool disasm_line__is_valid_local_jump(struct disasm_line *dl, struct symbol *sym); -void disasm_line__free(struct disasm_line *dl); struct annotation_line * annotation_line__next(struct annotation_line *pos, struct list_head *head); @@ -213,25 +199,65 @@ struct annotation_write_ops { void (*write_graph)(void *obj, int graph); }; +struct annotation_print_data { + struct hist_entry *he; + struct evsel *evsel; + struct arch *arch; + struct debuginfo *dbg; + /* save data type info keyed by al->offset */ + struct hashmap *type_hash; + /* It'll be set in hist_entry__annotate_printf() */ + int addr_fmt_width; +}; + void annotation_line__write(struct annotation_line *al, struct annotation *notes, - struct annotation_write_ops *ops, - struct annotation_options *opts); + const struct annotation_write_ops *ops, + struct annotation_print_data *apd); int __annotation__scnprintf_samples_period(struct annotation *notes, char *bf, size_t size, struct evsel *evsel, bool show_freq); -int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name); size_t disasm__fprintf(struct list_head *head, FILE *fp); void symbol__calc_percent(struct symbol *sym, struct evsel *evsel); +/** + * struct sym_hist - symbol histogram information for an event + * + * @nr_samples: Total number of samples. + * @period: Sum of sample periods. + */ struct sym_hist { u64 nr_samples; u64 period; - struct sym_hist_entry addr[]; }; +/** + * struct cyc_hist - (CPU) cycle histogram for a basic block + * + * @start: Start address of current block (if known). + * @cycles: Sum of cycles for the longest basic block. + * @cycles_aggr: Total cycles for this address. + * @cycles_max: Max cycles for this address. + * @cycles_min: Min cycles for this address. + * @cycles_spark: History of cycles for the longest basic block. + * @num: Number of samples for the longest basic block. + * @num_aggr: Total number of samples for this address. + * @have_start: Whether the current branch info has a start address. + * @reset: Number of resets due to a different start address. + * + * If sample has branch_stack and cycles info, it can construct basic blocks + * between two adjacent branches. It'd have start and end addresses but + * sometimes the start address may not be available. So the cycles are + * accounted at the end address. If multiple basic blocks end at the same + * address, it will take the longest one. + * + * The @start, @cycles, @cycles_spark and @num fields are used for the longest + * block only. Other fields are used for all cases. + * + * See __symbol__account_cycles(). + */ struct cyc_hist { u64 start; u64 cycles; @@ -246,45 +272,42 @@ struct cyc_hist { u16 reset; }; -/** struct annotated_source - symbols with hits have this attached as in sannotation +/** + * struct annotated_source - symbols with hits have this attached as in annotation * - * @histograms: Array of addr hit histograms per event being monitored - * nr_histograms: This may not be the same as evsel->evlist->core.nr_entries if + * @source: List head for annotated_line (embeded in disasm_line). + * @histograms: Array of symbol histograms per event to maintain the total number + * of samples and period. + * @nr_histograms: This may not be the same as evsel->evlist->core.nr_entries if * we have more than a group in a evlist, where we will want * to see each group separately, that is why symbol__annotate2() * sets src->nr_histograms to evsel->nr_members. - * @lines: If 'print_lines' is specified, per source code line percentages - * @source: source parsed from a disassembler like objdump -dS - * @cyc_hist: Average cycles per basic block + * @samples: Hash map of sym_hist_entry. Keyed by event index and offset in symbol. + * @nr_events: Number of events in the current output. + * @nr_entries: Number of annotated_line in the source list. + * @nr_asm_entries: Number of annotated_line with actual asm instruction in the + * source list. + * @max_jump_sources: Maximum number of jump instructions targeting to the same + * instruction. + * @widths: Precalculated width of each column in the TUI output. * - * lines is allocated, percentages calculated and all sorted by percentage + * disasm_lines are allocated, percentages calculated and all sorted by percentage * when the annotation is about to be presented, so the percentages are for * one of the entries in the histogram array, i.e. for the event/counter being * presented. It is deallocated right after symbol__{tui,tty,etc}_annotate * returns. */ struct annotated_source { - struct list_head source; - int nr_histograms; - size_t sizeof_sym_hist; - struct cyc_hist *cycles_hist; - struct sym_hist *histograms; -}; - -struct LOCKABLE annotation { - u64 max_coverage; - u64 start; - u64 hit_cycles; - u64 hit_insn; - unsigned int total_insn; - unsigned int cover_insn; - struct annotation_options *options; - struct annotation_line **offsets; - int nr_events; - int max_jump_sources; + struct list_head source; + struct sym_hist *histograms; + struct hashmap *samples; + int nr_histograms; + int nr_events; int nr_entries; int nr_asm_entries; - u16 max_line_len; + int max_jump_sources; + bool tried_source; + u64 start; struct { u8 addr; u8 jumps; @@ -292,9 +315,48 @@ struct LOCKABLE annotation { u8 min_addr; u8 max_addr; u8 max_ins_name; + u16 max_line_len; } widths; - bool have_cycles; +}; + +struct annotation_line *annotated_source__get_line(struct annotated_source *src, + s64 offset); + +/* A branch counter once saturated */ +#define ANNOTATION__BR_CNTR_SATURATED_FLAG (1ULL << 63) + +/** + * struct annotated_branch - basic block and IPC information for a symbol. + * + * @hit_cycles: Total executed cycles. + * @hit_insn: Total number of instructions executed. + * @total_insn: Number of instructions in the function. + * @cover_insn: Number of distinct, actually executed instructions. + * @cycles_hist: Array of cyc_hist for each instruction. + * @max_coverage: Maximum number of covered basic block (used for block-range). + * @br_cntr: Array of the occurrences of events (branch counters) during a block. + * + * This struct is used by two different codes when the sample has branch stack + * and cycles information. annotation__compute_ipc() calculates average IPC + * using @hit_insn / @hit_cycles. The actual coverage can be calculated using + * @cover_insn / @total_insn. The @cycles_hist can give IPC for each (longest) + * basic block ends at the given address. + * process_basic_block() calculates coverage of instructions (or basic blocks) + * in the function. + */ +struct annotated_branch { + u64 hit_cycles; + u64 hit_insn; + unsigned int total_insn; + unsigned int cover_insn; + struct cyc_hist *cycles_hist; + u64 max_coverage; + u64 *br_cntr; +}; + +struct LOCKABLE annotation { struct annotated_source *src; + struct annotated_branch *branch; }; static inline void annotation__init(struct annotation *notes __maybe_unused) @@ -308,37 +370,51 @@ bool annotation__trylock(struct annotation *notes) EXCLUSIVE_TRYLOCK_FUNCTION(tr static inline int annotation__cycles_width(struct annotation *notes) { - if (notes->have_cycles && notes->options->show_minmax_cycle) + if (notes->branch && annotate_opts.show_minmax_cycle) return ANNOTATION__IPC_WIDTH + ANNOTATION__MINMAX_CYCLES_WIDTH; - return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0; + return notes->branch ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0; } static inline int annotation__pcnt_width(struct annotation *notes) { - return (symbol_conf.show_total_period ? 12 : 7) * notes->nr_events; + return (symbol_conf.show_total_period ? 12 : 8) * notes->src->nr_events; } -static inline bool annotation_line__filter(struct annotation_line *al, struct annotation *notes) +static inline bool annotation_line__filter(struct annotation_line *al) { - return notes->options->hide_src_code && al->offset == -1; + return annotate_opts.hide_src_code && al->offset == -1; +} + +static inline u8 annotation__br_cntr_width(void) +{ + return annotate_opts.show_br_cntr ? ANNOTATION__BR_CNTR_WIDTH : 0; } -void annotation__set_offsets(struct annotation *notes, s64 size); -void annotation__compute_ipc(struct annotation *notes, size_t size); -void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym); void annotation__update_column_widths(struct annotation *notes); -void annotation__init_column_widths(struct annotation *notes, struct symbol *sym); void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms); -static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src, int idx) +static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src, + const struct evsel *evsel) +{ + return &src->histograms[evsel->core.idx]; +} + +static inline struct sym_hist *annotation__histogram(struct annotation *notes, + const struct evsel *evsel) { - return ((void *)src->histograms) + (src->sizeof_sym_hist * idx); + return annotated_source__histogram(notes->src, evsel); } -static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx) +static inline struct sym_hist_entry * +annotated_source__hist_entry(struct annotated_source *src, const struct evsel *evsel, u64 offset) { - return annotated_source__histogram(notes->src, idx); + struct sym_hist_entry *entry; + long key = offset << 16 | evsel->core.idx; + + if (!hashmap__find(src->samples, key, &entry)) + return NULL; + return entry; } static inline struct annotation *symbol__annotation(struct symbol *sym) @@ -349,9 +425,13 @@ static inline struct annotation *symbol__annotation(struct symbol *sym) int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, struct evsel *evsel); +struct annotated_branch *annotation__get_branch(struct annotation *notes); + int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, struct addr_map_symbol *start, - unsigned cycles); + unsigned cycles, + struct evsel *evsel, + u64 br_cntr); int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample, struct evsel *evsel, u64 addr); @@ -361,11 +441,9 @@ void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct arch **parch); int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options, struct arch **parch); enum symbol_disassemble_errno { @@ -386,49 +464,127 @@ enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP, SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE, SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF, + SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE, __SYMBOL_ANNOTATE_ERRNO__END, }; int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen); -int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *options); -void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); -void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); +void symbol__annotate_zero_histogram(struct symbol *sym, struct evsel *evsel); +void symbol__annotate_decay_histogram(struct symbol *sym, struct evsel *evsel); void annotated_source__purge(struct annotated_source *as); int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, - struct annotation_options *opts); + struct hist_entry *he); bool ui__has_annotation(void); -int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts); - -int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts); - -#ifdef HAVE_SLANG_SUPPORT -int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *opts); -#else -static inline int symbol__tui_annotate(struct map_symbol *ms __maybe_unused, - struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused, - struct annotation_options *opts __maybe_unused) -{ - return 0; -} -#endif +int hist_entry__annotate_printf(struct hist_entry *he, struct evsel *evsel); +int hist_entry__tty_annotate(struct hist_entry *he, struct evsel *evsel); +int hist_entry__tty_annotate2(struct hist_entry *he, struct evsel *evsel); -void annotation_options__init(struct annotation_options *opt); -void annotation_options__exit(struct annotation_options *opt); +void annotation_options__init(void); +void annotation_options__exit(void); -void annotation_config__init(struct annotation_options *opt); +void annotation_config__init(void); int annotate_parse_percent_type(const struct option *opt, const char *_str, int unset); -int annotate_check_args(struct annotation_options *args); +int annotate_check_args(void); + +/** + * struct annotated_op_loc - Location info of instruction operand + * @reg1: First register in the operand + * @reg2: Second register in the operand + * @offset: Memory access offset in the operand + * @segment: Segment selector register + * @mem_ref: Whether the operand accesses memory + * @multi_regs: Whether the second register is used + * @imm: Whether the operand is an immediate value (in offset) + */ +struct annotated_op_loc { + int reg1; + int reg2; + int offset; + u8 segment; + bool mem_ref; + bool multi_regs; + bool imm; +}; + +enum annotated_insn_ops { + INSN_OP_SOURCE = 0, + INSN_OP_TARGET = 1, + + INSN_OP_MAX, +}; + +enum annotated_x86_segment { + INSN_SEG_NONE = 0, + + INSN_SEG_X86_CS, + INSN_SEG_X86_DS, + INSN_SEG_X86_ES, + INSN_SEG_X86_FS, + INSN_SEG_X86_GS, + INSN_SEG_X86_SS, +}; + +/** + * struct annotated_insn_loc - Location info of instruction + * @ops: Array of location info for source and target operands + */ +struct annotated_insn_loc { + struct annotated_op_loc ops[INSN_OP_MAX]; +}; + +#define for_each_insn_op_loc(insn_loc, i, op_loc) \ + for (i = INSN_OP_SOURCE, op_loc = &(insn_loc)->ops[i]; \ + i < INSN_OP_MAX; \ + i++, op_loc++) + +/* Get detailed location info in the instruction */ +int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl, + struct annotated_insn_loc *loc); + +/* Returns a data type from the sample instruction (if any) */ +struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he); + +struct annotated_item_stat { + struct list_head list; + char *name; + int good; + int bad; +}; +extern struct list_head ann_insn_stat; + +/* Calculate PC-relative address */ +u64 annotate_calc_pcrel(struct map_symbol *ms, u64 ip, int offset, + struct disasm_line *dl); + +/** + * struct annotated_basic_block - Basic block of instructions + * @list: List node + * @begin: start instruction in the block + * @end: end instruction in the block + */ +struct annotated_basic_block { + struct list_head list; + struct disasm_line *begin; + struct disasm_line *end; +}; + +/* Get a list of basic blocks from src to dst addresses */ +int annotate_get_basic_blocks(struct symbol *sym, s64 src, s64 dst, + struct list_head *head); + +void debuginfo_cache__delete(void); + +int annotation_br_cntr_entry(char **str, int br_cntr_nr, u64 *br_cntr, + int num_aggr, struct evsel *evsel); +int annotation_br_cntr_abbr_list(char **str, struct evsel *evsel, bool header); +int evsel__get_arch(struct evsel *evsel, struct arch **parch); #endif /* __PERF_ANNOTATE_H */ diff --git a/tools/perf/util/arm-spe-decoder/Build b/tools/perf/util/arm-spe-decoder/Build index f8dae13fc876..ab500e0efe24 100644 --- a/tools/perf/util/arm-spe-decoder/Build +++ b/tools/perf/util/arm-spe-decoder/Build @@ -1 +1 @@ -perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o arm-spe-decoder.o +perf-util-y += arm-spe-pkt-decoder.o arm-spe-decoder.o diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index ba807071d3c1..9e02b2bdd117 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -28,7 +28,8 @@ static u64 arm_spe_calc_ip(int index, u64 payload) /* Instruction virtual address or Branch target address */ if (index == SPE_ADDR_PKT_HDR_INDEX_INS || - index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) { + index == SPE_ADDR_PKT_HDR_INDEX_BRANCH || + index == SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH) { ns = SPE_ADDR_PKT_GET_NS(payload); el = SPE_ADDR_PKT_GET_EL(payload); @@ -181,6 +182,8 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) decoder->record.virt_addr = ip; else if (idx == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) decoder->record.phys_addr = ip; + else if (idx == SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH) + decoder->record.prev_br_tgt = ip; break; case ARM_SPE_COUNTER: if (idx == SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT) @@ -197,16 +200,76 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) decoder->record.op |= ARM_SPE_OP_ST; else decoder->record.op |= ARM_SPE_OP_LD; - if (SPE_OP_PKT_IS_LDST_SVE(payload)) - decoder->record.op |= ARM_SPE_OP_SVE_LDST; + + if (SPE_OP_PKT_LDST_SUBCLASS_GP_REG(payload)) { + decoder->record.op |= ARM_SPE_OP_GP_REG; + } else if (SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(payload)) { + decoder->record.op |= ARM_SPE_OP_SIMD_FP; + } else if (SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(payload)) { + decoder->record.op |= ARM_SPE_OP_UNSPEC_REG; + } else if (SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(payload)) { + decoder->record.op |= ARM_SPE_OP_NV_SYSREG; + } else if (SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(payload)) { + decoder->record.op |= ARM_SPE_OP_MTE_TAG; + } else if (SPE_OP_PKT_LDST_SUBCLASS_EXTENDED(payload)) { + if (payload & SPE_OP_PKT_AR) + decoder->record.op |= ARM_SPE_OP_AR; + if (payload & SPE_OP_PKT_EXCL) + decoder->record.op |= ARM_SPE_OP_EXCL; + if (payload & SPE_OP_PKT_AT) + decoder->record.op |= ARM_SPE_OP_ATOMIC; + } else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) { + decoder->record.op |= ARM_SPE_OP_SVE; + if (payload & SPE_OP_PKT_SVE_PRED) + decoder->record.op |= ARM_SPE_OP_PRED; + if (payload & SPE_OP_PKT_SVE_SG) + decoder->record.op |= ARM_SPE_OP_SG; + } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(payload)) { + decoder->record.op |= ARM_SPE_OP_MEMCPY; + } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMSET(payload)) { + decoder->record.op |= ARM_SPE_OP_MEMSET; + } else if (SPE_OP_PKT_LDST_SUBCLASS_GCS(payload)) { + decoder->record.op |= ARM_SPE_OP_GCS; + if (payload & SPE_OP_PKT_GCS_COMM) + decoder->record.op |= ARM_SPE_OP_COMM; + } + break; case SPE_OP_PKT_HDR_CLASS_OTHER: decoder->record.op |= ARM_SPE_OP_OTHER; - if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload)) - decoder->record.op |= ARM_SPE_OP_SVE_OTHER; + if (SPE_OP_PKT_OTHER_SUBCLASS_SVE(payload)) { + decoder->record.op |= ARM_SPE_OP_SVE | ARM_SPE_OP_DP; + if (payload & SPE_OP_PKT_OTHER_FP) + decoder->record.op |= ARM_SPE_OP_FP; + if (payload & SPE_OP_PKT_SVE_PRED) + decoder->record.op |= ARM_SPE_OP_PRED; + } else if (SPE_OP_PKT_OTHER_SUBCLASS_SME(payload)) { + decoder->record.op |= ARM_SPE_OP_SME; + if (payload & SPE_OP_PKT_OTHER_FP) + decoder->record.op |= ARM_SPE_OP_FP; + } else if (SPE_OP_PKT_OTHER_SUBCLASS_OTHER(payload)) { + if (payload & SPE_OP_PKT_OTHER_ASE) + decoder->record.op |= ARM_SPE_OP_ASE; + if (payload & SPE_OP_PKT_OTHER_FP) + decoder->record.op |= ARM_SPE_OP_FP; + if (payload & SPE_OP_PKT_COND) + decoder->record.op |= ARM_SPE_OP_COND; + } break; case SPE_OP_PKT_HDR_CLASS_BR_ERET: decoder->record.op |= ARM_SPE_OP_BRANCH_ERET; + if (payload & SPE_OP_PKT_COND) + decoder->record.op |= ARM_SPE_OP_BR_COND; + if (payload & SPE_OP_PKT_INDIRECT_BRANCH) + decoder->record.op |= ARM_SPE_OP_BR_INDIRECT; + if (payload & SPE_OP_PKT_GCS) + decoder->record.op |= ARM_SPE_OP_BR_GCS; + if (SPE_OP_PKT_CR_BL(payload)) + decoder->record.op |= ARM_SPE_OP_BR_CR_BL; + if (SPE_OP_PKT_CR_RET(payload)) + decoder->record.op |= ARM_SPE_OP_BR_CR_RET; + if (SPE_OP_PKT_CR_NON_BL_RET(payload)) + decoder->record.op |= ARM_SPE_OP_BR_CR_NON_BL_RET; break; default: pr_err("Get packet error!\n"); @@ -214,36 +277,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) } break; case ARM_SPE_EVENTS: - if (payload & BIT(EV_L1D_REFILL)) - decoder->record.type |= ARM_SPE_L1D_MISS; - - if (payload & BIT(EV_L1D_ACCESS)) - decoder->record.type |= ARM_SPE_L1D_ACCESS; - - if (payload & BIT(EV_TLB_WALK)) - decoder->record.type |= ARM_SPE_TLB_MISS; - - if (payload & BIT(EV_TLB_ACCESS)) - decoder->record.type |= ARM_SPE_TLB_ACCESS; - - if (payload & BIT(EV_LLC_MISS)) - decoder->record.type |= ARM_SPE_LLC_MISS; - - if (payload & BIT(EV_LLC_ACCESS)) - decoder->record.type |= ARM_SPE_LLC_ACCESS; - - if (payload & BIT(EV_REMOTE_ACCESS)) - decoder->record.type |= ARM_SPE_REMOTE_ACCESS; - - if (payload & BIT(EV_MISPRED)) - decoder->record.type |= ARM_SPE_BRANCH_MISS; - - if (payload & BIT(EV_PARTIAL_PREDICATE)) - decoder->record.type |= ARM_SPE_SVE_PARTIAL_PRED; - - if (payload & BIT(EV_EMPTY_PREDICATE)) - decoder->record.type |= ARM_SPE_SVE_EMPTY_PRED; - + decoder->record.type = payload; break; case ARM_SPE_DATA_SOURCE: decoder->record.source = payload; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 1443c28545a9..3310e05122f0 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -13,67 +13,112 @@ #include "arm-spe-pkt-decoder.h" -enum arm_spe_sample_type { - ARM_SPE_L1D_ACCESS = 1 << 0, - ARM_SPE_L1D_MISS = 1 << 1, - ARM_SPE_LLC_ACCESS = 1 << 2, - ARM_SPE_LLC_MISS = 1 << 3, - ARM_SPE_TLB_ACCESS = 1 << 4, - ARM_SPE_TLB_MISS = 1 << 5, - ARM_SPE_BRANCH_MISS = 1 << 6, - ARM_SPE_REMOTE_ACCESS = 1 << 7, - ARM_SPE_SVE_PARTIAL_PRED = 1 << 8, - ARM_SPE_SVE_EMPTY_PRED = 1 << 9, -}; +#define ARM_SPE_L1D_ACCESS BIT(EV_L1D_ACCESS) +#define ARM_SPE_L1D_MISS BIT(EV_L1D_REFILL) +#define ARM_SPE_LLC_ACCESS BIT(EV_LLC_ACCESS) +#define ARM_SPE_LLC_MISS BIT(EV_LLC_MISS) +#define ARM_SPE_TLB_ACCESS BIT(EV_TLB_ACCESS) +#define ARM_SPE_TLB_MISS BIT(EV_TLB_WALK) +#define ARM_SPE_BRANCH_MISS BIT(EV_MISPRED) +#define ARM_SPE_BRANCH_NOT_TAKEN BIT(EV_NOT_TAKEN) +#define ARM_SPE_REMOTE_ACCESS BIT(EV_REMOTE_ACCESS) +#define ARM_SPE_SVE_PARTIAL_PRED BIT(EV_PARTIAL_PREDICATE) +#define ARM_SPE_SVE_EMPTY_PRED BIT(EV_EMPTY_PREDICATE) +#define ARM_SPE_IN_TXN BIT(EV_TRANSACTIONAL) +#define ARM_SPE_L2D_ACCESS BIT(EV_L2D_ACCESS) +#define ARM_SPE_L2D_MISS BIT(EV_L2D_MISS) +#define ARM_SPE_RECENTLY_FETCHED BIT(EV_RECENTLY_FETCHED) +#define ARM_SPE_DATA_SNOOPED BIT(EV_DATA_SNOOPED) +#define ARM_SPE_HITM BIT(EV_CACHE_DATA_MODIFIED) enum arm_spe_op_type { /* First level operation type */ ARM_SPE_OP_OTHER = 1 << 0, ARM_SPE_OP_LDST = 1 << 1, ARM_SPE_OP_BRANCH_ERET = 1 << 2, +}; + +enum arm_spe_2nd_op_ldst { + ARM_SPE_OP_GP_REG = 1 << 8, + ARM_SPE_OP_UNSPEC_REG = 1 << 9, + ARM_SPE_OP_NV_SYSREG = 1 << 10, + ARM_SPE_OP_SIMD_FP = 1 << 11, + ARM_SPE_OP_SVE = 1 << 12, + ARM_SPE_OP_MTE_TAG = 1 << 13, + ARM_SPE_OP_MEMCPY = 1 << 14, + ARM_SPE_OP_MEMSET = 1 << 15, + ARM_SPE_OP_GCS = 1 << 16, + ARM_SPE_OP_SME = 1 << 17, + ARM_SPE_OP_ASE = 1 << 18, + + /* Assisted information for memory / SIMD */ + ARM_SPE_OP_LD = 1 << 20, + ARM_SPE_OP_ST = 1 << 21, + ARM_SPE_OP_ATOMIC = 1 << 22, + ARM_SPE_OP_EXCL = 1 << 23, + ARM_SPE_OP_AR = 1 << 24, + ARM_SPE_OP_DP = 1 << 25, /* Data processing */ + ARM_SPE_OP_PRED = 1 << 26, /* Predicated */ + ARM_SPE_OP_SG = 1 << 27, /* Gather/Scatter */ + ARM_SPE_OP_COMM = 1 << 28, /* Common */ + ARM_SPE_OP_FP = 1 << 29, /* Floating-point */ + ARM_SPE_OP_COND = 1 << 30, /* Conditional */ +}; + +enum arm_spe_2nd_op_branch { + ARM_SPE_OP_BR_COND = 1 << 8, + ARM_SPE_OP_BR_INDIRECT = 1 << 9, + ARM_SPE_OP_BR_GCS = 1 << 10, + ARM_SPE_OP_BR_CR_BL = 1 << 11, + ARM_SPE_OP_BR_CR_RET = 1 << 12, + ARM_SPE_OP_BR_CR_NON_BL_RET = 1 << 13, +}; + +enum arm_spe_common_data_source { + ARM_SPE_COMMON_DS_L1D = 0x0, + ARM_SPE_COMMON_DS_L2 = 0x8, + ARM_SPE_COMMON_DS_PEER_CORE = 0x9, + ARM_SPE_COMMON_DS_LOCAL_CLUSTER = 0xa, + ARM_SPE_COMMON_DS_SYS_CACHE = 0xb, + ARM_SPE_COMMON_DS_PEER_CLUSTER = 0xc, + ARM_SPE_COMMON_DS_REMOTE = 0xd, + ARM_SPE_COMMON_DS_DRAM = 0xe, +}; - /* Second level operation type for OTHER */ - ARM_SPE_OP_SVE_OTHER = 1 << 16, - ARM_SPE_OP_SVE_FP = 1 << 17, - ARM_SPE_OP_SVE_PRED_OTHER = 1 << 18, - - /* Second level operation type for LDST */ - ARM_SPE_OP_LD = 1 << 16, - ARM_SPE_OP_ST = 1 << 17, - ARM_SPE_OP_ATOMIC = 1 << 18, - ARM_SPE_OP_EXCL = 1 << 19, - ARM_SPE_OP_AR = 1 << 20, - ARM_SPE_OP_SIMD_FP = 1 << 21, - ARM_SPE_OP_GP_REG = 1 << 22, - ARM_SPE_OP_UNSPEC_REG = 1 << 23, - ARM_SPE_OP_NV_SYSREG = 1 << 24, - ARM_SPE_OP_SVE_LDST = 1 << 25, - ARM_SPE_OP_SVE_PRED_LDST = 1 << 26, - ARM_SPE_OP_SVE_SG = 1 << 27, - - /* Second level operation type for BRANCH_ERET */ - ARM_SPE_OP_BR_COND = 1 << 16, - ARM_SPE_OP_BR_INDIRECT = 1 << 17, +enum arm_spe_ampereone_data_source { + ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0, + ARM_SPE_AMPEREONE_SLC = 0x3, + ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5, + ARM_SPE_AMPEREONE_DDR = 0x7, + ARM_SPE_AMPEREONE_L1D = 0x8, + ARM_SPE_AMPEREONE_L2D = 0x9, }; -enum arm_spe_neoverse_data_source { - ARM_SPE_NV_L1D = 0x0, - ARM_SPE_NV_L2 = 0x8, - ARM_SPE_NV_PEER_CORE = 0x9, - ARM_SPE_NV_LOCAL_CLUSTER = 0xa, - ARM_SPE_NV_SYS_CACHE = 0xb, - ARM_SPE_NV_PEER_CLUSTER = 0xc, - ARM_SPE_NV_REMOTE = 0xd, - ARM_SPE_NV_DRAM = 0xe, +enum arm_spe_hisi_hip_data_source { + ARM_SPE_HISI_HIP_PEER_CPU = 0, + ARM_SPE_HISI_HIP_PEER_CPU_HITM = 1, + ARM_SPE_HISI_HIP_L3 = 2, + ARM_SPE_HISI_HIP_L3_HITM = 3, + ARM_SPE_HISI_HIP_PEER_CLUSTER = 4, + ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM = 5, + ARM_SPE_HISI_HIP_REMOTE_SOCKET = 6, + ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM = 7, + ARM_SPE_HISI_HIP_LOCAL_MEM = 8, + ARM_SPE_HISI_HIP_REMOTE_MEM = 9, + ARM_SPE_HISI_HIP_NC_DEV = 13, + ARM_SPE_HISI_HIP_L2 = 16, + ARM_SPE_HISI_HIP_L2_HITM = 17, + ARM_SPE_HISI_HIP_L1 = 18, }; struct arm_spe_record { - enum arm_spe_sample_type type; + u64 type; int err; u32 op; u32 latency; u64 from_ip; u64 to_ip; + u64 prev_br_tgt; u64 timestamp; u64 virt_addr; u64 phys_addr; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index a454c6737563..5769ba2f4140 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -10,24 +10,11 @@ #include <byteswap.h> #include <linux/bitops.h> #include <stdarg.h> +#include <linux/kernel.h> +#include <linux/unaligned.h> #include "arm-spe-pkt-decoder.h" -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#define le16_to_cpu bswap_16 -#define le32_to_cpu bswap_32 -#define le64_to_cpu bswap_64 -#define memcpy_le64(d, s, n) do { \ - memcpy((d), (s), (n)); \ - *(d) = le64_to_cpu(*(d)); \ -} while (0) -#else -#define le16_to_cpu -#define le32_to_cpu -#define le64_to_cpu -#define memcpy_le64 memcpy -#endif - static const char * const arm_spe_packet_name[] = { [ARM_SPE_PAD] = "PAD", [ARM_SPE_END] = "END", @@ -70,9 +57,9 @@ static int arm_spe_get_payload(const unsigned char *buf, size_t len, switch (payload_len) { case 1: packet->payload = *(uint8_t *)buf; break; - case 2: packet->payload = le16_to_cpu(*(uint16_t *)buf); break; - case 4: packet->payload = le32_to_cpu(*(uint32_t *)buf); break; - case 8: packet->payload = le64_to_cpu(*(uint64_t *)buf); break; + case 2: packet->payload = get_unaligned_le16(buf); break; + case 4: packet->payload = get_unaligned_le32(buf); break; + case 8: packet->payload = get_unaligned_le64(buf); break; default: return ARM_SPE_BAD_PACKET; } @@ -321,10 +308,26 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS"); if (payload & BIT(EV_ALIGNMENT)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " ALIGNMENT"); + if (payload & BIT(EV_TRANSACTIONAL)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " TXN"); if (payload & BIT(EV_PARTIAL_PREDICATE)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-PARTIAL-PRED"); if (payload & BIT(EV_EMPTY_PREDICATE)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-EMPTY-PRED"); + if (payload & BIT(EV_L2D_ACCESS)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " L2D-ACCESS"); + if (payload & BIT(EV_L2D_MISS)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " L2D-MISS"); + if (payload & BIT(EV_CACHE_DATA_MODIFIED)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " HITM"); + if (payload & BIT(EV_RECENTLY_FETCHED)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " LFB"); + if (payload & BIT(EV_DATA_SNOOPED)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " SNOOPED"); + if (payload & BIT(EV_STREAMING_SVE_MODE)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " STREAMING-SVE"); + if (payload & BIT(EV_SMCU)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " SMCU"); return err; } @@ -337,7 +340,7 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, switch (packet->index) { case SPE_OP_PKT_HDR_CLASS_OTHER: - if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload)) { + if (SPE_OP_PKT_OTHER_SUBCLASS_SVE(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, "SVE-OTHER"); /* SVE effective vector length */ @@ -348,8 +351,21 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP"); if (payload & SPE_OP_PKT_SVE_PRED) arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED"); - } else { + } else if (SPE_OP_PKT_OTHER_SUBCLASS_SME(payload)) { + arm_spe_pkt_out_string(&err, &buf, &buf_len, "SME-OTHER"); + + /* SME effective vector length or tile size */ + arm_spe_pkt_out_string(&err, &buf, &buf_len, " ETS %d", + SPE_OP_PKG_SME_ETS(payload)); + + if (payload & SPE_OP_PKT_OTHER_FP) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP"); + } else if (SPE_OP_PKT_OTHER_SUBCLASS_OTHER(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, "OTHER"); + if (payload & SPE_OP_PKT_OTHER_ASE) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " ASE"); + if (payload & SPE_OP_PKT_OTHER_FP) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP"); arm_spe_pkt_out_string(&err, &buf, &buf_len, " %s", payload & SPE_OP_PKT_COND ? "COND-SELECT" : "INSN-OTHER"); @@ -359,42 +375,30 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, payload & 0x1 ? "ST" : "LD"); - if (SPE_OP_PKT_IS_LDST_ATOMIC(payload)) { + if (SPE_OP_PKT_LDST_SUBCLASS_EXTENDED(payload)) { if (payload & SPE_OP_PKT_AT) arm_spe_pkt_out_string(&err, &buf, &buf_len, " AT"); if (payload & SPE_OP_PKT_EXCL) arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL"); if (payload & SPE_OP_PKT_AR) arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR"); - } - - switch (SPE_OP_PKT_LDST_SUBCLASS_GET(payload)) { - case SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP: + } else if (SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_GP_REG: + } else if (SPE_OP_PKT_LDST_SUBCLASS_GP_REG(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " GP-REG"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG: + } else if (SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " UNSPEC-REG"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG: + } else if (SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " NV-SYSREG"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG: + } else if (SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " MTE-TAG"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_MEMCPY: + } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " MEMCPY"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_MEMSET: + } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMSET(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " MEMSET"); - break; - default: - break; - } + } else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) { + arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-SME-REG"); - if (SPE_OP_PKT_IS_LDST_SVE(payload)) { /* SVE effective vector length */ arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d", SPE_OP_PKG_SVE_EVL(payload)); @@ -403,6 +407,10 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED"); if (payload & SPE_OP_PKT_SVE_SG) arm_spe_pkt_out_string(&err, &buf, &buf_len, " SG"); + } else if (SPE_OP_PKT_LDST_SUBCLASS_GCS(payload)) { + arm_spe_pkt_out_string(&err, &buf, &buf_len, " GCS"); + if (payload & SPE_OP_PKT_GCS_COMM) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " COMM"); } break; case SPE_OP_PKT_HDR_CLASS_BR_ERET: @@ -410,10 +418,16 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, if (payload & SPE_OP_PKT_COND) arm_spe_pkt_out_string(&err, &buf, &buf_len, " COND"); - - if (SPE_OP_PKT_IS_INDIRECT_BRANCH(payload)) + if (payload & SPE_OP_PKT_INDIRECT_BRANCH) arm_spe_pkt_out_string(&err, &buf, &buf_len, " IND"); - + if (payload & SPE_OP_PKT_GCS) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " GCS"); + if (SPE_OP_PKT_CR_BL(payload)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " CR-BL"); + if (SPE_OP_PKT_CR_RET(payload)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " CR-RET"); + if (SPE_OP_PKT_CR_NON_BL_RET(payload)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " CR-NON-BL-RET"); break; default: /* Unknown index */ diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 464a912b221c..adf4cde320aa 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -7,6 +7,7 @@ #ifndef INCLUDE__ARM_SPE_PKT_DECODER_H__ #define INCLUDE__ARM_SPE_PKT_DECODER_H__ +#include <linux/bitfield.h> #include <stddef.h> #include <stdint.h> @@ -104,8 +105,16 @@ enum arm_spe_events { EV_LLC_MISS = 9, EV_REMOTE_ACCESS = 10, EV_ALIGNMENT = 11, + EV_TRANSACTIONAL = 16, EV_PARTIAL_PREDICATE = 17, EV_EMPTY_PREDICATE = 18, + EV_L2D_ACCESS = 19, + EV_L2D_MISS = 20, + EV_CACHE_DATA_MODIFIED = 21, + EV_RECENTLY_FETCHED = 22, + EV_DATA_SNOOPED = 23, + EV_STREAMING_SVE_MODE = 24, + EV_SMCU = 25, }; /* Operation packet header */ @@ -114,27 +123,39 @@ enum arm_spe_events { #define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC 0x1 #define SPE_OP_PKT_HDR_CLASS_BR_ERET 0x2 -#define SPE_OP_PKT_IS_OTHER_SVE_OP(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8) +#define SPE_OP_PKT_OTHER_SUBCLASS_OTHER(v) (((v) & GENMASK_ULL(7, 3)) == 0x0) +#define SPE_OP_PKT_OTHER_SUBCLASS_SVE(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8) +#define SPE_OP_PKT_OTHER_SUBCLASS_SME(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x88) -#define SPE_OP_PKT_COND BIT(0) +#define SPE_OP_PKT_OTHER_ASE BIT(2) +#define SPE_OP_PKT_OTHER_FP BIT(1) + +/* + * SME effective vector length or tile size (ETS) is stored in byte 0 + * bits [6:4,2]; the length is rounded up to a power of two and use 128 + * as one step, so ETS calculation is: + * + * 128 * (2 ^ bits [6:4,2]) = 32 << (bits [6:4,2]) + */ +#define SPE_OP_PKG_SME_ETS(v) (128 << (FIELD_GET(GENMASK_ULL(6, 4), (v)) << 1 | \ + (FIELD_GET(BIT(2), (v))))) -#define SPE_OP_PKT_LDST_SUBCLASS_GET(v) ((v) & GENMASK_ULL(7, 1)) -#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG 0x0 -#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP 0x4 -#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG 0x10 -#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG 0x30 -#define SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG 0x14 -#define SPE_OP_PKT_LDST_SUBCLASS_MEMCPY 0x20 -#define SPE_OP_PKT_LDST_SUBCLASS_MEMSET 0x25 +#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x0) +#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(v) (((v) & GENMASK_ULL(7, 1)) == 0x4) +#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x10) +#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(v) (((v) & GENMASK_ULL(7, 1)) == 0x30) +#define SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(v) (((v) & GENMASK_ULL(7, 1)) == 0x14) +#define SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(v) (((v) & GENMASK_ULL(7, 1)) == 0x20) +#define SPE_OP_PKT_LDST_SUBCLASS_MEMSET(v) (((v) & GENMASK_ULL(7, 0)) == 0x25) -#define SPE_OP_PKT_IS_LDST_ATOMIC(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2) +#define SPE_OP_PKT_LDST_SUBCLASS_EXTENDED(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2) #define SPE_OP_PKT_AR BIT(4) #define SPE_OP_PKT_EXCL BIT(3) #define SPE_OP_PKT_AT BIT(2) #define SPE_OP_PKT_ST BIT(0) -#define SPE_OP_PKT_IS_LDST_SVE(v) (((v) & (BIT(3) | BIT(1))) == 0x8) +#define SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(v) (((v) & (BIT(3) | BIT(1))) == 0x8) #define SPE_OP_PKT_SVE_SG BIT(7) /* @@ -148,7 +169,17 @@ enum arm_spe_events { #define SPE_OP_PKT_SVE_PRED BIT(2) #define SPE_OP_PKT_SVE_FP BIT(1) -#define SPE_OP_PKT_IS_INDIRECT_BRANCH(v) (((v) & GENMASK_ULL(7, 1)) == 0x2) +#define SPE_OP_PKT_LDST_SUBCLASS_GCS(v) (((v) & (GENMASK_ULL(7, 3) | BIT(1))) == 0x40) + +#define SPE_OP_PKT_GCS_COMM BIT(2) + +#define SPE_OP_PKT_CR_MASK GENMASK_ULL(4, 3) +#define SPE_OP_PKT_CR_BL(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 1) +#define SPE_OP_PKT_CR_RET(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 2) +#define SPE_OP_PKT_CR_NON_BL_RET(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 3) +#define SPE_OP_PKT_GCS BIT(2) +#define SPE_OP_PKT_INDIRECT_BRANCH BIT(1) +#define SPE_OP_PKT_COND BIT(0) const char *arm_spe_pkt_name(enum arm_spe_pkt_type); diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index afbd5869f6bf..dc19e72258f3 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -37,6 +37,25 @@ #include "../../arch/arm64/include/asm/cputype.h" #define MAX_TIMESTAMP (~0ULL) +#define is_ldst_op(op) (!!((op) & ARM_SPE_OP_LDST)) + +#define is_simd_op(op) (!!((op) & (ARM_SPE_OP_SIMD_FP | ARM_SPE_OP_SVE | \ + ARM_SPE_OP_SME | ARM_SPE_OP_ASE))) + +#define is_mem_op(op) (is_ldst_op(op) || is_simd_op(op)) + +#define ARM_SPE_CACHE_EVENT(lvl) \ + (ARM_SPE_##lvl##_ACCESS | ARM_SPE_##lvl##_MISS) + +#define arm_spe_is_cache_level(type, lvl) \ + ((type) & ARM_SPE_CACHE_EVENT(lvl)) + +#define arm_spe_is_cache_hit(type, lvl) \ + (((type) & ARM_SPE_CACHE_EVENT(lvl)) == ARM_SPE_##lvl##_ACCESS) + +#define arm_spe_is_cache_miss(type, lvl) \ + ((type) & ARM_SPE_##lvl##_MISS) + struct arm_spe { struct auxtrace auxtrace; struct auxtrace_queues queues; @@ -46,7 +65,6 @@ struct arm_spe { struct perf_session *session; struct machine *machine; u32 pmu_type; - u64 midr; struct perf_tsc_conversion tc; @@ -61,7 +79,6 @@ struct arm_spe { u8 sample_remote_access; u8 sample_memory; u8 sample_instructions; - u64 instructions_sample_period; u64 l1d_miss_id; u64 l1d_access_id; @@ -69,7 +86,7 @@ struct arm_spe { u64 llc_access_id; u64 tlb_miss_id; u64 tlb_access_id; - u64 branch_miss_id; + u64 branch_id; u64 remote_access_id; u64 memory_id; u64 instructions_id; @@ -78,6 +95,11 @@ struct arm_spe { unsigned long num_events; u8 use_ctx_pkt_for_pid; + + u64 **metadata; + u64 metadata_ver; + u64 metadata_nr_cpu; + bool is_homogeneous; }; struct arm_spe_queue { @@ -95,9 +117,23 @@ struct arm_spe_queue { u64 time; u64 timestamp; struct thread *thread; - u64 period_instructions; + u64 sample_count; + u32 flags; + struct branch_stack *last_branch; +}; + +struct data_source_handle { + const struct midr_range *midr_ranges; + void (*ds_synth)(const struct arm_spe_record *record, + union perf_mem_data_src *data_src); }; +#define DS(range, func) \ + { \ + .midr_ranges = range, \ + .ds_synth = arm_spe__synth_##func, \ + } + static void arm_spe_dump(struct arm_spe *spe __maybe_unused, unsigned char *buf, size_t len) { @@ -118,7 +154,7 @@ static void arm_spe_dump(struct arm_spe *spe __maybe_unused, else pkt_len = 1; printf("."); - color_fprintf(stdout, color, " %08x: ", pos); + color_fprintf(stdout, color, " %08zx: ", pos); for (i = 0; i < pkt_len; i++) color_fprintf(stdout, color, " %02x", buf[i]); for (; i < 16; i++) @@ -208,12 +244,22 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, speq->pid = -1; speq->tid = -1; speq->cpu = -1; - speq->period_instructions = 0; /* params set */ params.get_trace = arm_spe_get_trace; params.data = speq; + if (spe->synth_opts.last_branch) { + size_t sz = sizeof(struct branch_stack); + + /* Allocate up to two entries for PBT + TGT */ + sz += sizeof(struct branch_entry) * + min(spe->synth_opts.last_branch_sz, 2U); + speq->last_branch = zalloc(sz); + if (!speq->last_branch) + goto out_free; + } + /* create new decoder */ speq->decoder = arm_spe_decoder_new(¶ms); if (!speq->decoder) @@ -223,6 +269,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, out_free: zfree(&speq->event_buf); + zfree(&speq->last_branch); free(speq); return NULL; @@ -273,14 +320,38 @@ static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) return 0; } +static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, int cpu) +{ + u64 i; + + if (!spe->metadata) + return NULL; + + /* CPU ID is -1 for per-thread mode */ + if (cpu < 0) { + /* + * On the heterogeneous system, due to CPU ID is -1, + * cannot confirm the data source packet is supported. + */ + if (!spe->is_homogeneous) + return NULL; + + /* In homogeneous system, simply use CPU0's metadata */ + return spe->metadata[0]; + } + + for (i = 0; i < spe->metadata_nr_cpu; i++) + if (spe->metadata[i][ARM_SPE_CPU] == (u64)cpu) + return spe->metadata[i]; + + return NULL; +} + static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record) { struct simd_flags simd_flags = {}; - if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST)) - simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE; - - if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER)) + if (record->op & ARM_SPE_OP_SVE) simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE; if (record->type & ARM_SPE_SVE_PARTIAL_PRED) @@ -306,7 +377,7 @@ static void arm_spe_prep_sample(struct arm_spe *spe, sample->cpumode = arm_spe_cpumode(spe, sample->ip); sample->pid = speq->pid; sample->tid = speq->tid; - sample->period = 1; + sample->period = spe->synth_opts.period; sample->cpu = speq->cpu; sample->simd_flags = arm_spe__synth_simd_flags(record); @@ -315,6 +386,88 @@ static void arm_spe_prep_sample(struct arm_spe *spe, event->sample.header.size = sizeof(struct perf_event_header); } +static void arm_spe__prep_branch_stack(struct arm_spe_queue *speq) +{ + struct arm_spe *spe = speq->spe; + struct arm_spe_record *record = &speq->decoder->record; + struct branch_stack *bstack = speq->last_branch; + struct branch_flags *bs_flags; + unsigned int last_branch_sz = spe->synth_opts.last_branch_sz; + bool have_tgt = !!(speq->flags & PERF_IP_FLAG_BRANCH); + bool have_pbt = last_branch_sz >= (have_tgt + 1U) && record->prev_br_tgt; + size_t sz = sizeof(struct branch_stack) + + sizeof(struct branch_entry) * min(last_branch_sz, 2U) /* PBT + TGT */; + int i = 0; + + /* Clean up branch stack */ + memset(bstack, 0x0, sz); + + if (!have_tgt && !have_pbt) + return; + + if (have_tgt) { + bstack->entries[i].from = record->from_ip; + bstack->entries[i].to = record->to_ip; + + bs_flags = &bstack->entries[i].flags; + bs_flags->value = 0; + + if (record->op & ARM_SPE_OP_BR_CR_BL) { + if (record->op & ARM_SPE_OP_BR_COND) + bs_flags->type |= PERF_BR_COND_CALL; + else + bs_flags->type |= PERF_BR_CALL; + /* + * Indirect branch instruction without link (e.g. BR), + * take this case as function return. + */ + } else if (record->op & ARM_SPE_OP_BR_CR_RET || + record->op & ARM_SPE_OP_BR_INDIRECT) { + if (record->op & ARM_SPE_OP_BR_COND) + bs_flags->type |= PERF_BR_COND_RET; + else + bs_flags->type |= PERF_BR_RET; + } else if (record->op & ARM_SPE_OP_BR_CR_NON_BL_RET) { + if (record->op & ARM_SPE_OP_BR_COND) + bs_flags->type |= PERF_BR_COND; + else + bs_flags->type |= PERF_BR_UNCOND; + } else { + if (record->op & ARM_SPE_OP_BR_COND) + bs_flags->type |= PERF_BR_COND; + else + bs_flags->type |= PERF_BR_UNKNOWN; + } + + if (record->type & ARM_SPE_BRANCH_MISS) { + bs_flags->mispred = 1; + bs_flags->predicted = 0; + } else { + bs_flags->mispred = 0; + bs_flags->predicted = 1; + } + + if (record->type & ARM_SPE_BRANCH_NOT_TAKEN) + bs_flags->not_taken = 1; + + if (record->type & ARM_SPE_IN_TXN) + bs_flags->in_tx = 1; + + bs_flags->cycles = min(record->latency, 0xFFFFU); + i++; + } + + if (have_pbt) { + bs_flags = &bstack->entries[i].flags; + bs_flags->type |= PERF_BR_UNKNOWN; + bstack->entries[i].to = record->prev_br_tgt; + i++; + } + + bstack->nr = i; + bstack->hw_idx = -1ULL; +} + static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) { event->header.size = perf_event__sample_event_size(sample, type, 0); @@ -343,23 +496,28 @@ arm_spe_deliver_synth_event(struct arm_spe *spe, } static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, - u64 spe_events_id, u64 data_src) + u64 spe_events_id, + union perf_mem_data_src data_src) { struct arm_spe *spe = speq->spe; struct arm_spe_record *record = &speq->decoder->record; union perf_event *event = speq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; + int ret; + perf_sample__init(&sample, /*all=*/true); arm_spe_prep_sample(spe, speq, event, &sample); sample.id = spe_events_id; sample.stream_id = spe_events_id; sample.addr = record->virt_addr; sample.phys_addr = record->phys_addr; - sample.data_src = data_src; + sample.data_src = data_src.val; sample.weight = record->latency; - return arm_spe_deliver_synth_event(spe, speq, event, &sample); + ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); + perf_sample__exit(&sample); + return ret; } static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, @@ -368,56 +526,115 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, struct arm_spe *spe = speq->spe; struct arm_spe_record *record = &speq->decoder->record; union perf_event *event = speq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; + int ret; + perf_sample__init(&sample, /*all=*/true); arm_spe_prep_sample(spe, speq, event, &sample); sample.id = spe_events_id; sample.stream_id = spe_events_id; sample.addr = record->to_ip; sample.weight = record->latency; + sample.flags = speq->flags; + sample.branch_stack = speq->last_branch; - return arm_spe_deliver_synth_event(spe, speq, event, &sample); + ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); + perf_sample__exit(&sample); + return ret; } static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, - u64 spe_events_id, u64 data_src) + u64 spe_events_id, + union perf_mem_data_src data_src) { struct arm_spe *spe = speq->spe; struct arm_spe_record *record = &speq->decoder->record; union perf_event *event = speq->event_buf; - struct perf_sample sample = { .ip = 0, }; - - /* - * Handles perf instruction sampling period. - */ - speq->period_instructions++; - if (speq->period_instructions < spe->instructions_sample_period) - return 0; - speq->period_instructions = 0; + struct perf_sample sample; + int ret; + perf_sample__init(&sample, /*all=*/true); arm_spe_prep_sample(spe, speq, event, &sample); sample.id = spe_events_id; sample.stream_id = spe_events_id; - sample.addr = record->virt_addr; + sample.addr = record->to_ip; sample.phys_addr = record->phys_addr; - sample.data_src = data_src; - sample.period = spe->instructions_sample_period; + sample.data_src = data_src.val; sample.weight = record->latency; + sample.flags = speq->flags; + sample.branch_stack = speq->last_branch; - return arm_spe_deliver_synth_event(spe, speq, event, &sample); + ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); + perf_sample__exit(&sample); + return ret; } -static const struct midr_range neoverse_spe[] = { +static const struct midr_range common_ds_encoding_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A715), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A720AE), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X4), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), {}, }; -static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record, - union perf_mem_data_src *data_src) +static const struct midr_range ampereone_ds_encoding_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_AMPERE1A), + {}, +}; + +static const struct midr_range hisi_hip_ds_encoding_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_HISI_HIP12), + {}, +}; + +static void arm_spe__sample_flags(struct arm_spe_queue *speq) +{ + const struct arm_spe_record *record = &speq->decoder->record; + + speq->flags = 0; + if (record->op & ARM_SPE_OP_BRANCH_ERET) { + speq->flags = PERF_IP_FLAG_BRANCH; + + if (record->type & ARM_SPE_BRANCH_MISS) + speq->flags |= PERF_IP_FLAG_BRANCH_MISS; + + if (record->type & ARM_SPE_BRANCH_NOT_TAKEN) + speq->flags |= PERF_IP_FLAG_NOT_TAKEN; + + if (record->type & ARM_SPE_IN_TXN) + speq->flags |= PERF_IP_FLAG_IN_TX; + + if (record->op & ARM_SPE_OP_BR_COND) + speq->flags |= PERF_IP_FLAG_CONDITIONAL; + + if (record->op & ARM_SPE_OP_BR_CR_BL) + speq->flags |= PERF_IP_FLAG_CALL; + else if (record->op & ARM_SPE_OP_BR_CR_RET) + speq->flags |= PERF_IP_FLAG_RETURN; + /* + * Indirect branch instruction without link (e.g. BR), + * take it as a function return. + */ + else if (record->op & ARM_SPE_OP_BR_INDIRECT) + speq->flags |= PERF_IP_FLAG_RETURN; + } +} + +static void arm_spe__synth_data_source_common(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) { /* * Even though four levels of cache hierarchy are possible, no known @@ -439,17 +656,17 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec } switch (record->source) { - case ARM_SPE_NV_L1D: + case ARM_SPE_COMMON_DS_L1D: data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; break; - case ARM_SPE_NV_L2: + case ARM_SPE_COMMON_DS_L2: data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; break; - case ARM_SPE_NV_PEER_CORE: + case ARM_SPE_COMMON_DS_PEER_CORE: data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; @@ -458,8 +675,8 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec * We don't know if this is L1, L2 but we do know it was a cache-2-cache * transfer, so set SNOOPX_PEER */ - case ARM_SPE_NV_LOCAL_CLUSTER: - case ARM_SPE_NV_PEER_CLUSTER: + case ARM_SPE_COMMON_DS_LOCAL_CLUSTER: + case ARM_SPE_COMMON_DS_PEER_CLUSTER: data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; @@ -467,7 +684,7 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec /* * System cache is assumed to be L3 */ - case ARM_SPE_NV_SYS_CACHE: + case ARM_SPE_COMMON_DS_SYS_CACHE: data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; data_src->mem_snoop = PERF_MEM_SNOOP_HIT; @@ -476,61 +693,321 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec * We don't know what level it hit in, except it came from the other * socket */ - case ARM_SPE_NV_REMOTE: - data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1; + case ARM_SPE_COMMON_DS_REMOTE: + data_src->mem_lvl = PERF_MEM_LVL_NA; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_COMMON_DS_DRAM: + data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + default: + break; + } +} + +/* + * Source is IMPDEF. Here we convert the source code used on AmpereOne cores + * to the common (Neoverse, Cortex) to avoid duplicating the decoding code. + */ +static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ + struct arm_spe_record common_record; + + switch (record->source) { + case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE: + common_record.source = ARM_SPE_COMMON_DS_PEER_CORE; + break; + case ARM_SPE_AMPEREONE_SLC: + common_record.source = ARM_SPE_COMMON_DS_SYS_CACHE; + break; + case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE: + common_record.source = ARM_SPE_COMMON_DS_REMOTE; + break; + case ARM_SPE_AMPEREONE_DDR: + common_record.source = ARM_SPE_COMMON_DS_DRAM; + break; + case ARM_SPE_AMPEREONE_L1D: + common_record.source = ARM_SPE_COMMON_DS_L1D; + break; + case ARM_SPE_AMPEREONE_L2D: + common_record.source = ARM_SPE_COMMON_DS_L2; + break; + default: + pr_warning_once("AmpereOne: Unknown data source (0x%x)\n", + record->source); + return; + } + + common_record.op = record->op; + arm_spe__synth_data_source_common(&common_record, data_src); +} + +static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ + /* Use common synthesis method to handle store operations */ + if (record->op & ARM_SPE_OP_ST) { + arm_spe__synth_data_source_common(record, data_src); + return; + } + + switch (record->source) { + case ARM_SPE_HISI_HIP_PEER_CPU: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_PEER_CPU_HITM: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_L3: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HIT; + break; + case ARM_SPE_HISI_HIP_L3_HITM: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + break; + case ARM_SPE_HISI_HIP_PEER_CLUSTER: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_REMOTE_SOCKET: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; break; - case ARM_SPE_NV_DRAM: + case ARM_SPE_HISI_HIP_LOCAL_MEM: data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; break; + case ARM_SPE_HISI_HIP_REMOTE_MEM: + data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + break; + case ARM_SPE_HISI_HIP_NC_DEV: + data_src->mem_lvl = PERF_MEM_LVL_IO | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_HISI_HIP_L2: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_HISI_HIP_L2_HITM: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + break; + case ARM_SPE_HISI_HIP_L1: + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; default: break; } } -static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record, - union perf_mem_data_src *data_src) +static const struct data_source_handle data_source_handles[] = { + DS(common_ds_encoding_cpus, data_source_common), + DS(ampereone_ds_encoding_cpus, data_source_ampereone), + DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip), +}; + +static void arm_spe__synth_ld_memory_level(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) { - if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { - data_src->mem_lvl = PERF_MEM_LVL_L3; + /* + * To find a cache hit, search in ascending order from the lower level + * caches to the higher level caches. This reflects the best scenario + * for a cache hit. + */ + if (arm_spe_is_cache_hit(record->type, L1D)) { + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; + } else if (record->type & ARM_SPE_RECENTLY_FETCHED) { + data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_LFB; + } else if (arm_spe_is_cache_hit(record->type, L2D)) { + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + } else if (arm_spe_is_cache_hit(record->type, LLC)) { + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + /* + * To find a cache miss, search in descending order from the higher + * level cache to the lower level cache. This represents the worst + * scenario for a cache miss. + */ + } else if (arm_spe_is_cache_miss(record->type, LLC)) { + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_MISS; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + } else if (arm_spe_is_cache_miss(record->type, L2D)) { + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_MISS; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + } else if (arm_spe_is_cache_miss(record->type, L1D)) { + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; + } +} - if (record->type & ARM_SPE_LLC_MISS) - data_src->mem_lvl |= PERF_MEM_LVL_MISS; - else - data_src->mem_lvl |= PERF_MEM_LVL_HIT; - } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) { +static void arm_spe__synth_st_memory_level(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ + /* Record the greatest level info for a store operation. */ + if (arm_spe_is_cache_level(record->type, LLC)) { + data_src->mem_lvl = PERF_MEM_LVL_L3; + data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, LLC) ? + PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + } else if (arm_spe_is_cache_level(record->type, L2D)) { + data_src->mem_lvl = PERF_MEM_LVL_L2; + data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, L2D) ? + PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + } else if (arm_spe_is_cache_level(record->type, L1D)) { data_src->mem_lvl = PERF_MEM_LVL_L1; + data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, L1D) ? + PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; + } +} - if (record->type & ARM_SPE_L1D_MISS) - data_src->mem_lvl |= PERF_MEM_LVL_MISS; - else - data_src->mem_lvl |= PERF_MEM_LVL_HIT; +static void arm_spe__synth_memory_level(struct arm_spe_queue *speq, + const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ + struct arm_spe *spe = speq->spe; + + /* + * The data source packet contains more info for cache levels for + * peer snooping. So respect the memory level if has been set by + * data source parsing. + */ + if (!data_src->mem_lvl) { + if (data_src->mem_op == PERF_MEM_OP_LOAD) + arm_spe__synth_ld_memory_level(record, data_src); + if (data_src->mem_op == PERF_MEM_OP_STORE) + arm_spe__synth_st_memory_level(record, data_src); + } + + if (!data_src->mem_lvl) { + data_src->mem_lvl = PERF_MEM_LVL_NA; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; + } + + /* + * If 'mem_snoop' has been set by data source packet, skip to set + * it at here. + */ + if (!data_src->mem_snoop) { + if (record->type & ARM_SPE_DATA_SNOOPED) { + if (record->type & ARM_SPE_HITM) + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + else + data_src->mem_snoop = PERF_MEM_SNOOP_HIT; + } else { + u64 *metadata = + arm_spe__get_metadata_by_cpu(spe, speq->cpu); + + /* + * Set NA ("Not available") mode if no meta data or the + * SNOOPED event is not supported. + */ + if (!metadata || + !(metadata[ARM_SPE_CAP_EVENT_FILTER] & ARM_SPE_DATA_SNOOPED)) + data_src->mem_snoop = PERF_MEM_SNOOP_NA; + else + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + } + } + + if (!data_src->mem_remote) { + if (record->type & ARM_SPE_REMOTE_ACCESS) + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + } +} + +static void arm_spe__synth_ds(struct arm_spe_queue *speq, + const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ + struct arm_spe *spe = speq->spe; + u64 *metadata = NULL; + u64 midr; + unsigned int i; + + /* Metadata version 1 assumes all CPUs are the same (old behavior) */ + if (spe->metadata_ver == 1) { + const char *cpuid; + + pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n"); + cpuid = perf_env__cpuid(perf_session__env(spe->session)); + midr = strtol(cpuid, NULL, 16); + } else { + metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu); + if (!metadata) + return; + + midr = metadata[ARM_SPE_CPU_MIDR]; } - if (record->type & ARM_SPE_REMOTE_ACCESS) - data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; + for (i = 0; i < ARRAY_SIZE(data_source_handles); i++) { + if (is_midr_in_range_list(midr, data_source_handles[i].midr_ranges)) { + return data_source_handles[i].ds_synth(record, data_src); + } + } + + return; } -static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) +static union perf_mem_data_src +arm_spe__synth_data_source(struct arm_spe_queue *speq, + const struct arm_spe_record *record) { - union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; - bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe); + union perf_mem_data_src data_src = {}; + + if (!is_mem_op(record->op)) + return data_src; if (record->op & ARM_SPE_OP_LD) data_src.mem_op = PERF_MEM_OP_LOAD; else if (record->op & ARM_SPE_OP_ST) data_src.mem_op = PERF_MEM_OP_STORE; else - return 0; + data_src.mem_op = PERF_MEM_OP_NA; - if (is_neoverse) - arm_spe__synth_data_source_neoverse(record, &data_src); - else - arm_spe__synth_data_source_generic(record, &data_src); + arm_spe__synth_ds(speq, record, &data_src); + arm_spe__synth_memory_level(speq, record, &data_src); if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { data_src.mem_dtlb = PERF_MEM_TLB_WK; @@ -541,17 +1018,26 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m data_src.mem_dtlb |= PERF_MEM_TLB_HIT; } - return data_src.val; + return data_src; } static int arm_spe_sample(struct arm_spe_queue *speq) { const struct arm_spe_record *record = &speq->decoder->record; struct arm_spe *spe = speq->spe; - u64 data_src; + union perf_mem_data_src data_src; int err; - data_src = arm_spe__synth_data_source(record, spe->midr); + /* + * Discard all samples until period is reached + */ + speq->sample_count++; + if (speq->sample_count < spe->synth_opts.period) + return 0; + speq->sample_count = 0; + + arm_spe__sample_flags(speq); + data_src = arm_spe__synth_data_source(speq, record); if (spe->sample_flc) { if (record->type & ARM_SPE_L1D_MISS) { @@ -601,8 +1087,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq) } } - if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) { - err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id); + if (spe->synth_opts.last_branch && + (spe->sample_branch || spe->sample_instructions)) + arm_spe__prep_branch_stack(speq); + + if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) { + err = arm_spe__synth_branch_sample(speq, spe->branch_id); if (err) return err; } @@ -615,11 +1105,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq) return err; } - /* - * When data_src is zero it means the record is not a memory operation, - * skip to synthesize memory sample for this case. - */ - if (spe->sample_memory && data_src) { + if (spe->sample_memory && is_mem_op(record->op)) { err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); if (err) return err; @@ -899,7 +1385,7 @@ static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event, static int arm_spe_process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, - struct perf_tool *tool) + const struct perf_tool *tool) { int err = 0; u64 timestamp; @@ -947,7 +1433,7 @@ static int arm_spe_process_event(struct perf_session *session, static int arm_spe_process_auxtrace_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); @@ -985,7 +1471,7 @@ static int arm_spe_process_auxtrace_event(struct perf_session *session, } static int arm_spe_flush(struct perf_session *session __maybe_unused, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); @@ -1016,6 +1502,73 @@ static int arm_spe_flush(struct perf_session *session __maybe_unused, return 0; } +static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size) +{ + u64 *metadata; + + metadata = zalloc(per_cpu_size); + if (!metadata) + return NULL; + + memcpy(metadata, buf, per_cpu_size); + return metadata; +} + +static void arm_spe__free_metadata(u64 **metadata, int nr_cpu) +{ + int i; + + for (i = 0; i < nr_cpu; i++) + zfree(&metadata[i]); + free(metadata); +} + +static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info, + u64 *ver, int *nr_cpu) +{ + u64 *ptr = (u64 *)info->priv; + u64 metadata_size; + u64 **metadata = NULL; + int hdr_sz, per_cpu_sz, i; + + metadata_size = info->header.size - + sizeof(struct perf_record_auxtrace_info); + + /* Metadata version 1 */ + if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) { + *ver = 1; + *nr_cpu = 0; + /* No per CPU metadata */ + return NULL; + } + + *ver = ptr[ARM_SPE_HEADER_VERSION]; + hdr_sz = ptr[ARM_SPE_HEADER_SIZE]; + *nr_cpu = ptr[ARM_SPE_CPUS_NUM]; + + metadata = calloc(*nr_cpu, sizeof(*metadata)); + if (!metadata) + return NULL; + + /* Locate the start address of per CPU metadata */ + ptr += hdr_sz; + per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu); + + for (i = 0; i < *nr_cpu; i++) { + metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz); + if (!metadata[i]) + goto err_per_cpu_metadata; + + ptr += per_cpu_sz / sizeof(u64); + } + + return metadata; + +err_per_cpu_metadata: + arm_spe__free_metadata(metadata, *nr_cpu); + return NULL; +} + static void arm_spe_free_queue(void *priv) { struct arm_spe_queue *speq = priv; @@ -1025,6 +1578,7 @@ static void arm_spe_free_queue(void *priv) thread__zput(speq->thread); arm_spe_decoder_free(speq->decoder); zfree(&speq->event_buf); + zfree(&speq->last_branch); free(speq); } @@ -1050,6 +1604,7 @@ static void arm_spe_free(struct perf_session *session) auxtrace_heap__free(&spe->heap); arm_spe_free_events(session); session->auxtrace = NULL; + arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu); free(spe); } @@ -1061,45 +1616,61 @@ static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, return evsel->core.attr.type == spe->pmu_type; } -static const char * const arm_spe_info_fmts[] = { - [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n", +static const char * const metadata_hdr_v1_fmts[] = { + [ARM_SPE_PMU_TYPE] = " PMU Type :%"PRId64"\n", + [ARM_SPE_PER_CPU_MMAPS] = " Per CPU mmaps :%"PRId64"\n", }; -static void arm_spe_print_info(__u64 *arr) -{ - if (!dump_trace) - return; - - fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]); -} +static const char * const metadata_hdr_fmts[] = { + [ARM_SPE_HEADER_VERSION] = " Header version :%"PRId64"\n", + [ARM_SPE_HEADER_SIZE] = " Header size :%"PRId64"\n", + [ARM_SPE_PMU_TYPE_V2] = " PMU type v2 :%"PRId64"\n", + [ARM_SPE_CPUS_NUM] = " CPU number :%"PRId64"\n", +}; -struct arm_spe_synth { - struct perf_tool dummy_tool; - struct perf_session *session; +static const char * const metadata_per_cpu_fmts[] = { + [ARM_SPE_MAGIC] = " Magic :0x%"PRIx64"\n", + [ARM_SPE_CPU] = " CPU # :%"PRId64"\n", + [ARM_SPE_CPU_NR_PARAMS] = " Num of params :%"PRId64"\n", + [ARM_SPE_CPU_MIDR] = " MIDR :0x%"PRIx64"\n", + [ARM_SPE_CPU_PMU_TYPE] = " PMU Type :%"PRId64"\n", + [ARM_SPE_CAP_MIN_IVAL] = " Min Interval :%"PRId64"\n", + [ARM_SPE_CAP_EVENT_FILTER] = " Event Filter :0x%"PRIx64"\n", }; -static int arm_spe_event_synth(struct perf_tool *tool, - union perf_event *event, - struct perf_sample *sample __maybe_unused, - struct machine *machine __maybe_unused) +static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr) { - struct arm_spe_synth *arm_spe_synth = - container_of(tool, struct arm_spe_synth, dummy_tool); + unsigned int i, cpu, hdr_size, cpu_num, cpu_size; + const char * const *hdr_fmts; - return perf_session__deliver_synth_event(arm_spe_synth->session, - event, NULL); -} + if (!dump_trace) + return; -static int arm_spe_synth_event(struct perf_session *session, - struct perf_event_attr *attr, u64 id) -{ - struct arm_spe_synth arm_spe_synth; + if (spe->metadata_ver == 1) { + cpu_num = 0; + hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX; + hdr_fmts = metadata_hdr_v1_fmts; + } else { + cpu_num = arr[ARM_SPE_CPUS_NUM]; + hdr_size = arr[ARM_SPE_HEADER_SIZE]; + hdr_fmts = metadata_hdr_fmts; + } - memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth)); - arm_spe_synth.session = session; + for (i = 0; i < hdr_size; i++) + fprintf(stdout, hdr_fmts[i], arr[i]); - return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1, - &id, arm_spe_event_synth); + arr += hdr_size; + for (cpu = 0; cpu < cpu_num; cpu++) { + /* + * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS + * are fixed. The sequential parameter size is decided by the + * field 'ARM_SPE_CPU_NR_PARAMS'. + */ + cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS]; + for (i = 0; i < cpu_size; i++) + fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]); + arr += cpu_size; + } } static void arm_spe_set_event_name(struct evlist *evlist, u64 id, @@ -1161,18 +1732,16 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) attr.exclude_guest = evsel->core.attr.exclude_guest; attr.sample_id_all = evsel->core.attr.sample_id_all; attr.read_format = evsel->core.attr.read_format; + attr.sample_period = spe->synth_opts.period; /* create new id val to be a fixed offset from evsel id */ - id = evsel->core.id[0] + 1000000000; - - if (!id) - id = 1; + id = auxtrace_synth_id_range_start(evsel); if (spe->synth_opts.flc) { spe->sample_flc = true; /* Level 1 data cache miss */ - err = arm_spe_synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; spe->l1d_miss_id = id; @@ -1180,7 +1749,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) id += 1; /* Level 1 data cache access */ - err = arm_spe_synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; spe->l1d_access_id = id; @@ -1192,7 +1761,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) spe->sample_llc = true; /* Last level cache miss */ - err = arm_spe_synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; spe->llc_miss_id = id; @@ -1200,7 +1769,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) id += 1; /* Last level cache access */ - err = arm_spe_synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; spe->llc_access_id = id; @@ -1212,7 +1781,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) spe->sample_tlb = true; /* TLB miss */ - err = arm_spe_synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; spe->tlb_miss_id = id; @@ -1220,7 +1789,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) id += 1; /* TLB access */ - err = arm_spe_synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; spe->tlb_access_id = id; @@ -1228,15 +1797,28 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) id += 1; } + if (spe->synth_opts.last_branch) { + if (spe->synth_opts.last_branch_sz > 2) + pr_debug("Arm SPE supports only two bstack entries (PBT+TGT).\n"); + + attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + /* + * We don't use the hardware index, but the sample generation + * code uses the new format branch_stack with this field, + * so the event attributes must indicate that it's present. + */ + attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; + } + if (spe->synth_opts.branches) { spe->sample_branch = true; - /* Branch miss */ - err = arm_spe_synth_event(session, &attr, id); + /* Branch */ + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; - spe->branch_miss_id = id; - arm_spe_set_event_name(evlist, id, "branch-miss"); + spe->branch_id = id; + arm_spe_set_event_name(evlist, id, "branch"); id += 1; } @@ -1244,7 +1826,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) spe->sample_remote_access = true; /* Remote access */ - err = arm_spe_synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; spe->remote_access_id = id; @@ -1255,7 +1837,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) if (spe->synth_opts.mem) { spe->sample_memory = true; - err = arm_spe_synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; spe->memory_id = id; @@ -1264,47 +1846,70 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) } if (spe->synth_opts.instructions) { - if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) { - pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n"); - goto synth_instructions_out; - } - if (spe->synth_opts.period > 1) - pr_warning("Arm SPE has a hardware-based sample period.\n" - "Additional instruction events will be discarded by --itrace\n"); - spe->sample_instructions = true; attr.config = PERF_COUNT_HW_INSTRUCTIONS; - attr.sample_period = spe->synth_opts.period; - spe->instructions_sample_period = attr.sample_period; - err = arm_spe_synth_event(session, &attr, id); + + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; spe->instructions_id = id; arm_spe_set_event_name(evlist, id, "instructions"); } -synth_instructions_out: return 0; } +static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu) +{ + u64 midr; + int i; + + if (!nr_cpu) + return false; + + for (i = 0; i < nr_cpu; i++) { + if (!metadata[i]) + return false; + + if (i == 0) { + midr = metadata[i][ARM_SPE_CPU_MIDR]; + continue; + } + + if (midr != metadata[i][ARM_SPE_CPU_MIDR]) + return false; + } + + return true; +} + int arm_spe_process_auxtrace_info(union perf_event *event, struct perf_session *session) { struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; - size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX; + size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE; struct perf_record_time_conv *tc = &session->time_conv; - const char *cpuid = perf_env__cpuid(session->evlist->env); - u64 midr = strtol(cpuid, NULL, 16); struct arm_spe *spe; - int err; + u64 **metadata = NULL; + u64 metadata_ver; + int nr_cpu, err; if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + min_sz) return -EINVAL; + metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver, + &nr_cpu); + if (!metadata && metadata_ver != 1) { + pr_err("Failed to parse Arm SPE metadata.\n"); + return -EINVAL; + } + spe = zalloc(sizeof(struct arm_spe)); - if (!spe) - return -ENOMEM; + if (!spe) { + err = -ENOMEM; + goto err_free_metadata; + } err = auxtrace_queues__init(&spe->queues); if (err) @@ -1313,8 +1918,14 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->session = session; spe->machine = &session->machines.host; /* No kvm support */ spe->auxtrace_type = auxtrace_info->type; - spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; - spe->midr = midr; + if (metadata_ver == 1) + spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; + else + spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2]; + spe->metadata = metadata; + spe->metadata_ver = metadata_ver; + spe->metadata_nr_cpu = nr_cpu; + spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu); spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); @@ -1347,15 +1958,28 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; session->auxtrace = &spe->auxtrace; - arm_spe_print_info(&auxtrace_info->priv[0]); + arm_spe_print_info(spe, &auxtrace_info->priv[0]); if (dump_trace) return 0; - if (session->itrace_synth_opts && session->itrace_synth_opts->set) + if (session->itrace_synth_opts && session->itrace_synth_opts->set) { spe->synth_opts = *session->itrace_synth_opts; - else + } else { itrace_synth_opts__set_default(&spe->synth_opts, false); + /* Default nanoseconds period not supported */ + spe->synth_opts.period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS; + spe->synth_opts.period = 1; + } + + if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) { + ui__error("You must only use i (instructions) --itrace period with Arm SPE. e.g --itrace=i1i\n"); + err = -EINVAL; + goto err_free_queues; + } + if (spe->synth_opts.period > 1) + ui__warning("Arm SPE has a hardware-based sampling period.\n\n" + "--itrace periods > 1i downsample by an interval of n SPE samples rather than n instructions.\n"); err = arm_spe_synth_events(spe, session); if (err) @@ -1375,5 +1999,7 @@ err_free_queues: session->auxtrace = NULL; err_free: free(spe); +err_free_metadata: + arm_spe__free_metadata(metadata, nr_cpu); return err; } diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h index 98d3235781c3..3966df1856d8 100644 --- a/tools/perf/util/arm-spe.h +++ b/tools/perf/util/arm-spe.h @@ -12,10 +12,48 @@ enum { ARM_SPE_PMU_TYPE, ARM_SPE_PER_CPU_MMAPS, + ARM_SPE_AUXTRACE_V1_PRIV_MAX, +}; + +#define ARM_SPE_AUXTRACE_V1_PRIV_SIZE \ + (ARM_SPE_AUXTRACE_V1_PRIV_MAX * sizeof(u64)) + +enum { + /* + * The old metadata format (defined above) does not include a + * field for version number. Version 1 is reserved and starts + * from version 2. + */ + ARM_SPE_HEADER_VERSION, + /* Number of sizeof(u64) */ + ARM_SPE_HEADER_SIZE, + /* PMU type shared by CPUs */ + ARM_SPE_PMU_TYPE_V2, + /* Number of CPUs */ + ARM_SPE_CPUS_NUM, ARM_SPE_AUXTRACE_PRIV_MAX, }; -#define ARM_SPE_AUXTRACE_PRIV_SIZE (ARM_SPE_AUXTRACE_PRIV_MAX * sizeof(u64)) +enum { + /* Magic number */ + ARM_SPE_MAGIC, + /* CPU logical number in system */ + ARM_SPE_CPU, + /* Number of parameters */ + ARM_SPE_CPU_NR_PARAMS, + /* CPU MIDR */ + ARM_SPE_CPU_MIDR, + /* Associated PMU type */ + ARM_SPE_CPU_PMU_TYPE, + /* Minimal interval */ + ARM_SPE_CAP_MIN_IVAL, + /* Event filter */ + ARM_SPE_CAP_EVENT_FILTER, + ARM_SPE_CPU_PRIV_MAX, +}; + +#define ARM_SPE_HEADER_CURRENT_VERSION 2 + union perf_event; struct perf_session; @@ -27,5 +65,7 @@ struct auxtrace_record *arm_spe_recording_init(int *err, int arm_spe_process_auxtrace_info(union perf_event *event, struct perf_session *session); -struct perf_event_attr *arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu); +void arm_spe_pmu_default_config(const struct perf_pmu *arm_spe_pmu, + struct perf_event_attr *attr); + #endif diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c index 4940be4a0569..958afe8b821e 100644 --- a/tools/perf/util/arm64-frame-pointer-unwind-support.c +++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c @@ -4,6 +4,7 @@ #include "event.h" #include "perf_regs.h" // SMPL_REG_MASK #include "unwind.h" +#include <string.h> #define perf_event_arm_regs perf_event_arm64_regs #include "../../arch/arm64/include/uapi/asm/perf_regs.h" @@ -16,8 +17,13 @@ struct entries { static bool get_leaf_frame_caller_enabled(struct perf_sample *sample) { - return callchain_param.record_mode == CALLCHAIN_FP && sample->user_regs.regs - && sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_LR); + struct regs_dump *regs; + + if (callchain_param.record_mode != CALLCHAIN_FP) + return false; + + regs = perf_sample__user_regs(sample); + return regs->regs && regs->mask & SMPL_REG_MASK(PERF_REG_ARM64_LR); } static int add_entry(struct unwind_entry *entry, void *arg) @@ -32,7 +38,7 @@ u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thr { int ret; struct entries entries = {}; - struct regs_dump old_regs = sample->user_regs; + struct regs_dump old_regs, *regs; if (!get_leaf_frame_caller_enabled(sample)) return 0; @@ -42,19 +48,20 @@ u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thr * and set its mask. SP is not used when doing the unwinding but it * still needs to be set to prevent failures. */ - - if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) { - sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC); - sample->user_regs.cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1]; + regs = perf_sample__user_regs(sample); + memcpy(&old_regs, regs, sizeof(*regs)); + if (!(regs->mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) { + regs->cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC); + regs->cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1]; } - if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) { - sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP); - sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0; + if (!(regs->mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) { + regs->cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP); + regs->cache_regs[PERF_REG_ARM64_SP] = 0; } ret = unwind__get_entries(add_entry, &entries, thread, sample, 2, true); - sample->user_regs = old_regs; + memcpy(regs, &old_regs, sizeof(*regs)); if (ret || entries.length != 2) return ret; diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index a0368202a746..a224687ffbc1 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -55,12 +55,29 @@ #include "hisi-ptt.h" #include "s390-cpumsf.h" #include "util/mmap.h" +#include "powerpc-vpadtl.h" #include <linux/ctype.h> #include "symbol/kallsyms.h" #include <internal/lib.h> #include "util/sample.h" +#define AUXTRACE_SYNTH_EVENT_ID_OFFSET 1000000000ULL + +/* + * Event IDs are allocated sequentially, so a big offset from any + * existing ID will reach a unused range. + */ +u64 auxtrace_synth_id_range_start(struct evsel *evsel) +{ + u64 id = evsel->core.id[0] + AUXTRACE_SYNTH_EVENT_ID_OFFSET; + + if (!id) + id = 1; + + return id; +} + /* * Make a group from 'leader' to 'last', requiring that the events were not * already grouped to a different leader. @@ -174,7 +191,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, struct evlist *evlist, struct evsel *evsel, int idx) { - bool per_cpu = !perf_cpu_map__empty(evlist->core.user_requested_cpus); + bool per_cpu = !perf_cpu_map__has_any_cpu(evlist->core.user_requested_cpus); mp->mmap_needed = evsel->needs_auxtrace_mmap; @@ -185,10 +202,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, if (per_cpu) { mp->cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx); - if (evlist->core.threads) - mp->tid = perf_thread_map__pid(evlist->core.threads, 0); - else - mp->tid = -1; + mp->tid = perf_thread_map__pid(evlist->core.threads, 0); } else { mp->cpu.cpu = -1; mp->tid = perf_thread_map__pid(evlist->core.threads, idx); @@ -218,15 +232,20 @@ static struct auxtrace_queue *auxtrace_alloc_queue_array(unsigned int nr_queues) return queue_array; } -int auxtrace_queues__init(struct auxtrace_queues *queues) +int auxtrace_queues__init_nr(struct auxtrace_queues *queues, int nr_queues) { - queues->nr_queues = AUXTRACE_INIT_NR_QUEUES; + queues->nr_queues = nr_queues; queues->queue_array = auxtrace_alloc_queue_array(queues->nr_queues); if (!queues->queue_array) return -ENOMEM; return 0; } +int auxtrace_queues__init(struct auxtrace_queues *queues) +{ + return auxtrace_queues__init_nr(queues, AUXTRACE_INIT_NR_QUEUES); +} + static int auxtrace_queues__grow(struct auxtrace_queues *queues, unsigned int new_nr_queues) { @@ -648,7 +667,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, static int evlist__enable_event_idx(struct evlist *evlist, struct evsel *evsel, int idx) { - bool per_cpu_mmaps = !perf_cpu_map__empty(evlist->core.user_requested_cpus); + bool per_cpu_mmaps = !perf_cpu_map__has_any_cpu(evlist->core.user_requested_cpus); if (per_cpu_mmaps) { struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx); @@ -666,11 +685,11 @@ int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx) { struct evsel *evsel; - if (!itr->evlist || !itr->pmu) + if (!itr->evlist) return -EINVAL; evlist__for_each_entry(itr->evlist, evsel) { - if (evsel->core.attr.type == itr->pmu->type) { + if (evsel__is_aux_event(evsel)) { if (evsel->disabled) return 0; return evlist__enable_event_idx(itr->evlist, evsel, idx); @@ -805,19 +824,76 @@ no_opt: return auxtrace_validate_aux_sample_size(evlist, opts); } -void auxtrace_regroup_aux_output(struct evlist *evlist) +static struct aux_action_opt { + const char *str; + u32 aux_action; + bool aux_event_opt; +} aux_action_opts[] = { + {"start-paused", BIT(0), true}, + {"pause", BIT(1), false}, + {"resume", BIT(2), false}, + {.str = NULL}, +}; + +static const struct aux_action_opt *auxtrace_parse_aux_action_str(const char *str) +{ + const struct aux_action_opt *opt; + + if (!str) + return NULL; + + for (opt = aux_action_opts; opt->str; opt++) + if (!strcmp(str, opt->str)) + return opt; + + return NULL; +} + +int auxtrace_parse_aux_action(struct evlist *evlist) { - struct evsel *evsel, *aux_evsel = NULL; struct evsel_config_term *term; + struct evsel *aux_evsel = NULL; + struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - if (evsel__is_aux_event(evsel)) + bool is_aux_event = evsel__is_aux_event(evsel); + const struct aux_action_opt *opt; + + if (is_aux_event) aux_evsel = evsel; - term = evsel__get_config_term(evsel, AUX_OUTPUT); + term = evsel__get_config_term(evsel, AUX_ACTION); + if (!term) { + if (evsel__get_config_term(evsel, AUX_OUTPUT)) + goto regroup; + continue; + } + opt = auxtrace_parse_aux_action_str(term->val.str); + if (!opt) { + pr_err("Bad aux-action '%s'\n", term->val.str); + return -EINVAL; + } + if (opt->aux_event_opt && !is_aux_event) { + pr_err("aux-action '%s' can only be used with AUX area event\n", + term->val.str); + return -EINVAL; + } + if (!opt->aux_event_opt && is_aux_event) { + pr_err("aux-action '%s' cannot be used for AUX area event itself\n", + term->val.str); + return -EINVAL; + } + evsel->core.attr.aux_action = opt->aux_action; +regroup: /* If possible, group with the AUX event */ - if (term && aux_evsel) + if (aux_evsel) evlist__regroup(evlist, aux_evsel, evsel); + if (!evsel__is_aux_event(evsel__leader(evsel))) { + pr_err("Events with aux-action must have AUX area event group leader\n"); + return -EINVAL; + } } + + return 0; } struct auxtrace_record *__weak @@ -1111,16 +1187,19 @@ static int auxtrace_queue_data_cb(struct perf_session *session, if (!qd->samples || event->header.type != PERF_RECORD_SAMPLE) return 0; + perf_sample__init(&sample, /*all=*/false); err = evlist__parse_sample(session->evlist, event, &sample); if (err) - return err; - - if (!sample.aux_sample.size) - return 0; + goto out; - offset += sample.aux_sample.data - (void *)event; + if (sample.aux_sample.size) { + offset += sample.aux_sample.data - (void *)event; - return session->auxtrace->queue_data(session, &sample, NULL, offset); + err = session->auxtrace->queue_data(session, &sample, NULL, offset); + } +out: + perf_sample__exit(&sample); + return err; } int auxtrace_queue_data(struct perf_session *session, bool samples, bool events) @@ -1235,7 +1314,7 @@ void auxtrace_synth_error(struct perf_record_auxtrace_error *auxtrace_error, int } int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, - struct perf_tool *tool, + const struct perf_tool *tool, struct perf_session *session, perf_event__handler_t process) { @@ -1300,7 +1379,8 @@ static void unleader_auxtrace(struct perf_session *session) } } -int perf_event__process_auxtrace_info(struct perf_session *session, +int perf_event__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { enum auxtrace_type type = event->auxtrace_info.type; @@ -1328,6 +1408,9 @@ int perf_event__process_auxtrace_info(struct perf_session *session, case PERF_AUXTRACE_HISI_PTT: err = hisi_ptt_process_auxtrace_info(event, session); break; + case PERF_AUXTRACE_VPA_DTL: + err = powerpc_vpadtl_process_auxtrace_info(event, session); + break; case PERF_AUXTRACE_UNKNOWN: default: return -EINVAL; @@ -1341,7 +1424,8 @@ int perf_event__process_auxtrace_info(struct perf_session *session, return 0; } -s64 perf_event__process_auxtrace(struct perf_session *session, +s64 perf_event__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { s64 err; @@ -1466,6 +1550,7 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, char *endptr; bool period_type_set = false; bool period_set = false; + bool iy = false; synth_opts->set = true; @@ -1484,6 +1569,7 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, switch (*p++) { case 'i': case 'y': + iy = true; if (p[-1] == 'y') synth_opts->cycles = true; else @@ -1638,6 +1724,9 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, case 'Z': synth_opts->timeless_decoding = true; break; + case 'T': + synth_opts->use_timestamp = true; + break; case ' ': case ',': break; @@ -1646,7 +1735,7 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, } } out: - if (synth_opts->instructions || synth_opts->cycles) { + if (iy) { if (!period_type_set) synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; @@ -1733,7 +1822,8 @@ void events_stats__auxtrace_error_warn(const struct events_stats *stats) } } -int perf_event__process_auxtrace_error(struct perf_session *session, +int perf_event__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { if (auxtrace__dont_decode(session)) @@ -1820,8 +1910,8 @@ int __weak compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail) } static int __auxtrace_mmap__read(struct mmap *map, - struct auxtrace_record *itr, - struct perf_tool *tool, process_auxtrace_t fn, + struct auxtrace_record *itr, struct perf_env *env, + const struct perf_tool *tool, process_auxtrace_t fn, bool snapshot, size_t snapshot_size) { struct auxtrace_mmap *mm = &map->auxtrace_mmap; @@ -1830,7 +1920,7 @@ static int __auxtrace_mmap__read(struct mmap *map, size_t size, head_off, old_off, len1, len2, padding; union perf_event ev; void *data1, *data2; - int kernel_is_64_bit = perf_env__kernel_is_64_bit(evsel__env(NULL)); + int kernel_is_64_bit = perf_env__kernel_is_64_bit(env); head = auxtrace_mmap__read_head(mm, kernel_is_64_bit); @@ -1932,17 +2022,18 @@ static int __auxtrace_mmap__read(struct mmap *map, } int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr, - struct perf_tool *tool, process_auxtrace_t fn) + struct perf_env *env, const struct perf_tool *tool, + process_auxtrace_t fn) { - return __auxtrace_mmap__read(map, itr, tool, fn, false, 0); + return __auxtrace_mmap__read(map, itr, env, tool, fn, false, 0); } int auxtrace_mmap__read_snapshot(struct mmap *map, - struct auxtrace_record *itr, - struct perf_tool *tool, process_auxtrace_t fn, + struct auxtrace_record *itr, struct perf_env *env, + const struct perf_tool *tool, process_auxtrace_t fn, size_t snapshot_size) { - return __auxtrace_mmap__read(map, itr, tool, fn, true, snapshot_size); + return __auxtrace_mmap__read(map, itr, env, tool, fn, true, snapshot_size); } /** @@ -2649,7 +2740,7 @@ static int addr_filter__entire_dso(struct addr_filter *filt, struct dso *dso) } filt->addr = 0; - filt->size = dso->data.file_size; + filt->size = dso__data(dso)->file_size; return 0; } @@ -2819,7 +2910,7 @@ int auxtrace_parse_filters(struct evlist *evlist) } int auxtrace__process_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, struct perf_tool *tool) + struct perf_sample *sample, const struct perf_tool *tool) { if (!session->auxtrace) return 0; @@ -2837,7 +2928,7 @@ void auxtrace__dump_auxtrace_sample(struct perf_session *session, session->auxtrace->dump_auxtrace_sample(session, sample); } -int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool) +int auxtrace__flush_events(struct perf_session *session, const struct perf_tool *tool) { if (!session->auxtrace) return 0; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 29eb82dff574..6947f3f284c0 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -8,21 +8,17 @@ #define __PERF_AUXTRACE_H #include <sys/types.h> -#include <errno.h> -#include <stdbool.h> -#include <stddef.h> #include <stdio.h> // FILE -#include <linux/list.h> #include <linux/perf_event.h> #include <linux/types.h> -#include <perf/cpumap.h> -#include <asm/bitsperlong.h> #include <asm/barrier.h> +#include <perf/cpumap.h> union perf_event; struct perf_session; struct evlist; struct evsel; +struct perf_env; struct perf_tool; struct mmap; struct perf_sample; @@ -49,6 +45,7 @@ enum auxtrace_type { PERF_AUXTRACE_ARM_SPE, PERF_AUXTRACE_S390_CPUMSF, PERF_AUXTRACE_HISI_PTT, + PERF_AUXTRACE_VPA_DTL, }; enum itrace_period_type { @@ -75,7 +72,6 @@ enum itrace_period_type { * (not fully accurate, since CYC packets are only emitted * together with other events, such as branches) * @branches: whether to synthesize 'branches' events - * (branch misses only for Arm SPE) * @transactions: whether to synthesize events for transactions * @ptwrites: whether to synthesize events for ptwrites * @pwr_events: whether to synthesize power events @@ -99,6 +95,7 @@ enum itrace_period_type { * @remote_access: whether to synthesize remote access events * @mem: whether to synthesize memory events * @timeless_decoding: prefer "timeless" decoding i.e. ignore timestamps + * @use_timestamp: use the timestamp trace as kernel time * @vm_time_correlation: perform VM Time Correlation * @vm_tm_corr_dry_run: VM Time Correlation dry-run * @vm_tm_corr_args: VM Time Correlation implementation-specific arguments @@ -146,6 +143,7 @@ struct itrace_synth_opts { bool remote_access; bool mem; bool timeless_decoding; + bool use_timestamp; bool vm_time_correlation; bool vm_tm_corr_dry_run; char *vm_tm_corr_args; @@ -206,17 +204,17 @@ struct auxtrace { int (*process_event)(struct perf_session *session, union perf_event *event, struct perf_sample *sample, - struct perf_tool *tool); + const struct perf_tool *tool); int (*process_auxtrace_event)(struct perf_session *session, union perf_event *event, - struct perf_tool *tool); + const struct perf_tool *tool); int (*queue_data)(struct perf_session *session, struct perf_sample *sample, union perf_event *event, u64 data_offset); void (*dump_auxtrace_sample)(struct perf_session *session, struct perf_sample *sample); int (*flush_events)(struct perf_session *session, - struct perf_tool *tool); + const struct perf_tool *tool); void (*free_events)(struct perf_session *session); void (*free)(struct perf_session *session); bool (*evsel_is_auxtrace)(struct perf_session *session, @@ -409,7 +407,6 @@ struct auxtrace_record { int (*read_finish)(struct auxtrace_record *itr, int idx); unsigned int alignment; unsigned int default_aux_sample_size; - struct perf_pmu *pmu; struct evlist *evlist; }; @@ -457,8 +454,6 @@ struct addr_filters { struct auxtrace_cache; -#ifdef HAVE_AUXTRACE_SUPPORT - u64 compat_auxtrace_mmap__read_head(struct auxtrace_mmap *mm); int compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail); @@ -506,19 +501,21 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, struct evlist *evlist, struct evsel *evsel, int idx); -typedef int (*process_auxtrace_t)(struct perf_tool *tool, +typedef int (*process_auxtrace_t)(const struct perf_tool *tool, struct mmap *map, union perf_event *event, void *data1, size_t len1, void *data2, size_t len2); int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr, - struct perf_tool *tool, process_auxtrace_t fn); + struct perf_env *env, const struct perf_tool *tool, + process_auxtrace_t fn); int auxtrace_mmap__read_snapshot(struct mmap *map, - struct auxtrace_record *itr, - struct perf_tool *tool, process_auxtrace_t fn, + struct auxtrace_record *itr, struct perf_env *env, + const struct perf_tool *tool, process_auxtrace_t fn, size_t snapshot_size); +int auxtrace_queues__init_nr(struct auxtrace_queues *queues, int nr_queues); int auxtrace_queues__init(struct auxtrace_queues *queues); int auxtrace_queues__add_event(struct auxtrace_queues *queues, struct perf_session *session, @@ -577,7 +574,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, int auxtrace_parse_sample_options(struct auxtrace_record *itr, struct evlist *evlist, struct record_opts *opts, const char *str); -void auxtrace_regroup_aux_output(struct evlist *evlist); +int auxtrace_parse_aux_action(struct evlist *evlist); int auxtrace_record__options(struct auxtrace_record *itr, struct evlist *evlist, struct record_opts *opts); @@ -611,11 +608,14 @@ void auxtrace_synth_error(struct perf_record_auxtrace_error *auxtrace_error, int int code, int cpu, pid_t pid, pid_t tid, u64 ip, const char *msg, u64 timestamp); -int perf_event__process_auxtrace_info(struct perf_session *session, +int perf_event__process_auxtrace_info(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); -s64 perf_event__process_auxtrace(struct perf_session *session, +s64 perf_event__process_auxtrace(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); -int perf_event__process_auxtrace_error(struct perf_session *session, +int perf_event__process_auxtrace_error(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, const char *str, int unset); @@ -636,19 +636,20 @@ int addr_filters__parse_bare_filter(struct addr_filters *filts, int auxtrace_parse_filters(struct evlist *evlist); int auxtrace__process_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, struct perf_tool *tool); + struct perf_sample *sample, const struct perf_tool *tool); void auxtrace__dump_auxtrace_sample(struct perf_session *session, struct perf_sample *sample); -int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool); +int auxtrace__flush_events(struct perf_session *session, const struct perf_tool *tool); void auxtrace__free_events(struct perf_session *session); void auxtrace__free(struct perf_session *session); bool auxtrace__evsel_is_auxtrace(struct perf_session *session, struct evsel *evsel); +u64 auxtrace_synth_id_range_start(struct evsel *evsel); #define ITRACE_HELP \ " i[period]: synthesize instructions events\n" \ " y[period]: synthesize cycles events (same period as i)\n" \ -" b: synthesize branches events (branch misses for Arm SPE)\n" \ +" b: synthesize branches events\n" \ " c: synthesize branches events (calls only)\n" \ " r: synthesize branches events (returns only)\n" \ " x: synthesize transactions events\n" \ @@ -678,6 +679,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, " q: quicker (less detailed) decoding\n" \ " A: approximate IPC\n" \ " Z: prefer to ignore timestamps (so-called \"timeless\" decoding)\n" \ +" T: use the timestamp trace as kernel time\n" \ " PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ " concatenate multiple options. Default is iybxwpe or cewp\n" @@ -697,210 +699,4 @@ void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts) opts->range_num = 0; } -#else -#include "debug.h" - -static inline struct auxtrace_record * -auxtrace_record__init(struct evlist *evlist __maybe_unused, - int *err) -{ - *err = 0; - return NULL; -} - -static inline -void auxtrace_record__free(struct auxtrace_record *itr __maybe_unused) -{ -} - -static inline -int auxtrace_record__options(struct auxtrace_record *itr __maybe_unused, - struct evlist *evlist __maybe_unused, - struct record_opts *opts __maybe_unused) -{ - return 0; -} - -static inline -int perf_event__process_auxtrace_info(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused) -{ - return 0; -} - -static inline -s64 perf_event__process_auxtrace(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused) -{ - return 0; -} - -static inline -int perf_event__process_auxtrace_error(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused) -{ - return 0; -} - -static inline -void perf_session__auxtrace_error_inc(struct perf_session *session - __maybe_unused, - union perf_event *event - __maybe_unused) -{ -} - -static inline -void events_stats__auxtrace_error_warn(const struct events_stats *stats - __maybe_unused) -{ -} - -static inline -int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts __maybe_unused, - const char *str __maybe_unused, int unset __maybe_unused) -{ - pr_err("AUX area tracing not supported\n"); - return -EINVAL; -} - -static inline -int itrace_parse_synth_opts(const struct option *opt __maybe_unused, - const char *str __maybe_unused, - int unset __maybe_unused) -{ - pr_err("AUX area tracing not supported\n"); - return -EINVAL; -} - -static inline -int auxtrace_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused, - struct record_opts *opts __maybe_unused, - const char *str) -{ - if (!str) - return 0; - pr_err("AUX area tracing not supported\n"); - return -EINVAL; -} - -static inline -int auxtrace_parse_sample_options(struct auxtrace_record *itr __maybe_unused, - struct evlist *evlist __maybe_unused, - struct record_opts *opts __maybe_unused, - const char *str) -{ - if (!str) - return 0; - pr_err("AUX area tracing not supported\n"); - return -EINVAL; -} - -static inline -void auxtrace_regroup_aux_output(struct evlist *evlist __maybe_unused) -{ -} - -static inline -int auxtrace__process_event(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_sample *sample __maybe_unused, - struct perf_tool *tool __maybe_unused) -{ - return 0; -} - -static inline -void auxtrace__dump_auxtrace_sample(struct perf_session *session __maybe_unused, - struct perf_sample *sample __maybe_unused) -{ -} - -static inline -int auxtrace__flush_events(struct perf_session *session __maybe_unused, - struct perf_tool *tool __maybe_unused) -{ - return 0; -} - -static inline -void auxtrace__free_events(struct perf_session *session __maybe_unused) -{ -} - -static inline -void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache __maybe_unused) -{ -} - -static inline -void auxtrace__free(struct perf_session *session __maybe_unused) -{ -} - -static inline -int auxtrace_index__write(int fd __maybe_unused, - struct list_head *head __maybe_unused) -{ - return -EINVAL; -} - -static inline -int auxtrace_index__process(int fd __maybe_unused, - u64 size __maybe_unused, - struct perf_session *session __maybe_unused, - bool needs_swap __maybe_unused) -{ - return -EINVAL; -} - -static inline -void auxtrace_index__free(struct list_head *head __maybe_unused) -{ -} - -static inline -bool auxtrace__evsel_is_auxtrace(struct perf_session *session __maybe_unused, - struct evsel *evsel __maybe_unused) -{ - return false; -} - -static inline -int auxtrace_parse_filters(struct evlist *evlist __maybe_unused) -{ - return 0; -} - -int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, - struct auxtrace_mmap_params *mp, - void *userpg, int fd); -void auxtrace_mmap__munmap(struct auxtrace_mmap *mm); -void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, - off_t auxtrace_offset, - unsigned int auxtrace_pages, - bool auxtrace_overwrite); -void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, - struct evlist *evlist, - struct evsel *evsel, int idx); - -#define ITRACE_HELP "" - -static inline -void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts - __maybe_unused, - struct perf_time_interval *ptime_range - __maybe_unused, - int range_num __maybe_unused) -{ -} - -static inline -void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts - __maybe_unused) -{ -} - -#endif - #endif diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index 591fc1edd385..649392bee7ed 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -40,29 +40,33 @@ static struct block_header_column { [PERF_HPP_REPORT__BLOCK_DSO] = { .name = "Shared Object", .width = 20, + }, + [PERF_HPP_REPORT__BLOCK_BRANCH_COUNTER] = { + .name = "Branch Counter", + .width = 30, } }; -struct block_info *block_info__get(struct block_info *bi) +static struct block_info *block_info__new(unsigned int br_cntr_nr) { - if (bi) - refcount_inc(&bi->refcnt); - return bi; -} + struct block_info *bi = zalloc(sizeof(struct block_info)); -void block_info__put(struct block_info *bi) -{ - if (bi && refcount_dec_and_test(&bi->refcnt)) - free(bi); + if (bi && br_cntr_nr) { + bi->br_cntr = calloc(br_cntr_nr, sizeof(u64)); + if (!bi->br_cntr) { + free(bi); + return NULL; + } + } + + return bi; } -struct block_info *block_info__new(void) +void block_info__delete(struct block_info *bi) { - struct block_info *bi = zalloc(sizeof(*bi)); - if (bi) - refcount_set(&bi->refcnt, 1); - return bi; + free(bi->br_cntr); + free(bi); } int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right) @@ -98,7 +102,8 @@ int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, static void init_block_info(struct block_info *bi, struct symbol *sym, struct cyc_hist *ch, int offset, - u64 total_cycles) + u64 total_cycles, unsigned int br_cntr_nr, + u64 *br_cntr, struct evsel *evsel) { bi->sym = sym; bi->start = ch->start; @@ -111,10 +116,18 @@ static void init_block_info(struct block_info *bi, struct symbol *sym, memcpy(bi->cycles_spark, ch->cycles_spark, NUM_SPARKS * sizeof(u64)); + + if (br_cntr && br_cntr_nr) { + bi->br_cntr_nr = br_cntr_nr; + memcpy(bi->br_cntr, &br_cntr[offset * br_cntr_nr], + br_cntr_nr * sizeof(u64)); + } + bi->evsel = evsel; } int block_info__process_sym(struct hist_entry *he, struct block_hist *bh, - u64 *block_cycles_aggr, u64 total_cycles) + u64 *block_cycles_aggr, u64 total_cycles, + unsigned int br_cntr_nr) { struct annotation *notes; struct cyc_hist *ch; @@ -129,26 +142,28 @@ int block_info__process_sym(struct hist_entry *he, struct block_hist *bh, al.sym = he->ms.sym; notes = symbol__annotation(he->ms.sym); - if (!notes || !notes->src || !notes->src->cycles_hist) + if (!notes || !notes->branch || !notes->branch->cycles_hist) return 0; - ch = notes->src->cycles_hist; + ch = notes->branch->cycles_hist; for (unsigned int i = 0; i < symbol__size(he->ms.sym); i++) { if (ch[i].num_aggr) { struct block_info *bi; struct hist_entry *he_block; - bi = block_info__new(); + bi = block_info__new(br_cntr_nr); if (!bi) return -1; init_block_info(bi, he->ms.sym, &ch[i], i, - total_cycles); + total_cycles, br_cntr_nr, + notes->branch->br_cntr, + hists_to_evsel(he->hists)); cycles += bi->cycles_aggr / bi->num_aggr; he_block = hists__add_entry_block(&bh->block_hists, &al, bi); if (!he_block) { - block_info__put(bi); + block_info__delete(bi); return -1; } } @@ -319,7 +334,7 @@ static int block_dso_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, if (map && map__dso(map)) { return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, - map__dso(map)->short_name); + dso__short_name(map__dso(map))); } return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, @@ -339,6 +354,24 @@ static void init_block_header(struct block_fmt *block_fmt) fmt->width = block_column_width; } +static int block_branch_counter_entry(struct perf_hpp_fmt *fmt, + struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt); + struct block_info *bi = he->block_info; + char *buf; + int ret; + + if (annotation_br_cntr_entry(&buf, bi->br_cntr_nr, bi->br_cntr, + bi->num_aggr, bi->evsel)) + return 0; + + ret = scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf); + free(buf); + return ret; +} + static void hpp_register(struct block_fmt *block_fmt, int idx, struct perf_hpp_list *hpp_list) { @@ -369,6 +402,9 @@ static void hpp_register(struct block_fmt *block_fmt, int idx, case PERF_HPP_REPORT__BLOCK_DSO: fmt->entry = block_dso_entry; break; + case PERF_HPP_REPORT__BLOCK_BRANCH_COUNTER: + fmt->entry = block_branch_counter_entry; + break; default: return; } @@ -402,7 +438,7 @@ static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts, static int process_block_report(struct hists *hists, struct block_report *block_report, u64 total_cycles, int *block_hpps, - int nr_hpps) + int nr_hpps, unsigned int br_cntr_nr) { struct rb_node *next = rb_first_cached(&hists->entries); struct block_hist *bh = &block_report->hist; @@ -417,7 +453,7 @@ static int process_block_report(struct hists *hists, while (next) { he = rb_entry(next, struct hist_entry, rb_node); block_info__process_sym(he, bh, &block_report->cycles, - total_cycles); + total_cycles, br_cntr_nr); next = rb_next(&he->rb_node); } @@ -447,7 +483,7 @@ struct block_report *block_info__create_report(struct evlist *evlist, struct hists *hists = evsel__hists(pos); process_block_report(hists, &block_reports[i], total_cycles, - block_hpps, nr_hpps); + block_hpps, nr_hpps, evlist->nr_br_cntr); i++; } @@ -464,8 +500,7 @@ void block_info__free_report(struct block_report *reps, int nr_reps) } int report__browse_block_hists(struct block_hist *bh, float min_percent, - struct evsel *evsel, struct perf_env *env, - struct annotation_options *annotation_opts) + struct evsel *evsel, struct perf_env *env) { int ret; @@ -477,8 +512,7 @@ int report__browse_block_hists(struct block_hist *bh, float min_percent, return 0; case 1: symbol_conf.report_individual_block = true; - ret = block_hists_tui_browse(bh, evsel, min_percent, - env, annotation_opts); + ret = block_hists_tui_browse(bh, evsel, min_percent, env); return ret; default: return -1; diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h index 42e9dcc4cf0a..b9329dc3ab59 100644 --- a/tools/perf/util/block-info.h +++ b/tools/perf/util/block-info.h @@ -3,7 +3,6 @@ #define __PERF_BLOCK_H #include <linux/types.h> -#include <linux/refcount.h> #include "hist.h" #include "symbol.h" #include "sort.h" @@ -19,7 +18,9 @@ struct block_info { u64 total_cycles; int num; int num_aggr; - refcount_t refcnt; + int br_cntr_nr; + u64 *br_cntr; + struct evsel *evsel; }; struct block_fmt { @@ -38,6 +39,7 @@ enum { PERF_HPP_REPORT__BLOCK_AVG_CYCLES, PERF_HPP_REPORT__BLOCK_RANGE, PERF_HPP_REPORT__BLOCK_DSO, + PERF_HPP_REPORT__BLOCK_BRANCH_COUNTER, PERF_HPP_REPORT__BLOCK_MAX_INDEX }; @@ -48,19 +50,7 @@ struct block_report { int nr_fmts; }; -struct block_hist; - -struct block_info *block_info__new(void); -struct block_info *block_info__get(struct block_info *bi); -void block_info__put(struct block_info *bi); - -static inline void __block_info__zput(struct block_info **bi) -{ - block_info__put(*bi); - *bi = NULL; -} - -#define block_info__zput(bi) __block_info__zput(&bi) +void block_info__delete(struct block_info *bi); int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right); @@ -68,7 +58,8 @@ int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, struct hist_entry *left, struct hist_entry *right); int block_info__process_sym(struct hist_entry *he, struct block_hist *bh, - u64 *block_cycles_aggr, u64 total_cycles); + u64 *block_cycles_aggr, u64 total_cycles, + unsigned int br_cntr_nr); struct block_report *block_info__create_report(struct evlist *evlist, u64 total_cycles, @@ -78,8 +69,7 @@ struct block_report *block_info__create_report(struct evlist *evlist, void block_info__free_report(struct block_report *reps, int nr_reps); int report__browse_block_hists(struct block_hist *bh, float min_percent, - struct evsel *evsel, struct perf_env *env, - struct annotation_options *annotation_opts); + struct evsel *evsel, struct perf_env *env); float block_info__total_cycles_percent(struct hist_entry *he); diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c index 680e92774d0c..15c42196c24c 100644 --- a/tools/perf/util/block-range.c +++ b/tools/perf/util/block-range.c @@ -311,6 +311,7 @@ done: double block_range__coverage(struct block_range *br) { struct symbol *sym; + struct annotated_branch *branch; if (!br) { if (block_ranges.blocks) @@ -323,5 +324,9 @@ double block_range__coverage(struct block_range *br) if (!sym) return -1; - return (double)br->coverage / symbol__annotation(sym)->max_coverage; + branch = symbol__annotation(sym)->branch; + if (!branch) + return -1; + + return (double)br->coverage / branch->max_coverage; } diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 38fcf3ba5749..2298cd396c42 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -1,13 +1,21 @@ // SPDX-License-Identifier: GPL-2.0 #include <errno.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> #include <stdlib.h> +#include <string.h> #include <bpf/bpf.h> #include <bpf/btf.h> #include <bpf/libbpf.h> +#include <linux/bpf.h> #include <linux/btf.h> #include <linux/err.h> +#include <linux/perf_event.h> #include <linux/string.h> +#include <linux/zalloc.h> #include <internal/lib.h> +#include <perf/event.h> #include <symbol/kallsyms.h> #include "bpf-event.h" #include "bpf-utils.h" @@ -59,10 +67,11 @@ static int machine__process_bpf_event_load(struct machine *machine, if (map) { struct dso *dso = map__dso(map); - dso->binary_type = DSO_BINARY_TYPE__BPF_PROG_INFO; - dso->bpf_prog.id = id; - dso->bpf_prog.sub_id = i; - dso->bpf_prog.env = env; + dso__set_binary_type(dso, DSO_BINARY_TYPE__BPF_PROG_INFO); + dso__bpf_prog(dso)->id = id; + dso__bpf_prog(dso)->sub_id = i; + dso__bpf_prog(dso)->env = env; + map__put(map); } } return 0; @@ -150,6 +159,362 @@ static int synthesize_bpf_prog_name(char *buf, int size, return name_len; } +#ifdef HAVE_LIBBPF_STRINGS_SUPPORT + +#define BPF_METADATA_PREFIX "bpf_metadata_" +#define BPF_METADATA_PREFIX_LEN (sizeof(BPF_METADATA_PREFIX) - 1) + +static bool name_has_bpf_metadata_prefix(const char **s) +{ + if (strncmp(*s, BPF_METADATA_PREFIX, BPF_METADATA_PREFIX_LEN) != 0) + return false; + *s += BPF_METADATA_PREFIX_LEN; + return true; +} + +struct bpf_metadata_map { + struct btf *btf; + const struct btf_type *datasec; + void *rodata; + size_t rodata_size; + unsigned int num_vars; +}; + +static int bpf_metadata_read_map_data(__u32 map_id, struct bpf_metadata_map *map) +{ + int map_fd; + struct bpf_map_info map_info; + __u32 map_info_len; + int key; + struct btf *btf; + const struct btf_type *datasec; + struct btf_var_secinfo *vsi; + unsigned int vlen, vars; + void *rodata; + + map_fd = bpf_map_get_fd_by_id(map_id); + if (map_fd < 0) + return -1; + + memset(&map_info, 0, sizeof(map_info)); + map_info_len = sizeof(map_info); + if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len) < 0) + goto out_close; + + /* If it's not an .rodata map, don't bother. */ + if (map_info.type != BPF_MAP_TYPE_ARRAY || + map_info.key_size != sizeof(int) || + map_info.max_entries != 1 || + !map_info.btf_value_type_id || + !strstr(map_info.name, ".rodata")) { + goto out_close; + } + + btf = btf__load_from_kernel_by_id(map_info.btf_id); + if (!btf) + goto out_close; + datasec = btf__type_by_id(btf, map_info.btf_value_type_id); + if (!btf_is_datasec(datasec)) + goto out_free_btf; + + /* + * If there aren't any variables with the "bpf_metadata_" prefix, + * don't bother. + */ + vlen = btf_vlen(datasec); + vsi = btf_var_secinfos(datasec); + vars = 0; + for (unsigned int i = 0; i < vlen; i++, vsi++) { + const struct btf_type *t_var = btf__type_by_id(btf, vsi->type); + const char *name = btf__name_by_offset(btf, t_var->name_off); + + if (name_has_bpf_metadata_prefix(&name)) + vars++; + } + if (vars == 0) + goto out_free_btf; + + rodata = zalloc(map_info.value_size); + if (!rodata) + goto out_free_btf; + key = 0; + if (bpf_map_lookup_elem(map_fd, &key, rodata)) { + free(rodata); + goto out_free_btf; + } + close(map_fd); + + map->btf = btf; + map->datasec = datasec; + map->rodata = rodata; + map->rodata_size = map_info.value_size; + map->num_vars = vars; + return 0; + +out_free_btf: + btf__free(btf); +out_close: + close(map_fd); + return -1; +} + +struct format_btf_ctx { + char *buf; + size_t buf_size; + size_t buf_idx; +}; + +static void format_btf_cb(void *arg, const char *fmt, va_list ap) +{ + int n; + struct format_btf_ctx *ctx = (struct format_btf_ctx *)arg; + + n = vsnprintf(ctx->buf + ctx->buf_idx, ctx->buf_size - ctx->buf_idx, + fmt, ap); + ctx->buf_idx += n; + if (ctx->buf_idx >= ctx->buf_size) + ctx->buf_idx = ctx->buf_size; +} + +static void format_btf_variable(struct btf *btf, char *buf, size_t buf_size, + const struct btf_type *t, const void *btf_data) +{ + struct format_btf_ctx ctx = { + .buf = buf, + .buf_idx = 0, + .buf_size = buf_size, + }; + const struct btf_dump_type_data_opts opts = { + .sz = sizeof(struct btf_dump_type_data_opts), + .skip_names = 1, + .compact = 1, + .emit_strings = 1, + }; + struct btf_dump *d; + size_t btf_size; + + d = btf_dump__new(btf, format_btf_cb, &ctx, NULL); + btf_size = btf__resolve_size(btf, t->type); + btf_dump__dump_type_data(d, t->type, btf_data, btf_size, &opts); + btf_dump__free(d); +} + +static void bpf_metadata_fill_event(struct bpf_metadata_map *map, + struct perf_record_bpf_metadata *bpf_metadata_event) +{ + struct btf_var_secinfo *vsi; + unsigned int i, vlen; + + memset(bpf_metadata_event->prog_name, 0, BPF_PROG_NAME_LEN); + vlen = btf_vlen(map->datasec); + vsi = btf_var_secinfos(map->datasec); + + for (i = 0; i < vlen; i++, vsi++) { + const struct btf_type *t_var = btf__type_by_id(map->btf, + vsi->type); + const char *name = btf__name_by_offset(map->btf, + t_var->name_off); + const __u64 nr_entries = bpf_metadata_event->nr_entries; + struct perf_record_bpf_metadata_entry *entry; + + if (!name_has_bpf_metadata_prefix(&name)) + continue; + + if (nr_entries >= (__u64)map->num_vars) + break; + + entry = &bpf_metadata_event->entries[nr_entries]; + memset(entry, 0, sizeof(*entry)); + snprintf(entry->key, BPF_METADATA_KEY_LEN, "%s", name); + format_btf_variable(map->btf, entry->value, + BPF_METADATA_VALUE_LEN, t_var, + map->rodata + vsi->offset); + bpf_metadata_event->nr_entries++; + } +} + +static void bpf_metadata_free_map_data(struct bpf_metadata_map *map) +{ + btf__free(map->btf); + free(map->rodata); +} + +static struct bpf_metadata *bpf_metadata_alloc(__u32 nr_prog_tags, + __u32 nr_variables) +{ + struct bpf_metadata *metadata; + size_t event_size; + + metadata = zalloc(sizeof(struct bpf_metadata)); + if (!metadata) + return NULL; + + metadata->prog_names = zalloc(nr_prog_tags * sizeof(char *)); + if (!metadata->prog_names) { + bpf_metadata_free(metadata); + return NULL; + } + for (__u32 prog_index = 0; prog_index < nr_prog_tags; prog_index++) { + metadata->prog_names[prog_index] = zalloc(BPF_PROG_NAME_LEN); + if (!metadata->prog_names[prog_index]) { + bpf_metadata_free(metadata); + return NULL; + } + metadata->nr_prog_names++; + } + + event_size = sizeof(metadata->event->bpf_metadata) + + nr_variables * sizeof(metadata->event->bpf_metadata.entries[0]); + metadata->event = zalloc(event_size); + if (!metadata->event) { + bpf_metadata_free(metadata); + return NULL; + } + metadata->event->bpf_metadata = (struct perf_record_bpf_metadata) { + .header = { + .type = PERF_RECORD_BPF_METADATA, + .size = event_size, + }, + .nr_entries = 0, + }; + + return metadata; +} + +static struct bpf_metadata *bpf_metadata_create(struct bpf_prog_info *info) +{ + struct bpf_metadata *metadata; + const __u32 *map_ids = (__u32 *)(uintptr_t)info->map_ids; + + for (__u32 map_index = 0; map_index < info->nr_map_ids; map_index++) { + struct bpf_metadata_map map; + + if (bpf_metadata_read_map_data(map_ids[map_index], &map) != 0) + continue; + + metadata = bpf_metadata_alloc(info->nr_prog_tags, map.num_vars); + if (!metadata) + continue; + + bpf_metadata_fill_event(&map, &metadata->event->bpf_metadata); + + for (__u32 index = 0; index < info->nr_prog_tags; index++) { + synthesize_bpf_prog_name(metadata->prog_names[index], + BPF_PROG_NAME_LEN, info, + map.btf, index); + } + + bpf_metadata_free_map_data(&map); + + return metadata; + } + + return NULL; +} + +static int synthesize_perf_record_bpf_metadata(const struct bpf_metadata *metadata, + const struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + const size_t event_size = metadata->event->header.size; + union perf_event *event; + int err = 0; + + event = zalloc(event_size + machine->id_hdr_size); + if (!event) + return -1; + memcpy(event, metadata->event, event_size); + memset((void *)event + event->header.size, 0, machine->id_hdr_size); + event->header.size += machine->id_hdr_size; + for (__u32 index = 0; index < metadata->nr_prog_names; index++) { + memcpy(event->bpf_metadata.prog_name, + metadata->prog_names[index], BPF_PROG_NAME_LEN); + err = perf_tool__process_synth_event(tool, event, machine, + process); + if (err != 0) + break; + } + + free(event); + return err; +} + +void bpf_metadata_free(struct bpf_metadata *metadata) +{ + if (metadata == NULL) + return; + for (__u32 index = 0; index < metadata->nr_prog_names; index++) + free(metadata->prog_names[index]); + free(metadata->prog_names); + free(metadata->event); + free(metadata); +} + +#else /* HAVE_LIBBPF_STRINGS_SUPPORT */ + +static struct bpf_metadata *bpf_metadata_create(struct bpf_prog_info *info __maybe_unused) +{ + return NULL; +} + +static int synthesize_perf_record_bpf_metadata(const struct bpf_metadata *metadata __maybe_unused, + const struct perf_tool *tool __maybe_unused, + perf_event__handler_t process __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + +void bpf_metadata_free(struct bpf_metadata *metadata __maybe_unused) +{ +} + +#endif /* HAVE_LIBBPF_STRINGS_SUPPORT */ + +struct bpf_metadata_final_ctx { + const struct perf_tool *tool; + perf_event__handler_t process; + struct machine *machine; +}; + +static void synthesize_final_bpf_metadata_cb(struct bpf_prog_info_node *node, + void *data) +{ + struct bpf_metadata_final_ctx *ctx = (struct bpf_metadata_final_ctx *)data; + struct bpf_metadata *metadata = node->metadata; + int err; + + if (metadata == NULL) + return; + err = synthesize_perf_record_bpf_metadata(metadata, ctx->tool, + ctx->process, ctx->machine); + if (err != 0) { + const char *prog_name = metadata->prog_names[0]; + + if (prog_name != NULL) + pr_warning("Couldn't synthesize final BPF metadata for %s.\n", prog_name); + else + pr_warning("Couldn't synthesize final BPF metadata.\n"); + } + bpf_metadata_free(metadata); + node->metadata = NULL; +} + +void perf_event__synthesize_final_bpf_metadata(struct perf_session *session, + perf_event__handler_t process) +{ + struct perf_env *env = &session->header.env; + struct bpf_metadata_final_ctx ctx = { + .tool = session->tool, + .process = process, + .machine = &session->machines.host, + }; + + perf_env__iterate_bpf_prog_info(env, synthesize_final_bpf_metadata_cb, + &ctx); +} + /* * Synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for one bpf * program. One PERF_RECORD_BPF_EVENT is generated for the program. And @@ -169,9 +534,10 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, { struct perf_record_ksymbol *ksymbol_event = &event->ksymbol; struct perf_record_bpf_event *bpf_event = &event->bpf; - struct perf_tool *tool = session->tool; + const struct perf_tool *tool = session->tool; struct bpf_prog_info_node *info_node; struct perf_bpil *info_linear; + struct bpf_metadata *metadata; struct bpf_prog_info *info; struct btf *btf = NULL; struct perf_env *env; @@ -183,7 +549,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, * for perf-record and perf-report use header.env; * otherwise, use global perf_env. */ - env = session->data ? &session->header.env : &perf_env; + env = perf_session__env(session); arrays = 1UL << PERF_BPIL_JITED_KSYMS; arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS; @@ -192,6 +558,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, arrays |= 1UL << PERF_BPIL_JITED_INSNS; arrays |= 1UL << PERF_BPIL_LINE_INFO; arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO; + arrays |= 1UL << PERF_BPIL_MAP_IDS; info_linear = get_bpf_prog_info_linear(fd, arrays); if (IS_ERR_OR_NULL(info_linear)) { @@ -288,7 +655,17 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, } info_node->info_linear = info_linear; - perf_env__insert_bpf_prog_info(env, info_node); + info_node->metadata = NULL; + if (!perf_env__insert_bpf_prog_info(env, info_node)) { + /* + * Insert failed, likely because of a duplicate event + * made by the sideband thread. Ignore synthesizing the + * metadata. + */ + free(info_node); + goto out; + } + /* info_linear is now owned by info_node and shouldn't be freed below. */ info_linear = NULL; /* @@ -297,6 +674,15 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, */ err = perf_tool__process_synth_event(tool, event, machine, process); + + /* Synthesize PERF_RECORD_BPF_METADATA */ + metadata = bpf_metadata_create(info); + if (metadata != NULL) { + err = synthesize_perf_record_bpf_metadata(metadata, + tool, process, + machine); + bpf_metadata_free(metadata); + } } out: @@ -309,7 +695,7 @@ struct kallsyms_parse { union perf_event *event; perf_event__handler_t process; struct machine *machine; - struct perf_tool *tool; + const struct perf_tool *tool; }; static int @@ -386,6 +772,9 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, int err; int fd; + if (opts->no_bpf_event) + return 0; + event = malloc(sizeof(event->bpf) + KSYM_NAME_LEN + machine->id_hdr_size); if (!event) return -1; @@ -444,18 +833,18 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, return err; } -static void perf_env__add_bpf_info(struct perf_env *env, u32 id) +static int perf_env__add_bpf_info(struct perf_env *env, u32 id) { struct bpf_prog_info_node *info_node; struct perf_bpil *info_linear; struct btf *btf = NULL; u64 arrays; u32 btf_id; - int fd; + int fd, err = 0; fd = bpf_prog_get_fd_by_id(id); if (fd < 0) - return; + return -EINVAL; arrays = 1UL << PERF_BPIL_JITED_KSYMS; arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS; @@ -464,10 +853,12 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) arrays |= 1UL << PERF_BPIL_JITED_INSNS; arrays |= 1UL << PERF_BPIL_LINE_INFO; arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO; + arrays |= 1UL << PERF_BPIL_MAP_IDS; info_linear = get_bpf_prog_info_linear(fd, arrays); if (IS_ERR_OR_NULL(info_linear)) { pr_debug("%s: failed to get BPF program info. aborting\n", __func__); + err = PTR_ERR(info_linear); goto out; } @@ -476,36 +867,48 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) info_node = malloc(sizeof(struct bpf_prog_info_node)); if (info_node) { info_node->info_linear = info_linear; - perf_env__insert_bpf_prog_info(env, info_node); - } else + info_node->metadata = bpf_metadata_create(&info_linear->info); + if (!perf_env__insert_bpf_prog_info(env, info_node)) { + pr_debug("%s: duplicate add bpf info request for id %u\n", + __func__, btf_id); + free(info_linear); + free(info_node); + goto out; + } + } else { free(info_linear); + err = -ENOMEM; + goto out; + } if (btf_id == 0) goto out; btf = btf__load_from_kernel_by_id(btf_id); - if (libbpf_get_error(btf)) { - pr_debug("%s: failed to get BTF of id %u, aborting\n", - __func__, btf_id); - goto out; + if (!btf) { + err = -errno; + pr_debug("%s: failed to get BTF of id %u %d\n", __func__, btf_id, err); + } else { + perf_env__fetch_btf(env, btf_id, btf); } - perf_env__fetch_btf(env, btf_id, btf); out: btf__free(btf); close(fd); + return err; } static int bpf_event__sb_cb(union perf_event *event, void *data) { struct perf_env *env = data; + int ret = 0; if (event->header.type != PERF_RECORD_BPF_EVENT) return -1; switch (event->bpf.type) { case PERF_BPF_EVENT_PROG_LOAD: - perf_env__add_bpf_info(env, event->bpf.id); + ret = perf_env__add_bpf_info(env, event->bpf.id); case PERF_BPF_EVENT_PROG_UNLOAD: /* @@ -519,7 +922,7 @@ static int bpf_event__sb_cb(union perf_event *event, void *data) break; } - return 0; + return ret; } int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env) @@ -542,9 +945,9 @@ int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env) return evlist__add_sb_event(evlist, &attr, bpf_event__sb_cb, env); } -void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, - struct perf_env *env, - FILE *fp) +void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, + struct perf_env *env, + FILE *fp) { __u32 *prog_lens = (__u32 *)(uintptr_t)(info->jited_func_lens); __u64 *prog_addrs = (__u64 *)(uintptr_t)(info->jited_ksyms); @@ -560,7 +963,7 @@ void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, if (info->btf_id) { struct btf_node *node; - node = perf_env__find_btf(env, info->btf_id); + node = __perf_env__find_btf(env, info->btf_id); if (node) btf = btf__new((__u8 *)(node->data), node->data_size); diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index 1bcbd4fb6c66..60d2c6637af5 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -17,8 +17,15 @@ struct record_opts; struct evlist; struct target; +struct bpf_metadata { + union perf_event *event; + char **prog_names; + __u64 nr_prog_names; +}; + struct bpf_prog_info_node { struct perf_bpil *info_linear; + struct bpf_metadata *metadata; struct rb_node rb_node; }; @@ -33,9 +40,10 @@ struct btf_node { int machine__process_bpf(struct machine *machine, union perf_event *event, struct perf_sample *sample); int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env); -void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, - struct perf_env *env, - FILE *fp); +void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, + struct perf_env *env, + FILE *fp); +void bpf_metadata_free(struct bpf_metadata *metadata); #else static inline int machine__process_bpf(struct machine *machine __maybe_unused, union perf_event *event __maybe_unused, @@ -50,9 +58,14 @@ static inline int evlist__add_bpf_sb_event(struct evlist *evlist __maybe_unused, return 0; } -static inline void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info __maybe_unused, - struct perf_env *env __maybe_unused, - FILE *fp __maybe_unused) +static inline void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info __maybe_unused, + struct perf_env *env __maybe_unused, + FILE *fp __maybe_unused) +{ + +} + +static inline void bpf_metadata_free(struct bpf_metadata *metadata __maybe_unused) { } diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c index 0b30688d78a7..1a2e7b388d57 100644 --- a/tools/perf/util/bpf-filter.c +++ b/tools/perf/util/bpf-filter.c @@ -1,27 +1,87 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/** + * Generic event filter for sampling events in BPF. + * + * The BPF program is fixed and just to read filter expressions in the 'filters' + * map and compare the sample data in order to reject samples that don't match. + * Each filter expression contains a sample flag (term) to compare, an operation + * (==, >=, and so on) and a value. + * + * Note that each entry has an array of filter expressions and it only succeeds + * when all of the expressions are satisfied. But it supports the logical OR + * using a GROUP operation which is satisfied when any of its member expression + * is evaluated to true. But it doesn't allow nested GROUP operations for now. + * + * To support non-root users, the filters map can be loaded and pinned in the BPF + * filesystem by root (perf record --setup-filter pin). Then each user will get + * a new entry in the shared filters map to fill the filter expressions. And the + * BPF program will find the filter using (task-id, event-id) as a key. + * + * The pinned BPF object (shared for regular users) has: + * + * event_hash | + * | | | + * event->id ---> | id | ---+ idx_hash | filters + * | | | | | | | | + * | .... | +-> | idx | --+--> | exprs | ---> perf_bpf_filter_entry[] + * | | | | | | .op + * task id (tgid) --------------+ | .... | | | ... | .term (+ part) + * | .value + * | + * ======= (root would skip this part) ======== (compares it in a loop) + * + * This is used for per-task use cases while system-wide profiling (normally from + * root user) uses a separate copy of the program and the maps for its own so that + * it can proceed even if a lot of non-root users are using the filters at the + * same time. In this case the filters map has a single entry and no need to use + * the hash maps to get the index (key) of the filters map (IOW it's always 0). + * + * The BPF program returns 1 to accept the sample or 0 to drop it. + * The 'dropped' map is to keep how many samples it dropped by the filter and + * it will be reported as lost samples. + */ #include <stdlib.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/stat.h> #include <bpf/bpf.h> #include <linux/err.h> +#include <linux/list.h> +#include <api/fs/fs.h> #include <internal/xyarray.h> +#include <perf/threadmap.h> +#include "util/cap.h" #include "util/debug.h" #include "util/evsel.h" +#include "util/target.h" +#include "util/bpf-utils.h" #include "util/bpf-filter.h" -#include "util/bpf-filter-flex.h" -#include "util/bpf-filter-bison.h" +#include <util/bpf-filter-flex.h> +#include <util/bpf-filter-bison.h> #include "bpf_skel/sample-filter.h" #include "bpf_skel/sample_filter.skel.h" #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) -#define __PERF_SAMPLE_TYPE(st, opt) { st, #st, opt } -#define PERF_SAMPLE_TYPE(_st, opt) __PERF_SAMPLE_TYPE(PERF_SAMPLE_##_st, opt) +#define __PERF_SAMPLE_TYPE(tt, st, opt) { tt, #st, opt } +#define PERF_SAMPLE_TYPE(_st, opt) __PERF_SAMPLE_TYPE(PBF_TERM_##_st, PERF_SAMPLE_##_st, opt) + +/* Index in the pinned 'filters' map. Should be released after use. */ +struct pinned_filter_idx { + struct list_head list; + struct evsel *evsel; + u64 event_id; + int hash_idx; +}; + +static LIST_HEAD(pinned_filters); static const struct perf_sample_info { - u64 type; + enum perf_bpf_filter_term type; const char *name; const char *option; } sample_table[] = { @@ -42,14 +102,17 @@ static const struct perf_sample_info { PERF_SAMPLE_TYPE(TRANSACTION, "--transaction"), PERF_SAMPLE_TYPE(CODE_PAGE_SIZE, "--code-page-size"), PERF_SAMPLE_TYPE(DATA_PAGE_SIZE, "--data-page-size"), + PERF_SAMPLE_TYPE(CGROUP, "--all-cgroups"), }; -static const struct perf_sample_info *get_sample_info(u64 flags) +static int get_pinned_fd(const char *name); + +static const struct perf_sample_info *get_sample_info(enum perf_bpf_filter_term type) { size_t i; for (i = 0; i < ARRAY_SIZE(sample_table); i++) { - if (sample_table[i].type == flags) + if (sample_table[i].type == type) return &sample_table[i]; } return NULL; @@ -59,13 +122,29 @@ static int check_sample_flags(struct evsel *evsel, struct perf_bpf_filter_expr * { const struct perf_sample_info *info; - if (evsel->core.attr.sample_type & expr->sample_flags) + if (expr->term >= PBF_TERM_SAMPLE_START && expr->term <= PBF_TERM_SAMPLE_END && + (evsel->core.attr.sample_type & (1 << (expr->term - PBF_TERM_SAMPLE_START)))) + return 0; + + if (expr->term == PBF_TERM_UID || expr->term == PBF_TERM_GID) { + /* Not dependent on the sample_type as computed from a BPF helper. */ + return 0; + } + + if (expr->op == PBF_OP_GROUP_BEGIN) { + struct perf_bpf_filter_expr *group; + + list_for_each_entry(group, &expr->groups, list) { + if (check_sample_flags(evsel, group) < 0) + return -1; + } return 0; + } - info = get_sample_info(expr->sample_flags); + info = get_sample_info(expr->term); if (info == NULL) { - pr_err("Error: %s event does not have sample flags %lx\n", - evsel__name(evsel), expr->sample_flags); + pr_err("Error: %s event does not have sample flags %d\n", + evsel__name(evsel), expr->term); return -1; } @@ -75,95 +154,463 @@ static int check_sample_flags(struct evsel *evsel, struct perf_bpf_filter_expr * return -1; } -int perf_bpf_filter__prepare(struct evsel *evsel) +static int get_filter_entries(struct evsel *evsel, struct perf_bpf_filter_entry *entry) { - int i, x, y, fd; - struct sample_filter_bpf *skel; - struct bpf_program *prog; - struct bpf_link *link; + int i = 0; struct perf_bpf_filter_expr *expr; - skel = sample_filter_bpf__open_and_load(); - if (!skel) { - pr_err("Failed to load perf sample-filter BPF skeleton\n"); - return -1; - } - - i = 0; - fd = bpf_map__fd(skel->maps.filters); list_for_each_entry(expr, &evsel->bpf_filters, list) { - struct perf_bpf_filter_entry entry = { - .op = expr->op, - .part = expr->part, - .flags = expr->sample_flags, - .value = expr->val, - }; - if (check_sample_flags(evsel, expr) < 0) - return -1; + return -EINVAL; + + if (i == MAX_FILTERS) + return -E2BIG; - bpf_map_update_elem(fd, &i, &entry, BPF_ANY); + entry[i].op = expr->op; + entry[i].part = expr->part; + entry[i].term = expr->term; + entry[i].value = expr->val; i++; if (expr->op == PBF_OP_GROUP_BEGIN) { struct perf_bpf_filter_expr *group; list_for_each_entry(group, &expr->groups, list) { - struct perf_bpf_filter_entry group_entry = { - .op = group->op, - .part = group->part, - .flags = group->sample_flags, - .value = group->val, - }; - bpf_map_update_elem(fd, &i, &group_entry, BPF_ANY); + if (i == MAX_FILTERS) + return -E2BIG; + + entry[i].op = group->op; + entry[i].part = group->part; + entry[i].term = group->term; + entry[i].value = group->val; i++; } - memset(&entry, 0, sizeof(entry)); - entry.op = PBF_OP_GROUP_END; - bpf_map_update_elem(fd, &i, &entry, BPF_ANY); + if (i == MAX_FILTERS) + return -E2BIG; + + entry[i].op = PBF_OP_GROUP_END; i++; } } - if (i > MAX_FILTERS) { - pr_err("Too many filters: %d (max = %d)\n", i, MAX_FILTERS); + if (i < MAX_FILTERS) { + /* to terminate the loop early */ + entry[i].op = PBF_OP_DONE; + i++; + } + return 0; +} + +static int convert_to_tgid(int tid) +{ + char path[128]; + char *buf, *p, *q; + int tgid; + size_t len; + + scnprintf(path, sizeof(path), "%d/status", tid); + if (procfs__read_str(path, &buf, &len) < 0) + return -1; + + p = strstr(buf, "Tgid:"); + if (p == NULL) { + free(buf); + return -1; + } + + tgid = strtol(p + 6, &q, 0); + free(buf); + if (*q != '\n') + return -1; + + return tgid; +} + +/* + * The event might be closed already so we cannot get the list of ids using FD + * like in create_event_hash() below, let's iterate the event_hash map and + * delete all entries that have the event id as a key. + */ +static void destroy_event_hash(u64 event_id) +{ + int fd; + u64 key, *prev_key = NULL; + int num = 0, alloced = 32; + u64 *ids = calloc(alloced, sizeof(*ids)); + + if (ids == NULL) + return; + + fd = get_pinned_fd("event_hash"); + if (fd < 0) { + pr_debug("cannot get fd for 'event_hash' map\n"); + free(ids); + return; + } + + /* Iterate the whole map to collect keys for the event id. */ + while (!bpf_map_get_next_key(fd, prev_key, &key)) { + u64 id; + + if (bpf_map_lookup_elem(fd, &key, &id) == 0 && id == event_id) { + if (num == alloced) { + void *tmp; + + alloced *= 2; + tmp = realloc(ids, alloced * sizeof(*ids)); + if (tmp == NULL) + break; + + ids = tmp; + } + ids[num++] = key; + } + + prev_key = &key; + } + + for (int i = 0; i < num; i++) + bpf_map_delete_elem(fd, &ids[i]); + + free(ids); + close(fd); +} + +/* + * Return a representative id if ok, or 0 for failures. + * + * The perf_event->id is good for this, but an evsel would have multiple + * instances for CPUs and tasks. So pick up the first id and setup a hash + * from id of each instance to the representative id (the first one). + */ +static u64 create_event_hash(struct evsel *evsel) +{ + int x, y, fd; + u64 the_id = 0, id; + + fd = get_pinned_fd("event_hash"); + if (fd < 0) { + pr_err("cannot get fd for 'event_hash' map\n"); + return 0; + } + + for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) { + for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) { + int ret = ioctl(FD(evsel, x, y), PERF_EVENT_IOC_ID, &id); + + if (ret < 0) { + pr_err("Failed to get the event id\n"); + if (the_id) + destroy_event_hash(the_id); + return 0; + } + + if (the_id == 0) + the_id = id; + + bpf_map_update_elem(fd, &id, &the_id, BPF_ANY); + } + } + + close(fd); + return the_id; +} + +static void destroy_idx_hash(struct pinned_filter_idx *pfi) +{ + int fd, nr; + struct perf_thread_map *threads; + + fd = get_pinned_fd("filters"); + bpf_map_delete_elem(fd, &pfi->hash_idx); + close(fd); + + if (pfi->event_id) + destroy_event_hash(pfi->event_id); + + threads = perf_evsel__threads(&pfi->evsel->core); + if (threads == NULL) + return; + + fd = get_pinned_fd("idx_hash"); + nr = perf_thread_map__nr(threads); + for (int i = 0; i < nr; i++) { + /* The target task might be dead already, just try the pid */ + struct idx_hash_key key = { + .evt_id = pfi->event_id, + .tgid = perf_thread_map__pid(threads, i), + }; + + bpf_map_delete_elem(fd, &key); + } + close(fd); +} + +/* Maintain a hashmap from (tgid, event-id) to filter index */ +static int create_idx_hash(struct evsel *evsel, struct perf_bpf_filter_entry *entry) +{ + int filter_idx; + int fd, nr, last; + u64 event_id = 0; + struct pinned_filter_idx *pfi = NULL; + struct perf_thread_map *threads; + + fd = get_pinned_fd("filters"); + if (fd < 0) { + pr_err("cannot get fd for 'filters' map\n"); + return fd; + } + + /* Find the first available entry in the filters map */ + for (filter_idx = 0; filter_idx < MAX_FILTERS; filter_idx++) { + if (bpf_map_update_elem(fd, &filter_idx, entry, BPF_NOEXIST) == 0) + break; + } + close(fd); + + if (filter_idx == MAX_FILTERS) { + pr_err("Too many users for the filter map\n"); + return -EBUSY; + } + + pfi = zalloc(sizeof(*pfi)); + if (pfi == NULL) { + pr_err("Cannot save pinned filter index\n"); + return -ENOMEM; + } + + pfi->evsel = evsel; + pfi->hash_idx = filter_idx; + + event_id = create_event_hash(evsel); + if (event_id == 0) { + pr_err("Cannot update the event hash\n"); + goto err; + } + + pfi->event_id = event_id; + + threads = perf_evsel__threads(&evsel->core); + if (threads == NULL) { + pr_err("Cannot get the thread list of the event\n"); + goto err; + } + + /* save the index to a hash map */ + fd = get_pinned_fd("idx_hash"); + if (fd < 0) { + pr_err("cannot get fd for 'idx_hash' map\n"); + goto err; + } + + last = -1; + nr = perf_thread_map__nr(threads); + for (int i = 0; i < nr; i++) { + int pid = perf_thread_map__pid(threads, i); + int tgid; + struct idx_hash_key key = { + .evt_id = event_id, + }; + + /* it actually needs tgid, let's get tgid from /proc. */ + tgid = convert_to_tgid(pid); + if (tgid < 0) { + /* the thread may be dead, ignore. */ + continue; + } + + if (tgid == last) + continue; + last = tgid; + key.tgid = tgid; + + if (bpf_map_update_elem(fd, &key, &filter_idx, BPF_ANY) < 0) { + pr_err("Failed to update the idx_hash\n"); + close(fd); + goto err; + } + pr_debug("bpf-filter: idx_hash (task=%d,%s) -> %d\n", + tgid, evsel__name(evsel), filter_idx); + } + + list_add(&pfi->list, &pinned_filters); + close(fd); + return filter_idx; + +err: + destroy_idx_hash(pfi); + free(pfi); + return -1; +} + +int perf_bpf_filter__prepare(struct evsel *evsel, struct target *target) +{ + int i, x, y, fd, ret; + struct sample_filter_bpf *skel = NULL; + struct bpf_program *prog; + struct bpf_link *link; + struct perf_bpf_filter_entry *entry; + bool needs_idx_hash = !target__has_cpu(target); +#if LIBBPF_CURRENT_VERSION_GEQ(1, 7) + DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts, + .dont_enable = true); +#else + DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); +#endif + + entry = calloc(MAX_FILTERS, sizeof(*entry)); + if (entry == NULL) return -1; + + ret = get_filter_entries(evsel, entry); + if (ret < 0) { + pr_err("Failed to process filter entries\n"); + goto err; + } + + if (needs_idx_hash && geteuid() != 0) { + int zero = 0; + + /* The filters map is shared among other processes */ + ret = create_idx_hash(evsel, entry); + if (ret < 0) + goto err; + + fd = get_pinned_fd("dropped"); + if (fd < 0) { + ret = fd; + goto err; + } + + /* Reset the lost count */ + bpf_map_update_elem(fd, &ret, &zero, BPF_ANY); + close(fd); + + fd = get_pinned_fd("perf_sample_filter"); + if (fd < 0) { + ret = fd; + goto err; + } + + for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) { + for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) { + ret = ioctl(FD(evsel, x, y), PERF_EVENT_IOC_SET_BPF, fd); + if (ret < 0) { + pr_err("Failed to attach perf sample-filter\n"); + close(fd); + goto err; + } + } + } + + close(fd); + free(entry); + return 0; + } + + skel = sample_filter_bpf__open_and_load(); + if (!skel) { + ret = -errno; + pr_err("Failed to load perf sample-filter BPF skeleton\n"); + goto err; + } + + i = 0; + fd = bpf_map__fd(skel->maps.filters); + + /* The filters map has only one entry in this case */ + if (bpf_map_update_elem(fd, &i, entry, BPF_ANY) < 0) { + ret = -errno; + pr_err("Failed to update the filter map\n"); + goto err; } + prog = skel->progs.perf_sample_filter; for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) { for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) { - link = bpf_program__attach_perf_event(prog, FD(evsel, x, y)); + link = bpf_program__attach_perf_event_opts(prog, FD(evsel, x, y), + &pe_opts); if (IS_ERR(link)) { pr_err("Failed to attach perf sample-filter program\n"); - return PTR_ERR(link); + ret = PTR_ERR(link); + goto err; } } } + free(entry); evsel->bpf_skel = skel; return 0; + +err: + free(entry); + if (!list_empty(&pinned_filters)) { + struct pinned_filter_idx *pfi, *tmp; + + list_for_each_entry_safe(pfi, tmp, &pinned_filters, list) { + destroy_idx_hash(pfi); + list_del(&pfi->list); + free(pfi); + } + } + sample_filter_bpf__destroy(skel); + return ret; } int perf_bpf_filter__destroy(struct evsel *evsel) { struct perf_bpf_filter_expr *expr, *tmp; + struct pinned_filter_idx *pfi, *pos; list_for_each_entry_safe(expr, tmp, &evsel->bpf_filters, list) { list_del(&expr->list); free(expr); } sample_filter_bpf__destroy(evsel->bpf_skel); + + list_for_each_entry_safe(pfi, pos, &pinned_filters, list) { + destroy_idx_hash(pfi); + list_del(&pfi->list); + free(pfi); + } return 0; } u64 perf_bpf_filter__lost_count(struct evsel *evsel) { - struct sample_filter_bpf *skel = evsel->bpf_skel; + int count = 0; + + if (list_empty(&evsel->bpf_filters)) + return 0; + + if (!list_empty(&pinned_filters)) { + int fd = get_pinned_fd("dropped"); + struct pinned_filter_idx *pfi; + + if (fd < 0) + return 0; - return skel ? skel->bss->dropped : 0; + list_for_each_entry(pfi, &pinned_filters, list) { + if (pfi->evsel != evsel) + continue; + + bpf_map_lookup_elem(fd, &pfi->hash_idx, &count); + break; + } + close(fd); + } else if (evsel->bpf_skel) { + struct sample_filter_bpf *skel = evsel->bpf_skel; + int fd = bpf_map__fd(skel->maps.dropped); + int idx = 0; + + bpf_map_lookup_elem(fd, &idx, &count); + } + + return count; } -struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(unsigned long sample_flags, int part, +struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(enum perf_bpf_filter_term term, + int part, enum perf_bpf_filter_op op, unsigned long val) { @@ -171,7 +618,7 @@ struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(unsigned long sample_flag expr = malloc(sizeof(*expr)); if (expr != NULL) { - expr->sample_flags = sample_flags; + expr->term = term; expr->part = part; expr->op = op; expr->val = val; @@ -180,11 +627,38 @@ struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(unsigned long sample_flag return expr; } +static bool check_bpf_filter_capable(void) +{ + bool used_root; + + if (perf_cap__capable(CAP_BPF, &used_root)) + return true; + + if (!used_root) { + /* Check if root already pinned the filter programs and maps */ + int fd = get_pinned_fd("filters"); + + if (fd >= 0) { + close(fd); + return true; + } + } + + pr_err("Error: BPF filter only works for %s!\n" + "\tPlease run 'perf record --setup-filter pin' as root first.\n", + used_root ? "root" : "users with the CAP_BPF capability"); + + return false; +} + int perf_bpf_filter__parse(struct list_head *expr_head, const char *str) { YY_BUFFER_STATE buffer; int ret; + if (!check_bpf_filter_capable()) + return -EPERM; + buffer = perf_bpf_filter__scan_string(str); ret = perf_bpf_filter_parse(expr_head); @@ -195,3 +669,139 @@ int perf_bpf_filter__parse(struct list_head *expr_head, const char *str) return ret; } + +int perf_bpf_filter__pin(void) +{ + struct sample_filter_bpf *skel; + char *path = NULL; + int dir_fd, ret = -1; + + skel = sample_filter_bpf__open(); + if (!skel) { + ret = -errno; + pr_err("Failed to open perf sample-filter BPF skeleton\n"); + goto err; + } + + /* pinned program will use pid-hash */ + bpf_map__set_max_entries(skel->maps.filters, MAX_FILTERS); + bpf_map__set_max_entries(skel->maps.event_hash, MAX_EVT_HASH); + bpf_map__set_max_entries(skel->maps.idx_hash, MAX_IDX_HASH); + bpf_map__set_max_entries(skel->maps.dropped, MAX_FILTERS); + skel->rodata->use_idx_hash = 1; + + if (sample_filter_bpf__load(skel) < 0) { + ret = -errno; + pr_err("Failed to load perf sample-filter BPF skeleton\n"); + goto err; + } + + if (asprintf(&path, "%s/fs/bpf/%s", sysfs__mountpoint(), + PERF_BPF_FILTER_PIN_PATH) < 0) { + ret = -errno; + pr_err("Failed to allocate pathname in the BPF-fs\n"); + goto err; + } + + ret = bpf_object__pin(skel->obj, path); + if (ret < 0) { + pr_err("Failed to pin BPF filter objects\n"); + goto err; + } + + /* setup access permissions for the pinned objects */ + dir_fd = open(path, O_PATH); + if (dir_fd < 0) { + bpf_object__unpin(skel->obj, path); + ret = dir_fd; + goto err; + } + + /* BPF-fs root has the sticky bit */ + if (fchmodat(dir_fd, "..", 01755, 0) < 0) { + pr_debug("chmod for BPF-fs failed\n"); + ret = -errno; + goto err_close; + } + + /* perf_filter directory */ + if (fchmodat(dir_fd, ".", 0755, 0) < 0) { + pr_debug("chmod for perf_filter directory failed?\n"); + ret = -errno; + goto err_close; + } + + /* programs need write permission for some reason */ + if (fchmodat(dir_fd, "perf_sample_filter", 0777, 0) < 0) { + pr_debug("chmod for perf_sample_filter failed\n"); + ret = -errno; + } + /* maps */ + if (fchmodat(dir_fd, "filters", 0666, 0) < 0) { + pr_debug("chmod for filters failed\n"); + ret = -errno; + } + if (fchmodat(dir_fd, "event_hash", 0666, 0) < 0) { + pr_debug("chmod for event_hash failed\n"); + ret = -errno; + } + if (fchmodat(dir_fd, "idx_hash", 0666, 0) < 0) { + pr_debug("chmod for idx_hash failed\n"); + ret = -errno; + } + if (fchmodat(dir_fd, "dropped", 0666, 0) < 0) { + pr_debug("chmod for dropped failed\n"); + ret = -errno; + } + +err_close: + close(dir_fd); + +err: + free(path); + sample_filter_bpf__destroy(skel); + return ret; +} + +int perf_bpf_filter__unpin(void) +{ + struct sample_filter_bpf *skel; + char *path = NULL; + int ret = -1; + + skel = sample_filter_bpf__open_and_load(); + if (!skel) { + ret = -errno; + pr_err("Failed to open perf sample-filter BPF skeleton\n"); + goto err; + } + + if (asprintf(&path, "%s/fs/bpf/%s", sysfs__mountpoint(), + PERF_BPF_FILTER_PIN_PATH) < 0) { + ret = -errno; + pr_err("Failed to allocate pathname in the BPF-fs\n"); + goto err; + } + + ret = bpf_object__unpin(skel->obj, path); + +err: + free(path); + sample_filter_bpf__destroy(skel); + return ret; +} + +static int get_pinned_fd(const char *name) +{ + char *path = NULL; + int fd; + + if (asprintf(&path, "%s/fs/bpf/%s/%s", sysfs__mountpoint(), + PERF_BPF_FILTER_PIN_PATH, name) < 0) + return -1; + + fd = bpf_obj_get(path); + + free(path); + return fd; +} diff --git a/tools/perf/util/bpf-filter.h b/tools/perf/util/bpf-filter.h index 7afd159411b8..818c554b91b2 100644 --- a/tools/perf/util/bpf-filter.h +++ b/tools/perf/util/bpf-filter.h @@ -5,35 +5,48 @@ #include <linux/list.h> #include "bpf_skel/sample-filter.h" +#include "util/debug.h" struct perf_bpf_filter_expr { struct list_head list; struct list_head groups; enum perf_bpf_filter_op op; int part; - unsigned long sample_flags; + enum perf_bpf_filter_term term; unsigned long val; }; struct evsel; +struct target; + +/* path in BPF-fs for the pinned program and maps */ +#define PERF_BPF_FILTER_PIN_PATH "perf_filter" #ifdef HAVE_BPF_SKEL -struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(unsigned long sample_flags, int part, +struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(enum perf_bpf_filter_term term, + int part, enum perf_bpf_filter_op op, unsigned long val); int perf_bpf_filter__parse(struct list_head *expr_head, const char *str); -int perf_bpf_filter__prepare(struct evsel *evsel); +int perf_bpf_filter__prepare(struct evsel *evsel, struct target *target); int perf_bpf_filter__destroy(struct evsel *evsel); u64 perf_bpf_filter__lost_count(struct evsel *evsel); +int perf_bpf_filter__pin(void); +int perf_bpf_filter__unpin(void); #else /* !HAVE_BPF_SKEL */ +#include <errno.h> + static inline int perf_bpf_filter__parse(struct list_head *expr_head __maybe_unused, const char *str __maybe_unused) { + pr_err("Error: BPF filter is requested but perf is not built with BPF.\n" + "\tPlease make sure to build with libbpf and BPF skeleton.\n"); return -EOPNOTSUPP; } -static inline int perf_bpf_filter__prepare(struct evsel *evsel __maybe_unused) +static inline int perf_bpf_filter__prepare(struct evsel *evsel __maybe_unused, + struct target *target __maybe_unused) { return -EOPNOTSUPP; } @@ -45,5 +58,13 @@ static inline u64 perf_bpf_filter__lost_count(struct evsel *evsel __maybe_unused { return 0; } +static inline int perf_bpf_filter__pin(void) +{ + return -EOPNOTSUPP; +} +static inline int perf_bpf_filter__unpin(void) +{ + return -EOPNOTSUPP; +} #endif /* HAVE_BPF_SKEL*/ #endif /* PERF_UTIL_BPF_FILTER_H */ diff --git a/tools/perf/util/bpf-filter.l b/tools/perf/util/bpf-filter.l index d4ff0f1345cd..6aa65ade3385 100644 --- a/tools/perf/util/bpf-filter.l +++ b/tools/perf/util/bpf-filter.l @@ -9,20 +9,32 @@ #include "bpf-filter.h" #include "bpf-filter-bison.h" -static int sample(unsigned long sample_flag) +extern int perf_bpf_filter_needs_path; + +static int sample(enum perf_bpf_filter_term term) { - perf_bpf_filter_lval.sample.type = sample_flag; + perf_bpf_filter_needs_path = 0; + perf_bpf_filter_lval.sample.term = term; perf_bpf_filter_lval.sample.part = 0; return BFT_SAMPLE; } -static int sample_part(unsigned long sample_flag, int part) +static int sample_part(enum perf_bpf_filter_term term, int part) { - perf_bpf_filter_lval.sample.type = sample_flag; + perf_bpf_filter_needs_path = 0; + perf_bpf_filter_lval.sample.term = term; perf_bpf_filter_lval.sample.part = part; return BFT_SAMPLE; } +static int sample_path(enum perf_bpf_filter_term term) +{ + perf_bpf_filter_needs_path = 1; + perf_bpf_filter_lval.sample.term = term; + perf_bpf_filter_lval.sample.part = 0; + return BFT_SAMPLE_PATH; +} + static int operator(enum perf_bpf_filter_op op) { perf_bpf_filter_lval.op = op; @@ -48,10 +60,15 @@ static int constant(int val) return BFT_NUM; } -static int error(const char *str) +static int path_or_error(void) { - printf("perf_bpf_filter: Unexpected filter %s: %s\n", str, perf_bpf_filter_text); - return BFT_ERROR; + if (!perf_bpf_filter_needs_path) { + printf("perf_bpf_filter: Error: Unexpected item: %s\n", + perf_bpf_filter_text); + return BFT_ERROR; + } + perf_bpf_filter_lval.path = perf_bpf_filter_text; + return BFT_PATH; } %} @@ -59,6 +76,7 @@ static int error(const char *str) num_dec [0-9]+ num_hex 0[Xx][0-9a-fA-F]+ space [ \t]+ +path [^ \t\n,]+ ident [_a-zA-Z][_a-zA-Z0-9]+ %% @@ -67,34 +85,37 @@ ident [_a-zA-Z][_a-zA-Z0-9]+ {num_hex} { return value(16); } {space} { } -ip { return sample(PERF_SAMPLE_IP); } -id { return sample(PERF_SAMPLE_ID); } -tid { return sample(PERF_SAMPLE_TID); } -pid { return sample_part(PERF_SAMPLE_TID, 1); } -cpu { return sample(PERF_SAMPLE_CPU); } -time { return sample(PERF_SAMPLE_TIME); } -addr { return sample(PERF_SAMPLE_ADDR); } -period { return sample(PERF_SAMPLE_PERIOD); } -txn { return sample(PERF_SAMPLE_TRANSACTION); } -weight { return sample(PERF_SAMPLE_WEIGHT); } -weight1 { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 1); } -weight2 { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 2); } -weight3 { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 3); } -ins_lat { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 2); } /* alias for weight2 */ -p_stage_cyc { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 3); } /* alias for weight3 */ -retire_lat { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 3); } /* alias for weight3 */ -phys_addr { return sample(PERF_SAMPLE_PHYS_ADDR); } -code_pgsz { return sample(PERF_SAMPLE_CODE_PAGE_SIZE); } -data_pgsz { return sample(PERF_SAMPLE_DATA_PAGE_SIZE); } -mem_op { return sample_part(PERF_SAMPLE_DATA_SRC, 1); } -mem_lvlnum { return sample_part(PERF_SAMPLE_DATA_SRC, 2); } -mem_lvl { return sample_part(PERF_SAMPLE_DATA_SRC, 2); } /* alias for mem_lvlnum */ -mem_snoop { return sample_part(PERF_SAMPLE_DATA_SRC, 3); } /* include snoopx */ -mem_remote { return sample_part(PERF_SAMPLE_DATA_SRC, 4); } -mem_lock { return sample_part(PERF_SAMPLE_DATA_SRC, 5); } -mem_dtlb { return sample_part(PERF_SAMPLE_DATA_SRC, 6); } -mem_blk { return sample_part(PERF_SAMPLE_DATA_SRC, 7); } -mem_hops { return sample_part(PERF_SAMPLE_DATA_SRC, 8); } +ip { return sample(PBF_TERM_IP); } +id { return sample(PBF_TERM_ID); } +tid { return sample(PBF_TERM_TID); } +pid { return sample_part(PBF_TERM_TID, 1); } +cpu { return sample(PBF_TERM_CPU); } +time { return sample(PBF_TERM_TIME); } +addr { return sample(PBF_TERM_ADDR); } +period { return sample(PBF_TERM_PERIOD); } +txn { return sample(PBF_TERM_TRANSACTION); } +weight { return sample(PBF_TERM_WEIGHT); } +weight1 { return sample_part(PBF_TERM_WEIGHT_STRUCT, 1); } +weight2 { return sample_part(PBF_TERM_WEIGHT_STRUCT, 2); } +weight3 { return sample_part(PBF_TERM_WEIGHT_STRUCT, 3); } +ins_lat { return sample_part(PBF_TERM_WEIGHT_STRUCT, 2); } /* alias for weight2 */ +p_stage_cyc { return sample_part(PBF_TERM_WEIGHT_STRUCT, 3); } /* alias for weight3 */ +retire_lat { return sample_part(PBF_TERM_WEIGHT_STRUCT, 3); } /* alias for weight3 */ +phys_addr { return sample(PBF_TERM_PHYS_ADDR); } +code_pgsz { return sample(PBF_TERM_CODE_PAGE_SIZE); } +data_pgsz { return sample(PBF_TERM_DATA_PAGE_SIZE); } +mem_op { return sample_part(PBF_TERM_DATA_SRC, 1); } +mem_lvlnum { return sample_part(PBF_TERM_DATA_SRC, 2); } +mem_lvl { return sample_part(PBF_TERM_DATA_SRC, 2); } /* alias for mem_lvlnum */ +mem_snoop { return sample_part(PBF_TERM_DATA_SRC, 3); } /* include snoopx */ +mem_remote { return sample_part(PBF_TERM_DATA_SRC, 4); } +mem_lock { return sample_part(PBF_TERM_DATA_SRC, 5); } +mem_dtlb { return sample_part(PBF_TERM_DATA_SRC, 6); } +mem_blk { return sample_part(PBF_TERM_DATA_SRC, 7); } +mem_hops { return sample_part(PBF_TERM_DATA_SRC, 8); } +uid { return sample(PBF_TERM_UID); } +gid { return sample(PBF_TERM_GID); } +cgroup { return sample_path(PBF_TERM_CGROUP); } "==" { return operator(PBF_OP_EQ); } "!=" { return operator(PBF_OP_NEQ); } @@ -153,7 +174,6 @@ hops3 { return constant(PERF_MEM_HOPS_3); } "," { return ','; } "||" { return BFT_LOGICAL_OR; } -{ident} { return error("ident"); } -. { return error("input"); } +{path} { return path_or_error(); } %% diff --git a/tools/perf/util/bpf-filter.y b/tools/perf/util/bpf-filter.y index 07d6c7926c13..5a79a8e7a45b 100644 --- a/tools/perf/util/bpf-filter.y +++ b/tools/perf/util/bpf-filter.y @@ -3,11 +3,21 @@ %{ +#ifndef NDEBUG +#define YYDEBUG 1 +#endif + #include <stdio.h> #include <string.h> #include <linux/compiler.h> #include <linux/list.h> #include "bpf-filter.h" +#include "cgroup.h" + +int perf_bpf_filter_lex(void); + +/* To indicate if the current term needs a pathname or not */ +int perf_bpf_filter_needs_path; static void perf_bpf_filter_error(struct list_head *expr __maybe_unused, char const *msg) @@ -20,20 +30,22 @@ static void perf_bpf_filter_error(struct list_head *expr __maybe_unused, %union { unsigned long num; + char *path; struct { - unsigned long type; + enum perf_bpf_filter_term term; int part; } sample; enum perf_bpf_filter_op op; struct perf_bpf_filter_expr *expr; } -%token BFT_SAMPLE BFT_OP BFT_ERROR BFT_NUM BFT_LOGICAL_OR +%token BFT_SAMPLE BFT_SAMPLE_PATH BFT_OP BFT_ERROR BFT_NUM BFT_LOGICAL_OR BFT_PATH %type <expr> filter_term filter_expr %destructor { free ($$); } <expr> -%type <sample> BFT_SAMPLE +%type <sample> BFT_SAMPLE BFT_SAMPLE_PATH %type <op> BFT_OP %type <num> BFT_NUM +%type <path> BFT_PATH %% @@ -56,7 +68,8 @@ filter_term BFT_LOGICAL_OR filter_expr if ($1->op == PBF_OP_GROUP_BEGIN) { expr = $1; } else { - expr = perf_bpf_filter_expr__new(0, 0, PBF_OP_GROUP_BEGIN, 1); + expr = perf_bpf_filter_expr__new(PBF_TERM_NONE, /*part=*/0, + PBF_OP_GROUP_BEGIN, /*val=*/1); list_add_tail(&$1->list, &expr->groups); } expr->val++; @@ -72,7 +85,25 @@ filter_expr filter_expr: BFT_SAMPLE BFT_OP BFT_NUM { - $$ = perf_bpf_filter_expr__new($1.type, $1.part, $2, $3); + $$ = perf_bpf_filter_expr__new($1.term, $1.part, $2, $3); +} +| +BFT_SAMPLE_PATH BFT_OP BFT_PATH +{ + struct cgroup *cgrp; + unsigned long cgroup_id = 0; + + if ($2 != PBF_OP_EQ && $2 != PBF_OP_NEQ) { + printf("perf_bpf_filter: cgroup accepts '==' or '!=' only\n"); + YYERROR; + } + + cgrp = cgroup__new($3, /*do_open=*/false); + if (cgrp && read_cgroup_id(cgrp) == 0) + cgroup_id = cgrp->id; + + $$ = perf_bpf_filter_expr__new($1.term, $1.part, $2, cgroup_id); + cgroup__put(cgrp); } %% diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c deleted file mode 100644 index 44cde27d6389..000000000000 --- a/tools/perf/util/bpf-loader.c +++ /dev/null @@ -1,2110 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * bpf-loader.c - * - * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> - * Copyright (C) 2015 Huawei Inc. - */ - -#include <linux/bpf.h> -#include <bpf/libbpf.h> -#include <bpf/bpf.h> -#include <linux/filter.h> -#include <linux/err.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/zalloc.h> -#include <errno.h> -#include <stdlib.h> -#include "debug.h" -#include "evlist.h" -#include "bpf-loader.h" -#include "bpf-prologue.h" -#include "probe-event.h" -#include "probe-finder.h" // for MAX_PROBES -#include "parse-events.h" -#include "strfilter.h" -#include "util.h" -#include "llvm-utils.h" -#include "c++/clang-c.h" -#include "util/hashmap.h" -#include "asm/bug.h" - -#include <internal/xyarray.h> - -/* temporarily disable libbpf deprecation warnings */ -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - -static int libbpf_perf_print(enum libbpf_print_level level __attribute__((unused)), - const char *fmt, va_list args) -{ - return veprintf(1, verbose, pr_fmt(fmt), args); -} - -struct bpf_prog_priv { - bool is_tp; - char *sys_name; - char *evt_name; - struct perf_probe_event pev; - bool need_prologue; - struct bpf_insn *insns_buf; - int nr_types; - int *type_mapping; - int *prologue_fds; -}; - -struct bpf_perf_object { - struct list_head list; - struct bpf_object *obj; -}; - -struct bpf_preproc_result { - struct bpf_insn *new_insn_ptr; - int new_insn_cnt; -}; - -static LIST_HEAD(bpf_objects_list); -static struct hashmap *bpf_program_hash; -static struct hashmap *bpf_map_hash; - -static struct bpf_perf_object * -bpf_perf_object__next(struct bpf_perf_object *prev) -{ - if (!prev) { - if (list_empty(&bpf_objects_list)) - return NULL; - - return list_first_entry(&bpf_objects_list, struct bpf_perf_object, list); - } - if (list_is_last(&prev->list, &bpf_objects_list)) - return NULL; - - return list_next_entry(prev, list); -} - -#define bpf_perf_object__for_each(perf_obj, tmp) \ - for ((perf_obj) = bpf_perf_object__next(NULL), \ - (tmp) = bpf_perf_object__next(perf_obj); \ - (perf_obj) != NULL; \ - (perf_obj) = (tmp), (tmp) = bpf_perf_object__next(tmp)) - -static bool libbpf_initialized; -static int libbpf_sec_handler; - -static int bpf_perf_object__add(struct bpf_object *obj) -{ - struct bpf_perf_object *perf_obj = zalloc(sizeof(*perf_obj)); - - if (perf_obj) { - INIT_LIST_HEAD(&perf_obj->list); - perf_obj->obj = obj; - list_add_tail(&perf_obj->list, &bpf_objects_list); - } - return perf_obj ? 0 : -ENOMEM; -} - -static void *program_priv(const struct bpf_program *prog) -{ - void *priv; - - if (IS_ERR_OR_NULL(bpf_program_hash)) - return NULL; - if (!hashmap__find(bpf_program_hash, prog, &priv)) - return NULL; - return priv; -} - -static struct bpf_insn prologue_init_insn[] = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_MOV64_IMM(BPF_REG_4, 0), - BPF_MOV64_IMM(BPF_REG_5, 0), -}; - -static int libbpf_prog_prepare_load_fn(struct bpf_program *prog, - struct bpf_prog_load_opts *opts __maybe_unused, - long cookie __maybe_unused) -{ - size_t init_size_cnt = ARRAY_SIZE(prologue_init_insn); - size_t orig_insn_cnt, insn_cnt, init_size, orig_size; - struct bpf_prog_priv *priv = program_priv(prog); - const struct bpf_insn *orig_insn; - struct bpf_insn *insn; - - if (IS_ERR_OR_NULL(priv)) { - pr_debug("bpf: failed to get private field\n"); - return -BPF_LOADER_ERRNO__INTERNAL; - } - - if (!priv->need_prologue) - return 0; - - /* prepend initialization code to program instructions */ - orig_insn = bpf_program__insns(prog); - orig_insn_cnt = bpf_program__insn_cnt(prog); - init_size = init_size_cnt * sizeof(*insn); - orig_size = orig_insn_cnt * sizeof(*insn); - - insn_cnt = orig_insn_cnt + init_size_cnt; - insn = malloc(insn_cnt * sizeof(*insn)); - if (!insn) - return -ENOMEM; - - memcpy(insn, prologue_init_insn, init_size); - memcpy((char *) insn + init_size, orig_insn, orig_size); - bpf_program__set_insns(prog, insn, insn_cnt); - return 0; -} - -static int libbpf_init(void) -{ - LIBBPF_OPTS(libbpf_prog_handler_opts, handler_opts, - .prog_prepare_load_fn = libbpf_prog_prepare_load_fn, - ); - - if (libbpf_initialized) - return 0; - - libbpf_set_print(libbpf_perf_print); - libbpf_sec_handler = libbpf_register_prog_handler(NULL, BPF_PROG_TYPE_KPROBE, - 0, &handler_opts); - if (libbpf_sec_handler < 0) { - pr_debug("bpf: failed to register libbpf section handler: %d\n", - libbpf_sec_handler); - return -BPF_LOADER_ERRNO__INTERNAL; - } - libbpf_initialized = true; - return 0; -} - -struct bpf_object * -bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name) -{ - LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = name); - struct bpf_object *obj; - int err; - - err = libbpf_init(); - if (err) - return ERR_PTR(err); - - obj = bpf_object__open_mem(obj_buf, obj_buf_sz, &opts); - if (IS_ERR_OR_NULL(obj)) { - pr_debug("bpf: failed to load buffer\n"); - return ERR_PTR(-EINVAL); - } - - if (bpf_perf_object__add(obj)) { - bpf_object__close(obj); - return ERR_PTR(-ENOMEM); - } - - return obj; -} - -static void bpf_perf_object__close(struct bpf_perf_object *perf_obj) -{ - list_del(&perf_obj->list); - bpf_object__close(perf_obj->obj); - free(perf_obj); -} - -struct bpf_object *bpf__prepare_load(const char *filename, bool source) -{ - LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = filename); - struct bpf_object *obj; - int err; - - err = libbpf_init(); - if (err) - return ERR_PTR(err); - - if (source) { - void *obj_buf; - size_t obj_buf_sz; - - perf_clang__init(); - err = perf_clang__compile_bpf(filename, &obj_buf, &obj_buf_sz); - perf_clang__cleanup(); - if (err) { - pr_debug("bpf: builtin compilation failed: %d, try external compiler\n", err); - err = llvm__compile_bpf(filename, &obj_buf, &obj_buf_sz); - if (err) - return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE); - } else - pr_debug("bpf: successful builtin compilation\n"); - obj = bpf_object__open_mem(obj_buf, obj_buf_sz, &opts); - - if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj) - llvm__dump_obj(filename, obj_buf, obj_buf_sz); - - free(obj_buf); - } else { - obj = bpf_object__open(filename); - } - - if (IS_ERR_OR_NULL(obj)) { - pr_debug("bpf: failed to load %s\n", filename); - return obj; - } - - if (bpf_perf_object__add(obj)) { - bpf_object__close(obj); - return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE); - } - - return obj; -} - -static void close_prologue_programs(struct bpf_prog_priv *priv) -{ - struct perf_probe_event *pev; - int i, fd; - - if (!priv->need_prologue) - return; - pev = &priv->pev; - for (i = 0; i < pev->ntevs; i++) { - fd = priv->prologue_fds[i]; - if (fd != -1) - close(fd); - } -} - -static void -clear_prog_priv(const struct bpf_program *prog __maybe_unused, - void *_priv) -{ - struct bpf_prog_priv *priv = _priv; - - close_prologue_programs(priv); - cleanup_perf_probe_events(&priv->pev, 1); - zfree(&priv->insns_buf); - zfree(&priv->prologue_fds); - zfree(&priv->type_mapping); - zfree(&priv->sys_name); - zfree(&priv->evt_name); - free(priv); -} - -static void bpf_program_hash_free(void) -{ - struct hashmap_entry *cur; - size_t bkt; - - if (IS_ERR_OR_NULL(bpf_program_hash)) - return; - - hashmap__for_each_entry(bpf_program_hash, cur, bkt) - clear_prog_priv(cur->pkey, cur->pvalue); - - hashmap__free(bpf_program_hash); - bpf_program_hash = NULL; -} - -static void bpf_map_hash_free(void); - -void bpf__clear(void) -{ - struct bpf_perf_object *perf_obj, *tmp; - - bpf_perf_object__for_each(perf_obj, tmp) { - bpf__unprobe(perf_obj->obj); - bpf_perf_object__close(perf_obj); - } - - bpf_program_hash_free(); - bpf_map_hash_free(); -} - -static size_t ptr_hash(const long __key, void *ctx __maybe_unused) -{ - return __key; -} - -static bool ptr_equal(long key1, long key2, void *ctx __maybe_unused) -{ - return key1 == key2; -} - -static int program_set_priv(struct bpf_program *prog, void *priv) -{ - void *old_priv; - - /* - * Should not happen, we warn about it in the - * caller function - config_bpf_program - */ - if (IS_ERR(bpf_program_hash)) - return PTR_ERR(bpf_program_hash); - - if (!bpf_program_hash) { - bpf_program_hash = hashmap__new(ptr_hash, ptr_equal, NULL); - if (IS_ERR(bpf_program_hash)) - return PTR_ERR(bpf_program_hash); - } - - old_priv = program_priv(prog); - if (old_priv) { - clear_prog_priv(prog, old_priv); - return hashmap__set(bpf_program_hash, prog, priv, NULL, NULL); - } - return hashmap__add(bpf_program_hash, prog, priv); -} - -static int -prog_config__exec(const char *value, struct perf_probe_event *pev) -{ - pev->uprobes = true; - pev->target = strdup(value); - if (!pev->target) - return -ENOMEM; - return 0; -} - -static int -prog_config__module(const char *value, struct perf_probe_event *pev) -{ - pev->uprobes = false; - pev->target = strdup(value); - if (!pev->target) - return -ENOMEM; - return 0; -} - -static int -prog_config__bool(const char *value, bool *pbool, bool invert) -{ - int err; - bool bool_value; - - if (!pbool) - return -EINVAL; - - err = strtobool(value, &bool_value); - if (err) - return err; - - *pbool = invert ? !bool_value : bool_value; - return 0; -} - -static int -prog_config__inlines(const char *value, - struct perf_probe_event *pev __maybe_unused) -{ - return prog_config__bool(value, &probe_conf.no_inlines, true); -} - -static int -prog_config__force(const char *value, - struct perf_probe_event *pev __maybe_unused) -{ - return prog_config__bool(value, &probe_conf.force_add, false); -} - -static struct { - const char *key; - const char *usage; - const char *desc; - int (*func)(const char *, struct perf_probe_event *); -} bpf_prog_config_terms[] = { - { - .key = "exec", - .usage = "exec=<full path of file>", - .desc = "Set uprobe target", - .func = prog_config__exec, - }, - { - .key = "module", - .usage = "module=<module name> ", - .desc = "Set kprobe module", - .func = prog_config__module, - }, - { - .key = "inlines", - .usage = "inlines=[yes|no] ", - .desc = "Probe at inline symbol", - .func = prog_config__inlines, - }, - { - .key = "force", - .usage = "force=[yes|no] ", - .desc = "Forcibly add events with existing name", - .func = prog_config__force, - }, -}; - -static int -do_prog_config(const char *key, const char *value, - struct perf_probe_event *pev) -{ - unsigned int i; - - pr_debug("config bpf program: %s=%s\n", key, value); - for (i = 0; i < ARRAY_SIZE(bpf_prog_config_terms); i++) - if (strcmp(key, bpf_prog_config_terms[i].key) == 0) - return bpf_prog_config_terms[i].func(value, pev); - - pr_debug("BPF: ERROR: invalid program config option: %s=%s\n", - key, value); - - pr_debug("\nHint: Valid options are:\n"); - for (i = 0; i < ARRAY_SIZE(bpf_prog_config_terms); i++) - pr_debug("\t%s:\t%s\n", bpf_prog_config_terms[i].usage, - bpf_prog_config_terms[i].desc); - pr_debug("\n"); - - return -BPF_LOADER_ERRNO__PROGCONF_TERM; -} - -static const char * -parse_prog_config_kvpair(const char *config_str, struct perf_probe_event *pev) -{ - char *text = strdup(config_str); - char *sep, *line; - const char *main_str = NULL; - int err = 0; - - if (!text) { - pr_debug("Not enough memory: dup config_str failed\n"); - return ERR_PTR(-ENOMEM); - } - - line = text; - while ((sep = strchr(line, ';'))) { - char *equ; - - *sep = '\0'; - equ = strchr(line, '='); - if (!equ) { - pr_warning("WARNING: invalid config in BPF object: %s\n", - line); - pr_warning("\tShould be 'key=value'.\n"); - goto nextline; - } - *equ = '\0'; - - err = do_prog_config(line, equ + 1, pev); - if (err) - break; -nextline: - line = sep + 1; - } - - if (!err) - main_str = config_str + (line - text); - free(text); - - return err ? ERR_PTR(err) : main_str; -} - -static int -parse_prog_config(const char *config_str, const char **p_main_str, - bool *is_tp, struct perf_probe_event *pev) -{ - int err; - const char *main_str = parse_prog_config_kvpair(config_str, pev); - - if (IS_ERR(main_str)) - return PTR_ERR(main_str); - - *p_main_str = main_str; - if (!strchr(main_str, '=')) { - /* Is a tracepoint event? */ - const char *s = strchr(main_str, ':'); - - if (!s) { - pr_debug("bpf: '%s' is not a valid tracepoint\n", - config_str); - return -BPF_LOADER_ERRNO__CONFIG; - } - - *is_tp = true; - return 0; - } - - *is_tp = false; - err = parse_perf_probe_command(main_str, pev); - if (err < 0) { - pr_debug("bpf: '%s' is not a valid config string\n", - config_str); - /* parse failed, don't need clear pev. */ - return -BPF_LOADER_ERRNO__CONFIG; - } - return 0; -} - -static int -config_bpf_program(struct bpf_program *prog) -{ - struct perf_probe_event *pev = NULL; - struct bpf_prog_priv *priv = NULL; - const char *config_str, *main_str; - bool is_tp = false; - int err; - - /* Initialize per-program probing setting */ - probe_conf.no_inlines = false; - probe_conf.force_add = false; - - priv = calloc(sizeof(*priv), 1); - if (!priv) { - pr_debug("bpf: failed to alloc priv\n"); - return -ENOMEM; - } - pev = &priv->pev; - - config_str = bpf_program__section_name(prog); - pr_debug("bpf: config program '%s'\n", config_str); - err = parse_prog_config(config_str, &main_str, &is_tp, pev); - if (err) - goto errout; - - if (is_tp) { - char *s = strchr(main_str, ':'); - - priv->is_tp = true; - priv->sys_name = strndup(main_str, s - main_str); - priv->evt_name = strdup(s + 1); - goto set_priv; - } - - if (pev->group && strcmp(pev->group, PERF_BPF_PROBE_GROUP)) { - pr_debug("bpf: '%s': group for event is set and not '%s'.\n", - config_str, PERF_BPF_PROBE_GROUP); - err = -BPF_LOADER_ERRNO__GROUP; - goto errout; - } else if (!pev->group) - pev->group = strdup(PERF_BPF_PROBE_GROUP); - - if (!pev->group) { - pr_debug("bpf: strdup failed\n"); - err = -ENOMEM; - goto errout; - } - - if (!pev->event) { - pr_debug("bpf: '%s': event name is missing. Section name should be 'key=value'\n", - config_str); - err = -BPF_LOADER_ERRNO__EVENTNAME; - goto errout; - } - pr_debug("bpf: config '%s' is ok\n", config_str); - -set_priv: - err = program_set_priv(prog, priv); - if (err) { - pr_debug("Failed to set priv for program '%s'\n", config_str); - goto errout; - } - - return 0; - -errout: - if (pev) - clear_perf_probe_event(pev); - free(priv); - return err; -} - -static int bpf__prepare_probe(void) -{ - static int err = 0; - static bool initialized = false; - - /* - * Make err static, so if init failed the first, bpf__prepare_probe() - * fails each time without calling init_probe_symbol_maps multiple - * times. - */ - if (initialized) - return err; - - initialized = true; - err = init_probe_symbol_maps(false); - if (err < 0) - pr_debug("Failed to init_probe_symbol_maps\n"); - probe_conf.max_probes = MAX_PROBES; - return err; -} - -static int -preproc_gen_prologue(struct bpf_program *prog, int n, - const struct bpf_insn *orig_insns, int orig_insns_cnt, - struct bpf_preproc_result *res) -{ - struct bpf_prog_priv *priv = program_priv(prog); - struct probe_trace_event *tev; - struct perf_probe_event *pev; - struct bpf_insn *buf; - size_t prologue_cnt = 0; - int i, err; - - if (IS_ERR_OR_NULL(priv) || priv->is_tp) - goto errout; - - pev = &priv->pev; - - if (n < 0 || n >= priv->nr_types) - goto errout; - - /* Find a tev belongs to that type */ - for (i = 0; i < pev->ntevs; i++) { - if (priv->type_mapping[i] == n) - break; - } - - if (i >= pev->ntevs) { - pr_debug("Internal error: prologue type %d not found\n", n); - return -BPF_LOADER_ERRNO__PROLOGUE; - } - - tev = &pev->tevs[i]; - - buf = priv->insns_buf; - err = bpf__gen_prologue(tev->args, tev->nargs, - buf, &prologue_cnt, - BPF_MAXINSNS - orig_insns_cnt); - if (err) { - const char *title; - - title = bpf_program__section_name(prog); - pr_debug("Failed to generate prologue for program %s\n", - title); - return err; - } - - memcpy(&buf[prologue_cnt], orig_insns, - sizeof(struct bpf_insn) * orig_insns_cnt); - - res->new_insn_ptr = buf; - res->new_insn_cnt = prologue_cnt + orig_insns_cnt; - return 0; - -errout: - pr_debug("Internal error in preproc_gen_prologue\n"); - return -BPF_LOADER_ERRNO__PROLOGUE; -} - -/* - * compare_tev_args is reflexive, transitive and antisymmetric. - * I can proof it but this margin is too narrow to contain. - */ -static int compare_tev_args(const void *ptev1, const void *ptev2) -{ - int i, ret; - const struct probe_trace_event *tev1 = - *(const struct probe_trace_event **)ptev1; - const struct probe_trace_event *tev2 = - *(const struct probe_trace_event **)ptev2; - - ret = tev2->nargs - tev1->nargs; - if (ret) - return ret; - - for (i = 0; i < tev1->nargs; i++) { - struct probe_trace_arg *arg1, *arg2; - struct probe_trace_arg_ref *ref1, *ref2; - - arg1 = &tev1->args[i]; - arg2 = &tev2->args[i]; - - ret = strcmp(arg1->value, arg2->value); - if (ret) - return ret; - - ref1 = arg1->ref; - ref2 = arg2->ref; - - while (ref1 && ref2) { - ret = ref2->offset - ref1->offset; - if (ret) - return ret; - - ref1 = ref1->next; - ref2 = ref2->next; - } - - if (ref1 || ref2) - return ref2 ? 1 : -1; - } - - return 0; -} - -/* - * Assign a type number to each tevs in a pev. - * mapping is an array with same slots as tevs in that pev. - * nr_types will be set to number of types. - */ -static int map_prologue(struct perf_probe_event *pev, int *mapping, - int *nr_types) -{ - int i, type = 0; - struct probe_trace_event **ptevs; - - size_t array_sz = sizeof(*ptevs) * pev->ntevs; - - ptevs = malloc(array_sz); - if (!ptevs) { - pr_debug("Not enough memory: alloc ptevs failed\n"); - return -ENOMEM; - } - - pr_debug("In map_prologue, ntevs=%d\n", pev->ntevs); - for (i = 0; i < pev->ntevs; i++) - ptevs[i] = &pev->tevs[i]; - - qsort(ptevs, pev->ntevs, sizeof(*ptevs), - compare_tev_args); - - for (i = 0; i < pev->ntevs; i++) { - int n; - - n = ptevs[i] - pev->tevs; - if (i == 0) { - mapping[n] = type; - pr_debug("mapping[%d]=%d\n", n, type); - continue; - } - - if (compare_tev_args(ptevs + i, ptevs + i - 1) == 0) - mapping[n] = type; - else - mapping[n] = ++type; - - pr_debug("mapping[%d]=%d\n", n, mapping[n]); - } - free(ptevs); - *nr_types = type + 1; - - return 0; -} - -static int hook_load_preprocessor(struct bpf_program *prog) -{ - struct bpf_prog_priv *priv = program_priv(prog); - struct perf_probe_event *pev; - bool need_prologue = false; - int i; - - if (IS_ERR_OR_NULL(priv)) { - pr_debug("Internal error when hook preprocessor\n"); - return -BPF_LOADER_ERRNO__INTERNAL; - } - - if (priv->is_tp) { - priv->need_prologue = false; - return 0; - } - - pev = &priv->pev; - for (i = 0; i < pev->ntevs; i++) { - struct probe_trace_event *tev = &pev->tevs[i]; - - if (tev->nargs > 0) { - need_prologue = true; - break; - } - } - - /* - * Since all tevs don't have argument, we don't need generate - * prologue. - */ - if (!need_prologue) { - priv->need_prologue = false; - return 0; - } - - priv->need_prologue = true; - priv->insns_buf = malloc(sizeof(struct bpf_insn) * BPF_MAXINSNS); - if (!priv->insns_buf) { - pr_debug("Not enough memory: alloc insns_buf failed\n"); - return -ENOMEM; - } - - priv->prologue_fds = malloc(sizeof(int) * pev->ntevs); - if (!priv->prologue_fds) { - pr_debug("Not enough memory: alloc prologue fds failed\n"); - return -ENOMEM; - } - memset(priv->prologue_fds, -1, sizeof(int) * pev->ntevs); - - priv->type_mapping = malloc(sizeof(int) * pev->ntevs); - if (!priv->type_mapping) { - pr_debug("Not enough memory: alloc type_mapping failed\n"); - return -ENOMEM; - } - memset(priv->type_mapping, -1, - sizeof(int) * pev->ntevs); - - return map_prologue(pev, priv->type_mapping, &priv->nr_types); -} - -int bpf__probe(struct bpf_object *obj) -{ - int err = 0; - struct bpf_program *prog; - struct bpf_prog_priv *priv; - struct perf_probe_event *pev; - - err = bpf__prepare_probe(); - if (err) { - pr_debug("bpf__prepare_probe failed\n"); - return err; - } - - bpf_object__for_each_program(prog, obj) { - err = config_bpf_program(prog); - if (err) - goto out; - - priv = program_priv(prog); - if (IS_ERR_OR_NULL(priv)) { - if (!priv) - err = -BPF_LOADER_ERRNO__INTERNAL; - else - err = PTR_ERR(priv); - goto out; - } - - if (priv->is_tp) { - bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT); - continue; - } - - bpf_program__set_type(prog, BPF_PROG_TYPE_KPROBE); - pev = &priv->pev; - - err = convert_perf_probe_events(pev, 1); - if (err < 0) { - pr_debug("bpf_probe: failed to convert perf probe events\n"); - goto out; - } - - err = apply_perf_probe_events(pev, 1); - if (err < 0) { - pr_debug("bpf_probe: failed to apply perf probe events\n"); - goto out; - } - - /* - * After probing, let's consider prologue, which - * adds program fetcher to BPF programs. - * - * hook_load_preprocessor() hooks pre-processor - * to bpf_program, let it generate prologue - * dynamically during loading. - */ - err = hook_load_preprocessor(prog); - if (err) - goto out; - } -out: - return err < 0 ? err : 0; -} - -#define EVENTS_WRITE_BUFSIZE 4096 -int bpf__unprobe(struct bpf_object *obj) -{ - int err, ret = 0; - struct bpf_program *prog; - - bpf_object__for_each_program(prog, obj) { - struct bpf_prog_priv *priv = program_priv(prog); - int i; - - if (IS_ERR_OR_NULL(priv) || priv->is_tp) - continue; - - for (i = 0; i < priv->pev.ntevs; i++) { - struct probe_trace_event *tev = &priv->pev.tevs[i]; - char name_buf[EVENTS_WRITE_BUFSIZE]; - struct strfilter *delfilter; - - snprintf(name_buf, EVENTS_WRITE_BUFSIZE, - "%s:%s", tev->group, tev->event); - name_buf[EVENTS_WRITE_BUFSIZE - 1] = '\0'; - - delfilter = strfilter__new(name_buf, NULL); - if (!delfilter) { - pr_debug("Failed to create filter for unprobing\n"); - ret = -ENOMEM; - continue; - } - - err = del_perf_probe_events(delfilter); - strfilter__delete(delfilter); - if (err) { - pr_debug("Failed to delete %s\n", name_buf); - ret = err; - continue; - } - } - } - return ret; -} - -static int bpf_object__load_prologue(struct bpf_object *obj) -{ - int init_cnt = ARRAY_SIZE(prologue_init_insn); - const struct bpf_insn *orig_insns; - struct bpf_preproc_result res; - struct perf_probe_event *pev; - struct bpf_program *prog; - int orig_insns_cnt; - - bpf_object__for_each_program(prog, obj) { - struct bpf_prog_priv *priv = program_priv(prog); - int err, i, fd; - - if (IS_ERR_OR_NULL(priv)) { - pr_debug("bpf: failed to get private field\n"); - return -BPF_LOADER_ERRNO__INTERNAL; - } - - if (!priv->need_prologue) - continue; - - /* - * For each program that needs prologue we do following: - * - * - take its current instructions and use them - * to generate the new code with prologue - * - load new instructions with bpf_prog_load - * and keep the fd in prologue_fds - * - new fd will be used in bpf__foreach_event - * to connect this program with perf evsel - */ - orig_insns = bpf_program__insns(prog); - orig_insns_cnt = bpf_program__insn_cnt(prog); - - pev = &priv->pev; - for (i = 0; i < pev->ntevs; i++) { - /* - * Skipping artificall prologue_init_insn instructions - * (init_cnt), so the prologue can be generated instead - * of them. - */ - err = preproc_gen_prologue(prog, i, - orig_insns + init_cnt, - orig_insns_cnt - init_cnt, - &res); - if (err) - return err; - - fd = bpf_prog_load(bpf_program__get_type(prog), - bpf_program__name(prog), "GPL", - res.new_insn_ptr, - res.new_insn_cnt, NULL); - if (fd < 0) { - char bf[128]; - - libbpf_strerror(-errno, bf, sizeof(bf)); - pr_debug("bpf: load objects with prologue failed: err=%d: (%s)\n", - -errno, bf); - return -errno; - } - priv->prologue_fds[i] = fd; - } - /* - * We no longer need the original program, - * we can unload it. - */ - bpf_program__unload(prog); - } - return 0; -} - -int bpf__load(struct bpf_object *obj) -{ - int err; - - err = bpf_object__load(obj); - if (err) { - char bf[128]; - libbpf_strerror(err, bf, sizeof(bf)); - pr_debug("bpf: load objects failed: err=%d: (%s)\n", err, bf); - return err; - } - return bpf_object__load_prologue(obj); -} - -int bpf__foreach_event(struct bpf_object *obj, - bpf_prog_iter_callback_t func, - void *arg) -{ - struct bpf_program *prog; - int err; - - bpf_object__for_each_program(prog, obj) { - struct bpf_prog_priv *priv = program_priv(prog); - struct probe_trace_event *tev; - struct perf_probe_event *pev; - int i, fd; - - if (IS_ERR_OR_NULL(priv)) { - pr_debug("bpf: failed to get private field\n"); - return -BPF_LOADER_ERRNO__INTERNAL; - } - - if (priv->is_tp) { - fd = bpf_program__fd(prog); - err = (*func)(priv->sys_name, priv->evt_name, fd, obj, arg); - if (err) { - pr_debug("bpf: tracepoint call back failed, stop iterate\n"); - return err; - } - continue; - } - - pev = &priv->pev; - for (i = 0; i < pev->ntevs; i++) { - tev = &pev->tevs[i]; - - if (priv->need_prologue) - fd = priv->prologue_fds[i]; - else - fd = bpf_program__fd(prog); - - if (fd < 0) { - pr_debug("bpf: failed to get file descriptor\n"); - return fd; - } - - err = (*func)(tev->group, tev->event, fd, obj, arg); - if (err) { - pr_debug("bpf: call back failed, stop iterate\n"); - return err; - } - } - } - return 0; -} - -enum bpf_map_op_type { - BPF_MAP_OP_SET_VALUE, - BPF_MAP_OP_SET_EVSEL, -}; - -enum bpf_map_key_type { - BPF_MAP_KEY_ALL, - BPF_MAP_KEY_RANGES, -}; - -struct bpf_map_op { - struct list_head list; - enum bpf_map_op_type op_type; - enum bpf_map_key_type key_type; - union { - struct parse_events_array array; - } k; - union { - u64 value; - struct evsel *evsel; - } v; -}; - -struct bpf_map_priv { - struct list_head ops_list; -}; - -static void -bpf_map_op__delete(struct bpf_map_op *op) -{ - if (!list_empty(&op->list)) - list_del_init(&op->list); - if (op->key_type == BPF_MAP_KEY_RANGES) - parse_events__clear_array(&op->k.array); - free(op); -} - -static void -bpf_map_priv__purge(struct bpf_map_priv *priv) -{ - struct bpf_map_op *pos, *n; - - list_for_each_entry_safe(pos, n, &priv->ops_list, list) { - list_del_init(&pos->list); - bpf_map_op__delete(pos); - } -} - -static void -bpf_map_priv__clear(const struct bpf_map *map __maybe_unused, - void *_priv) -{ - struct bpf_map_priv *priv = _priv; - - bpf_map_priv__purge(priv); - free(priv); -} - -static void *map_priv(const struct bpf_map *map) -{ - void *priv; - - if (IS_ERR_OR_NULL(bpf_map_hash)) - return NULL; - if (!hashmap__find(bpf_map_hash, map, &priv)) - return NULL; - return priv; -} - -static void bpf_map_hash_free(void) -{ - struct hashmap_entry *cur; - size_t bkt; - - if (IS_ERR_OR_NULL(bpf_map_hash)) - return; - - hashmap__for_each_entry(bpf_map_hash, cur, bkt) - bpf_map_priv__clear(cur->pkey, cur->pvalue); - - hashmap__free(bpf_map_hash); - bpf_map_hash = NULL; -} - -static int map_set_priv(struct bpf_map *map, void *priv) -{ - void *old_priv; - - if (WARN_ON_ONCE(IS_ERR(bpf_map_hash))) - return PTR_ERR(bpf_program_hash); - - if (!bpf_map_hash) { - bpf_map_hash = hashmap__new(ptr_hash, ptr_equal, NULL); - if (IS_ERR(bpf_map_hash)) - return PTR_ERR(bpf_map_hash); - } - - old_priv = map_priv(map); - if (old_priv) { - bpf_map_priv__clear(map, old_priv); - return hashmap__set(bpf_map_hash, map, priv, NULL, NULL); - } - return hashmap__add(bpf_map_hash, map, priv); -} - -static int -bpf_map_op_setkey(struct bpf_map_op *op, struct parse_events_term *term) -{ - op->key_type = BPF_MAP_KEY_ALL; - if (!term) - return 0; - - if (term->array.nr_ranges) { - size_t memsz = term->array.nr_ranges * - sizeof(op->k.array.ranges[0]); - - op->k.array.ranges = memdup(term->array.ranges, memsz); - if (!op->k.array.ranges) { - pr_debug("Not enough memory to alloc indices for map\n"); - return -ENOMEM; - } - op->key_type = BPF_MAP_KEY_RANGES; - op->k.array.nr_ranges = term->array.nr_ranges; - } - return 0; -} - -static struct bpf_map_op * -bpf_map_op__new(struct parse_events_term *term) -{ - struct bpf_map_op *op; - int err; - - op = zalloc(sizeof(*op)); - if (!op) { - pr_debug("Failed to alloc bpf_map_op\n"); - return ERR_PTR(-ENOMEM); - } - INIT_LIST_HEAD(&op->list); - - err = bpf_map_op_setkey(op, term); - if (err) { - free(op); - return ERR_PTR(err); - } - return op; -} - -static struct bpf_map_op * -bpf_map_op__clone(struct bpf_map_op *op) -{ - struct bpf_map_op *newop; - - newop = memdup(op, sizeof(*op)); - if (!newop) { - pr_debug("Failed to alloc bpf_map_op\n"); - return NULL; - } - - INIT_LIST_HEAD(&newop->list); - if (op->key_type == BPF_MAP_KEY_RANGES) { - size_t memsz = op->k.array.nr_ranges * - sizeof(op->k.array.ranges[0]); - - newop->k.array.ranges = memdup(op->k.array.ranges, memsz); - if (!newop->k.array.ranges) { - pr_debug("Failed to alloc indices for map\n"); - free(newop); - return NULL; - } - } - - return newop; -} - -static struct bpf_map_priv * -bpf_map_priv__clone(struct bpf_map_priv *priv) -{ - struct bpf_map_priv *newpriv; - struct bpf_map_op *pos, *newop; - - newpriv = zalloc(sizeof(*newpriv)); - if (!newpriv) { - pr_debug("Not enough memory to alloc map private\n"); - return NULL; - } - INIT_LIST_HEAD(&newpriv->ops_list); - - list_for_each_entry(pos, &priv->ops_list, list) { - newop = bpf_map_op__clone(pos); - if (!newop) { - bpf_map_priv__purge(newpriv); - return NULL; - } - list_add_tail(&newop->list, &newpriv->ops_list); - } - - return newpriv; -} - -static int -bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op) -{ - const char *map_name = bpf_map__name(map); - struct bpf_map_priv *priv = map_priv(map); - - if (IS_ERR(priv)) { - pr_debug("Failed to get private from map %s\n", map_name); - return PTR_ERR(priv); - } - - if (!priv) { - priv = zalloc(sizeof(*priv)); - if (!priv) { - pr_debug("Not enough memory to alloc map private\n"); - return -ENOMEM; - } - INIT_LIST_HEAD(&priv->ops_list); - - if (map_set_priv(map, priv)) { - free(priv); - return -BPF_LOADER_ERRNO__INTERNAL; - } - } - - list_add_tail(&op->list, &priv->ops_list); - return 0; -} - -static struct bpf_map_op * -bpf_map__add_newop(struct bpf_map *map, struct parse_events_term *term) -{ - struct bpf_map_op *op; - int err; - - op = bpf_map_op__new(term); - if (IS_ERR(op)) - return op; - - err = bpf_map__add_op(map, op); - if (err) { - bpf_map_op__delete(op); - return ERR_PTR(err); - } - return op; -} - -static int -__bpf_map__config_value(struct bpf_map *map, - struct parse_events_term *term) -{ - struct bpf_map_op *op; - const char *map_name = bpf_map__name(map); - - if (!map) { - pr_debug("Map '%s' is invalid\n", map_name); - return -BPF_LOADER_ERRNO__INTERNAL; - } - - if (bpf_map__type(map) != BPF_MAP_TYPE_ARRAY) { - pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n", - map_name); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; - } - if (bpf_map__key_size(map) < sizeof(unsigned int)) { - pr_debug("Map %s has incorrect key size\n", map_name); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE; - } - switch (bpf_map__value_size(map)) { - case 1: - case 2: - case 4: - case 8: - break; - default: - pr_debug("Map %s has incorrect value size\n", map_name); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE; - } - - op = bpf_map__add_newop(map, term); - if (IS_ERR(op)) - return PTR_ERR(op); - op->op_type = BPF_MAP_OP_SET_VALUE; - op->v.value = term->val.num; - return 0; -} - -static int -bpf_map__config_value(struct bpf_map *map, - struct parse_events_term *term, - struct evlist *evlist __maybe_unused) -{ - if (!term->err_val) { - pr_debug("Config value not set\n"); - return -BPF_LOADER_ERRNO__OBJCONF_CONF; - } - - if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) { - pr_debug("ERROR: wrong value type for 'value'\n"); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE; - } - - return __bpf_map__config_value(map, term); -} - -static int -__bpf_map__config_event(struct bpf_map *map, - struct parse_events_term *term, - struct evlist *evlist) -{ - struct bpf_map_op *op; - const char *map_name = bpf_map__name(map); - struct evsel *evsel = evlist__find_evsel_by_str(evlist, term->val.str); - - if (!evsel) { - pr_debug("Event (for '%s') '%s' doesn't exist\n", - map_name, term->val.str); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT; - } - - if (!map) { - pr_debug("Map '%s' is invalid\n", map_name); - return PTR_ERR(map); - } - - /* - * No need to check key_size and value_size: - * kernel has already checked them. - */ - if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { - pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", - map_name); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; - } - - op = bpf_map__add_newop(map, term); - if (IS_ERR(op)) - return PTR_ERR(op); - op->op_type = BPF_MAP_OP_SET_EVSEL; - op->v.evsel = evsel; - return 0; -} - -static int -bpf_map__config_event(struct bpf_map *map, - struct parse_events_term *term, - struct evlist *evlist) -{ - if (!term->err_val) { - pr_debug("Config value not set\n"); - return -BPF_LOADER_ERRNO__OBJCONF_CONF; - } - - if (term->type_val != PARSE_EVENTS__TERM_TYPE_STR) { - pr_debug("ERROR: wrong value type for 'event'\n"); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE; - } - - return __bpf_map__config_event(map, term, evlist); -} - -struct bpf_obj_config__map_func { - const char *config_opt; - int (*config_func)(struct bpf_map *, struct parse_events_term *, - struct evlist *); -}; - -struct bpf_obj_config__map_func bpf_obj_config__map_funcs[] = { - {"value", bpf_map__config_value}, - {"event", bpf_map__config_event}, -}; - -static int -config_map_indices_range_check(struct parse_events_term *term, - struct bpf_map *map, - const char *map_name) -{ - struct parse_events_array *array = &term->array; - unsigned int i; - - if (!array->nr_ranges) - return 0; - if (!array->ranges) { - pr_debug("ERROR: map %s: array->nr_ranges is %d but range array is NULL\n", - map_name, (int)array->nr_ranges); - return -BPF_LOADER_ERRNO__INTERNAL; - } - - if (!map) { - pr_debug("Map '%s' is invalid\n", map_name); - return -BPF_LOADER_ERRNO__INTERNAL; - } - - for (i = 0; i < array->nr_ranges; i++) { - unsigned int start = array->ranges[i].start; - size_t length = array->ranges[i].length; - unsigned int idx = start + length - 1; - - if (idx >= bpf_map__max_entries(map)) { - pr_debug("ERROR: index %d too large\n", idx); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG; - } - } - return 0; -} - -static int -bpf__obj_config_map(struct bpf_object *obj, - struct parse_events_term *term, - struct evlist *evlist, - int *key_scan_pos) -{ - /* key is "map:<mapname>.<config opt>" */ - char *map_name = strdup(term->config + sizeof("map:") - 1); - struct bpf_map *map; - int err = -BPF_LOADER_ERRNO__OBJCONF_OPT; - char *map_opt; - size_t i; - - if (!map_name) - return -ENOMEM; - - map_opt = strchr(map_name, '.'); - if (!map_opt) { - pr_debug("ERROR: Invalid map config: %s\n", map_name); - goto out; - } - - *map_opt++ = '\0'; - if (*map_opt == '\0') { - pr_debug("ERROR: Invalid map option: %s\n", term->config); - goto out; - } - - map = bpf_object__find_map_by_name(obj, map_name); - if (!map) { - pr_debug("ERROR: Map %s doesn't exist\n", map_name); - err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST; - goto out; - } - - *key_scan_pos += strlen(map_opt); - err = config_map_indices_range_check(term, map, map_name); - if (err) - goto out; - *key_scan_pos -= strlen(map_opt); - - for (i = 0; i < ARRAY_SIZE(bpf_obj_config__map_funcs); i++) { - struct bpf_obj_config__map_func *func = - &bpf_obj_config__map_funcs[i]; - - if (strcmp(map_opt, func->config_opt) == 0) { - err = func->config_func(map, term, evlist); - goto out; - } - } - - pr_debug("ERROR: Invalid map config option '%s'\n", map_opt); - err = -BPF_LOADER_ERRNO__OBJCONF_MAP_OPT; -out: - if (!err) - *key_scan_pos += strlen(map_opt); - - free(map_name); - return err; -} - -int bpf__config_obj(struct bpf_object *obj, - struct parse_events_term *term, - struct evlist *evlist, - int *error_pos) -{ - int key_scan_pos = 0; - int err; - - if (!obj || !term || !term->config) - return -EINVAL; - - if (strstarts(term->config, "map:")) { - key_scan_pos = sizeof("map:") - 1; - err = bpf__obj_config_map(obj, term, evlist, &key_scan_pos); - goto out; - } - err = -BPF_LOADER_ERRNO__OBJCONF_OPT; -out: - if (error_pos) - *error_pos = key_scan_pos; - return err; - -} - -typedef int (*map_config_func_t)(const char *name, int map_fd, - const struct bpf_map *map, - struct bpf_map_op *op, - void *pkey, void *arg); - -static int -foreach_key_array_all(map_config_func_t func, - void *arg, const char *name, - int map_fd, const struct bpf_map *map, - struct bpf_map_op *op) -{ - unsigned int i; - int err; - - for (i = 0; i < bpf_map__max_entries(map); i++) { - err = func(name, map_fd, map, op, &i, arg); - if (err) { - pr_debug("ERROR: failed to insert value to %s[%u]\n", - name, i); - return err; - } - } - return 0; -} - -static int -foreach_key_array_ranges(map_config_func_t func, void *arg, - const char *name, int map_fd, - const struct bpf_map *map, - struct bpf_map_op *op) -{ - unsigned int i, j; - int err; - - for (i = 0; i < op->k.array.nr_ranges; i++) { - unsigned int start = op->k.array.ranges[i].start; - size_t length = op->k.array.ranges[i].length; - - for (j = 0; j < length; j++) { - unsigned int idx = start + j; - - err = func(name, map_fd, map, op, &idx, arg); - if (err) { - pr_debug("ERROR: failed to insert value to %s[%u]\n", - name, idx); - return err; - } - } - } - return 0; -} - -static int -bpf_map_config_foreach_key(struct bpf_map *map, - map_config_func_t func, - void *arg) -{ - int err, map_fd, type; - struct bpf_map_op *op; - const char *name = bpf_map__name(map); - struct bpf_map_priv *priv = map_priv(map); - - if (IS_ERR(priv)) { - pr_debug("ERROR: failed to get private from map %s\n", name); - return -BPF_LOADER_ERRNO__INTERNAL; - } - if (!priv || list_empty(&priv->ops_list)) { - pr_debug("INFO: nothing to config for map %s\n", name); - return 0; - } - - if (!map) { - pr_debug("Map '%s' is invalid\n", name); - return -BPF_LOADER_ERRNO__INTERNAL; - } - map_fd = bpf_map__fd(map); - if (map_fd < 0) { - pr_debug("ERROR: failed to get fd from map %s\n", name); - return map_fd; - } - - type = bpf_map__type(map); - list_for_each_entry(op, &priv->ops_list, list) { - switch (type) { - case BPF_MAP_TYPE_ARRAY: - case BPF_MAP_TYPE_PERF_EVENT_ARRAY: - switch (op->key_type) { - case BPF_MAP_KEY_ALL: - err = foreach_key_array_all(func, arg, name, - map_fd, map, op); - break; - case BPF_MAP_KEY_RANGES: - err = foreach_key_array_ranges(func, arg, name, - map_fd, map, op); - break; - default: - pr_debug("ERROR: keytype for map '%s' invalid\n", - name); - return -BPF_LOADER_ERRNO__INTERNAL; - } - if (err) - return err; - break; - default: - pr_debug("ERROR: type of '%s' incorrect\n", name); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; - } - } - - return 0; -} - -static int -apply_config_value_for_key(int map_fd, void *pkey, - size_t val_size, u64 val) -{ - int err = 0; - - switch (val_size) { - case 1: { - u8 _val = (u8)(val); - err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY); - break; - } - case 2: { - u16 _val = (u16)(val); - err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY); - break; - } - case 4: { - u32 _val = (u32)(val); - err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY); - break; - } - case 8: { - err = bpf_map_update_elem(map_fd, pkey, &val, BPF_ANY); - break; - } - default: - pr_debug("ERROR: invalid value size\n"); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE; - } - if (err && errno) - err = -errno; - return err; -} - -static int -apply_config_evsel_for_key(const char *name, int map_fd, void *pkey, - struct evsel *evsel) -{ - struct xyarray *xy = evsel->core.fd; - struct perf_event_attr *attr; - unsigned int key, events; - bool check_pass = false; - int *evt_fd; - int err; - - if (!xy) { - pr_debug("ERROR: evsel not ready for map %s\n", name); - return -BPF_LOADER_ERRNO__INTERNAL; - } - - if (xy->row_size / xy->entry_size != 1) { - pr_debug("ERROR: Dimension of target event is incorrect for map %s\n", - name); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM; - } - - attr = &evsel->core.attr; - if (attr->inherit) { - pr_debug("ERROR: Can't put inherit event into map %s\n", name); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH; - } - - if (evsel__is_bpf_output(evsel)) - check_pass = true; - if (attr->type == PERF_TYPE_RAW) - check_pass = true; - if (attr->type == PERF_TYPE_HARDWARE) - check_pass = true; - if (!check_pass) { - pr_debug("ERROR: Event type is wrong for map %s\n", name); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE; - } - - events = xy->entries / (xy->row_size / xy->entry_size); - key = *((unsigned int *)pkey); - if (key >= events) { - pr_debug("ERROR: there is no event %d for map %s\n", - key, name); - return -BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE; - } - evt_fd = xyarray__entry(xy, key, 0); - err = bpf_map_update_elem(map_fd, pkey, evt_fd, BPF_ANY); - if (err && errno) - err = -errno; - return err; -} - -static int -apply_obj_config_map_for_key(const char *name, int map_fd, - const struct bpf_map *map, - struct bpf_map_op *op, - void *pkey, void *arg __maybe_unused) -{ - int err; - - switch (op->op_type) { - case BPF_MAP_OP_SET_VALUE: - err = apply_config_value_for_key(map_fd, pkey, - bpf_map__value_size(map), - op->v.value); - break; - case BPF_MAP_OP_SET_EVSEL: - err = apply_config_evsel_for_key(name, map_fd, pkey, - op->v.evsel); - break; - default: - pr_debug("ERROR: unknown value type for '%s'\n", name); - err = -BPF_LOADER_ERRNO__INTERNAL; - } - return err; -} - -static int -apply_obj_config_map(struct bpf_map *map) -{ - return bpf_map_config_foreach_key(map, - apply_obj_config_map_for_key, - NULL); -} - -static int -apply_obj_config_object(struct bpf_object *obj) -{ - struct bpf_map *map; - int err; - - bpf_object__for_each_map(map, obj) { - err = apply_obj_config_map(map); - if (err) - return err; - } - return 0; -} - -int bpf__apply_obj_config(void) -{ - struct bpf_perf_object *perf_obj, *tmp; - int err; - - bpf_perf_object__for_each(perf_obj, tmp) { - err = apply_obj_config_object(perf_obj->obj); - if (err) - return err; - } - - return 0; -} - -#define bpf__perf_for_each_map(map, pobj, tmp) \ - bpf_perf_object__for_each(pobj, tmp) \ - bpf_object__for_each_map(map, pobj->obj) - -#define bpf__perf_for_each_map_named(map, pobj, pobjtmp, name) \ - bpf__perf_for_each_map(map, pobj, pobjtmp) \ - if (bpf_map__name(map) && (strcmp(name, bpf_map__name(map)) == 0)) - -struct evsel *bpf__setup_output_event(struct evlist *evlist, const char *name) -{ - struct bpf_map_priv *tmpl_priv = NULL; - struct bpf_perf_object *perf_obj, *tmp; - struct evsel *evsel = NULL; - struct bpf_map *map; - int err; - bool need_init = false; - - bpf__perf_for_each_map_named(map, perf_obj, tmp, name) { - struct bpf_map_priv *priv = map_priv(map); - - if (IS_ERR(priv)) - return ERR_PTR(-BPF_LOADER_ERRNO__INTERNAL); - - /* - * No need to check map type: type should have been - * verified by kernel. - */ - if (!need_init && !priv) - need_init = !priv; - if (!tmpl_priv && priv) - tmpl_priv = priv; - } - - if (!need_init) - return NULL; - - if (!tmpl_priv) { - char *event_definition = NULL; - - if (asprintf(&event_definition, "bpf-output/no-inherit=1,name=%s/", name) < 0) - return ERR_PTR(-ENOMEM); - - err = parse_event(evlist, event_definition); - free(event_definition); - - if (err) { - pr_debug("ERROR: failed to create the \"%s\" bpf-output event\n", name); - return ERR_PTR(-err); - } - - evsel = evlist__last(evlist); - } - - bpf__perf_for_each_map_named(map, perf_obj, tmp, name) { - struct bpf_map_priv *priv = map_priv(map); - - if (IS_ERR(priv)) - return ERR_PTR(-BPF_LOADER_ERRNO__INTERNAL); - if (priv) - continue; - - if (tmpl_priv) { - priv = bpf_map_priv__clone(tmpl_priv); - if (!priv) - return ERR_PTR(-ENOMEM); - - err = map_set_priv(map, priv); - if (err) { - bpf_map_priv__clear(map, priv); - return ERR_PTR(err); - } - } else if (evsel) { - struct bpf_map_op *op; - - op = bpf_map__add_newop(map, NULL); - if (IS_ERR(op)) - return ERR_CAST(op); - op->op_type = BPF_MAP_OP_SET_EVSEL; - op->v.evsel = evsel; - } - } - - return evsel; -} - -int bpf__setup_stdout(struct evlist *evlist) -{ - struct evsel *evsel = bpf__setup_output_event(evlist, "__bpf_stdout__"); - return PTR_ERR_OR_ZERO(evsel); -} - -#define ERRNO_OFFSET(e) ((e) - __BPF_LOADER_ERRNO__START) -#define ERRCODE_OFFSET(c) ERRNO_OFFSET(BPF_LOADER_ERRNO__##c) -#define NR_ERRNO (__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START) - -static const char *bpf_loader_strerror_table[NR_ERRNO] = { - [ERRCODE_OFFSET(CONFIG)] = "Invalid config string", - [ERRCODE_OFFSET(GROUP)] = "Invalid group name", - [ERRCODE_OFFSET(EVENTNAME)] = "No event name found in config string", - [ERRCODE_OFFSET(INTERNAL)] = "BPF loader internal error", - [ERRCODE_OFFSET(COMPILE)] = "Error when compiling BPF scriptlet", - [ERRCODE_OFFSET(PROGCONF_TERM)] = "Invalid program config term in config string", - [ERRCODE_OFFSET(PROLOGUE)] = "Failed to generate prologue", - [ERRCODE_OFFSET(PROLOGUE2BIG)] = "Prologue too big for program", - [ERRCODE_OFFSET(PROLOGUEOOB)] = "Offset out of bound for prologue", - [ERRCODE_OFFSET(OBJCONF_OPT)] = "Invalid object config option", - [ERRCODE_OFFSET(OBJCONF_CONF)] = "Config value not set (missing '=')", - [ERRCODE_OFFSET(OBJCONF_MAP_OPT)] = "Invalid object map config option", - [ERRCODE_OFFSET(OBJCONF_MAP_NOTEXIST)] = "Target map doesn't exist", - [ERRCODE_OFFSET(OBJCONF_MAP_VALUE)] = "Incorrect value type for map", - [ERRCODE_OFFSET(OBJCONF_MAP_TYPE)] = "Incorrect map type", - [ERRCODE_OFFSET(OBJCONF_MAP_KEYSIZE)] = "Incorrect map key size", - [ERRCODE_OFFSET(OBJCONF_MAP_VALUESIZE)] = "Incorrect map value size", - [ERRCODE_OFFSET(OBJCONF_MAP_NOEVT)] = "Event not found for map setting", - [ERRCODE_OFFSET(OBJCONF_MAP_MAPSIZE)] = "Invalid map size for event setting", - [ERRCODE_OFFSET(OBJCONF_MAP_EVTDIM)] = "Event dimension too large", - [ERRCODE_OFFSET(OBJCONF_MAP_EVTINH)] = "Doesn't support inherit event", - [ERRCODE_OFFSET(OBJCONF_MAP_EVTTYPE)] = "Wrong event type for map", - [ERRCODE_OFFSET(OBJCONF_MAP_IDX2BIG)] = "Index too large", -}; - -static int -bpf_loader_strerror(int err, char *buf, size_t size) -{ - char sbuf[STRERR_BUFSIZE]; - const char *msg; - - if (!buf || !size) - return -1; - - err = err > 0 ? err : -err; - - if (err >= __LIBBPF_ERRNO__START) - return libbpf_strerror(err, buf, size); - - if (err >= __BPF_LOADER_ERRNO__START && err < __BPF_LOADER_ERRNO__END) { - msg = bpf_loader_strerror_table[ERRNO_OFFSET(err)]; - snprintf(buf, size, "%s", msg); - buf[size - 1] = '\0'; - return 0; - } - - if (err >= __BPF_LOADER_ERRNO__END) - snprintf(buf, size, "Unknown bpf loader error %d", err); - else - snprintf(buf, size, "%s", - str_error_r(err, sbuf, sizeof(sbuf))); - - buf[size - 1] = '\0'; - return -1; -} - -#define bpf__strerror_head(err, buf, size) \ - char sbuf[STRERR_BUFSIZE], *emsg;\ - if (!size)\ - return 0;\ - if (err < 0)\ - err = -err;\ - bpf_loader_strerror(err, sbuf, sizeof(sbuf));\ - emsg = sbuf;\ - switch (err) {\ - default:\ - scnprintf(buf, size, "%s", emsg);\ - break; - -#define bpf__strerror_entry(val, fmt...)\ - case val: {\ - scnprintf(buf, size, fmt);\ - break;\ - } - -#define bpf__strerror_end(buf, size)\ - }\ - buf[size - 1] = '\0'; - -int bpf__strerror_prepare_load(const char *filename, bool source, - int err, char *buf, size_t size) -{ - size_t n; - int ret; - - n = snprintf(buf, size, "Failed to load %s%s: ", - filename, source ? " from source" : ""); - if (n >= size) { - buf[size - 1] = '\0'; - return 0; - } - buf += n; - size -= n; - - ret = bpf_loader_strerror(err, buf, size); - buf[size - 1] = '\0'; - return ret; -} - -int bpf__strerror_probe(struct bpf_object *obj __maybe_unused, - int err, char *buf, size_t size) -{ - bpf__strerror_head(err, buf, size); - case BPF_LOADER_ERRNO__PROGCONF_TERM: { - scnprintf(buf, size, "%s (add -v to see detail)", emsg); - break; - } - bpf__strerror_entry(EEXIST, "Probe point exist. Try 'perf probe -d \"*\"' and set 'force=yes'"); - bpf__strerror_entry(EACCES, "You need to be root"); - bpf__strerror_entry(EPERM, "You need to be root, and /proc/sys/kernel/kptr_restrict should be 0"); - bpf__strerror_entry(ENOENT, "You need to check probing points in BPF file"); - bpf__strerror_end(buf, size); - return 0; -} - -int bpf__strerror_load(struct bpf_object *obj, - int err, char *buf, size_t size) -{ - bpf__strerror_head(err, buf, size); - case LIBBPF_ERRNO__KVER: { - unsigned int obj_kver = bpf_object__kversion(obj); - unsigned int real_kver; - - if (fetch_kernel_version(&real_kver, NULL, 0)) { - scnprintf(buf, size, "Unable to fetch kernel version"); - break; - } - - if (obj_kver != real_kver) { - scnprintf(buf, size, - "'version' ("KVER_FMT") doesn't match running kernel ("KVER_FMT")", - KVER_PARAM(obj_kver), - KVER_PARAM(real_kver)); - break; - } - - scnprintf(buf, size, "Failed to load program for unknown reason"); - break; - } - bpf__strerror_end(buf, size); - return 0; -} - -int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused, - struct parse_events_term *term __maybe_unused, - struct evlist *evlist __maybe_unused, - int *error_pos __maybe_unused, int err, - char *buf, size_t size) -{ - bpf__strerror_head(err, buf, size); - bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE, - "Can't use this config term with this map type"); - bpf__strerror_end(buf, size); - return 0; -} - -int bpf__strerror_apply_obj_config(int err, char *buf, size_t size) -{ - bpf__strerror_head(err, buf, size); - bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM, - "Cannot set event to BPF map in multi-thread tracing"); - bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH, - "%s (Hint: use -i to turn off inherit)", emsg); - bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE, - "Can only put raw, hardware and BPF output event into a BPF map"); - bpf__strerror_end(buf, size); - return 0; -} - -int bpf__strerror_setup_output_event(struct evlist *evlist __maybe_unused, - int err, char *buf, size_t size) -{ - bpf__strerror_head(err, buf, size); - bpf__strerror_end(buf, size); - return 0; -} diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h deleted file mode 100644 index 5d1c725cea29..000000000000 --- a/tools/perf/util/bpf-loader.h +++ /dev/null @@ -1,216 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com> - * Copyright (C) 2015, Huawei Inc. - */ -#ifndef __BPF_LOADER_H -#define __BPF_LOADER_H - -#include <linux/compiler.h> -#include <linux/err.h> - -#ifdef HAVE_LIBBPF_SUPPORT -#include <bpf/libbpf.h> - -enum bpf_loader_errno { - __BPF_LOADER_ERRNO__START = __LIBBPF_ERRNO__START - 100, - /* Invalid config string */ - BPF_LOADER_ERRNO__CONFIG = __BPF_LOADER_ERRNO__START, - BPF_LOADER_ERRNO__GROUP, /* Invalid group name */ - BPF_LOADER_ERRNO__EVENTNAME, /* Event name is missing */ - BPF_LOADER_ERRNO__INTERNAL, /* BPF loader internal error */ - BPF_LOADER_ERRNO__COMPILE, /* Error when compiling BPF scriptlet */ - BPF_LOADER_ERRNO__PROGCONF_TERM,/* Invalid program config term in config string */ - BPF_LOADER_ERRNO__PROLOGUE, /* Failed to generate prologue */ - BPF_LOADER_ERRNO__PROLOGUE2BIG, /* Prologue too big for program */ - BPF_LOADER_ERRNO__PROLOGUEOOB, /* Offset out of bound for prologue */ - BPF_LOADER_ERRNO__OBJCONF_OPT, /* Invalid object config option */ - BPF_LOADER_ERRNO__OBJCONF_CONF, /* Config value not set (lost '=')) */ - BPF_LOADER_ERRNO__OBJCONF_MAP_OPT, /* Invalid object map config option */ - BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST, /* Target map not exist */ - BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE, /* Incorrect value type for map */ - BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE, /* Incorrect map type */ - BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE, /* Incorrect map key size */ - BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE,/* Incorrect map value size */ - BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT, /* Event not found for map setting */ - BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE, /* Invalid map size for event setting */ - BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM, /* Event dimension too large */ - BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH, /* Doesn't support inherit event */ - BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE, /* Wrong event type for map */ - BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG, /* Index too large */ - __BPF_LOADER_ERRNO__END, -}; -#endif // HAVE_LIBBPF_SUPPORT - -struct evsel; -struct evlist; -struct bpf_object; -struct parse_events_term; -#define PERF_BPF_PROBE_GROUP "perf_bpf_probe" - -typedef int (*bpf_prog_iter_callback_t)(const char *group, const char *event, - int fd, struct bpf_object *obj, void *arg); - -#ifdef HAVE_LIBBPF_SUPPORT -struct bpf_object *bpf__prepare_load(const char *filename, bool source); -int bpf__strerror_prepare_load(const char *filename, bool source, - int err, char *buf, size_t size); - -struct bpf_object *bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, - const char *name); - -void bpf__clear(void); - -int bpf__probe(struct bpf_object *obj); -int bpf__unprobe(struct bpf_object *obj); -int bpf__strerror_probe(struct bpf_object *obj, int err, - char *buf, size_t size); - -int bpf__load(struct bpf_object *obj); -int bpf__strerror_load(struct bpf_object *obj, int err, - char *buf, size_t size); -int bpf__foreach_event(struct bpf_object *obj, - bpf_prog_iter_callback_t func, void *arg); - -int bpf__config_obj(struct bpf_object *obj, struct parse_events_term *term, - struct evlist *evlist, int *error_pos); -int bpf__strerror_config_obj(struct bpf_object *obj, - struct parse_events_term *term, - struct evlist *evlist, - int *error_pos, int err, char *buf, - size_t size); -int bpf__apply_obj_config(void); -int bpf__strerror_apply_obj_config(int err, char *buf, size_t size); - -int bpf__setup_stdout(struct evlist *evlist); -struct evsel *bpf__setup_output_event(struct evlist *evlist, const char *name); -int bpf__strerror_setup_output_event(struct evlist *evlist, int err, char *buf, size_t size); -#else -#include <errno.h> -#include <string.h> -#include "debug.h" - -static inline struct bpf_object * -bpf__prepare_load(const char *filename __maybe_unused, - bool source __maybe_unused) -{ - pr_debug("ERROR: eBPF object loading is disabled during compiling.\n"); - return ERR_PTR(-ENOTSUP); -} - -static inline struct bpf_object * -bpf__prepare_load_buffer(void *obj_buf __maybe_unused, - size_t obj_buf_sz __maybe_unused) -{ - return ERR_PTR(-ENOTSUP); -} - -static inline void bpf__clear(void) { } - -static inline int bpf__probe(struct bpf_object *obj __maybe_unused) { return 0;} -static inline int bpf__unprobe(struct bpf_object *obj __maybe_unused) { return 0;} -static inline int bpf__load(struct bpf_object *obj __maybe_unused) { return 0; } - -static inline int -bpf__foreach_event(struct bpf_object *obj __maybe_unused, - bpf_prog_iter_callback_t func __maybe_unused, - void *arg __maybe_unused) -{ - return 0; -} - -static inline int -bpf__config_obj(struct bpf_object *obj __maybe_unused, - struct parse_events_term *term __maybe_unused, - struct evlist *evlist __maybe_unused, - int *error_pos __maybe_unused) -{ - return 0; -} - -static inline int -bpf__apply_obj_config(void) -{ - return 0; -} - -static inline int -bpf__setup_stdout(struct evlist *evlist __maybe_unused) -{ - return 0; -} - -static inline struct evsel * -bpf__setup_output_event(struct evlist *evlist __maybe_unused, const char *name __maybe_unused) -{ - return NULL; -} - -static inline int -__bpf_strerror(char *buf, size_t size) -{ - if (!size) - return 0; - strncpy(buf, - "ERROR: eBPF object loading is disabled during compiling.\n", - size); - buf[size - 1] = '\0'; - return 0; -} - -static inline -int bpf__strerror_prepare_load(const char *filename __maybe_unused, - bool source __maybe_unused, - int err __maybe_unused, - char *buf, size_t size) -{ - return __bpf_strerror(buf, size); -} - -static inline int -bpf__strerror_probe(struct bpf_object *obj __maybe_unused, - int err __maybe_unused, - char *buf, size_t size) -{ - return __bpf_strerror(buf, size); -} - -static inline int bpf__strerror_load(struct bpf_object *obj __maybe_unused, - int err __maybe_unused, - char *buf, size_t size) -{ - return __bpf_strerror(buf, size); -} - -static inline int -bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused, - struct parse_events_term *term __maybe_unused, - struct evlist *evlist __maybe_unused, - int *error_pos __maybe_unused, - int err __maybe_unused, - char *buf, size_t size) -{ - return __bpf_strerror(buf, size); -} - -static inline int -bpf__strerror_apply_obj_config(int err __maybe_unused, - char *buf, size_t size) -{ - return __bpf_strerror(buf, size); -} - -static inline int -bpf__strerror_setup_output_event(struct evlist *evlist __maybe_unused, - int err __maybe_unused, char *buf, size_t size) -{ - return __bpf_strerror(buf, size); -} - -#endif - -static inline int bpf__strerror_setup_stdout(struct evlist *evlist, int err, char *buf, size_t size) -{ - return bpf__strerror_setup_output_event(evlist, err, buf, size); -} -#endif diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c deleted file mode 100644 index 9887ae09242d..000000000000 --- a/tools/perf/util/bpf-prologue.c +++ /dev/null @@ -1,508 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * bpf-prologue.c - * - * Copyright (C) 2015 He Kuang <hekuang@huawei.com> - * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> - * Copyright (C) 2015 Huawei Inc. - */ - -#include <bpf/libbpf.h> -#include "debug.h" -#include "bpf-loader.h" -#include "bpf-prologue.h" -#include "probe-finder.h" -#include <errno.h> -#include <stdlib.h> -#include <dwarf-regs.h> -#include <linux/filter.h> - -#define BPF_REG_SIZE 8 - -#define JMP_TO_ERROR_CODE -1 -#define JMP_TO_SUCCESS_CODE -2 -#define JMP_TO_USER_CODE -3 - -struct bpf_insn_pos { - struct bpf_insn *begin; - struct bpf_insn *end; - struct bpf_insn *pos; -}; - -static inline int -pos_get_cnt(struct bpf_insn_pos *pos) -{ - return pos->pos - pos->begin; -} - -static int -append_insn(struct bpf_insn new_insn, struct bpf_insn_pos *pos) -{ - if (!pos->pos) - return -BPF_LOADER_ERRNO__PROLOGUE2BIG; - - if (pos->pos + 1 >= pos->end) { - pr_err("bpf prologue: prologue too long\n"); - pos->pos = NULL; - return -BPF_LOADER_ERRNO__PROLOGUE2BIG; - } - - *(pos->pos)++ = new_insn; - return 0; -} - -static int -check_pos(struct bpf_insn_pos *pos) -{ - if (!pos->pos || pos->pos >= pos->end) - return -BPF_LOADER_ERRNO__PROLOGUE2BIG; - return 0; -} - -/* - * Convert type string (u8/u16/u32/u64/s8/s16/s32/s64 ..., see - * Documentation/trace/kprobetrace.rst) to size field of BPF_LDX_MEM - * instruction (BPF_{B,H,W,DW}). - */ -static int -argtype_to_ldx_size(const char *type) -{ - int arg_size = type ? atoi(&type[1]) : 64; - - switch (arg_size) { - case 8: - return BPF_B; - case 16: - return BPF_H; - case 32: - return BPF_W; - case 64: - default: - return BPF_DW; - } -} - -static const char * -insn_sz_to_str(int insn_sz) -{ - switch (insn_sz) { - case BPF_B: - return "BPF_B"; - case BPF_H: - return "BPF_H"; - case BPF_W: - return "BPF_W"; - case BPF_DW: - return "BPF_DW"; - default: - return "UNKNOWN"; - } -} - -/* Give it a shorter name */ -#define ins(i, p) append_insn((i), (p)) - -/* - * Give a register name (in 'reg'), generate instruction to - * load register into an eBPF register rd: - * 'ldd target_reg, offset(ctx_reg)', where: - * ctx_reg is pre initialized to pointer of 'struct pt_regs'. - */ -static int -gen_ldx_reg_from_ctx(struct bpf_insn_pos *pos, int ctx_reg, - const char *reg, int target_reg) -{ - int offset = regs_query_register_offset(reg); - - if (offset < 0) { - pr_err("bpf: prologue: failed to get register %s\n", - reg); - return offset; - } - ins(BPF_LDX_MEM(BPF_DW, target_reg, ctx_reg, offset), pos); - - return check_pos(pos); -} - -/* - * Generate a BPF_FUNC_probe_read function call. - * - * src_base_addr_reg is a register holding base address, - * dst_addr_reg is a register holding dest address (on stack), - * result is: - * - * *[dst_addr_reg] = *([src_base_addr_reg] + offset) - * - * Arguments of BPF_FUNC_probe_read: - * ARG1: ptr to stack (dest) - * ARG2: size (8) - * ARG3: unsafe ptr (src) - */ -static int -gen_read_mem(struct bpf_insn_pos *pos, - int src_base_addr_reg, - int dst_addr_reg, - long offset, - int probeid) -{ - /* mov arg3, src_base_addr_reg */ - if (src_base_addr_reg != BPF_REG_ARG3) - ins(BPF_MOV64_REG(BPF_REG_ARG3, src_base_addr_reg), pos); - /* add arg3, #offset */ - if (offset) - ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG3, offset), pos); - - /* mov arg2, #reg_size */ - ins(BPF_ALU64_IMM(BPF_MOV, BPF_REG_ARG2, BPF_REG_SIZE), pos); - - /* mov arg1, dst_addr_reg */ - if (dst_addr_reg != BPF_REG_ARG1) - ins(BPF_MOV64_REG(BPF_REG_ARG1, dst_addr_reg), pos); - - /* Call probe_read */ - ins(BPF_EMIT_CALL(probeid), pos); - /* - * Error processing: if read fail, goto error code, - * will be relocated. Target should be the start of - * error processing code. - */ - ins(BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, JMP_TO_ERROR_CODE), - pos); - - return check_pos(pos); -} - -/* - * Each arg should be bare register. Fetch and save them into argument - * registers (r3 - r5). - * - * BPF_REG_1 should have been initialized with pointer to - * 'struct pt_regs'. - */ -static int -gen_prologue_fastpath(struct bpf_insn_pos *pos, - struct probe_trace_arg *args, int nargs) -{ - int i, err = 0; - - for (i = 0; i < nargs; i++) { - err = gen_ldx_reg_from_ctx(pos, BPF_REG_1, args[i].value, - BPF_PROLOGUE_START_ARG_REG + i); - if (err) - goto errout; - } - - return check_pos(pos); -errout: - return err; -} - -/* - * Slow path: - * At least one argument has the form of 'offset($rx)'. - * - * Following code first stores them into stack, then loads all of then - * to r2 - r5. - * Before final loading, the final result should be: - * - * low address - * BPF_REG_FP - 24 ARG3 - * BPF_REG_FP - 16 ARG2 - * BPF_REG_FP - 8 ARG1 - * BPF_REG_FP - * high address - * - * For each argument (described as: offn(...off2(off1(reg)))), - * generates following code: - * - * r7 <- fp - * r7 <- r7 - stack_offset // Ideal code should initialize r7 using - * // fp before generating args. However, - * // eBPF won't regard r7 as stack pointer - * // if it is generated by minus 8 from - * // another stack pointer except fp. - * // This is why we have to set r7 - * // to fp for each variable. - * r3 <- value of 'reg'-> generated using gen_ldx_reg_from_ctx() - * (r7) <- r3 // skip following instructions for bare reg - * r3 <- r3 + off1 . // skip if off1 == 0 - * r2 <- 8 \ - * r1 <- r7 |-> generated by gen_read_mem() - * call probe_read / - * jnei r0, 0, err ./ - * r3 <- (r7) - * r3 <- r3 + off2 . // skip if off2 == 0 - * r2 <- 8 \ // r2 may be broken by probe_read, so set again - * r1 <- r7 |-> generated by gen_read_mem() - * call probe_read / - * jnei r0, 0, err ./ - * ... - */ -static int -gen_prologue_slowpath(struct bpf_insn_pos *pos, - struct probe_trace_arg *args, int nargs) -{ - int err, i, probeid; - - for (i = 0; i < nargs; i++) { - struct probe_trace_arg *arg = &args[i]; - const char *reg = arg->value; - struct probe_trace_arg_ref *ref = NULL; - int stack_offset = (i + 1) * -8; - - pr_debug("prologue: fetch arg %d, base reg is %s\n", - i, reg); - - /* value of base register is stored into ARG3 */ - err = gen_ldx_reg_from_ctx(pos, BPF_REG_CTX, reg, - BPF_REG_ARG3); - if (err) { - pr_err("prologue: failed to get offset of register %s\n", - reg); - goto errout; - } - - /* Make r7 the stack pointer. */ - ins(BPF_MOV64_REG(BPF_REG_7, BPF_REG_FP), pos); - /* r7 += -8 */ - ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, stack_offset), pos); - /* - * Store r3 (base register) onto stack - * Ensure fp[offset] is set. - * fp is the only valid base register when storing - * into stack. We are not allowed to use r7 as base - * register here. - */ - ins(BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_ARG3, - stack_offset), pos); - - ref = arg->ref; - probeid = BPF_FUNC_probe_read_kernel; - while (ref) { - pr_debug("prologue: arg %d: offset %ld\n", - i, ref->offset); - - if (ref->user_access) - probeid = BPF_FUNC_probe_read_user; - - err = gen_read_mem(pos, BPF_REG_3, BPF_REG_7, - ref->offset, probeid); - if (err) { - pr_err("prologue: failed to generate probe_read function call\n"); - goto errout; - } - - ref = ref->next; - /* - * Load previous result into ARG3. Use - * BPF_REG_FP instead of r7 because verifier - * allows FP based addressing only. - */ - if (ref) - ins(BPF_LDX_MEM(BPF_DW, BPF_REG_ARG3, - BPF_REG_FP, stack_offset), pos); - } - } - - /* Final pass: read to registers */ - for (i = 0; i < nargs; i++) { - int insn_sz = (args[i].ref) ? argtype_to_ldx_size(args[i].type) : BPF_DW; - - pr_debug("prologue: load arg %d, insn_sz is %s\n", - i, insn_sz_to_str(insn_sz)); - ins(BPF_LDX_MEM(insn_sz, BPF_PROLOGUE_START_ARG_REG + i, - BPF_REG_FP, -BPF_REG_SIZE * (i + 1)), pos); - } - - ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_SUCCESS_CODE), pos); - - return check_pos(pos); -errout: - return err; -} - -static int -prologue_relocate(struct bpf_insn_pos *pos, struct bpf_insn *error_code, - struct bpf_insn *success_code, struct bpf_insn *user_code) -{ - struct bpf_insn *insn; - - if (check_pos(pos)) - return -BPF_LOADER_ERRNO__PROLOGUE2BIG; - - for (insn = pos->begin; insn < pos->pos; insn++) { - struct bpf_insn *target; - u8 class = BPF_CLASS(insn->code); - u8 opcode; - - if (class != BPF_JMP) - continue; - opcode = BPF_OP(insn->code); - if (opcode == BPF_CALL) - continue; - - switch (insn->off) { - case JMP_TO_ERROR_CODE: - target = error_code; - break; - case JMP_TO_SUCCESS_CODE: - target = success_code; - break; - case JMP_TO_USER_CODE: - target = user_code; - break; - default: - pr_err("bpf prologue: internal error: relocation failed\n"); - return -BPF_LOADER_ERRNO__PROLOGUE; - } - - insn->off = target - (insn + 1); - } - return 0; -} - -int bpf__gen_prologue(struct probe_trace_arg *args, int nargs, - struct bpf_insn *new_prog, size_t *new_cnt, - size_t cnt_space) -{ - struct bpf_insn *success_code = NULL; - struct bpf_insn *error_code = NULL; - struct bpf_insn *user_code = NULL; - struct bpf_insn_pos pos; - bool fastpath = true; - int err = 0, i; - - if (!new_prog || !new_cnt) - return -EINVAL; - - if (cnt_space > BPF_MAXINSNS) - cnt_space = BPF_MAXINSNS; - - pos.begin = new_prog; - pos.end = new_prog + cnt_space; - pos.pos = new_prog; - - if (!nargs) { - ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), - &pos); - - if (check_pos(&pos)) - goto errout; - - *new_cnt = pos_get_cnt(&pos); - return 0; - } - - if (nargs > BPF_PROLOGUE_MAX_ARGS) { - pr_warning("bpf: prologue: %d arguments are dropped\n", - nargs - BPF_PROLOGUE_MAX_ARGS); - nargs = BPF_PROLOGUE_MAX_ARGS; - } - - /* First pass: validation */ - for (i = 0; i < nargs; i++) { - struct probe_trace_arg_ref *ref = args[i].ref; - - if (args[i].value[0] == '@') { - /* TODO: fetch global variable */ - pr_err("bpf: prologue: global %s%+ld not support\n", - args[i].value, ref ? ref->offset : 0); - return -ENOTSUP; - } - - while (ref) { - /* fastpath is true if all args has ref == NULL */ - fastpath = false; - - /* - * Instruction encodes immediate value using - * s32, ref->offset is long. On systems which - * can't fill long in s32, refuse to process if - * ref->offset too large (or small). - */ -#ifdef __LP64__ -#define OFFSET_MAX ((1LL << 31) - 1) -#define OFFSET_MIN ((1LL << 31) * -1) - if (ref->offset > OFFSET_MAX || - ref->offset < OFFSET_MIN) { - pr_err("bpf: prologue: offset out of bound: %ld\n", - ref->offset); - return -BPF_LOADER_ERRNO__PROLOGUEOOB; - } -#endif - ref = ref->next; - } - } - pr_debug("prologue: pass validation\n"); - - if (fastpath) { - /* If all variables are registers... */ - pr_debug("prologue: fast path\n"); - err = gen_prologue_fastpath(&pos, args, nargs); - if (err) - goto errout; - } else { - pr_debug("prologue: slow path\n"); - - /* Initialization: move ctx to a callee saved register. */ - ins(BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1), &pos); - - err = gen_prologue_slowpath(&pos, args, nargs); - if (err) - goto errout; - /* - * start of ERROR_CODE (only slow pass needs error code) - * mov r2 <- 1 // r2 is error number - * mov r3 <- 0 // r3, r4... should be touched or - * // verifier would complain - * mov r4 <- 0 - * ... - * goto usercode - */ - error_code = pos.pos; - ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 1), - &pos); - - for (i = 0; i < nargs; i++) - ins(BPF_ALU64_IMM(BPF_MOV, - BPF_PROLOGUE_START_ARG_REG + i, - 0), - &pos); - ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_USER_CODE), - &pos); - } - - /* - * start of SUCCESS_CODE: - * mov r2 <- 0 - * goto usercode // skip - */ - success_code = pos.pos; - ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), &pos); - - /* - * start of USER_CODE: - * Restore ctx to r1 - */ - user_code = pos.pos; - if (!fastpath) { - /* - * Only slow path needs restoring of ctx. In fast path, - * register are loaded directly from r1. - */ - ins(BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX), &pos); - err = prologue_relocate(&pos, error_code, success_code, - user_code); - if (err) - goto errout; - } - - err = check_pos(&pos); - if (err) - goto errout; - - *new_cnt = pos_get_cnt(&pos); - return 0; -errout: - return err; -} diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h deleted file mode 100644 index 66dcf751ef65..000000000000 --- a/tools/perf/util/bpf-prologue.h +++ /dev/null @@ -1,37 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2015, He Kuang <hekuang@huawei.com> - * Copyright (C) 2015, Huawei Inc. - */ -#ifndef __BPF_PROLOGUE_H -#define __BPF_PROLOGUE_H - -struct probe_trace_arg; -struct bpf_insn; - -#define BPF_PROLOGUE_MAX_ARGS 3 -#define BPF_PROLOGUE_START_ARG_REG BPF_REG_3 -#define BPF_PROLOGUE_FETCH_RESULT_REG BPF_REG_2 - -#ifdef HAVE_BPF_PROLOGUE -int bpf__gen_prologue(struct probe_trace_arg *args, int nargs, - struct bpf_insn *new_prog, size_t *new_cnt, - size_t cnt_space); -#else -#include <linux/compiler.h> -#include <errno.h> - -static inline int -bpf__gen_prologue(struct probe_trace_arg *args __maybe_unused, - int nargs __maybe_unused, - struct bpf_insn *new_prog __maybe_unused, - size_t *new_cnt, - size_t cnt_space __maybe_unused) -{ - if (!new_cnt) - return -EINVAL; - *new_cnt = 0; - return -ENOTSUP; -} -#endif -#endif /* __BPF_PROLOGUE_H */ diff --git a/tools/perf/util/bpf-trace-summary.c b/tools/perf/util/bpf-trace-summary.c new file mode 100644 index 000000000000..cf6e1e4402d5 --- /dev/null +++ b/tools/perf/util/bpf-trace-summary.c @@ -0,0 +1,465 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <errno.h> +#include <inttypes.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> + +#include "dwarf-regs.h" /* for EM_HOST */ +#include "syscalltbl.h" +#include "util/cgroup.h" +#include "util/hashmap.h" +#include "util/trace.h" +#include "util/util.h" +#include <bpf/bpf.h> +#include <linux/rbtree.h> +#include <linux/time64.h> +#include <tools/libc_compat.h> /* reallocarray */ + +#include "bpf_skel/syscall_summary.h" +#include "bpf_skel/syscall_summary.skel.h" + + +static struct syscall_summary_bpf *skel; +static struct rb_root cgroups = RB_ROOT; + +int trace_prepare_bpf_summary(enum trace_summary_mode mode) +{ + skel = syscall_summary_bpf__open(); + if (skel == NULL) { + fprintf(stderr, "failed to open syscall summary bpf skeleton\n"); + return -1; + } + + if (mode == SUMMARY__BY_THREAD) + skel->rodata->aggr_mode = SYSCALL_AGGR_THREAD; + else if (mode == SUMMARY__BY_CGROUP) + skel->rodata->aggr_mode = SYSCALL_AGGR_CGROUP; + else + skel->rodata->aggr_mode = SYSCALL_AGGR_CPU; + + if (cgroup_is_v2("perf_event") > 0) + skel->rodata->use_cgroup_v2 = 1; + + if (syscall_summary_bpf__load(skel) < 0) { + fprintf(stderr, "failed to load syscall summary bpf skeleton\n"); + return -1; + } + + if (syscall_summary_bpf__attach(skel) < 0) { + fprintf(stderr, "failed to attach syscall summary bpf skeleton\n"); + return -1; + } + + if (mode == SUMMARY__BY_CGROUP) + read_all_cgroups(&cgroups); + + return 0; +} + +void trace_start_bpf_summary(void) +{ + skel->bss->enabled = 1; +} + +void trace_end_bpf_summary(void) +{ + skel->bss->enabled = 0; +} + +struct syscall_node { + int syscall_nr; + struct syscall_stats stats; +}; + +static double rel_stddev(struct syscall_stats *stat) +{ + double variance, average; + + if (stat->count < 2) + return 0; + + average = (double)stat->total_time / stat->count; + + variance = stat->squared_sum; + variance -= (stat->total_time * stat->total_time) / stat->count; + variance /= stat->count - 1; + + return 100 * sqrt(variance / stat->count) / average; +} + +/* + * The syscall_data is to maintain syscall stats ordered by total time. + * It supports different summary modes like per-thread or global. + * + * For per-thread stats, it uses two-level data strurcture - + * syscall_data is keyed by TID and has an array of nodes which + * represents each syscall for the thread. + * + * For global stats, it's still two-level technically but we don't need + * per-cpu analysis so it's keyed by the syscall number to combine stats + * from different CPUs. And syscall_data always has a syscall_node so + * it can effectively work as flat hierarchy. + * + * For per-cgroup stats, it uses two-level data structure like thread + * syscall_data is keyed by CGROUP and has an array of node which + * represents each syscall for the cgroup. + */ +struct syscall_data { + u64 key; /* tid if AGGR_THREAD, syscall-nr if AGGR_CPU, cgroup if AGGR_CGROUP */ + int nr_events; + int nr_nodes; + u64 total_time; + struct syscall_node *nodes; +}; + +static int datacmp(const void *a, const void *b) +{ + const struct syscall_data * const *sa = a; + const struct syscall_data * const *sb = b; + + return (*sa)->total_time > (*sb)->total_time ? -1 : 1; +} + +static int nodecmp(const void *a, const void *b) +{ + const struct syscall_node *na = a; + const struct syscall_node *nb = b; + + return na->stats.total_time > nb->stats.total_time ? -1 : 1; +} + +static size_t sc_node_hash(long key, void *ctx __maybe_unused) +{ + return key; +} + +static bool sc_node_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + +static int print_common_stats(struct syscall_data *data, int max_summary, FILE *fp) +{ + int printed = 0; + + if (max_summary == 0 || max_summary > data->nr_nodes) + max_summary = data->nr_nodes; + + for (int i = 0; i < max_summary; i++) { + struct syscall_node *node = &data->nodes[i]; + struct syscall_stats *stat = &node->stats; + double total = (double)(stat->total_time) / NSEC_PER_MSEC; + double min = (double)(stat->min_time) / NSEC_PER_MSEC; + double max = (double)(stat->max_time) / NSEC_PER_MSEC; + double avg = total / stat->count; + const char *name; + + /* TODO: support other ABIs */ + name = syscalltbl__name(EM_HOST, node->syscall_nr); + if (name) + printed += fprintf(fp, " %-15s", name); + else + printed += fprintf(fp, " syscall:%-7d", node->syscall_nr); + + printed += fprintf(fp, " %8u %6u %9.3f %9.3f %9.3f %9.3f %9.2f%%\n", + stat->count, stat->error, total, min, avg, max, + rel_stddev(stat)); + } + return printed; +} + +static int update_thread_stats(struct hashmap *hash, struct syscall_key *map_key, + struct syscall_stats *map_data) +{ + struct syscall_data *data; + struct syscall_node *nodes; + + if (!hashmap__find(hash, map_key->cpu_or_tid, &data)) { + data = zalloc(sizeof(*data)); + if (data == NULL) + return -ENOMEM; + + data->key = map_key->cpu_or_tid; + if (hashmap__add(hash, data->key, data) < 0) { + free(data); + return -ENOMEM; + } + } + + /* update thread total stats */ + data->nr_events += map_data->count; + data->total_time += map_data->total_time; + + nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes)); + if (nodes == NULL) + return -ENOMEM; + + data->nodes = nodes; + nodes = &data->nodes[data->nr_nodes++]; + nodes->syscall_nr = map_key->nr; + + /* each thread has an entry for each syscall, just use the stat */ + memcpy(&nodes->stats, map_data, sizeof(*map_data)); + return 0; +} + +static int print_thread_stat(struct syscall_data *data, int max_summary, FILE *fp) +{ + int printed = 0; + + qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp); + + printed += fprintf(fp, " thread (%d), ", (int)data->key); + printed += fprintf(fp, "%d events\n\n", data->nr_events); + + printed += fprintf(fp, " syscall calls errors total min avg max stddev\n"); + printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); + printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); + + printed += print_common_stats(data, max_summary, fp); + printed += fprintf(fp, "\n\n"); + + return printed; +} + +static int print_thread_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp) +{ + int printed = 0; + + for (int i = 0; i < nr_data; i++) + printed += print_thread_stat(data[i], max_summary, fp); + + return printed; +} + +static int update_total_stats(struct hashmap *hash, struct syscall_key *map_key, + struct syscall_stats *map_data) +{ + struct syscall_data *data; + struct syscall_stats *stat; + + if (!hashmap__find(hash, map_key->nr, &data)) { + data = zalloc(sizeof(*data)); + if (data == NULL) + return -ENOMEM; + + data->nodes = zalloc(sizeof(*data->nodes)); + if (data->nodes == NULL) { + free(data); + return -ENOMEM; + } + + data->nr_nodes = 1; + data->key = map_key->nr; + data->nodes->syscall_nr = data->key; + + if (hashmap__add(hash, data->key, data) < 0) { + free(data->nodes); + free(data); + return -ENOMEM; + } + } + + /* update total stats for this syscall */ + data->nr_events += map_data->count; + data->total_time += map_data->total_time; + + /* This is sum of the same syscall from different CPUs */ + stat = &data->nodes->stats; + + stat->total_time += map_data->total_time; + stat->squared_sum += map_data->squared_sum; + stat->count += map_data->count; + stat->error += map_data->error; + + if (stat->max_time < map_data->max_time) + stat->max_time = map_data->max_time; + if (stat->min_time > map_data->min_time || stat->min_time == 0) + stat->min_time = map_data->min_time; + + return 0; +} + +static int print_total_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp) +{ + int printed = 0; + int nr_events = 0; + + for (int i = 0; i < nr_data; i++) + nr_events += data[i]->nr_events; + + printed += fprintf(fp, " total, %d events\n\n", nr_events); + + printed += fprintf(fp, " syscall calls errors total min avg max stddev\n"); + printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); + printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); + + if (max_summary == 0 || max_summary > nr_data) + max_summary = nr_data; + + for (int i = 0; i < max_summary; i++) + printed += print_common_stats(data[i], max_summary, fp); + + printed += fprintf(fp, "\n\n"); + return printed; +} + +static int update_cgroup_stats(struct hashmap *hash, struct syscall_key *map_key, + struct syscall_stats *map_data) +{ + struct syscall_data *data; + struct syscall_node *nodes; + + if (!hashmap__find(hash, map_key->cgroup, &data)) { + data = zalloc(sizeof(*data)); + if (data == NULL) + return -ENOMEM; + + data->key = map_key->cgroup; + if (hashmap__add(hash, data->key, data) < 0) { + free(data); + return -ENOMEM; + } + } + + /* update thread total stats */ + data->nr_events += map_data->count; + data->total_time += map_data->total_time; + + nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes)); + if (nodes == NULL) + return -ENOMEM; + + data->nodes = nodes; + nodes = &data->nodes[data->nr_nodes++]; + nodes->syscall_nr = map_key->nr; + + /* each thread has an entry for each syscall, just use the stat */ + memcpy(&nodes->stats, map_data, sizeof(*map_data)); + return 0; +} + +static int print_cgroup_stat(struct syscall_data *data, int max_summary, FILE *fp) +{ + int printed = 0; + struct cgroup *cgrp = __cgroup__find(&cgroups, data->key); + + qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp); + + if (cgrp) + printed += fprintf(fp, " cgroup %s,", cgrp->name); + else + printed += fprintf(fp, " cgroup id:%lu,", (unsigned long)data->key); + + printed += fprintf(fp, " %d events\n\n", data->nr_events); + + printed += fprintf(fp, " syscall calls errors total min avg max stddev\n"); + printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); + printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); + + printed += print_common_stats(data, max_summary, fp); + printed += fprintf(fp, "\n\n"); + + return printed; +} + +static int print_cgroup_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp) +{ + int printed = 0; + + for (int i = 0; i < nr_data; i++) + printed += print_cgroup_stat(data[i], max_summary, fp); + + return printed; +} + +int trace_print_bpf_summary(FILE *fp, int max_summary) +{ + struct bpf_map *map = skel->maps.syscall_stats_map; + struct syscall_key *prev_key, key; + struct syscall_data **data = NULL; + struct hashmap schash; + struct hashmap_entry *entry; + int nr_data = 0; + int printed = 0; + int i; + size_t bkt; + + hashmap__init(&schash, sc_node_hash, sc_node_equal, /*ctx=*/NULL); + + printed = fprintf(fp, "\n Summary of events:\n\n"); + + /* get stats from the bpf map */ + prev_key = NULL; + while (!bpf_map__get_next_key(map, prev_key, &key, sizeof(key))) { + struct syscall_stats stat; + + if (!bpf_map__lookup_elem(map, &key, sizeof(key), &stat, sizeof(stat), 0)) { + switch (skel->rodata->aggr_mode) { + case SYSCALL_AGGR_THREAD: + update_thread_stats(&schash, &key, &stat); + break; + case SYSCALL_AGGR_CPU: + update_total_stats(&schash, &key, &stat); + break; + case SYSCALL_AGGR_CGROUP: + update_cgroup_stats(&schash, &key, &stat); + break; + default: + break; + } + } + + prev_key = &key; + } + + nr_data = hashmap__size(&schash); + data = calloc(nr_data, sizeof(*data)); + if (data == NULL) + goto out; + + i = 0; + hashmap__for_each_entry(&schash, entry, bkt) + data[i++] = entry->pvalue; + + qsort(data, nr_data, sizeof(*data), datacmp); + + switch (skel->rodata->aggr_mode) { + case SYSCALL_AGGR_THREAD: + printed += print_thread_stats(data, nr_data, max_summary, fp); + break; + case SYSCALL_AGGR_CPU: + printed += print_total_stats(data, nr_data, max_summary, fp); + break; + case SYSCALL_AGGR_CGROUP: + printed += print_cgroup_stats(data, nr_data, max_summary, fp); + break; + default: + break; + } + + for (i = 0; i < nr_data && data; i++) { + free(data[i]->nodes); + free(data[i]); + } + free(data); + +out: + hashmap__clear(&schash); + return printed; +} + +void trace_cleanup_bpf_summary(void) +{ + if (!RB_EMPTY_ROOT(&cgroups)) { + struct cgroup *cgrp, *tmp; + + rbtree_postorder_for_each_entry_safe(cgrp, tmp, &cgroups, node) + cgroup__put(cgrp); + + cgroups = RB_ROOT; + } + + syscall_summary_bpf__destroy(skel); +} diff --git a/tools/perf/util/bpf-utils.c b/tools/perf/util/bpf-utils.c index 80b1d2b3729b..5a66dc8594aa 100644 --- a/tools/perf/util/bpf-utils.c +++ b/tools/perf/util/bpf-utils.c @@ -20,7 +20,7 @@ struct bpil_array_desc { */ }; -static struct bpil_array_desc bpil_array_desc[] = { +static const struct bpil_array_desc bpil_array_desc[] = { [PERF_BPIL_JITED_INSNS] = { offsetof(struct bpf_prog_info, jited_prog_insns), offsetof(struct bpf_prog_info, jited_prog_len), @@ -115,7 +115,7 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) __u32 info_len = sizeof(info); __u32 data_len = 0; int i, err; - void *ptr; + __u8 *ptr; if (arrays >> PERF_BPIL_LAST_ARRAY) return ERR_PTR(-EINVAL); @@ -126,15 +126,15 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) pr_debug("can't get prog info: %s", strerror(errno)); return ERR_PTR(-EFAULT); } + if (info.type >= __MAX_BPF_PROG_TYPE) + pr_debug("%s:%d: unexpected program type %u\n", __func__, __LINE__, info.type); /* step 2: calculate total size of all arrays */ for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { + const struct bpil_array_desc *desc = &bpil_array_desc[i]; bool include_array = (arrays & (1UL << i)) > 0; - struct bpil_array_desc *desc; __u32 count, size; - desc = bpil_array_desc + i; - /* kernel is too old to support this field */ if (info_len < desc->array_offset + sizeof(__u32) || info_len < desc->count_offset + sizeof(__u32) || @@ -163,19 +163,20 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) ptr = info_linear->data; for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { - struct bpil_array_desc *desc; + const struct bpil_array_desc *desc = &bpil_array_desc[i]; __u32 count, size; if ((arrays & (1UL << i)) == 0) continue; - desc = bpil_array_desc + i; count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); bpf_prog_info_set_offset_u32(&info_linear->info, desc->count_offset, count); bpf_prog_info_set_offset_u32(&info_linear->info, desc->size_offset, size); + assert(ptr >= info_linear->data); + assert(ptr < &info_linear->data[data_len]); bpf_prog_info_set_offset_u64(&info_linear->info, desc->array_offset, ptr_to_u64(ptr)); @@ -189,27 +190,45 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) free(info_linear); return ERR_PTR(-EFAULT); } + if (info_linear->info.type >= __MAX_BPF_PROG_TYPE) { + pr_debug("%s:%d: unexpected program type %u\n", + __func__, __LINE__, info_linear->info.type); + } /* step 6: verify the data */ + ptr = info_linear->data; for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { - struct bpil_array_desc *desc; - __u32 v1, v2; + const struct bpil_array_desc *desc = &bpil_array_desc[i]; + __u32 count1, count2, size1, size2; + __u64 ptr2; if ((arrays & (1UL << i)) == 0) continue; - desc = bpil_array_desc + i; - v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset); - v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + count1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + count2 = bpf_prog_info_read_offset_u32(&info_linear->info, desc->count_offset); - if (v1 != v2) - pr_warning("%s: mismatch in element count\n", __func__); + if (count1 != count2) { + pr_warning("%s: mismatch in element count %u vs %u\n", __func__, count1, count2); + free(info_linear); + return ERR_PTR(-ERANGE); + } - v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); - v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + size1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + size2 = bpf_prog_info_read_offset_u32(&info_linear->info, desc->size_offset); - if (v1 != v2) - pr_warning("%s: mismatch in rec size\n", __func__); + if (size1 != size2) { + pr_warning("%s: mismatch in rec size %u vs %u\n", __func__, size1, size2); + free(info_linear); + return ERR_PTR(-ERANGE); + } + ptr2 = bpf_prog_info_read_offset_u64(&info_linear->info, desc->array_offset); + if (ptr_to_u64(ptr) != ptr2) { + pr_warning("%s: mismatch in array %p vs %llx\n", __func__, ptr, ptr2); + free(info_linear); + return ERR_PTR(-ERANGE); + } + ptr += roundup(count1 * size1, sizeof(__u64)); } /* step 7: update info_len and data_len */ @@ -224,13 +243,12 @@ void bpil_addr_to_offs(struct perf_bpil *info_linear) int i; for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { - struct bpil_array_desc *desc; + const struct bpil_array_desc *desc = &bpil_array_desc[i]; __u64 addr, offs; if ((info_linear->arrays & (1UL << i)) == 0) continue; - desc = bpil_array_desc + i; addr = bpf_prog_info_read_offset_u64(&info_linear->info, desc->array_offset); offs = addr - ptr_to_u64(info_linear->data); @@ -244,13 +262,12 @@ void bpil_offs_to_addr(struct perf_bpil *info_linear) int i; for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { - struct bpil_array_desc *desc; + const struct bpil_array_desc *desc = &bpil_array_desc[i]; __u64 addr, offs; if ((info_linear->arrays & (1UL << i)) == 0) continue; - desc = bpil_array_desc + i; offs = bpf_prog_info_read_offset_u64(&info_linear->info, desc->array_offset); addr = offs + ptr_to_u64(info_linear->data); diff --git a/tools/perf/util/bpf-utils.h b/tools/perf/util/bpf-utils.h index 86a5055cdfad..a8bc1a232968 100644 --- a/tools/perf/util/bpf-utils.h +++ b/tools/perf/util/bpf-utils.h @@ -8,6 +8,16 @@ #ifdef HAVE_LIBBPF_SUPPORT #include <bpf/libbpf.h> +#include <bpf/libbpf_version.h> + +#define LIBBPF_CURRENT_VERSION_GEQ(major, minor) \ + (LIBBPF_MAJOR_VERSION > (major) || \ + (LIBBPF_MAJOR_VERSION == (major) && LIBBPF_MINOR_VERSION >= (minor))) + +#if LIBBPF_CURRENT_VERSION_GEQ(1, 7) +// libbpf 1.7+ support the btf_dump_type_data_opts.emit_strings option. +#define HAVE_LIBBPF_STRINGS_SUPPORT 1 +#endif /* * Get bpf_prog_info in continuous memory diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 6732cbbcf9b3..a5882b582205 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -6,10 +6,14 @@ #include <limits.h> #include <unistd.h> #include <sys/file.h> +#include <sys/resource.h> #include <sys/time.h> #include <linux/err.h> +#include <linux/list.h> #include <linux/zalloc.h> #include <api/fs/fs.h> +#include <bpf/bpf.h> +#include <bpf/btf.h> #include <perf/bpf_perf.h> #include "bpf_counter.h" @@ -28,13 +32,67 @@ #include "bpf_skel/bperf_leader.skel.h" #include "bpf_skel/bperf_follower.skel.h" +struct bpf_counter { + void *skel; + struct list_head list; +}; + #define ATTR_MAP_SIZE 16 -static inline void *u64_to_ptr(__u64 ptr) +static void *u64_to_ptr(__u64 ptr) { return (void *)(unsigned long)ptr; } + +void set_max_rlimit(void) +{ + struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; + + setrlimit(RLIMIT_MEMLOCK, &rinf); +} + +static __u32 bpf_link_get_id(int fd) +{ + struct bpf_link_info link_info = { .id = 0, }; + __u32 link_info_len = sizeof(link_info); + + bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len); + return link_info.id; +} + +static __u32 bpf_link_get_prog_id(int fd) +{ + struct bpf_link_info link_info = { .id = 0, }; + __u32 link_info_len = sizeof(link_info); + + bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len); + return link_info.prog_id; +} + +static __u32 bpf_map_get_id(int fd) +{ + struct bpf_map_info map_info = { .id = 0, }; + __u32 map_info_len = sizeof(map_info); + + bpf_obj_get_info_by_fd(fd, &map_info, &map_info_len); + return map_info.id; +} + +/* trigger the leader program on a cpu */ +int bperf_trigger_reading(int prog_fd, int cpu) +{ + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .ctx_in = NULL, + .ctx_size_in = 0, + .flags = BPF_F_TEST_RUN_ON_CPU, + .cpu = cpu, + .retval = 0, + ); + + return bpf_prog_test_run_opts(prog_fd, &opts); +} + static struct bpf_counter *bpf_counter_alloc(void) { struct bpf_counter *counter; @@ -104,7 +162,7 @@ static int bpf_program_profiler_load_one(struct evsel *evsel, u32 prog_id) struct bpf_prog_profiler_bpf *skel; struct bpf_counter *counter; struct bpf_program *prog; - char *prog_name; + char *prog_name = NULL; int prog_fd; int err; @@ -155,10 +213,12 @@ static int bpf_program_profiler_load_one(struct evsel *evsel, u32 prog_id) assert(skel != NULL); counter->skel = skel; list_add(&counter->list, &evsel->bpf_counter_list); + free(prog_name); close(prog_fd); return 0; err_out: bpf_prog_profiler_bpf__destroy(skel); + free(prog_name); free(counter); close(prog_fd); return -1; @@ -180,6 +240,7 @@ static int bpf_program_profiler__load(struct evsel *evsel, struct target *target (*p != '\0' && *p != ',')) { pr_err("Failed to parse bpf prog ids %s\n", target->bpf_str); + free(bpf_str_); return -1; } @@ -275,6 +336,7 @@ static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu_map_idx { struct bpf_prog_profiler_bpf *skel; struct bpf_counter *counter; + int cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx).cpu; int ret; list_for_each_entry(counter, &evsel->bpf_counter_list, list) { @@ -282,7 +344,7 @@ static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu_map_idx assert(skel != NULL); ret = bpf_map_update_elem(bpf_map__fd(skel->maps.events), - &cpu_map_idx, &fd, BPF_ANY); + &cpu, &fd, BPF_ANY); if (ret) return ret; } @@ -390,7 +452,7 @@ static int bperf_check_target(struct evsel *evsel, return 0; } -static struct perf_cpu_map *all_cpu_map; +static __u32 filter_entry_cnt; static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, struct perf_event_attr_map_entry *entry) @@ -398,6 +460,7 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, struct bperf_leader_bpf *skel = bperf_leader_bpf__open(); int link_fd, diff_map_fd, err; struct bpf_link *link = NULL; + struct perf_thread_map *threads; if (!skel) { pr_err("Failed to open leader skeleton\n"); @@ -433,7 +496,11 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, * following evsel__open_per_cpu call */ evsel->leader_skel = skel; - evsel__open_per_cpu(evsel, all_cpu_map, -1); + assert(!perf_cpu_map__has_any_cpu_or_is_empty(evsel->core.cpus)); + /* Always open system wide. */ + threads = thread_map__new_by_tid(-1); + evsel__open(evsel, evsel->core.cpus, threads); + perf_thread_map__put(threads); out: bperf_leader_bpf__destroy(skel); @@ -441,22 +508,36 @@ out: return err; } +static int bperf_attach_follower_program(struct bperf_follower_bpf *skel, + enum bperf_filter_type filter_type, + bool inherit) +{ + struct bpf_link *link; + int err = 0; + + if ((filter_type == BPERF_FILTER_PID || + filter_type == BPERF_FILTER_TGID) && inherit) + /* attach all follower bpf progs to enable event inheritance */ + err = bperf_follower_bpf__attach(skel); + else { + link = bpf_program__attach(skel->progs.fexit_XXX); + if (IS_ERR(link)) + err = PTR_ERR(link); + } + + return err; +} + static int bperf__load(struct evsel *evsel, struct target *target) { struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff}; int attr_map_fd, diff_map_fd = -1, err; enum bperf_filter_type filter_type; - __u32 filter_entry_cnt, i; + __u32 i; if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt)) return -1; - if (!all_cpu_map) { - all_cpu_map = perf_cpu_map__new(NULL); - if (!all_cpu_map) - return -1; - } - evsel->bperf_leader_prog_fd = -1; evsel->bperf_leader_link_fd = -1; @@ -526,9 +607,6 @@ static int bperf__load(struct evsel *evsel, struct target *target) /* set up reading map */ bpf_map__set_max_entries(evsel->follower_skel->maps.accum_readings, filter_entry_cnt); - /* set up follower filter based on target */ - bpf_map__set_max_entries(evsel->follower_skel->maps.filter, - filter_entry_cnt); err = bperf_follower_bpf__load(evsel->follower_skel); if (err) { pr_err("Failed to load follower skeleton\n"); @@ -540,6 +618,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) for (i = 0; i < filter_entry_cnt; i++) { int filter_map_fd; __u32 key; + struct bperf_filter_value fval = { i, 0 }; if (filter_type == BPERF_FILTER_PID || filter_type == BPERF_FILTER_TGID) @@ -550,12 +629,14 @@ static int bperf__load(struct evsel *evsel, struct target *target) break; filter_map_fd = bpf_map__fd(evsel->follower_skel->maps.filter); - bpf_map_update_elem(filter_map_fd, &key, &i, BPF_ANY); + bpf_map_update_elem(filter_map_fd, &key, &fval, BPF_ANY); } evsel->follower_skel->bss->type = filter_type; + evsel->follower_skel->bss->inherit = target->inherit; - err = bperf_follower_bpf__attach(evsel->follower_skel); + err = bperf_attach_follower_program(evsel->follower_skel, filter_type, + target->inherit); out: if (err && evsel->bperf_leader_link_fd >= 0) @@ -574,9 +655,10 @@ out: static int bperf__install_pe(struct evsel *evsel, int cpu_map_idx, int fd) { struct bperf_leader_bpf *skel = evsel->leader_skel; + int cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx).cpu; return bpf_map_update_elem(bpf_map__fd(skel->maps.events), - &cpu_map_idx, &fd, BPF_ANY); + &cpu, &fd, BPF_ANY); } /* @@ -585,13 +667,12 @@ static int bperf__install_pe(struct evsel *evsel, int cpu_map_idx, int fd) */ static int bperf_sync_counters(struct evsel *evsel) { - int num_cpu, i, cpu; + struct perf_cpu cpu; + int idx; + + perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus) + bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu.cpu); - num_cpu = perf_cpu_map__nr(all_cpu_map); - for (i = 0; i < num_cpu; i++) { - cpu = perf_cpu_map__cpu(all_cpu_map, i).cpu; - bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu); - } return 0; } @@ -620,7 +701,7 @@ static int bperf__read(struct evsel *evsel) bperf_sync_counters(evsel); reading_map_fd = bpf_map__fd(skel->maps.accum_readings); - for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) { + for (i = 0; i < filter_entry_cnt; i++) { struct perf_cpu entry; __u32 cpu; @@ -761,7 +842,7 @@ struct bpf_counter_ops bperf_ops = { extern struct bpf_counter_ops bperf_cgrp_ops; -static inline bool bpf_counter_skip(struct evsel *evsel) +static bool bpf_counter_skip(struct evsel *evsel) { return evsel->bpf_counter_ops == NULL; } diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h index c6d21c07b14c..658d8e7d507e 100644 --- a/tools/perf/util/bpf_counter.h +++ b/tools/perf/util/bpf_counter.h @@ -2,18 +2,10 @@ #ifndef __PERF_BPF_COUNTER_H #define __PERF_BPF_COUNTER_H 1 -#include <linux/list.h> -#include <sys/resource.h> - -#ifdef HAVE_LIBBPF_SUPPORT -#include <bpf/bpf.h> -#include <bpf/btf.h> -#include <bpf/libbpf.h> -#endif - struct evsel; struct target; -struct bpf_counter; + +#ifdef HAVE_BPF_SKEL typedef int (*bpf_counter_evsel_op)(struct evsel *evsel); typedef int (*bpf_counter_evsel_target_op)(struct evsel *evsel, @@ -22,6 +14,7 @@ typedef int (*bpf_counter_evsel_install_pe_op)(struct evsel *evsel, int cpu_map_idx, int fd); +/* Shared ops between bpf_counter, bpf_counter_cgroup, etc. */ struct bpf_counter_ops { bpf_counter_evsel_target_op load; bpf_counter_evsel_op enable; @@ -31,13 +24,6 @@ struct bpf_counter_ops { bpf_counter_evsel_install_pe_op install_pe; }; -struct bpf_counter { - void *skel; - struct list_head list; -}; - -#ifdef HAVE_BPF_SKEL - int bpf_counter__load(struct evsel *evsel, struct target *target); int bpf_counter__enable(struct evsel *evsel); int bpf_counter__disable(struct evsel *evsel); @@ -45,6 +31,9 @@ int bpf_counter__read(struct evsel *evsel); void bpf_counter__destroy(struct evsel *evsel); int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd); +int bperf_trigger_reading(int prog_fd, int cpu); +void set_max_rlimit(void); + #else /* HAVE_BPF_SKEL */ #include <linux/err.h> @@ -83,55 +72,4 @@ static inline int bpf_counter__install_pe(struct evsel *evsel __maybe_unused, #endif /* HAVE_BPF_SKEL */ -static inline void set_max_rlimit(void) -{ - struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; - - setrlimit(RLIMIT_MEMLOCK, &rinf); -} - -#ifdef HAVE_BPF_SKEL - -static inline __u32 bpf_link_get_id(int fd) -{ - struct bpf_link_info link_info = { .id = 0, }; - __u32 link_info_len = sizeof(link_info); - - bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len); - return link_info.id; -} - -static inline __u32 bpf_link_get_prog_id(int fd) -{ - struct bpf_link_info link_info = { .id = 0, }; - __u32 link_info_len = sizeof(link_info); - - bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len); - return link_info.prog_id; -} - -static inline __u32 bpf_map_get_id(int fd) -{ - struct bpf_map_info map_info = { .id = 0, }; - __u32 map_info_len = sizeof(map_info); - - bpf_obj_get_info_by_fd(fd, &map_info, &map_info_len); - return map_info.id; -} - -/* trigger the leader program on a cpu */ -static inline int bperf_trigger_reading(int prog_fd, int cpu) -{ - DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, - .ctx_in = NULL, - .ctx_size_in = 0, - .flags = BPF_F_TEST_RUN_ON_CPU, - .cpu = cpu, - .retval = 0, - ); - - return bpf_prog_test_run_opts(prog_fd, &opts); -} -#endif /* HAVE_BPF_SKEL */ - #endif /* __PERF_BPF_COUNTER_H */ diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c index 1c82377ed78b..17d7196c6589 100644 --- a/tools/perf/util/bpf_counter_cgroup.c +++ b/tools/perf/util/bpf_counter_cgroup.c @@ -4,6 +4,7 @@ /* Copyright (c) 2021 Google */ #include <assert.h> +#include <errno.h> #include <limits.h> #include <unistd.h> #include <sys/file.h> @@ -13,6 +14,7 @@ #include <linux/zalloc.h> #include <linux/perf_event.h> #include <api/fs/fs.h> +#include <bpf/bpf.h> #include <perf/bpf_perf.h> #include "affinity.h" @@ -26,6 +28,7 @@ #include "cpumap.h" #include "thread_map.h" +#include "bpf_skel/bperf_cgroup.h" #include "bpf_skel/bperf_cgroup.skel.h" static struct perf_event_attr cgrp_switch_attr = { @@ -41,6 +44,55 @@ static struct bperf_cgroup_bpf *skel; #define FD(evt, cpu) (*(int *)xyarray__entry(evt->core.fd, cpu, 0)) +static void setup_rodata(struct bperf_cgroup_bpf *sk, int evlist_size) +{ + int map_size, total_cpus = cpu__max_cpu().cpu; + + sk->rodata->num_cpus = total_cpus; + sk->rodata->num_events = evlist_size / nr_cgroups; + + if (cgroup_is_v2("perf_event") > 0) + sk->rodata->use_cgroup_v2 = 1; + + BUG_ON(evlist_size % nr_cgroups != 0); + + /* we need one copy of events per cpu for reading */ + map_size = total_cpus * evlist_size / nr_cgroups; + bpf_map__set_max_entries(sk->maps.events, map_size); + bpf_map__set_max_entries(sk->maps.cgrp_idx, nr_cgroups); + /* previous result is saved in a per-cpu array */ + map_size = evlist_size / nr_cgroups; + bpf_map__set_max_entries(sk->maps.prev_readings, map_size); + /* cgroup result needs all events (per-cpu) */ + map_size = evlist_size; + bpf_map__set_max_entries(sk->maps.cgrp_readings, map_size); +} + +static void test_max_events_program_load(void) +{ +#ifndef NDEBUG + /* + * Test that the program verifies with the maximum number of events. If + * this test fails unfortunately perf needs recompiling with a lower + * BPERF_CGROUP__MAX_EVENTS to avoid BPF verifier issues. + */ + int err, max_events = BPERF_CGROUP__MAX_EVENTS * nr_cgroups; + struct bperf_cgroup_bpf *test_skel = bperf_cgroup_bpf__open(); + + if (!test_skel) { + pr_err("Failed to open cgroup skeleton\n"); + return; + } + setup_rodata(test_skel, max_events); + err = bperf_cgroup_bpf__load(test_skel); + if (err) { + pr_err("Failed to load cgroup skeleton with max events %d.\n", + BPERF_CGROUP__MAX_EVENTS); + } + bperf_cgroup_bpf__destroy(test_skel); +#endif +} + static int bperf_load_program(struct evlist *evlist) { struct bpf_link *link; @@ -49,32 +101,18 @@ static int bperf_load_program(struct evlist *evlist) int i, j; struct perf_cpu cpu; int total_cpus = cpu__max_cpu().cpu; - int map_size, map_fd; - int prog_fd, err; + int map_fd, prog_fd, err; + + set_max_rlimit(); + + test_max_events_program_load(); skel = bperf_cgroup_bpf__open(); if (!skel) { pr_err("Failed to open cgroup skeleton\n"); return -1; } - - skel->rodata->num_cpus = total_cpus; - skel->rodata->num_events = evlist->core.nr_entries / nr_cgroups; - - BUG_ON(evlist->core.nr_entries % nr_cgroups != 0); - - /* we need one copy of events per cpu for reading */ - map_size = total_cpus * evlist->core.nr_entries / nr_cgroups; - bpf_map__set_max_entries(skel->maps.events, map_size); - bpf_map__set_max_entries(skel->maps.cgrp_idx, nr_cgroups); - /* previous result is saved in a per-cpu array */ - map_size = evlist->core.nr_entries / nr_cgroups; - bpf_map__set_max_entries(skel->maps.prev_readings, map_size); - /* cgroup result needs all events (per-cpu) */ - map_size = evlist->core.nr_entries; - bpf_map__set_max_entries(skel->maps.cgrp_readings, map_size); - - set_max_rlimit(); + setup_rodata(skel, evlist->core.nr_entries); err = bperf_cgroup_bpf__load(skel); if (err) { @@ -82,9 +120,6 @@ static int bperf_load_program(struct evlist *evlist) goto out; } - if (cgroup_is_v2("perf_event") > 0) - skel->bss->use_cgroup_v2 = 1; - err = -1; cgrp_switch = evsel__new(&cgrp_switch_attr); @@ -136,9 +171,8 @@ static int bperf_load_program(struct evlist *evlist) cgrp = evsel->cgrp; if (read_cgroup_id(cgrp) < 0) { - pr_err("Failed to get cgroup id\n"); - err = -1; - goto out; + pr_debug("Failed to get cgroup id for %s\n", cgrp->name); + cgrp->id = 0; } map_fd = bpf_map__fd(skel->maps.cgrp_idx); @@ -186,7 +220,8 @@ static int bperf_cgrp__load(struct evsel *evsel, } static int bperf_cgrp__install_pe(struct evsel *evsel __maybe_unused, - int cpu __maybe_unused, int fd __maybe_unused) + int cpu_map_idx __maybe_unused, + int fd __maybe_unused) { /* nothing to do */ return 0; diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c index 7a4297d8fd2c..c456d24efa30 100644 --- a/tools/perf/util/bpf_ftrace.c +++ b/tools/perf/util/bpf_ftrace.c @@ -1,8 +1,10 @@ -#include <stdio.h> +#include <errno.h> #include <fcntl.h> #include <stdint.h> +#include <stdio.h> #include <stdlib.h> +#include <bpf/bpf.h> #include <linux/err.h> #include "util/ftrace.h" @@ -11,6 +13,7 @@ #include "util/debug.h" #include "util/evlist.h" #include "util/bpf_counter.h" +#include "util/stat.h" #include "util/bpf_skel/func_latency.skel.h" @@ -20,15 +23,26 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) { int fd, err; int i, ncpus = 1, ntasks = 1; - struct filter_entry *func; + struct filter_entry *func = NULL; - if (!list_is_singular(&ftrace->filters)) { - pr_err("ERROR: %s target function(s).\n", - list_empty(&ftrace->filters) ? "No" : "Too many"); - return -1; - } + if (!list_empty(&ftrace->filters)) { + if (!list_is_singular(&ftrace->filters)) { + pr_err("ERROR: Too many target functions.\n"); + return -1; + } + func = list_first_entry(&ftrace->filters, struct filter_entry, list); + } else { + int count = 0; + struct list_head *pos; - func = list_first_entry(&ftrace->filters, struct filter_entry, list); + list_for_each(pos, &ftrace->event_pair) + count++; + + if (count != 2) { + pr_err("ERROR: Needs two target events.\n"); + return -1; + } + } skel = func_latency_bpf__open(); if (!skel) { @@ -36,17 +50,28 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) return -1; } + skel->rodata->bucket_range = ftrace->bucket_range; + skel->rodata->min_latency = ftrace->min_latency; + skel->rodata->bucket_num = ftrace->bucket_num; + if (ftrace->bucket_range && ftrace->bucket_num) { + bpf_map__set_max_entries(skel->maps.latency, ftrace->bucket_num); + } + /* don't need to set cpu filter for system-wide mode */ if (ftrace->target.cpu_list) { ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus); bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); + skel->rodata->has_cpu = 1; } if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) { ntasks = perf_thread_map__nr(ftrace->evlist->core.threads); bpf_map__set_max_entries(skel->maps.task_filter, ntasks); + skel->rodata->has_task = 1; } + skel->rodata->use_nsec = ftrace->use_nsec; + set_max_rlimit(); err = func_latency_bpf__load(skel); @@ -59,7 +84,6 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) u32 cpu; u8 val = 1; - skel->bss->has_cpu = 1; fd = bpf_map__fd(skel->maps.cpu_filter); for (i = 0; i < ncpus; i++) { @@ -72,7 +96,6 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) u32 pid; u8 val = 1; - skel->bss->has_task = 1; fd = bpf_map__fd(skel->maps.task_filter); for (i = 0; i < ntasks; i++) { @@ -81,22 +104,46 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) } } - skel->bss->use_nsec = ftrace->use_nsec; + skel->bss->min = INT64_MAX; - skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin, - false, func->name); - if (IS_ERR(skel->links.func_begin)) { - pr_err("Failed to attach fentry program\n"); - err = PTR_ERR(skel->links.func_begin); - goto out; - } + if (func) { + skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin, + false, func->name); + if (IS_ERR(skel->links.func_begin)) { + pr_err("Failed to attach fentry program\n"); + err = PTR_ERR(skel->links.func_begin); + goto out; + } - skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end, - true, func->name); - if (IS_ERR(skel->links.func_end)) { - pr_err("Failed to attach fexit program\n"); - err = PTR_ERR(skel->links.func_end); - goto out; + skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end, + true, func->name); + if (IS_ERR(skel->links.func_end)) { + pr_err("Failed to attach fexit program\n"); + err = PTR_ERR(skel->links.func_end); + goto out; + } + } else { + struct filter_entry *event; + + event = list_first_entry(&ftrace->event_pair, struct filter_entry, list); + + skel->links.event_begin = bpf_program__attach_raw_tracepoint(skel->progs.event_begin, + event->name); + if (IS_ERR(skel->links.event_begin)) { + pr_err("Failed to attach first tracepoint program\n"); + err = PTR_ERR(skel->links.event_begin); + goto out; + } + + event = list_next_entry(event, list); + + skel->links.event_end = bpf_program__attach_raw_tracepoint(skel->progs.event_end, + event->name); + if (IS_ERR(skel->links.event_end)) { + pr_err("Failed to attach second tracepoint program\n"); + err = PTR_ERR(skel->links.event_end); + goto out; + } } /* XXX: we don't actually use this fd - just for poll() */ @@ -118,8 +165,8 @@ int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused) return 0; } -int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, - int buckets[]) +int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace, + int buckets[], struct stats *stats) { int i, fd, err; u32 idx; @@ -132,7 +179,7 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, if (hist == NULL) return -ENOMEM; - for (idx = 0; idx < NUM_BUCKET; idx++) { + for (idx = 0; idx < skel->rodata->bucket_num; idx++) { err = bpf_map_lookup_elem(fd, &idx, hist); if (err) { buckets[idx] = 0; @@ -143,6 +190,19 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, buckets[idx] += hist[i]; } + if (skel->bss->count) { + stats->mean = skel->bss->total / skel->bss->count; + stats->n = skel->bss->count; + stats->max = skel->bss->max; + stats->min = skel->bss->min; + + if (!ftrace->use_nsec) { + stats->mean /= 1000; + stats->max /= 1000; + stats->min /= 1000; + } + } + free(hist); return 0; } diff --git a/tools/perf/util/bpf_kwork.c b/tools/perf/util/bpf_kwork.c index 6eb2c78fd7f4..5cff755c71fa 100644 --- a/tools/perf/util/bpf_kwork.c +++ b/tools/perf/util/bpf_kwork.c @@ -147,12 +147,12 @@ static bool valid_kwork_class_type(enum kwork_class_type type) static int setup_filters(struct perf_kwork *kwork) { - u8 val = 1; - int i, nr_cpus, key, fd; - struct perf_cpu_map *map; - if (kwork->cpu_list != NULL) { - fd = bpf_map__fd(skel->maps.perf_kwork_cpu_filter); + int idx, nr_cpus; + struct perf_cpu_map *map; + struct perf_cpu cpu; + int fd = bpf_map__fd(skel->maps.perf_kwork_cpu_filter); + if (fd < 0) { pr_debug("Invalid cpu filter fd\n"); return -1; @@ -165,8 +165,8 @@ static int setup_filters(struct perf_kwork *kwork) } nr_cpus = libbpf_num_possible_cpus(); - for (i = 0; i < perf_cpu_map__nr(map); i++) { - struct perf_cpu cpu = perf_cpu_map__cpu(map, i); + perf_cpu_map__for_each_cpu(cpu, idx, map) { + u8 val = 1; if (cpu.cpu >= nr_cpus) { perf_cpu_map__put(map); @@ -176,11 +176,11 @@ static int setup_filters(struct perf_kwork *kwork) bpf_map_update_elem(fd, &cpu.cpu, &val, BPF_ANY); } perf_cpu_map__put(map); - - skel->bss->has_cpu_filter = 1; } if (kwork->profile_name != NULL) { + int key, fd; + if (strlen(kwork->profile_name) >= MAX_KWORKNAME) { pr_err("Requested name filter %s too large, limit to %d\n", kwork->profile_name, MAX_KWORKNAME - 1); @@ -195,8 +195,6 @@ static int setup_filters(struct perf_kwork *kwork) key = 0; bpf_map_update_elem(fd, &key, kwork->profile_name, BPF_ANY); - - skel->bss->has_name_filter = 1; } return 0; @@ -237,6 +235,11 @@ int perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork) class_bpf->load_prepare(kwork); } + if (kwork->cpu_list != NULL) + skel->rodata->has_cpu_filter = 1; + if (kwork->profile_name != NULL) + skel->rodata->has_name_filter = 1; + if (kwork_trace_bpf__load(skel)) { pr_debug("Failed to load kwork trace skeleton\n"); goto out; @@ -282,7 +285,7 @@ static int add_work(struct perf_kwork *kwork, (bpf_trace->get_work_name(key, &tmp.name))) return -1; - work = perf_kwork_add_work(kwork, tmp.class, &tmp); + work = kwork->add_work(kwork, tmp.class, &tmp); if (work == NULL) return -1; diff --git a/tools/perf/util/bpf_kwork_top.c b/tools/perf/util/bpf_kwork_top.c new file mode 100644 index 000000000000..b6f187dd9136 --- /dev/null +++ b/tools/perf/util/bpf_kwork_top.c @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * bpf_kwork_top.c + * + * Copyright (c) 2022 Huawei Inc, Yang Jihong <yangjihong1@huawei.com> + */ + +#include <time.h> +#include <fcntl.h> +#include <signal.h> +#include <stdio.h> +#include <unistd.h> + +#include <linux/time64.h> + +#include "util/debug.h" +#include "util/evsel.h" +#include "util/kwork.h" + +#include <bpf/bpf.h> +#include <perf/cpumap.h> + +#include "util/bpf_skel/kwork_top.skel.h" + +/* + * This should be in sync with "util/kwork_top.bpf.c" + */ +#define MAX_COMMAND_LEN 16 + +struct time_data { + __u64 timestamp; +}; + +struct work_data { + __u64 runtime; +}; + +struct task_data { + __u32 tgid; + __u32 is_kthread; + char comm[MAX_COMMAND_LEN]; +}; + +struct work_key { + __u32 type; + __u32 pid; + __u64 task_p; +}; + +struct task_key { + __u32 pid; + __u32 cpu; +}; + +struct kwork_class_bpf { + struct kwork_class *class; + void (*load_prepare)(void); +}; + +static struct kwork_top_bpf *skel; + +void perf_kwork__top_start(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + skel->bss->from_timestamp = (u64)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; + skel->bss->enabled = 1; + pr_debug("perf kwork top start at: %lld\n", skel->bss->from_timestamp); +} + +void perf_kwork__top_finish(void) +{ + struct timespec ts; + + skel->bss->enabled = 0; + clock_gettime(CLOCK_MONOTONIC, &ts); + skel->bss->to_timestamp = (u64)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; + pr_debug("perf kwork top finish at: %lld\n", skel->bss->to_timestamp); +} + +static void irq_load_prepare(void) +{ + bpf_program__set_autoload(skel->progs.on_irq_handler_entry, true); + bpf_program__set_autoload(skel->progs.on_irq_handler_exit, true); +} + +static struct kwork_class_bpf kwork_irq_bpf = { + .load_prepare = irq_load_prepare, +}; + +static void softirq_load_prepare(void) +{ + bpf_program__set_autoload(skel->progs.on_softirq_entry, true); + bpf_program__set_autoload(skel->progs.on_softirq_exit, true); +} + +static struct kwork_class_bpf kwork_softirq_bpf = { + .load_prepare = softirq_load_prepare, +}; + +static void sched_load_prepare(void) +{ + bpf_program__set_autoload(skel->progs.on_switch, true); +} + +static struct kwork_class_bpf kwork_sched_bpf = { + .load_prepare = sched_load_prepare, +}; + +static struct kwork_class_bpf * +kwork_class_bpf_supported_list[KWORK_CLASS_MAX] = { + [KWORK_CLASS_IRQ] = &kwork_irq_bpf, + [KWORK_CLASS_SOFTIRQ] = &kwork_softirq_bpf, + [KWORK_CLASS_SCHED] = &kwork_sched_bpf, +}; + +static bool valid_kwork_class_type(enum kwork_class_type type) +{ + return type >= 0 && type < KWORK_CLASS_MAX; +} + +static int setup_filters(struct perf_kwork *kwork) +{ + if (kwork->cpu_list) { + int idx, nr_cpus, fd; + struct perf_cpu_map *map; + struct perf_cpu cpu; + + fd = bpf_map__fd(skel->maps.kwork_top_cpu_filter); + if (fd < 0) { + pr_debug("Invalid cpu filter fd\n"); + return -1; + } + + map = perf_cpu_map__new(kwork->cpu_list); + if (!map) { + pr_debug("Invalid cpu_list\n"); + return -1; + } + + nr_cpus = libbpf_num_possible_cpus(); + perf_cpu_map__for_each_cpu(cpu, idx, map) { + u8 val = 1; + + if (cpu.cpu >= nr_cpus) { + perf_cpu_map__put(map); + pr_err("Requested cpu %d too large\n", cpu.cpu); + return -1; + } + bpf_map_update_elem(fd, &cpu.cpu, &val, BPF_ANY); + } + perf_cpu_map__put(map); + } + + return 0; +} + +int perf_kwork__top_prepare_bpf(struct perf_kwork *kwork) +{ + struct bpf_program *prog; + struct kwork_class *class; + struct kwork_class_bpf *class_bpf; + enum kwork_class_type type; + + skel = kwork_top_bpf__open(); + if (!skel) { + pr_debug("Failed to open kwork top skeleton\n"); + return -1; + } + + /* + * set all progs to non-autoload, + * then set corresponding progs according to config + */ + bpf_object__for_each_program(prog, skel->obj) + bpf_program__set_autoload(prog, false); + + list_for_each_entry(class, &kwork->class_list, list) { + type = class->type; + if (!valid_kwork_class_type(type) || + !kwork_class_bpf_supported_list[type]) { + pr_err("Unsupported bpf trace class %s\n", class->name); + goto out; + } + + class_bpf = kwork_class_bpf_supported_list[type]; + class_bpf->class = class; + + if (class_bpf->load_prepare) + class_bpf->load_prepare(); + } + + if (kwork->cpu_list) + skel->rodata->has_cpu_filter = 1; + + if (kwork_top_bpf__load(skel)) { + pr_debug("Failed to load kwork top skeleton\n"); + goto out; + } + + if (setup_filters(kwork)) + goto out; + + if (kwork_top_bpf__attach(skel)) { + pr_debug("Failed to attach kwork top skeleton\n"); + goto out; + } + + return 0; + +out: + kwork_top_bpf__destroy(skel); + return -1; +} + +static void read_task_info(struct kwork_work *work) +{ + int fd; + struct task_data data; + struct task_key key = { + .pid = work->id, + .cpu = work->cpu, + }; + + fd = bpf_map__fd(skel->maps.kwork_top_tasks); + if (fd < 0) { + pr_debug("Invalid top tasks map fd\n"); + return; + } + + if (!bpf_map_lookup_elem(fd, &key, &data)) { + work->tgid = data.tgid; + work->is_kthread = data.is_kthread; + work->name = strdup(data.comm); + } +} +static int add_work(struct perf_kwork *kwork, struct work_key *key, + struct work_data *data, int cpu) +{ + struct kwork_class_bpf *bpf_trace; + struct kwork_work *work; + struct kwork_work tmp = { + .id = key->pid, + .cpu = cpu, + .name = NULL, + }; + enum kwork_class_type type = key->type; + + if (!valid_kwork_class_type(type)) { + pr_debug("Invalid class type %d to add work\n", type); + return -1; + } + + bpf_trace = kwork_class_bpf_supported_list[type]; + tmp.class = bpf_trace->class; + + work = kwork->add_work(kwork, tmp.class, &tmp); + if (!work) + return -1; + + work->total_runtime = data->runtime; + read_task_info(work); + + return 0; +} + +int perf_kwork__top_read_bpf(struct perf_kwork *kwork) +{ + int i, fd, nr_cpus; + struct work_data *data; + struct work_key key, prev; + + fd = bpf_map__fd(skel->maps.kwork_top_works); + if (fd < 0) { + pr_debug("Invalid top runtime fd\n"); + return -1; + } + + nr_cpus = libbpf_num_possible_cpus(); + data = calloc(nr_cpus, sizeof(struct work_data)); + if (!data) + return -1; + + memset(&prev, 0, sizeof(prev)); + while (!bpf_map_get_next_key(fd, &prev, &key)) { + if ((bpf_map_lookup_elem(fd, &key, data)) != 0) { + pr_debug("Failed to lookup top elem\n"); + return -1; + } + + for (i = 0; i < nr_cpus; i++) { + if (data[i].runtime == 0) + continue; + + if (add_work(kwork, &key, &data[i], i)) + return -1; + } + prev = key; + } + free(data); + + return 0; +} + +void perf_kwork__top_cleanup_bpf(void) +{ + kwork_top_bpf__destroy(skel); +} diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index e7dddf0127bc..7b5671f13c53 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 +#include "util/cgroup.h" #include "util/debug.h" #include "util/evlist.h" +#include "util/hashmap.h" #include "util/machine.h" #include "util/map.h" #include "util/symbol.h" @@ -10,20 +12,181 @@ #include "util/lock-contention.h" #include <linux/zalloc.h> #include <linux/string.h> +#include <api/fs/fs.h> #include <bpf/bpf.h> +#include <bpf/btf.h> +#include <inttypes.h> #include "bpf_skel/lock_contention.skel.h" #include "bpf_skel/lock_data.h" static struct lock_contention_bpf *skel; +static bool has_slab_iter; +static struct hashmap slab_hash; + +static size_t slab_cache_hash(long key, void *ctx __maybe_unused) +{ + return key; +} + +static bool slab_cache_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + +static void check_slab_cache_iter(struct lock_contention *con) +{ + s32 ret; + + hashmap__init(&slab_hash, slab_cache_hash, slab_cache_equal, /*ctx=*/NULL); + + con->btf = btf__load_vmlinux_btf(); + if (con->btf == NULL) { + pr_debug("BTF loading failed: %s\n", strerror(errno)); + return; + } + + ret = btf__find_by_name_kind(con->btf, "bpf_iter__kmem_cache", BTF_KIND_STRUCT); + if (ret < 0) { + bpf_program__set_autoload(skel->progs.slab_cache_iter, false); + pr_debug("slab cache iterator is not available: %d\n", ret); + return; + } + + has_slab_iter = true; + + bpf_map__set_max_entries(skel->maps.slab_caches, con->map_nr_entries); +} + +static void run_slab_cache_iter(void) +{ + int fd; + char buf[256]; + long key, *prev_key; + + if (!has_slab_iter) + return; + + fd = bpf_iter_create(bpf_link__fd(skel->links.slab_cache_iter)); + if (fd < 0) { + pr_debug("cannot create slab cache iter: %d\n", fd); + return; + } + + /* This will run the bpf program */ + while (read(fd, buf, sizeof(buf)) > 0) + continue; + + close(fd); + + /* Read the slab cache map and build a hash with IDs */ + fd = bpf_map__fd(skel->maps.slab_caches); + prev_key = NULL; + while (!bpf_map_get_next_key(fd, prev_key, &key)) { + struct slab_cache_data *data; + + data = malloc(sizeof(*data)); + if (data == NULL) + break; + + if (bpf_map_lookup_elem(fd, &key, data) < 0) + break; + + hashmap__add(&slab_hash, data->id, data); + prev_key = &key; + } +} + +static void exit_slab_cache_iter(void) +{ + struct hashmap_entry *cur; + unsigned bkt; + + hashmap__for_each_entry(&slab_hash, cur, bkt) + free(cur->pvalue); + + hashmap__clear(&slab_hash); +} + +static void init_numa_data(struct lock_contention *con) +{ + struct symbol *sym; + struct map *kmap; + char *buf = NULL, *p; + size_t len; + long last = -1; + int ret; + + /* + * 'struct zone' is embedded in 'struct pglist_data' as an array. + * As we may not have full information of the struct zone in the + * (fake) vmlinux.h, let's get the actual size from BTF. + */ + ret = btf__find_by_name_kind(con->btf, "zone", BTF_KIND_STRUCT); + if (ret < 0) { + pr_debug("cannot get type of struct zone: %d\n", ret); + return; + } + + ret = btf__resolve_size(con->btf, ret); + if (ret < 0) { + pr_debug("cannot get size of struct zone: %d\n", ret); + return; + } + skel->rodata->sizeof_zone = ret; + + /* UMA system doesn't have 'node_data[]' - just use contig_page_data. */ + sym = machine__find_kernel_symbol_by_name(con->machine, + "contig_page_data", + &kmap); + if (sym) { + skel->rodata->contig_page_data_addr = map__unmap_ip(kmap, sym->start); + map__put(kmap); + return; + } + + /* + * The 'node_data' is an array of pointers to struct pglist_data. + * It needs to follow the pointer for each node in BPF to get the + * address of struct pglist_data and its zones. + */ + sym = machine__find_kernel_symbol_by_name(con->machine, + "node_data", + &kmap); + if (sym == NULL) + return; + + skel->rodata->node_data_addr = map__unmap_ip(kmap, sym->start); + map__put(kmap); + + /* get the number of online nodes using the last node number + 1 */ + ret = sysfs__read_str("devices/system/node/online", &buf, &len); + if (ret < 0) { + pr_debug("failed to read online node: %d\n", ret); + return; + } + + p = buf; + while (p && *p) { + last = strtol(p, &p, 0); + + if (p && (*p == ',' || *p == '-' || *p == '\n')) + p++; + } + skel->rodata->nr_nodes = last + 1; + free(buf); +} int lock_contention_prepare(struct lock_contention *con) { int i, fd; - int ncpus = 1, ntasks = 1, ntypes = 1, naddrs = 1; + int ncpus = 1, ntasks = 1, ntypes = 1, naddrs = 1, ncgrps = 1, nslabs = 1; struct evlist *evlist = con->evlist; struct target *target = con->target; + /* make sure it loads the kernel map before lookup */ + map__load(machine__kernel_map(con->machine)); + skel = lock_contention_bpf__open(); if (!skel) { pr_err("Failed to open lock-contention BPF skeleton\n"); @@ -39,17 +202,37 @@ int lock_contention_prepare(struct lock_contention *con) else bpf_map__set_max_entries(skel->maps.task_data, 1); - if (con->save_callstack) + if (con->save_callstack) { bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries); - else + if (con->owner) { + bpf_map__set_value_size(skel->maps.stack_buf, con->max_stack * sizeof(u64)); + bpf_map__set_key_size(skel->maps.owner_stacks, + con->max_stack * sizeof(u64)); + bpf_map__set_max_entries(skel->maps.owner_stacks, con->map_nr_entries); + bpf_map__set_max_entries(skel->maps.owner_data, con->map_nr_entries); + bpf_map__set_max_entries(skel->maps.owner_stat, con->map_nr_entries); + skel->rodata->max_stack = con->max_stack; + } + } else { bpf_map__set_max_entries(skel->maps.stacks, 1); + } - if (target__has_cpu(target)) + if (target__has_cpu(target)) { + skel->rodata->has_cpu = 1; ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); - if (target__has_task(target)) + } + if (target__has_task(target)) { + skel->rodata->has_task = 1; ntasks = perf_thread_map__nr(evlist->core.threads); - if (con->filters->nr_types) + } + if (con->filters->nr_types) { + skel->rodata->has_type = 1; ntypes = con->filters->nr_types; + } + if (con->filters->nr_cgrps) { + skel->rodata->has_cgroup = 1; + ncgrps = con->filters->nr_cgrps; + } /* resolve lock name filters to addr */ if (con->filters->nr_syms) { @@ -78,12 +261,56 @@ int lock_contention_prepare(struct lock_contention *con) con->filters->addrs = addrs; } naddrs = con->filters->nr_addrs; + skel->rodata->has_addr = 1; + } + + /* resolve lock name in delays */ + if (con->nr_delays) { + struct symbol *sym; + struct map *kmap; + + for (i = 0; i < con->nr_delays; i++) { + sym = machine__find_kernel_symbol_by_name(con->machine, + con->delays[i].sym, + &kmap); + if (sym == NULL) { + pr_warning("ignore unknown symbol: %s\n", + con->delays[i].sym); + continue; + } + + con->delays[i].addr = map__unmap_ip(kmap, sym->start); + } + skel->rodata->lock_delay = 1; + bpf_map__set_max_entries(skel->maps.lock_delays, con->nr_delays); } bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); bpf_map__set_max_entries(skel->maps.task_filter, ntasks); bpf_map__set_max_entries(skel->maps.type_filter, ntypes); bpf_map__set_max_entries(skel->maps.addr_filter, naddrs); + bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps); + + skel->rodata->stack_skip = con->stack_skip; + skel->rodata->aggr_mode = con->aggr_mode; + skel->rodata->needs_callstack = con->save_callstack; + skel->rodata->lock_owner = con->owner; + + if (con->aggr_mode == LOCK_AGGR_CGROUP || con->filters->nr_cgrps) { + if (cgroup_is_v2("perf_event")) + skel->rodata->use_cgroup_v2 = 1; + } + + check_slab_cache_iter(con); + + if (con->filters->nr_slabs && has_slab_iter) { + skel->rodata->has_slab = 1; + nslabs = con->filters->nr_slabs; + } + + bpf_map__set_max_entries(skel->maps.slab_filter, nslabs); + + init_numa_data(con); if (lock_contention_bpf__load(skel) < 0) { pr_err("Failed to load lock-contention BPF skeleton\n"); @@ -94,7 +321,6 @@ int lock_contention_prepare(struct lock_contention *con) u32 cpu; u8 val = 1; - skel->bss->has_cpu = 1; fd = bpf_map__fd(skel->maps.cpu_filter); for (i = 0; i < ncpus; i++) { @@ -107,7 +333,6 @@ int lock_contention_prepare(struct lock_contention *con) u32 pid; u8 val = 1; - skel->bss->has_task = 1; fd = bpf_map__fd(skel->maps.task_filter); for (i = 0; i < ntasks; i++) { @@ -120,7 +345,6 @@ int lock_contention_prepare(struct lock_contention *con) u32 pid = evlist->workload.pid; u8 val = 1; - skel->bss->has_task = 1; fd = bpf_map__fd(skel->maps.task_filter); bpf_map_update_elem(fd, &pid, &val, BPF_ANY); } @@ -128,7 +352,6 @@ int lock_contention_prepare(struct lock_contention *con) if (con->filters->nr_types) { u8 val = 1; - skel->bss->has_type = 1; fd = bpf_map__fd(skel->maps.type_filter); for (i = 0; i < con->filters->nr_types; i++) @@ -138,25 +361,187 @@ int lock_contention_prepare(struct lock_contention *con) if (con->filters->nr_addrs) { u8 val = 1; - skel->bss->has_addr = 1; fd = bpf_map__fd(skel->maps.addr_filter); for (i = 0; i < con->filters->nr_addrs; i++) bpf_map_update_elem(fd, &con->filters->addrs[i], &val, BPF_ANY); } - /* these don't work well if in the rodata section */ - skel->bss->stack_skip = con->stack_skip; - skel->bss->aggr_mode = con->aggr_mode; - skel->bss->needs_callstack = con->save_callstack; - skel->bss->lock_owner = con->owner; + if (con->filters->nr_cgrps) { + u8 val = 1; + + fd = bpf_map__fd(skel->maps.cgroup_filter); + + for (i = 0; i < con->filters->nr_cgrps; i++) + bpf_map_update_elem(fd, &con->filters->cgrps[i], &val, BPF_ANY); + } + + if (con->nr_delays) { + fd = bpf_map__fd(skel->maps.lock_delays); + + for (i = 0; i < con->nr_delays; i++) + bpf_map_update_elem(fd, &con->delays[i].addr, &con->delays[i].time, BPF_ANY); + } + + if (con->aggr_mode == LOCK_AGGR_CGROUP) + read_all_cgroups(&con->cgroups); bpf_program__set_autoload(skel->progs.collect_lock_syms, false); lock_contention_bpf__attach(skel); + + /* run the slab iterator after attaching */ + run_slab_cache_iter(); + + if (con->filters->nr_slabs) { + u8 val = 1; + int cache_fd; + long key, *prev_key; + + fd = bpf_map__fd(skel->maps.slab_filter); + + /* Read the slab cache map and build a hash with its address */ + cache_fd = bpf_map__fd(skel->maps.slab_caches); + prev_key = NULL; + while (!bpf_map_get_next_key(cache_fd, prev_key, &key)) { + struct slab_cache_data data; + + if (bpf_map_lookup_elem(cache_fd, &key, &data) < 0) + break; + + for (i = 0; i < con->filters->nr_slabs; i++) { + if (!strcmp(con->filters->slabs[i], data.name)) { + bpf_map_update_elem(fd, &key, &val, BPF_ANY); + break; + } + } + prev_key = &key; + } + } + return 0; } +/* + * Run the BPF program directly using BPF_PROG_TEST_RUN to update the end + * timestamp in ktime so that it can calculate delta easily. + */ +static void mark_end_timestamp(void) +{ + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .flags = BPF_F_TEST_RUN_ON_CPU, + ); + int prog_fd = bpf_program__fd(skel->progs.end_timestamp); + + bpf_prog_test_run_opts(prog_fd, &opts); +} + +static void update_lock_stat(int map_fd, int pid, u64 end_ts, + enum lock_aggr_mode aggr_mode, + struct tstamp_data *ts_data) +{ + u64 delta; + struct contention_key stat_key = {}; + struct contention_data stat_data; + + if (ts_data->timestamp >= end_ts) + return; + + delta = end_ts - ts_data->timestamp; + + switch (aggr_mode) { + case LOCK_AGGR_CALLER: + stat_key.stack_id = ts_data->stack_id; + break; + case LOCK_AGGR_TASK: + stat_key.pid = pid; + break; + case LOCK_AGGR_ADDR: + stat_key.lock_addr_or_cgroup = ts_data->lock; + break; + case LOCK_AGGR_CGROUP: + /* TODO */ + return; + default: + return; + } + + if (bpf_map_lookup_elem(map_fd, &stat_key, &stat_data) < 0) + return; + + stat_data.total_time += delta; + stat_data.count++; + + if (delta > stat_data.max_time) + stat_data.max_time = delta; + if (delta < stat_data.min_time) + stat_data.min_time = delta; + + bpf_map_update_elem(map_fd, &stat_key, &stat_data, BPF_EXIST); +} + +/* + * Account entries in the tstamp map (which didn't see the corresponding + * lock:contention_end tracepoint) using end_ts. + */ +static void account_end_timestamp(struct lock_contention *con) +{ + int ts_fd, stat_fd; + int *prev_key, key; + u64 end_ts = skel->bss->end_ts; + int total_cpus; + enum lock_aggr_mode aggr_mode = con->aggr_mode; + struct tstamp_data ts_data, *cpu_data; + + /* Iterate per-task tstamp map (key = TID) */ + ts_fd = bpf_map__fd(skel->maps.tstamp); + stat_fd = bpf_map__fd(skel->maps.lock_stat); + + prev_key = NULL; + while (!bpf_map_get_next_key(ts_fd, prev_key, &key)) { + if (bpf_map_lookup_elem(ts_fd, &key, &ts_data) == 0) { + int pid = key; + + if (aggr_mode == LOCK_AGGR_TASK && con->owner) + pid = ts_data.flags; + + update_lock_stat(stat_fd, pid, end_ts, aggr_mode, + &ts_data); + } + + prev_key = &key; + } + + /* Now it'll check per-cpu tstamp map which doesn't have TID. */ + if (aggr_mode == LOCK_AGGR_TASK || aggr_mode == LOCK_AGGR_CGROUP) + return; + + total_cpus = cpu__max_cpu().cpu; + ts_fd = bpf_map__fd(skel->maps.tstamp_cpu); + + cpu_data = calloc(total_cpus, sizeof(*cpu_data)); + if (cpu_data == NULL) + return; + + prev_key = NULL; + while (!bpf_map_get_next_key(ts_fd, prev_key, &key)) { + if (bpf_map_lookup_elem(ts_fd, &key, cpu_data) < 0) + goto next; + + for (int i = 0; i < total_cpus; i++) { + if (cpu_data[i].lock == 0) + continue; + + update_lock_stat(stat_fd, -1, end_ts, aggr_mode, + &cpu_data[i]); + } + +next: + prev_key = &key; + } + free(cpu_data); +} + int lock_contention_start(void) { skel->bss->enabled = 1; @@ -166,6 +551,7 @@ int lock_contention_start(void) int lock_contention_stop(void) { skel->bss->enabled = 0; + mark_end_timestamp(); return 0; } @@ -175,7 +561,6 @@ static const char *lock_contention_get_name(struct lock_contention *con, { int idx = 0; u64 addr; - const char *name = ""; static char name_buf[KSYM_NAME_LEN]; struct symbol *sym; struct map *kmap; @@ -188,19 +573,21 @@ static const char *lock_contention_get_name(struct lock_contention *con, /* do not update idle comm which contains CPU number */ if (pid) { - struct thread *t = __machine__findnew_thread(machine, /*pid=*/-1, pid); + struct thread *t = machine__findnew_thread(machine, /*pid=*/-1, pid); - if (t == NULL) - return name; - if (!bpf_map_lookup_elem(task_fd, &pid, &task) && - thread__set_comm(t, task.comm, /*timestamp=*/0)) - name = task.comm; + if (t != NULL && + !bpf_map_lookup_elem(task_fd, &pid, &task) && + thread__set_comm(t, task.comm, /*timestamp=*/0)) { + snprintf(name_buf, sizeof(name_buf), "%s", task.comm); + return name_buf; + } } - return name; + return ""; } if (con->aggr_mode == LOCK_AGGR_ADDR) { int lock_fd = bpf_map__fd(skel->maps.lock_syms); + struct slab_cache_data *slab_data; /* per-process locks set upper bits of the flags */ if (flags & LCD_F_MMAP_LOCK) @@ -209,19 +596,41 @@ static const char *lock_contention_get_name(struct lock_contention *con, return "siglock"; /* global locks with symbols */ - sym = machine__find_kernel_symbol(machine, key->lock_addr, &kmap); + sym = machine__find_kernel_symbol(machine, key->lock_addr_or_cgroup, &kmap); if (sym) return sym->name; /* try semi-global locks collected separately */ - if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr, &flags)) { + if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr_or_cgroup, &flags)) { if (flags == LOCK_CLASS_RQLOCK) return "rq_lock"; } + if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr_or_cgroup, &flags)) { + if (flags == LOCK_CLASS_ZONE_LOCK) + return "zone_lock"; + } + + /* look slab_hash for dynamic locks in a slab object */ + if (hashmap__find(&slab_hash, flags & LCB_F_SLAB_ID_MASK, &slab_data)) { + snprintf(name_buf, sizeof(name_buf), "&%s", slab_data->name); + return name_buf; + } + return ""; } + if (con->aggr_mode == LOCK_AGGR_CGROUP) { + u64 cgrp_id = key->lock_addr_or_cgroup; + struct cgroup *cgrp = __cgroup__find(&con->cgroups, cgrp_id); + + if (cgrp) + return cgrp->name; + + snprintf(name_buf, sizeof(name_buf), "cgroup:%" PRIu64 "", cgrp_id); + return name_buf; + } + /* LOCK_AGGR_CALLER: skip lock internal functions */ while (machine__is_lock_function(machine, stack_trace[idx]) && idx < con->max_stack - 1) @@ -246,6 +655,63 @@ static const char *lock_contention_get_name(struct lock_contention *con, return name_buf; } +struct lock_stat *pop_owner_stack_trace(struct lock_contention *con) +{ + int stacks_fd, stat_fd; + u64 *stack_trace = NULL; + s32 stack_id; + struct contention_key ckey = {}; + struct contention_data cdata = {}; + size_t stack_size = con->max_stack * sizeof(*stack_trace); + struct lock_stat *st = NULL; + + stacks_fd = bpf_map__fd(skel->maps.owner_stacks); + stat_fd = bpf_map__fd(skel->maps.owner_stat); + if (!stacks_fd || !stat_fd) + goto out_err; + + stack_trace = zalloc(stack_size); + if (stack_trace == NULL) + goto out_err; + + if (bpf_map_get_next_key(stacks_fd, NULL, stack_trace)) + goto out_err; + + bpf_map_lookup_elem(stacks_fd, stack_trace, &stack_id); + ckey.stack_id = stack_id; + bpf_map_lookup_elem(stat_fd, &ckey, &cdata); + + st = zalloc(sizeof(struct lock_stat)); + if (!st) + goto out_err; + + st->name = strdup(stack_trace[0] ? lock_contention_get_name(con, NULL, stack_trace, 0) : + "unknown"); + if (!st->name) + goto out_err; + + st->flags = cdata.flags; + st->nr_contended = cdata.count; + st->wait_time_total = cdata.total_time; + st->wait_time_max = cdata.max_time; + st->wait_time_min = cdata.min_time; + st->callstack = stack_trace; + + if (cdata.count) + st->avg_wait_time = cdata.total_time / cdata.count; + + bpf_map_delete_elem(stacks_fd, stack_trace); + bpf_map_delete_elem(stat_fd, &ckey); + + return st; + +out_err: + free(stack_trace); + free(st); + + return NULL; +} + int lock_contention_read(struct lock_contention *con) { int fd, stack, err = 0; @@ -268,8 +734,10 @@ int lock_contention_read(struct lock_contention *con) if (stack_trace == NULL) return -1; + account_end_timestamp(con); + if (con->aggr_mode == LOCK_AGGR_TASK) { - struct thread *idle = __machine__findnew_thread(machine, + struct thread *idle = machine__findnew_thread(machine, /*pid=*/0, /*tid=*/0); thread__set_comm(idle, "swapper", /*timestamp=*/0); @@ -284,9 +752,6 @@ int lock_contention_read(struct lock_contention *con) bpf_prog_test_run_opts(prog_fd, &opts); } - /* make sure it loads the kernel map */ - map__load(maps__first(machine->kmaps)->map); - prev_key = NULL; while (!bpf_map_get_next_key(fd, prev_key, &key)) { s64 ls_key; @@ -299,7 +764,7 @@ int lock_contention_read(struct lock_contention *con) if (con->save_callstack) { bpf_map_lookup_elem(stack, &key.stack_id, stack_trace); - if (!match_callstack_filter(machine, stack_trace)) { + if (!match_callstack_filter(machine, stack_trace, con->max_stack)) { con->nr_filtered += data.count; goto next; } @@ -313,7 +778,8 @@ int lock_contention_read(struct lock_contention *con) ls_key = key.pid; break; case LOCK_AGGR_ADDR: - ls_key = key.lock_addr; + case LOCK_AGGR_CGROUP: + ls_key = key.lock_addr_or_cgroup; break; default: goto next; @@ -364,12 +830,23 @@ next: return err; } -int lock_contention_finish(void) +int lock_contention_finish(struct lock_contention *con) { if (skel) { skel->bss->enabled = 0; lock_contention_bpf__destroy(skel); } + while (!RB_EMPTY_ROOT(&con->cgroups)) { + struct rb_node *node = rb_first(&con->cgroups); + struct cgroup *cgrp = rb_entry(node, struct cgroup, node); + + rb_erase(node, &con->cgroups); + cgroup__put(cgrp); + } + + exit_slab_cache_iter(); + btf__free(con->btf); + return 0; } diff --git a/tools/perf/util/bpf_map.c b/tools/perf/util/bpf_map.c index c863ae0c5cb5..442f91b4e8e1 100644 --- a/tools/perf/util/bpf_map.c +++ b/tools/perf/util/bpf_map.c @@ -5,6 +5,7 @@ #include <bpf/libbpf.h> #include <linux/err.h> #include <linux/kernel.h> +#include <errno.h> #include <stdbool.h> #include <stdlib.h> #include <unistd.h> @@ -35,9 +36,6 @@ int bpf_map__fprintf(struct bpf_map *map, FILE *fp) if (fd < 0) return fd; - if (!map) - return PTR_ERR(map); - err = -ENOMEM; key = malloc(bpf_map__key_size(map)); if (key == NULL) diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c index 01f70b8e705a..88e0660c4bff 100644 --- a/tools/perf/util/bpf_off_cpu.c +++ b/tools/perf/util/bpf_off_cpu.c @@ -13,6 +13,9 @@ #include "util/cgroup.h" #include "util/strlist.h" #include <bpf/bpf.h> +#include <bpf/btf.h> +#include <internal/xyarray.h> +#include <linux/time64.h> #include "bpf_skel/off_cpu.skel.h" @@ -36,34 +39,25 @@ union off_cpu_data { u64 array[1024 / sizeof(u64)]; }; +u64 off_cpu_raw[MAX_STACKS + 5]; + static int off_cpu_config(struct evlist *evlist) { + char off_cpu_event[64]; struct evsel *evsel; - struct perf_event_attr attr = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_BPF_OUTPUT, - .size = sizeof(attr), /* to capture ABI version */ - }; - char *evname = strdup(OFFCPU_EVENT); - if (evname == NULL) - return -ENOMEM; - - evsel = evsel__new(&attr); - if (!evsel) { - free(evname); - return -ENOMEM; + scnprintf(off_cpu_event, sizeof(off_cpu_event), "bpf-output/name=%s/", OFFCPU_EVENT); + if (parse_event(evlist, off_cpu_event)) { + pr_err("Failed to open off-cpu event\n"); + return -1; } - evsel->core.attr.freq = 1; - evsel->core.attr.sample_period = 1; - /* off-cpu analysis depends on stack trace */ - evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN; - - evlist__add(evlist, evsel); - - free(evsel->name); - evsel->name = evname; + evlist__for_each_entry(evlist, evsel) { + if (evsel__is_offcpu_event(evsel)) { + evsel->core.system_wide = true; + break; + } + } return 0; } @@ -71,21 +65,42 @@ static int off_cpu_config(struct evlist *evlist) static void off_cpu_start(void *arg) { struct evlist *evlist = arg; + struct evsel *evsel; + struct perf_cpu pcpu; + int i; /* update task filter for the given workload */ - if (!skel->bss->has_cpu && !skel->bss->has_task && + if (skel->rodata->has_task && skel->rodata->uses_tgid && perf_thread_map__pid(evlist->core.threads, 0) != -1) { int fd; u32 pid; u8 val = 1; - skel->bss->has_task = 1; - skel->bss->uses_tgid = 1; fd = bpf_map__fd(skel->maps.task_filter); pid = perf_thread_map__pid(evlist->core.threads, 0); bpf_map_update_elem(fd, &pid, &val, BPF_ANY); } + /* update BPF perf_event map */ + evsel = evlist__find_evsel_by_str(evlist, OFFCPU_EVENT); + if (evsel == NULL) { + pr_err("%s evsel not found\n", OFFCPU_EVENT); + return; + } + + perf_cpu_map__for_each_cpu(pcpu, i, evsel->core.cpus) { + int err; + int cpu_nr = pcpu.cpu; + + err = bpf_map__update_elem(skel->maps.offcpu_output, &cpu_nr, sizeof(int), + xyarray__entry(evsel->core.fd, cpu_nr, 0), + sizeof(int), BPF_ANY); + if (err) { + pr_err("Failed to update perf event map for direct off-cpu dumping\n"); + return; + } + } + skel->bss->enabled = 1; } @@ -98,28 +113,36 @@ static void off_cpu_finish(void *arg __maybe_unused) /* v5.18 kernel added prev_state arg, so it needs to check the signature */ static void check_sched_switch_args(void) { - const struct btf *btf = bpf_object__btf(skel->obj); + struct btf *btf = btf__load_vmlinux_btf(); const struct btf_type *t1, *t2, *t3; u32 type_id; + if (!btf) { + pr_debug("Missing btf, check if CONFIG_DEBUG_INFO_BTF is enabled\n"); + goto cleanup; + } + type_id = btf__find_by_name_kind(btf, "btf_trace_sched_switch", BTF_KIND_TYPEDEF); if ((s32)type_id < 0) - return; + goto cleanup; t1 = btf__type_by_id(btf, type_id); if (t1 == NULL) - return; + goto cleanup; t2 = btf__type_by_id(btf, t1->type); if (t2 == NULL || !btf_is_ptr(t2)) - return; + goto cleanup; t3 = btf__type_by_id(btf, t2->type); - if (t3 && btf_is_func_proto(t3) && btf_vlen(t3) == 4) { + /* btf_trace func proto has one more argument for the context */ + if (t3 && btf_is_func_proto(t3) && btf_vlen(t3) == 5) { /* new format: pass prev_state as 4th arg */ skel->rodata->has_prev_state = true; } +cleanup: + btf__free(btf); } int off_cpu_prepare(struct evlist *evlist, struct target *target, @@ -145,6 +168,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, if (target->cpu_list) { ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); + skel->rodata->has_cpu = 1; } if (target->pid) { @@ -170,11 +194,16 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, ntasks = MAX_PROC; bpf_map__set_max_entries(skel->maps.task_filter, ntasks); + skel->rodata->has_task = 1; + skel->rodata->uses_tgid = 1; } else if (target__has_task(target)) { ntasks = perf_thread_map__nr(evlist->core.threads); bpf_map__set_max_entries(skel->maps.task_filter, ntasks); + skel->rodata->has_task = 1; } else if (target__none(target)) { bpf_map__set_max_entries(skel->maps.task_filter, MAX_PROC); + skel->rodata->has_task = 1; + skel->rodata->uses_tgid = 1; } if (evlist__first(evlist)->cgrp) { @@ -183,6 +212,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, if (!cgroup_is_v2("perf_event")) skel->rodata->uses_cgroup_v1 = true; + skel->rodata->has_cgroup = 1; } if (opts->record_cgroup) { @@ -205,7 +235,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, u32 cpu; u8 val = 1; - skel->bss->has_cpu = 1; fd = bpf_map__fd(skel->maps.cpu_filter); for (i = 0; i < ncpus; i++) { @@ -217,8 +246,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, if (target->pid) { u8 val = 1; - skel->bss->has_task = 1; - skel->bss->uses_tgid = 1; fd = bpf_map__fd(skel->maps.task_filter); strlist__for_each_entry(pos, pid_slist) { @@ -237,7 +264,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, u32 pid; u8 val = 1; - skel->bss->has_task = 1; fd = bpf_map__fd(skel->maps.task_filter); for (i = 0; i < ntasks; i++) { @@ -250,7 +276,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, struct evsel *evsel; u8 val = 1; - skel->bss->has_cgroup = 1; fd = bpf_map__fd(skel->maps.cgroup_filter); evlist__for_each_entry(evlist, evsel) { @@ -269,6 +294,8 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, } } + skel->bss->offcpu_thresh_ns = opts->off_cpu_thresh_ns; + err = off_cpu_bpf__attach(skel); if (err) { pr_err("Failed to attach off-cpu BPF skeleton\n"); @@ -292,6 +319,7 @@ int off_cpu_write(struct perf_session *session) { int bytes = 0, size; int fd, stack; + u32 raw_size; u64 sample_type, val, sid = 0; struct evsel *evsel; struct perf_data_file *file = &session->data->file; @@ -331,46 +359,54 @@ int off_cpu_write(struct perf_session *session) while (!bpf_map_get_next_key(fd, &prev, &key)) { int n = 1; /* start from perf_event_header */ - int ip_pos = -1; bpf_map_lookup_elem(fd, &key, &val); + /* zero-fill some of the fields, will be overwritten by raw_data when parsing */ if (sample_type & PERF_SAMPLE_IDENTIFIER) data.array[n++] = sid; - if (sample_type & PERF_SAMPLE_IP) { - ip_pos = n; + if (sample_type & PERF_SAMPLE_IP) data.array[n++] = 0; /* will be updated */ - } if (sample_type & PERF_SAMPLE_TID) - data.array[n++] = (u64)key.pid << 32 | key.tgid; + data.array[n++] = 0; if (sample_type & PERF_SAMPLE_TIME) data.array[n++] = tstamp; - if (sample_type & PERF_SAMPLE_ID) - data.array[n++] = sid; if (sample_type & PERF_SAMPLE_CPU) data.array[n++] = 0; if (sample_type & PERF_SAMPLE_PERIOD) - data.array[n++] = val; - if (sample_type & PERF_SAMPLE_CALLCHAIN) { - int len = 0; - - /* data.array[n] is callchain->nr (updated later) */ - data.array[n + 1] = PERF_CONTEXT_USER; - data.array[n + 2] = 0; - - bpf_map_lookup_elem(stack, &key.stack_id, &data.array[n + 2]); - while (data.array[n + 2 + len]) + data.array[n++] = 0; + if (sample_type & PERF_SAMPLE_RAW) { + /* + * [ size ][ data ] + * [ data ] + * [ data ] + * [ data ] + * [ data ][ empty] + */ + int len = 0, i = 0; + void *raw_data = (void *)data.array + n * sizeof(u64); + + off_cpu_raw[i++] = (u64)key.pid << 32 | key.tgid; + off_cpu_raw[i++] = val; + + /* off_cpu_raw[i] is callchain->nr (updated later) */ + off_cpu_raw[i + 1] = PERF_CONTEXT_USER; + off_cpu_raw[i + 2] = 0; + + bpf_map_lookup_elem(stack, &key.stack_id, &off_cpu_raw[i + 2]); + while (off_cpu_raw[i + 2 + len]) len++; - /* update length of callchain */ - data.array[n] = len + 1; + off_cpu_raw[i] = len + 1; + i += len + 2; + + off_cpu_raw[i++] = key.cgroup_id; - /* update sample ip with the first callchain entry */ - if (ip_pos >= 0) - data.array[ip_pos] = data.array[n + 2]; + raw_size = i * sizeof(u64) + sizeof(u32); /* 4 bytes for alignment */ + memcpy(raw_data, &raw_size, sizeof(raw_size)); + memcpy(raw_data + sizeof(u32), off_cpu_raw, i * sizeof(u64)); - /* calculate sample callchain data array length */ - n += len + 2; + n += i + 1; } if (sample_type & PERF_SAMPLE_CGROUP) data.array[n++] = key.cgroup_id; diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c new file mode 100644 index 000000000000..2a6e61864ee0 --- /dev/null +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c @@ -0,0 +1,580 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Augment the raw_syscalls tracepoints with the contents of the pointer arguments. + * + * This exactly matches what is marshalled into the raw_syscall:sys_enter + * payload expected by the 'perf trace' beautifiers. + */ + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <linux/limits.h> + +#define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1) +#define __PERF_ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) + +/** + * is_power_of_2() - check if a value is a power of two + * @n: the value to check + * + * Determine whether some value is a power of two, where zero is *not* + * considered a power of two. Return: true if @n is a power of 2, otherwise + * false. + */ +#define is_power_of_2(n) (n != 0 && ((n & (n - 1)) == 0)) + +#define MAX_CPUS 4096 + +#define TRACE_AUG_MAX_BUF 32 /* for buffer augmentation in perf trace */ + +/* bpf-output associated map */ +struct __augmented_syscalls__ { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __type(key, int); + __type(value, __u32); + __uint(max_entries, MAX_CPUS); +} __augmented_syscalls__ SEC(".maps"); + +/* + * What to augment at entry? + * + * Pointer arg payloads (filenames, etc) passed from userspace to the kernel + */ +struct syscalls_sys_enter { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 1024); +} syscalls_sys_enter SEC(".maps"); + +/* + * What to augment at exit? + * + * Pointer arg payloads returned from the kernel (struct stat, etc) to userspace. + */ +struct syscalls_sys_exit { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 1024); +} syscalls_sys_exit SEC(".maps"); + +struct syscall_enter_args { + unsigned long long common_tp_fields; + long syscall_nr; + unsigned long args[6]; +}; + +struct syscall_exit_args { + unsigned long long common_tp_fields; + long syscall_nr; + long ret; +}; + +/* + * Desired design of maximum size and alignment (see RFC2553) + */ +#define SS_MAXSIZE 128 /* Implementation specific max size */ + +typedef unsigned short sa_family_t; + +/* + * FIXME: Should come from system headers + * + * The definition uses anonymous union and struct in order to control the + * default alignment. + */ +struct sockaddr_storage { + union { + struct { + sa_family_t ss_family; /* address family */ + /* Following field(s) are implementation specific */ + char __data[SS_MAXSIZE - sizeof(unsigned short)]; + /* space to achieve desired size, */ + /* _SS_MAXSIZE value minus size of ss_family */ + }; + void *__align; /* implementation specific desired alignment */ + }; +}; + +struct augmented_arg { + unsigned int size; + int err; + union { + char value[PATH_MAX]; + struct sockaddr_storage saddr; + }; +}; + +struct pids_filtered { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, pid_t); + __type(value, bool); + __uint(max_entries, 64); +} pids_filtered SEC(".maps"); + +struct augmented_args_payload { + struct syscall_enter_args args; + struct augmented_arg arg, arg2; // We have to reserve space for two arguments (rename, etc) +}; + +// We need more tmp space than the BPF stack can give us +struct augmented_args_tmp { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, int); + __type(value, struct augmented_args_payload); + __uint(max_entries, 1); +} augmented_args_tmp SEC(".maps"); + +struct beauty_map_enter { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); + __type(value, __u32[6]); + __uint(max_entries, 512); +} beauty_map_enter SEC(".maps"); + +struct beauty_payload_enter { + struct syscall_enter_args args; + struct augmented_arg aug_args[6]; +}; + +struct beauty_payload_enter_map { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, int); + __type(value, struct beauty_payload_enter); + __uint(max_entries, 1); +} beauty_payload_enter_map SEC(".maps"); + +static inline struct augmented_args_payload *augmented_args_payload(void) +{ + int key = 0; + return bpf_map_lookup_elem(&augmented_args_tmp, &key); +} + +static inline int augmented__output(void *ctx, struct augmented_args_payload *args, int len) +{ + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ + return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len); +} + +static inline int augmented__beauty_output(void *ctx, void *data, int len) +{ + return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, data, len); +} + +static inline +unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len) +{ + unsigned int augmented_len = sizeof(*augmented_arg); + int string_len = bpf_probe_read_user_str(&augmented_arg->value, arg_len, arg); + + augmented_arg->size = augmented_arg->err = 0; + /* + * probe_read_str may return < 0, e.g. -EFAULT + * So we leave that in the augmented_arg->size that userspace will + */ + if (string_len > 0) { + augmented_len -= sizeof(augmented_arg->value) - string_len; + _Static_assert(is_power_of_2(sizeof(augmented_arg->value)), "sizeof(augmented_arg->value) needs to be a power of two"); + augmented_len &= sizeof(augmented_arg->value) - 1; + augmented_arg->size = string_len; + } else { + /* + * So that username notice the error while still being able + * to skip this augmented arg record + */ + augmented_arg->err = string_len; + augmented_len = offsetof(struct augmented_arg, value); + } + + return augmented_len; +} + +SEC("tp/raw_syscalls/sys_enter") +int syscall_unaugmented(struct syscall_enter_args *args) +{ + return 1; +} + +/* + * These will be tail_called from SEC("raw_syscalls:sys_enter"), so will find in + * augmented_args_tmp what was read by that raw_syscalls:sys_enter and go + * on from there, reading the first syscall arg as a string, i.e. open's + * filename. + */ +SEC("tp/syscalls/sys_enter_connect") +int sys_enter_connect(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const void *sockaddr_arg = (const void *)args->args[1]; + unsigned int socklen = args->args[2]; + unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs + + if (augmented_args == NULL) + return 1; /* Failure: don't filter */ + + _Static_assert(is_power_of_2(sizeof(augmented_args->arg.saddr)), "sizeof(augmented_args->arg.saddr) needs to be a power of two"); + socklen &= sizeof(augmented_args->arg.saddr) - 1; + + bpf_probe_read_user(&augmented_args->arg.saddr, socklen, sockaddr_arg); + augmented_args->arg.size = socklen; + augmented_args->arg.err = 0; + + return augmented__output(args, augmented_args, len + socklen); +} + +SEC("tp/syscalls/sys_enter_sendto") +int sys_enter_sendto(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const void *sockaddr_arg = (const void *)args->args[4]; + unsigned int socklen = args->args[5]; + unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs + + if (augmented_args == NULL) + return 1; /* Failure: don't filter */ + + socklen &= sizeof(augmented_args->arg.saddr) - 1; + + bpf_probe_read_user(&augmented_args->arg.saddr, socklen, sockaddr_arg); + + return augmented__output(args, augmented_args, len + socklen); +} + +SEC("tp/syscalls/sys_enter_open") +int sys_enter_open(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const void *filename_arg = (const void *)args->args[0]; + unsigned int len = sizeof(augmented_args->args); + + if (augmented_args == NULL) + return 1; /* Failure: don't filter */ + + len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value)); + + return augmented__output(args, augmented_args, len); +} + +SEC("tp/syscalls/sys_enter_openat") +int sys_enter_openat(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const void *filename_arg = (const void *)args->args[1]; + unsigned int len = sizeof(augmented_args->args); + + if (augmented_args == NULL) + return 1; /* Failure: don't filter */ + + len += augmented_arg__read_str(&augmented_args->arg, filename_arg, sizeof(augmented_args->arg.value)); + + return augmented__output(args, augmented_args, len); +} + +SEC("tp/syscalls/sys_enter_rename") +int sys_enter_rename(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const void *oldpath_arg = (const void *)args->args[0], + *newpath_arg = (const void *)args->args[1]; + unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len; + + if (augmented_args == NULL) + return 1; /* Failure: don't filter */ + + len += 2 * sizeof(u64); // The overhead of size and err, just before the payload... + + oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value)); + augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64)); + len += augmented_args->arg.size; + + /* Every read from userspace is limited to value size */ + if (augmented_args->arg.size > sizeof(augmented_args->arg.value)) + return 1; /* Failure: don't filter */ + + struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size; + + newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value)); + arg2->size = newpath_len; + + len += newpath_len; + + return augmented__output(args, augmented_args, len); +} + +SEC("tp/syscalls/sys_enter_renameat2") +int sys_enter_renameat2(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const void *oldpath_arg = (const void *)args->args[1], + *newpath_arg = (const void *)args->args[3]; + unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len; + + if (augmented_args == NULL) + return 1; /* Failure: don't filter */ + + len += 2 * sizeof(u64); // The overhead of size and err, just before the payload... + + oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value)); + augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64)); + len += augmented_args->arg.size; + + /* Every read from userspace is limited to value size */ + if (augmented_args->arg.size > sizeof(augmented_args->arg.value)) + return 1; /* Failure: don't filter */ + + struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size; + + newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value)); + arg2->size = newpath_len; + + len += newpath_len; + + return augmented__output(args, augmented_args, len); +} + +#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ + +// we need just the start, get the size to then copy it +struct perf_event_attr_size { + __u32 type; + /* + * Size of the attr structure, for fwd/bwd compat. + */ + __u32 size; +}; + +SEC("tp/syscalls/sys_enter_perf_event_open") +int sys_enter_perf_event_open(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read; + unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs + + if (augmented_args == NULL) + goto failure; + + if (bpf_probe_read_user(&augmented_args->arg.value, sizeof(*attr), attr) < 0) + goto failure; + + attr_read = (const struct perf_event_attr_size *)augmented_args->arg.value; + + __u32 size = attr_read->size; + + if (!size) + size = PERF_ATTR_SIZE_VER0; + + if (size > sizeof(augmented_args->arg.value)) + goto failure; + + // Now that we read attr->size and tested it against the size limits, read it completely + if (bpf_probe_read_user(&augmented_args->arg.value, size, attr) < 0) + goto failure; + + return augmented__output(args, augmented_args, len + size); +failure: + return 1; /* Failure: don't filter */ +} + +SEC("tp/syscalls/sys_enter_clock_nanosleep") +int sys_enter_clock_nanosleep(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const void *rqtp_arg = (const void *)args->args[2]; + unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs + __u32 size = sizeof(struct timespec64); + + if (augmented_args == NULL) + goto failure; + + if (size > sizeof(augmented_args->arg.value)) + goto failure; + + bpf_probe_read_user(&augmented_args->arg.value, size, rqtp_arg); + + return augmented__output(args, augmented_args, len + size); +failure: + return 1; /* Failure: don't filter */ +} + +SEC("tp/syscalls/sys_enter_nanosleep") +int sys_enter_nanosleep(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const void *req_arg = (const void *)args->args[0]; + unsigned int len = sizeof(augmented_args->args); + __u32 size = sizeof(struct timespec64); + + if (augmented_args == NULL) + goto failure; + + if (size > sizeof(augmented_args->arg.value)) + goto failure; + + bpf_probe_read_user(&augmented_args->arg.value, size, req_arg); + + return augmented__output(args, augmented_args, len + size); +failure: + return 1; /* Failure: don't filter */ +} + +static pid_t getpid(void) +{ + return bpf_get_current_pid_tgid(); +} + +static bool pid_filter__has(struct pids_filtered *pids, pid_t pid) +{ + return bpf_map_lookup_elem(pids, &pid) != NULL; +} + +static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) +{ + bool augmented, do_output = false; + int zero = 0, index, value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value); + u64 output = 0; /* has to be u64, otherwise it won't pass the verifier */ + s64 aug_size, size; + unsigned int nr, *beauty_map; + struct beauty_payload_enter *payload; + void *arg, *payload_offset; + + /* fall back to do predefined tail call */ + if (args == NULL) + return 1; + + /* use syscall number to get beauty_map entry */ + nr = (__u32)args->syscall_nr; + beauty_map = bpf_map_lookup_elem(&beauty_map_enter, &nr); + + /* set up payload for output */ + payload = bpf_map_lookup_elem(&beauty_payload_enter_map, &zero); + payload_offset = (void *)&payload->aug_args; + + if (beauty_map == NULL || payload == NULL) + return 1; + + /* copy the sys_enter header, which has the syscall_nr */ + __builtin_memcpy(&payload->args, args, sizeof(struct syscall_enter_args)); + + /* + * Determine what type of argument and how many bytes to read from user space, using the + * value in the beauty_map. This is the relation of parameter type and its corresponding + * value in the beauty map, and how many bytes we read eventually: + * + * string: 1 -> size of string + * struct: size of struct -> size of struct + * buffer: -1 * (index of paired len) -> value of paired len (maximum: TRACE_AUG_MAX_BUF) + */ + for (int i = 0; i < 6; i++) { + arg = (void *)args->args[i]; + augmented = false; + size = beauty_map[i]; + aug_size = size; /* size of the augmented data read from user space */ + + if (size == 0 || arg == NULL) + continue; + + if (size == 1) { /* string */ + aug_size = bpf_probe_read_user_str(((struct augmented_arg *)payload_offset)->value, value_size, arg); + /* minimum of 0 to pass the verifier */ + if (aug_size < 0) + aug_size = 0; + + augmented = true; + } else if (size > 0 && size <= value_size) { /* struct */ + if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, size, arg)) + augmented = true; + } else if ((int)size < 0 && size >= -6) { /* buffer */ + index = -(size + 1); + barrier_var(index); // Prevent clang (noticed with v18) from removing the &= 7 trick. + index &= 7; // Satisfy the bounds checking with the verifier in some kernels. + aug_size = args->args[index] > TRACE_AUG_MAX_BUF ? TRACE_AUG_MAX_BUF : args->args[index]; + + if (aug_size > 0) { + if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, aug_size, arg)) + augmented = true; + } + } + + /* Augmented data size is limited to sizeof(augmented_arg->unnamed union with value field) */ + if (aug_size > value_size) + aug_size = value_size; + + /* write data to payload */ + if (augmented) { + int written = offsetof(struct augmented_arg, value) + aug_size; + + if (written < 0 || written > sizeof(struct augmented_arg)) + return 1; + + ((struct augmented_arg *)payload_offset)->size = aug_size; + output += written; + payload_offset += written; + do_output = true; + } + } + + if (!do_output || (sizeof(struct syscall_enter_args) + output) > sizeof(struct beauty_payload_enter)) + return 1; + + return augmented__beauty_output(ctx, payload, sizeof(struct syscall_enter_args) + output); +} + +SEC("tp/raw_syscalls/sys_enter") +int sys_enter(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args; + /* + * We start len, the amount of data that will be in the perf ring + * buffer, if this is not filtered out by one of pid_filter__has(), + * syscall->enabled, etc, with the non-augmented raw syscall payload, + * i.e. sizeof(augmented_args->args). + * + * We'll add to this as we add augmented syscalls right after that + * initial, non-augmented raw_syscalls:sys_enter payload. + */ + + if (pid_filter__has(&pids_filtered, getpid())) + return 0; + + augmented_args = augmented_args_payload(); + if (augmented_args == NULL) + return 1; + + bpf_probe_read_kernel(&augmented_args->args, sizeof(augmented_args->args), args); + + /* + * Jump to syscall specific augmenter, even if the default one, + * "!raw_syscalls:unaugmented" that will just return 1 to return the + * unaugmented tracepoint payload. + */ + if (augment_sys_enter(args, &augmented_args->args)) + bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr); + + // If not found on the PROG_ARRAY syscalls map, then we're filtering it: + return 0; +} + +SEC("tp/raw_syscalls/sys_exit") +int sys_exit(struct syscall_exit_args *args) +{ + struct syscall_exit_args exit_args; + + if (pid_filter__has(&pids_filtered, getpid())) + return 0; + + bpf_probe_read_kernel(&exit_args, sizeof(exit_args), args); + /* + * Jump to syscall specific return augmenter, even if the default one, + * "!raw_syscalls:unaugmented" that will just return 1 to return the + * unaugmented tracepoint payload. + */ + bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr); + /* + * If not found on the PROG_ARRAY syscalls map, then we're filtering it: + */ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/perf/util/bpf_skel/bench_uprobe.bpf.c b/tools/perf/util/bpf_skel/bench_uprobe.bpf.c new file mode 100644 index 000000000000..a01c7f791fcd --- /dev/null +++ b/tools/perf/util/bpf_skel/bench_uprobe.bpf.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2023 Red Hat +#include "vmlinux.h" +#include <bpf/bpf_tracing.h> + +unsigned int nr_uprobes; +unsigned int nr_uretprobes; + +SEC("uprobe") +int BPF_UPROBE(empty) +{ + return 0; +} + +SEC("uprobe") +int BPF_UPROBE(trace_printk) +{ + char fmt[] = "perf bench uprobe %u"; + + bpf_trace_printk(fmt, sizeof(fmt), ++nr_uprobes); + return 0; +} + +SEC("uretprobe") +int BPF_URETPROBE(empty_ret) +{ + return 0; +} + +SEC("uretprobe") +int BPF_URETPROBE(trace_printk_ret) +{ + char fmt[] = "perf bench uretprobe %u"; + + bpf_trace_printk(fmt, sizeof(fmt), ++nr_uretprobes); + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c index 6a438e0102c5..c2298a2decc9 100644 --- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c @@ -1,14 +1,12 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2021 Facebook // Copyright (c) 2021 Google +#include "bperf_cgroup.h" #include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_core_read.h> -#define MAX_LEVELS 10 // max cgroup hierarchy level: arbitrary -#define MAX_EVENTS 32 // max events per cgroup: arbitrary - // NOTE: many of map and global data will be modified before loading // from the userspace (perf tool) using the skeleton helpers. @@ -57,9 +55,9 @@ struct cgroup___old { const volatile __u32 num_events = 1; const volatile __u32 num_cpus = 1; +const volatile int use_cgroup_v2 = 0; int enabled = 0; -int use_cgroup_v2 = 0; int perf_subsys_id = -1; static inline __u64 get_cgroup_v1_ancestor_id(struct cgroup *cgrp, int level) @@ -97,7 +95,7 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size) cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_subsys_id], cgroup); level = BPF_CORE_READ(cgrp, level); - for (cnt = 0; i < MAX_LEVELS; i++) { + for (cnt = 0; i < BPERF_CGROUP__MAX_LEVELS; i++) { __u64 cgrp_id; if (i > level) @@ -123,7 +121,7 @@ static inline int get_cgroup_v2_idx(__u32 *cgrps, int size) __u32 *elem; int cnt; - for (cnt = 0; i < MAX_LEVELS; i++) { + for (cnt = 0; i < BPERF_CGROUP__MAX_LEVELS; i++) { __u64 cgrp_id = bpf_get_current_ancestor_cgroup_id(i); if (cgrp_id == 0) @@ -148,17 +146,17 @@ static int bperf_cgroup_count(void) register int c = 0; struct bpf_perf_event_value val, delta, *prev_val, *cgrp_val; __u32 cpu = bpf_get_smp_processor_id(); - __u32 cgrp_idx[MAX_LEVELS]; + __u32 cgrp_idx[BPERF_CGROUP__MAX_LEVELS]; int cgrp_cnt; __u32 key, cgrp; long err; if (use_cgroup_v2) - cgrp_cnt = get_cgroup_v2_idx(cgrp_idx, MAX_LEVELS); + cgrp_cnt = get_cgroup_v2_idx(cgrp_idx, BPERF_CGROUP__MAX_LEVELS); else - cgrp_cnt = get_cgroup_v1_idx(cgrp_idx, MAX_LEVELS); + cgrp_cnt = get_cgroup_v1_idx(cgrp_idx, BPERF_CGROUP__MAX_LEVELS); - for ( ; idx < MAX_EVENTS; idx++) { + for ( ; idx < BPERF_CGROUP__MAX_EVENTS; idx++) { if (idx == num_events) break; @@ -186,7 +184,7 @@ static int bperf_cgroup_count(void) delta.enabled = val.enabled - prev_val->enabled; delta.running = val.running - prev_val->running; - for (c = 0; c < MAX_LEVELS; c++) { + for (c = 0; c < BPERF_CGROUP__MAX_LEVELS; c++) { if (c == cgrp_cnt) break; diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.h b/tools/perf/util/bpf_skel/bperf_cgroup.h new file mode 100644 index 000000000000..3fb84b19d39a --- /dev/null +++ b/tools/perf/util/bpf_skel/bperf_cgroup.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Data structures shared between BPF and tools. */ +#ifndef __BPERF_CGROUP_H +#define __BPERF_CGROUP_H + +// These constants impact code size of bperf_cgroup.bpf.c that may result in BPF +// verifier issues. They are exposed to control the size and also to disable BPF +// counters when the number of user events is too large. + +// max cgroup hierarchy level: arbitrary +#define BPERF_CGROUP__MAX_LEVELS 10 +// max events per cgroup: arbitrary +#define BPERF_CGROUP__MAX_EVENTS 128 + +#endif /* __BPERF_CGROUP_H */ diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c index f193998530d4..0595063139a3 100644 --- a/tools/perf/util/bpf_skel/bperf_follower.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c @@ -5,6 +5,8 @@ #include <bpf/bpf_tracing.h> #include "bperf_u.h" +#define MAX_ENTRIES 102400 + struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(key_size, sizeof(__u32)); @@ -22,25 +24,29 @@ struct { struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bperf_filter_value)); + __uint(max_entries, MAX_ENTRIES); + __uint(map_flags, BPF_F_NO_PREALLOC); } filter SEC(".maps"); enum bperf_filter_type type = 0; int enabled = 0; +int inherit; SEC("fexit/XXX") int BPF_PROG(fexit_XXX) { struct bpf_perf_event_value *diff_val, *accum_val; __u32 filter_key, zero = 0; - __u32 *accum_key; + __u32 accum_key; + struct bperf_filter_value *fval; if (!enabled) return 0; switch (type) { case BPERF_FILTER_GLOBAL: - accum_key = &zero; + accum_key = zero; goto do_add; case BPERF_FILTER_CPU: filter_key = bpf_get_smp_processor_id(); @@ -49,22 +55,34 @@ int BPF_PROG(fexit_XXX) filter_key = bpf_get_current_pid_tgid() & 0xffffffff; break; case BPERF_FILTER_TGID: - filter_key = bpf_get_current_pid_tgid() >> 32; + /* Use pid as the filter_key to exclude new task counts + * when inherit is disabled. Don't worry about the existing + * children in TGID losing their counts, bpf_counter has + * already added them to the filter map via perf_thread_map + * before this bpf prog runs. + */ + filter_key = inherit ? + bpf_get_current_pid_tgid() >> 32 : + bpf_get_current_pid_tgid() & 0xffffffff; break; default: return 0; } - accum_key = bpf_map_lookup_elem(&filter, &filter_key); - if (!accum_key) + fval = bpf_map_lookup_elem(&filter, &filter_key); + if (!fval) return 0; + accum_key = fval->accum_key; + if (fval->exited) + bpf_map_delete_elem(&filter, &filter_key); + do_add: diff_val = bpf_map_lookup_elem(&diff_readings, &zero); if (!diff_val) return 0; - accum_val = bpf_map_lookup_elem(&accum_readings, accum_key); + accum_val = bpf_map_lookup_elem(&accum_readings, &accum_key); if (!accum_val) return 0; @@ -75,4 +93,70 @@ do_add: return 0; } +/* The program is only used for PID or TGID filter types. */ +SEC("tp_btf/task_newtask") +int BPF_PROG(on_newtask, struct task_struct *task, __u64 clone_flags) +{ + __u32 parent_key, child_key; + struct bperf_filter_value *parent_fval; + struct bperf_filter_value child_fval = { 0 }; + + if (!enabled) + return 0; + + switch (type) { + case BPERF_FILTER_PID: + parent_key = bpf_get_current_pid_tgid() & 0xffffffff; + child_key = task->pid; + break; + case BPERF_FILTER_TGID: + parent_key = bpf_get_current_pid_tgid() >> 32; + child_key = task->tgid; + if (child_key == parent_key) + return 0; + break; + default: + return 0; + } + + /* Check if the current task is one of the target tasks to be counted */ + parent_fval = bpf_map_lookup_elem(&filter, &parent_key); + if (!parent_fval) + return 0; + + /* Start counting for the new task by adding it into filter map, + * inherit the accum key of its parent task so that they can be + * counted together. + */ + child_fval.accum_key = parent_fval->accum_key; + child_fval.exited = 0; + bpf_map_update_elem(&filter, &child_key, &child_fval, BPF_NOEXIST); + + return 0; +} + +/* The program is only used for PID or TGID filter types. */ +SEC("tp_btf/sched_process_exit") +int BPF_PROG(on_exittask, struct task_struct *task) +{ + __u32 pid; + struct bperf_filter_value *fval; + + if (!enabled) + return 0; + + /* Stop counting for this task by removing it from filter map. + * For TGID type, if the pid can be found in the map, it means that + * this pid belongs to the leader task. After the task exits, the + * tgid of its child tasks (if any) will be 1, so the pid can be + * safely removed. + */ + pid = task->pid; + fval = bpf_map_lookup_elem(&filter, &pid); + if (fval) + fval->exited = 1; + + return 0; +} + char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/bpf_skel/bperf_u.h b/tools/perf/util/bpf_skel/bperf_u.h index 1ce0c2c905c1..4a4a753980be 100644 --- a/tools/perf/util/bpf_skel/bperf_u.h +++ b/tools/perf/util/bpf_skel/bperf_u.h @@ -11,4 +11,9 @@ enum bperf_filter_type { BPERF_FILTER_TGID, }; +struct bperf_filter_value { + __u32 accum_key; + __u8 exited; +}; + #endif /* __BPERF_STAT_U_H */ diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c index 9d01e3af7479..621e2022c8bc 100644 --- a/tools/perf/util/bpf_skel/func_latency.bpf.c +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c @@ -37,38 +37,104 @@ struct { int enabled = 0; -int has_cpu = 0; -int has_task = 0; -int use_nsec = 0; -SEC("kprobe/func") -int BPF_PROG(func_begin) +// stats +__s64 total; +__s64 count; +__s64 max; +__s64 min; + +const volatile int has_cpu = 0; +const volatile int has_task = 0; +const volatile int use_nsec = 0; +const volatile unsigned int bucket_range; +const volatile unsigned int min_latency; +const volatile unsigned int max_latency; +const volatile unsigned int bucket_num = NUM_BUCKET; + +static bool can_record(void) { - __u64 key, now; - - if (!enabled) - return 0; - - key = bpf_get_current_pid_tgid(); - if (has_cpu) { __u32 cpu = bpf_get_smp_processor_id(); __u8 *ok; ok = bpf_map_lookup_elem(&cpu_filter, &cpu); if (!ok) - return 0; + return false; } if (has_task) { - __u32 pid = key & 0xffffffff; + __u32 pid = bpf_get_current_pid_tgid(); __u8 *ok; ok = bpf_map_lookup_elem(&task_filter, &pid); if (!ok) - return 0; + return false; + } + return true; +} + +static void update_latency(__s64 delta) +{ + __u64 val = delta; + __u32 key = 0; + __u64 *hist; + __u64 cmp_base = use_nsec ? 1 : 1000; + + if (delta < 0) + return; + + if (bucket_range != 0) { + val = delta / cmp_base; + + if (min_latency > 0) { + if (val > min_latency) + val -= min_latency; + else + goto do_lookup; + } + + // Less than 1 unit (ms or ns), or, in the future, + // than the min latency desired. + if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units ) + key = val / bucket_range + 1; + if (key >= bucket_num) + key = bucket_num - 1; + } + + goto do_lookup; + } + // calculate index using delta + for (key = 0; key < (bucket_num - 1); key++) { + if (delta < (cmp_base << key)) + break; } +do_lookup: + hist = bpf_map_lookup_elem(&latency, &key); + if (!hist) + return; + + __sync_fetch_and_add(hist, 1); + + __sync_fetch_and_add(&total, delta); // always in nsec + __sync_fetch_and_add(&count, 1); + + if (delta > max) + max = delta; + if (delta < min) + min = delta; +} + +SEC("kprobe/func") +int BPF_PROG(func_begin) +{ + __u64 key, now; + + if (!enabled || !can_record()) + return 0; + + key = bpf_get_current_pid_tgid(); now = bpf_ktime_get_ns(); // overwrite timestamp for nested functions @@ -81,7 +147,6 @@ int BPF_PROG(func_end) { __u64 tid; __u64 *start; - __u64 cmp_base = use_nsec ? 1 : 1000; if (!enabled) return 0; @@ -90,26 +155,44 @@ int BPF_PROG(func_end) start = bpf_map_lookup_elem(&functime, &tid); if (start) { - __s64 delta = bpf_ktime_get_ns() - *start; - __u32 key; - __u64 *hist; - + update_latency(bpf_ktime_get_ns() - *start); bpf_map_delete_elem(&functime, &tid); + } + + return 0; +} - if (delta < 0) - return 0; +SEC("raw_tp") +int BPF_PROG(event_begin) +{ + __u64 key, now; - // calculate index using delta - for (key = 0; key < (NUM_BUCKET - 1); key++) { - if (delta < (cmp_base << key)) - break; - } + if (!enabled || !can_record()) + return 0; - hist = bpf_map_lookup_elem(&latency, &key); - if (!hist) - return 0; + key = bpf_get_current_pid_tgid(); + now = bpf_ktime_get_ns(); - *hist += 1; + // overwrite timestamp for nested events + bpf_map_update_elem(&functime, &key, &now, BPF_ANY); + return 0; +} + +SEC("raw_tp") +int BPF_PROG(event_end) +{ + __u64 tid; + __u64 *start; + + if (!enabled) + return 0; + + tid = bpf_get_current_pid_tgid(); + + start = bpf_map_lookup_elem(&functime, &tid); + if (start) { + update_latency(bpf_ktime_get_ns() - *start); + bpf_map_delete_elem(&functime, &tid); } return 0; diff --git a/tools/perf/util/bpf_skel/kwork_top.bpf.c b/tools/perf/util/bpf_skel/kwork_top.bpf.c new file mode 100644 index 000000000000..6673386302e2 --- /dev/null +++ b/tools/perf/util/bpf_skel/kwork_top.bpf.c @@ -0,0 +1,338 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2022, Huawei + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +/* + * This should be in sync with "util/kwork.h" + */ +enum kwork_class_type { + KWORK_CLASS_IRQ, + KWORK_CLASS_SOFTIRQ, + KWORK_CLASS_WORKQUEUE, + KWORK_CLASS_SCHED, + KWORK_CLASS_MAX, +}; + +#define MAX_ENTRIES 102400 +#define MAX_NR_CPUS 4096 +#define PF_KTHREAD 0x00200000 +#define MAX_COMMAND_LEN 16 + +struct time_data { + __u64 timestamp; +}; + +struct work_data { + __u64 runtime; +}; + +struct task_data { + __u32 tgid; + __u32 is_kthread; + char comm[MAX_COMMAND_LEN]; +}; + +struct work_key { + __u32 type; + __u32 pid; + __u64 task_p; +}; + +struct task_key { + __u32 pid; + __u32 cpu; +}; + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct time_data); +} kwork_top_task_time SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(key_size, sizeof(struct work_key)); + __uint(value_size, sizeof(struct time_data)); + __uint(max_entries, MAX_ENTRIES); +} kwork_top_irq_time SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(struct task_key)); + __uint(value_size, sizeof(struct task_data)); + __uint(max_entries, MAX_ENTRIES); +} kwork_top_tasks SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(key_size, sizeof(struct work_key)); + __uint(value_size, sizeof(struct work_data)); + __uint(max_entries, MAX_ENTRIES); +} kwork_top_works SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u8)); + __uint(max_entries, MAX_NR_CPUS); +} kwork_top_cpu_filter SEC(".maps"); + +int enabled = 0; + +const volatile int has_cpu_filter = 0; + +__u64 from_timestamp = 0; +__u64 to_timestamp = 0; + +static __always_inline int cpu_is_filtered(__u32 cpu) +{ + __u8 *cpu_val; + + if (has_cpu_filter) { + cpu_val = bpf_map_lookup_elem(&kwork_top_cpu_filter, &cpu); + if (!cpu_val) + return 1; + } + + return 0; +} + +static __always_inline void update_task_info(struct task_struct *task, __u32 cpu) +{ + struct task_key key = { + .pid = task->pid, + .cpu = cpu, + }; + + if (!bpf_map_lookup_elem(&kwork_top_tasks, &key)) { + struct task_data data = { + .tgid = task->tgid, + .is_kthread = task->flags & PF_KTHREAD ? 1 : 0, + }; + BPF_CORE_READ_STR_INTO(&data.comm, task, comm); + + bpf_map_update_elem(&kwork_top_tasks, &key, &data, BPF_ANY); + } +} + +static __always_inline void update_work(struct work_key *key, __u64 delta) +{ + struct work_data *data; + + data = bpf_map_lookup_elem(&kwork_top_works, key); + if (data) { + data->runtime += delta; + } else { + struct work_data new_data = { + .runtime = delta, + }; + + bpf_map_update_elem(&kwork_top_works, key, &new_data, BPF_ANY); + } +} + +static void on_sched_out(struct task_struct *task, __u64 ts, __u32 cpu) +{ + __u64 delta; + struct time_data *pelem; + + pelem = bpf_task_storage_get(&kwork_top_task_time, task, NULL, 0); + if (pelem) + delta = ts - pelem->timestamp; + else + delta = ts - from_timestamp; + + struct work_key key = { + .type = KWORK_CLASS_SCHED, + .pid = task->pid, + .task_p = (__u64)task, + }; + + update_work(&key, delta); + update_task_info(task, cpu); +} + +static void on_sched_in(struct task_struct *task, __u64 ts) +{ + struct time_data *pelem; + + pelem = bpf_task_storage_get(&kwork_top_task_time, task, NULL, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (pelem) + pelem->timestamp = ts; +} + +SEC("tp_btf/sched_switch") +int on_switch(u64 *ctx) +{ + struct task_struct *prev, *next; + + prev = (struct task_struct *)ctx[1]; + next = (struct task_struct *)ctx[2]; + + if (!enabled) + return 0; + + __u32 cpu = bpf_get_smp_processor_id(); + + if (cpu_is_filtered(cpu)) + return 0; + + __u64 ts = bpf_ktime_get_ns(); + + on_sched_out(prev, ts, cpu); + on_sched_in(next, ts); + + return 0; +} + +SEC("tp_btf/irq_handler_entry") +int on_irq_handler_entry(u64 *cxt) +{ + struct task_struct *task; + + if (!enabled) + return 0; + + __u32 cpu = bpf_get_smp_processor_id(); + + if (cpu_is_filtered(cpu)) + return 0; + + __u64 ts = bpf_ktime_get_ns(); + + task = (struct task_struct *)bpf_get_current_task(); + if (!task) + return 0; + + struct work_key key = { + .type = KWORK_CLASS_IRQ, + .pid = BPF_CORE_READ(task, pid), + .task_p = (__u64)task, + }; + + struct time_data data = { + .timestamp = ts, + }; + + bpf_map_update_elem(&kwork_top_irq_time, &key, &data, BPF_ANY); + + return 0; +} + +SEC("tp_btf/irq_handler_exit") +int on_irq_handler_exit(u64 *cxt) +{ + __u64 delta; + struct task_struct *task; + struct time_data *pelem; + + if (!enabled) + return 0; + + __u32 cpu = bpf_get_smp_processor_id(); + + if (cpu_is_filtered(cpu)) + return 0; + + __u64 ts = bpf_ktime_get_ns(); + + task = (struct task_struct *)bpf_get_current_task(); + if (!task) + return 0; + + struct work_key key = { + .type = KWORK_CLASS_IRQ, + .pid = BPF_CORE_READ(task, pid), + .task_p = (__u64)task, + }; + + pelem = bpf_map_lookup_elem(&kwork_top_irq_time, &key); + if (pelem && pelem->timestamp != 0) + delta = ts - pelem->timestamp; + else + delta = ts - from_timestamp; + + update_work(&key, delta); + + return 0; +} + +SEC("tp_btf/softirq_entry") +int on_softirq_entry(u64 *cxt) +{ + struct task_struct *task; + + if (!enabled) + return 0; + + __u32 cpu = bpf_get_smp_processor_id(); + + if (cpu_is_filtered(cpu)) + return 0; + + __u64 ts = bpf_ktime_get_ns(); + + task = (struct task_struct *)bpf_get_current_task(); + if (!task) + return 0; + + struct work_key key = { + .type = KWORK_CLASS_SOFTIRQ, + .pid = BPF_CORE_READ(task, pid), + .task_p = (__u64)task, + }; + + struct time_data data = { + .timestamp = ts, + }; + + bpf_map_update_elem(&kwork_top_irq_time, &key, &data, BPF_ANY); + + return 0; +} + +SEC("tp_btf/softirq_exit") +int on_softirq_exit(u64 *cxt) +{ + __u64 delta; + struct task_struct *task; + struct time_data *pelem; + + if (!enabled) + return 0; + + __u32 cpu = bpf_get_smp_processor_id(); + + if (cpu_is_filtered(cpu)) + return 0; + + __u64 ts = bpf_ktime_get_ns(); + + task = (struct task_struct *)bpf_get_current_task(); + if (!task) + return 0; + + struct work_key key = { + .type = KWORK_CLASS_SOFTIRQ, + .pid = BPF_CORE_READ(task, pid), + .task_p = (__u64)task, + }; + + pelem = bpf_map_lookup_elem(&kwork_top_irq_time, &key); + if (pelem) + delta = ts - pelem->timestamp; + else + delta = ts - from_timestamp; + + update_work(&key, delta); + + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/bpf_skel/kwork_trace.bpf.c b/tools/perf/util/bpf_skel/kwork_trace.bpf.c index 063c124e0999..9ce9c8dddc4b 100644 --- a/tools/perf/util/bpf_skel/kwork_trace.bpf.c +++ b/tools/perf/util/bpf_skel/kwork_trace.bpf.c @@ -68,8 +68,9 @@ struct { } perf_kwork_name_filter SEC(".maps"); int enabled = 0; -int has_cpu_filter = 0; -int has_name_filter = 0; + +const volatile int has_cpu_filter = 0; +const volatile int has_name_filter = 0; static __always_inline int local_strncmp(const char *s1, unsigned int sz, const char *s2) @@ -79,7 +80,7 @@ static __always_inline int local_strncmp(const char *s1, for (i = 0; i < sz; i++) { ret = (unsigned char)s1[i] - (unsigned char)s2[i]; - if (ret || !s1[i] || !s2[i]) + if (ret || !s1[i]) break; } diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c index 8d3cfbb3cc65..96e7d853b9ed 100644 --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c @@ -11,6 +11,12 @@ /* for collect_lock_syms(). 4096 was rejected by the verifier */ #define MAX_CPUS 1024 +/* for collect_zone_lock(). It should be more than the actual zones. */ +#define MAX_ZONES 10 + +/* for do_lock_delay(). Arbitrarily set to 1 million. */ +#define MAX_LOOP (1U << 20) + /* lock contention flags from include/trace/events/lock.h */ #define LCB_F_SPIN (1U << 0) #define LCB_F_READ (1U << 1) @@ -19,13 +25,6 @@ #define LCB_F_PERCPU (1U << 4) #define LCB_F_MUTEX (1U << 5) -struct tstamp_data { - __u64 timestamp; - __u64 lock; - __u32 flags; - __s32 stack_id; -}; - /* callstack storage */ struct { __uint(type, BPF_MAP_TYPE_STACK_TRACE); @@ -34,6 +33,38 @@ struct { __uint(max_entries, MAX_ENTRIES); } stacks SEC(".maps"); +/* buffer for owner stacktrace */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u64)); + __uint(max_entries, 1); +} stack_buf SEC(".maps"); + +/* a map for tracing owner stacktrace to owner stack id */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u64)); // owner stacktrace + __uint(value_size, sizeof(__s32)); // owner stack id + __uint(max_entries, 1); +} owner_stacks SEC(".maps"); + +/* a map for tracing lock address to owner data */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u64)); // lock address + __uint(value_size, sizeof(struct owner_tracing_data)); + __uint(max_entries, 1); +} owner_data SEC(".maps"); + +/* a map for contention_key (stores owner stack id) to contention data */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(struct contention_key)); + __uint(value_size, sizeof(struct contention_data)); + __uint(max_entries, 1); +} owner_stat SEC(".maps"); + /* maintain timestamp at the beginning of contention */ struct { __uint(type, BPF_MAP_TYPE_HASH); @@ -42,6 +73,14 @@ struct { __uint(max_entries, MAX_ENTRIES); } tstamp SEC(".maps"); +/* maintain per-CPU timestamp at the beginning of contention */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct tstamp_data)); + __uint(max_entries, 1); +} tstamp_cpu SEC(".maps"); + /* actual lock contention statistics */ struct { __uint(type, BPF_MAP_TYPE_HASH); @@ -92,6 +131,34 @@ struct { __uint(max_entries, 1); } addr_filter SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u64)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} cgroup_filter SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(long)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} slab_filter SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(long)); + __uint(value_size, sizeof(struct slab_cache_data)); + __uint(max_entries, 1); +} slab_caches SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u64)); + __uint(value_size, sizeof(__u64)); + __uint(max_entries, 1); +} lock_delays SEC(".maps"); + struct rw_semaphore___old { struct task_struct *owner; } __attribute__((preserve_access_index)); @@ -108,18 +175,32 @@ struct mm_struct___new { struct rw_semaphore mmap_lock; } __attribute__((preserve_access_index)); +extern struct kmem_cache *bpf_get_kmem_cache(u64 addr) __ksym __weak; + /* control flags */ -int enabled; -int has_cpu; -int has_task; -int has_type; -int has_addr; -int needs_callstack; -int stack_skip; -int lock_owner; +const volatile int has_cpu; +const volatile int has_task; +const volatile int has_type; +const volatile int has_addr; +const volatile int has_cgroup; +const volatile int has_slab; +const volatile int needs_callstack; +const volatile int stack_skip; +const volatile int lock_owner; +const volatile int use_cgroup_v2; +const volatile int max_stack; +const volatile int lock_delay; /* determine the key of lock stat */ -int aggr_mode; +const volatile int aggr_mode; + +int enabled; + +int perf_subsys_id = -1; + +__u64 end_ts; + +__u32 slab_cache_id; /* error stat */ int task_fail; @@ -130,6 +211,32 @@ int data_fail; int task_map_full; int data_map_full; +struct task_struct *bpf_task_from_pid(s32 pid) __ksym __weak; +void bpf_task_release(struct task_struct *p) __ksym __weak; + +static inline __u64 get_current_cgroup_id(void) +{ + struct task_struct *task; + struct cgroup *cgrp; + + if (use_cgroup_v2) + return bpf_get_current_cgroup_id(); + + task = bpf_get_current_task_btf(); + + if (perf_subsys_id == -1) { +#if __has_builtin(__builtin_preserve_enum_value) + perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id, + perf_event_cgrp_id); +#else + perf_subsys_id = perf_event_cgrp_id; +#endif + } + + cgrp = BPF_CORE_READ(task, cgroups, subsys[perf_subsys_id], cgroup); + return BPF_CORE_READ(cgrp, kn, id); +} + static inline int can_record(u64 *ctx) { if (has_cpu) { @@ -164,6 +271,26 @@ static inline int can_record(u64 *ctx) __u64 addr = ctx[0]; ok = bpf_map_lookup_elem(&addr_filter, &addr); + if (!ok && !has_slab) + return 0; + } + + if (has_cgroup) { + __u8 *ok; + __u64 cgrp = get_current_cgroup_id(); + + ok = bpf_map_lookup_elem(&cgroup_filter, &cgrp); + if (!ok) + return 0; + } + + if (has_slab && bpf_get_kmem_cache) { + __u8 *ok; + __u64 addr = ctx[0]; + long kmem_cache_addr; + + kmem_cache_addr = (long)bpf_get_kmem_cache(addr); + ok = bpf_map_lookup_elem(&slab_filter, &kmem_cache_addr); if (!ok) return 0; } @@ -238,6 +365,7 @@ static inline __u32 check_lock_type(__u64 lock, __u32 flags) struct task_struct *curr; struct mm_struct___old *mm_old; struct mm_struct___new *mm_new; + struct sighand_struct *sighand; switch (flags) { case LCB_F_READ: /* rwsem */ @@ -259,7 +387,9 @@ static inline __u32 check_lock_type(__u64 lock, __u32 flags) break; case LCB_F_SPIN: /* spinlock */ curr = bpf_get_current_task_btf(); - if (&curr->sighand->siglock == (void *)lock) + sighand = curr->sighand; + + if (sighand && &sighand->siglock == (void *)lock) return LCD_F_SIGHAND_LOCK; break; default: @@ -268,36 +398,212 @@ static inline __u32 check_lock_type(__u64 lock, __u32 flags) return 0; } -SEC("tp_btf/contention_begin") -int contention_begin(u64 *ctx) +static inline long delay_callback(__u64 idx, void *arg) +{ + __u64 target = *(__u64 *)arg; + + if (target <= bpf_ktime_get_ns()) + return 1; + + /* just to kill time */ + (void)bpf_get_prandom_u32(); + + return 0; +} + +static inline void do_lock_delay(__u64 duration) +{ + __u64 target = bpf_ktime_get_ns() + duration; + + bpf_loop(MAX_LOOP, delay_callback, &target, /*flags=*/0); +} + +static inline void check_lock_delay(__u64 lock) +{ + __u64 *delay; + + delay = bpf_map_lookup_elem(&lock_delays, &lock); + if (delay) + do_lock_delay(*delay); +} + +static inline struct tstamp_data *get_tstamp_elem(__u32 flags) { __u32 pid; struct tstamp_data *pelem; - if (!enabled || !can_record(ctx)) - return 0; + /* Use per-cpu array map for spinlock and rwlock */ + if ((flags & (LCB_F_SPIN | LCB_F_MUTEX)) == LCB_F_SPIN) { + __u32 idx = 0; + + pelem = bpf_map_lookup_elem(&tstamp_cpu, &idx); + /* Do not update the element for nested locks */ + if (pelem && pelem->lock) + pelem = NULL; + return pelem; + } pid = bpf_get_current_pid_tgid(); pelem = bpf_map_lookup_elem(&tstamp, &pid); + /* Do not update the element for nested locks */ if (pelem && pelem->lock) - return 0; + return NULL; if (pelem == NULL) { struct tstamp_data zero = {}; - bpf_map_update_elem(&tstamp, &pid, &zero, BPF_ANY); + if (bpf_map_update_elem(&tstamp, &pid, &zero, BPF_NOEXIST) < 0) { + __sync_fetch_and_add(&task_fail, 1); + return NULL; + } + pelem = bpf_map_lookup_elem(&tstamp, &pid); if (pelem == NULL) { __sync_fetch_and_add(&task_fail, 1); - return 0; + return NULL; } } + return pelem; +} + +static inline s32 get_owner_stack_id(u64 *stacktrace) +{ + s32 *id, new_id; + static s64 id_gen = 1; + + id = bpf_map_lookup_elem(&owner_stacks, stacktrace); + if (id) + return *id; + + new_id = (s32)__sync_fetch_and_add(&id_gen, 1); + + bpf_map_update_elem(&owner_stacks, stacktrace, &new_id, BPF_NOEXIST); + + id = bpf_map_lookup_elem(&owner_stacks, stacktrace); + if (id) + return *id; + + return -1; +} + +static inline void update_contention_data(struct contention_data *data, u64 duration, u32 count) +{ + __sync_fetch_and_add(&data->total_time, duration); + __sync_fetch_and_add(&data->count, count); + + /* FIXME: need atomic operations */ + if (data->max_time < duration) + data->max_time = duration; + if (data->min_time > duration) + data->min_time = duration; +} + +static inline void update_owner_stat(u32 id, u64 duration, u32 flags) +{ + struct contention_key key = { + .stack_id = id, + .pid = 0, + .lock_addr_or_cgroup = 0, + }; + struct contention_data *data = bpf_map_lookup_elem(&owner_stat, &key); + + if (!data) { + struct contention_data first = { + .total_time = duration, + .max_time = duration, + .min_time = duration, + .count = 1, + .flags = flags, + }; + bpf_map_update_elem(&owner_stat, &key, &first, BPF_NOEXIST); + } else { + update_contention_data(data, duration, 1); + } +} + +SEC("tp_btf/contention_begin") +int contention_begin(u64 *ctx) +{ + struct tstamp_data *pelem; + + if (!enabled || !can_record(ctx)) + return 0; + + pelem = get_tstamp_elem(ctx[1]); + if (pelem == NULL) + return 0; pelem->timestamp = bpf_ktime_get_ns(); pelem->lock = (__u64)ctx[0]; pelem->flags = (__u32)ctx[1]; if (needs_callstack) { + u32 i = 0; + u32 id = 0; + int owner_pid; + u64 *buf; + struct task_struct *task; + struct owner_tracing_data *otdata; + + if (!lock_owner) + goto skip_owner; + + task = get_lock_owner(pelem->lock, pelem->flags); + if (!task) + goto skip_owner; + + owner_pid = BPF_CORE_READ(task, pid); + + buf = bpf_map_lookup_elem(&stack_buf, &i); + if (!buf) + goto skip_owner; + for (i = 0; i < max_stack; i++) + buf[i] = 0x0; + + if (!bpf_task_from_pid) + goto skip_owner; + + task = bpf_task_from_pid(owner_pid); + if (!task) + goto skip_owner; + + bpf_get_task_stack(task, buf, max_stack * sizeof(unsigned long), 0); + bpf_task_release(task); + + otdata = bpf_map_lookup_elem(&owner_data, &pelem->lock); + id = get_owner_stack_id(buf); + + /* + * Contention just happens, or corner case `lock` is owned by process not + * `owner_pid`. For the corner case we treat it as unexpected internal error and + * just ignore the precvious tracing record. + */ + if (!otdata || otdata->pid != owner_pid) { + struct owner_tracing_data first = { + .pid = owner_pid, + .timestamp = pelem->timestamp, + .count = 1, + .stack_id = id, + }; + bpf_map_update_elem(&owner_data, &pelem->lock, &first, BPF_ANY); + } + /* Contention is ongoing and new waiter joins */ + else { + __sync_fetch_and_add(&otdata->count, 1); + + /* + * The owner is the same, but stacktrace might be changed. In this case we + * store/update `owner_stat` based on current owner stack id. + */ + if (id != otdata->stack_id) { + update_owner_stat(id, pelem->timestamp - otdata->timestamp, + pelem->flags); + + otdata->timestamp = pelem->timestamp; + otdata->stack_id = id; + } + } +skip_owner: pelem->stack_id = bpf_get_stackid(ctx, &stacks, BPF_F_FAST_STACK_CMP | stack_skip); if (pelem->stack_id < 0) @@ -330,27 +636,120 @@ int contention_begin(u64 *ctx) SEC("tp_btf/contention_end") int contention_end(u64 *ctx) { - __u32 pid; + __u32 pid = 0, idx = 0; struct tstamp_data *pelem; struct contention_key key = {}; struct contention_data *data; + __u64 timestamp; __u64 duration; + bool need_delete = false; if (!enabled) return 0; - pid = bpf_get_current_pid_tgid(); - pelem = bpf_map_lookup_elem(&tstamp, &pid); - if (!pelem || pelem->lock != ctx[0]) - return 0; + /* + * For spinlock and rwlock, it needs to get the timestamp for the + * per-cpu map. However, contention_end does not have the flags + * so it cannot know whether it reads percpu or hash map. + * + * Try per-cpu map first and check if there's active contention. + * If it is, do not read hash map because it cannot go to sleeping + * locks before releasing the spinning locks. + */ + pelem = bpf_map_lookup_elem(&tstamp_cpu, &idx); + if (pelem && pelem->lock) { + if (pelem->lock != ctx[0]) + return 0; + } else { + pid = bpf_get_current_pid_tgid(); + pelem = bpf_map_lookup_elem(&tstamp, &pid); + if (!pelem || pelem->lock != ctx[0]) + return 0; + need_delete = true; + } - duration = bpf_ktime_get_ns() - pelem->timestamp; + timestamp = bpf_ktime_get_ns(); + duration = timestamp - pelem->timestamp; if ((__s64)duration < 0) { - bpf_map_delete_elem(&tstamp, &pid); __sync_fetch_and_add(&time_fail, 1); - return 0; + goto out; } + if (needs_callstack && lock_owner) { + struct owner_tracing_data *otdata = bpf_map_lookup_elem(&owner_data, &pelem->lock); + + if (!otdata) + goto skip_owner; + + /* Update `owner_stat` */ + update_owner_stat(otdata->stack_id, timestamp - otdata->timestamp, pelem->flags); + + /* No contention is occurring, delete `lock` entry in `owner_data` */ + if (otdata->count <= 1) + bpf_map_delete_elem(&owner_data, &pelem->lock); + /* + * Contention is still ongoing, with a new owner (current task). `owner_data` + * should be updated accordingly. + */ + else { + u32 i = 0; + s32 ret = (s32)ctx[1]; + u64 *buf; + + otdata->timestamp = timestamp; + __sync_fetch_and_add(&otdata->count, -1); + + buf = bpf_map_lookup_elem(&stack_buf, &i); + if (!buf) + goto skip_owner; + for (i = 0; i < (u32)max_stack; i++) + buf[i] = 0x0; + + /* + * `ret` has the return code of the lock function. + * If `ret` is negative, the current task terminates lock waiting without + * acquiring it. Owner is not changed, but we still need to update the owner + * stack. + */ + if (ret < 0) { + s32 id = 0; + struct task_struct *task; + + if (!bpf_task_from_pid) + goto skip_owner; + + task = bpf_task_from_pid(otdata->pid); + if (!task) + goto skip_owner; + + bpf_get_task_stack(task, buf, + max_stack * sizeof(unsigned long), 0); + bpf_task_release(task); + + id = get_owner_stack_id(buf); + + /* + * If owner stack is changed, update owner stack id for this lock. + */ + if (id != otdata->stack_id) + otdata->stack_id = id; + } + /* + * Otherwise, update tracing data with the current task, which is the new + * owner. + */ + else { + otdata->pid = pid; + /* + * We don't want to retrieve callstack here, since it is where the + * current task acquires the lock and provides no additional + * information. We simply assign -1 to invalidate it. + */ + otdata->stack_id = -1; + } + } + } +skip_owner: switch (aggr_mode) { case LOCK_AGGR_CALLER: key.stack_id = pelem->stack_id; @@ -358,16 +757,22 @@ int contention_end(u64 *ctx) case LOCK_AGGR_TASK: if (lock_owner) key.pid = pelem->flags; - else + else { + if (!need_delete) + pid = bpf_get_current_pid_tgid(); key.pid = pid; + } if (needs_callstack) key.stack_id = pelem->stack_id; break; case LOCK_AGGR_ADDR: - key.lock_addr = pelem->lock; + key.lock_addr_or_cgroup = pelem->lock; if (needs_callstack) key.stack_id = pelem->stack_id; break; + case LOCK_AGGR_CGROUP: + key.lock_addr_or_cgroup = get_current_cgroup_id(); + break; default: /* should not happen */ return 0; @@ -376,9 +781,8 @@ int contention_end(u64 *ctx) data = bpf_map_lookup_elem(&lock_stat, &key); if (!data) { if (data_map_full) { - bpf_map_delete_elem(&tstamp, &pid); __sync_fetch_and_add(&data_fail, 1); - return 0; + goto out; } struct contention_data first = { @@ -390,34 +794,64 @@ int contention_end(u64 *ctx) }; int err; - if (aggr_mode == LOCK_AGGR_ADDR) - first.flags |= check_lock_type(pelem->lock, pelem->flags); + if (aggr_mode == LOCK_AGGR_ADDR) { + first.flags |= check_lock_type(pelem->lock, + pelem->flags & LCB_F_TYPE_MASK); + + /* Check if it's from a slab object */ + if (bpf_get_kmem_cache) { + struct kmem_cache *s; + struct slab_cache_data *d; + + s = bpf_get_kmem_cache(pelem->lock); + if (s != NULL) { + /* + * Save the ID of the slab cache in the flags + * (instead of full address) to reduce the + * space in the contention_data. + */ + d = bpf_map_lookup_elem(&slab_caches, &s); + if (d != NULL) + first.flags |= d->id; + } + } + } err = bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST); if (err < 0) { + if (err == -EEXIST) { + /* it lost the race, try to get it again */ + data = bpf_map_lookup_elem(&lock_stat, &key); + if (data != NULL) + goto found; + } if (err == -E2BIG) data_map_full = 1; __sync_fetch_and_add(&data_fail, 1); } - bpf_map_delete_elem(&tstamp, &pid); - return 0; + goto out; } - __sync_fetch_and_add(&data->total_time, duration); - __sync_fetch_and_add(&data->count, 1); +found: + update_contention_data(data, duration, 1); - /* FIXME: need atomic operations */ - if (data->max_time < duration) - data->max_time = duration; - if (data->min_time > duration) - data->min_time = duration; +out: + if (lock_delay) + check_lock_delay(pelem->lock); - bpf_map_delete_elem(&tstamp, &pid); + pelem->lock = 0; + if (need_delete) + bpf_map_delete_elem(&tstamp, &pid); return 0; } extern struct rq runqueues __ksym; +const volatile __u64 contig_page_data_addr; +const volatile __u64 node_data_addr; +const volatile int nr_nodes; +const volatile int sizeof_zone; + struct rq___old { raw_spinlock_t lock; } __attribute__((preserve_access_index)); @@ -426,6 +860,59 @@ struct rq___new { raw_spinlock_t __lock; } __attribute__((preserve_access_index)); +static void collect_zone_lock(void) +{ + __u64 nr_zones, zone_off; + __u64 lock_addr, lock_off; + __u32 lock_flag = LOCK_CLASS_ZONE_LOCK; + + zone_off = offsetof(struct pglist_data, node_zones); + lock_off = offsetof(struct zone, lock); + + if (contig_page_data_addr) { + struct pglist_data *contig_page_data; + + contig_page_data = (void *)(long)contig_page_data_addr; + nr_zones = BPF_CORE_READ(contig_page_data, nr_zones); + + for (int i = 0; i < MAX_ZONES; i++) { + __u64 zone_addr; + + if (i >= nr_zones) + break; + + zone_addr = contig_page_data_addr + (sizeof_zone * i) + zone_off; + lock_addr = zone_addr + lock_off; + + bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY); + } + } else if (nr_nodes > 0) { + struct pglist_data **node_data = (void *)(long)node_data_addr; + + for (int i = 0; i < nr_nodes; i++) { + struct pglist_data *pgdat = NULL; + int err; + + err = bpf_core_read(&pgdat, sizeof(pgdat), &node_data[i]); + if (err < 0 || pgdat == NULL) + break; + + nr_zones = BPF_CORE_READ(pgdat, nr_zones); + for (int k = 0; k < MAX_ZONES; k++) { + __u64 zone_addr; + + if (k >= nr_zones) + break; + + zone_addr = (__u64)(void *)pgdat + (sizeof_zone * k) + zone_off; + lock_addr = zone_addr + lock_off; + + bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY); + } + } + } +} + SEC("raw_tp/bpf_test_finish") int BPF_PROG(collect_lock_syms) { @@ -447,6 +934,55 @@ int BPF_PROG(collect_lock_syms) lock_flag = LOCK_CLASS_RQLOCK; bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY); } + + collect_zone_lock(); + + return 0; +} + +SEC("raw_tp/bpf_test_finish") +int BPF_PROG(end_timestamp) +{ + end_ts = bpf_ktime_get_ns(); + return 0; +} + +/* + * bpf_iter__kmem_cache added recently so old kernels don't have it in the + * vmlinux.h. But we cannot add it here since it will cause a compiler error + * due to redefinition of the struct on later kernels. + * + * So it uses a CO-RE trick to access the member only if it has the type. + * This will support both old and new kernels without compiler errors. + */ +struct bpf_iter__kmem_cache___new { + struct kmem_cache *s; +} __attribute__((preserve_access_index)); + +SEC("iter/kmem_cache") +int slab_cache_iter(void *ctx) +{ + struct kmem_cache *s = NULL; + struct slab_cache_data d; + const char *nameptr; + + if (bpf_core_type_exists(struct bpf_iter__kmem_cache)) { + struct bpf_iter__kmem_cache___new *iter = ctx; + + s = iter->s; + } + + if (s == NULL) + return 0; + + nameptr = s->name; + bpf_probe_read_kernel_str(d.name, sizeof(d.name), nameptr); + + d.id = ++slab_cache_id << LCB_F_SLAB_ID_SHIFT; + if (d.id >= LCB_F_SLAB_ID_END) + return 0; + + bpf_map_update_elem(&slab_caches, &s, &d, BPF_NOEXIST); return 0; } diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h index 260062a9f2ab..28c5e5aced7f 100644 --- a/tools/perf/util/bpf_skel/lock_data.h +++ b/tools/perf/util/bpf_skel/lock_data.h @@ -3,10 +3,24 @@ #ifndef UTIL_BPF_SKEL_LOCK_DATA_H #define UTIL_BPF_SKEL_LOCK_DATA_H +struct owner_tracing_data { + u32 pid; // Who has the lock. + u32 count; // How many waiters for this lock. + u64 timestamp; // The time while the owner acquires lock and contention is going on. + s32 stack_id; // Identifier for `owner_stat`, which stores as value in `owner_stacks` +}; + +struct tstamp_data { + u64 timestamp; + u64 lock; + u32 flags; + s32 stack_id; +}; + struct contention_key { - u32 stack_id; + s32 stack_id; u32 pid; - u64 lock_addr; + u64 lock_addr_or_cgroup; }; #define TASK_COMM_LEN 16 @@ -25,7 +39,15 @@ struct contention_task_data { #define LCD_F_MMAP_LOCK (1U << 31) #define LCD_F_SIGHAND_LOCK (1U << 30) -#define LCB_F_MAX_FLAGS (1U << 7) +#define LCB_F_SLAB_ID_SHIFT 16 +#define LCB_F_SLAB_ID_START (1U << 16) +#define LCB_F_SLAB_ID_END (1U << 26) +#define LCB_F_SLAB_ID_MASK 0x03FF0000U + +#define LCB_F_TYPE_MAX (1U << 7) +#define LCB_F_TYPE_MASK 0x0000007FU + +#define SLAB_NAME_MAX 28 struct contention_data { u64 total_time; @@ -39,11 +61,18 @@ enum lock_aggr_mode { LOCK_AGGR_ADDR = 0, LOCK_AGGR_TASK, LOCK_AGGR_CALLER, + LOCK_AGGR_CGROUP, }; enum lock_class_sym { LOCK_CLASS_NONE, LOCK_CLASS_RQLOCK, + LOCK_CLASS_ZONE_LOCK, +}; + +struct slab_cache_data { + u32 id; + char name[SLAB_NAME_MAX]; }; #endif /* UTIL_BPF_SKEL_LOCK_DATA_H */ diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c index d877a0a9731f..72763bb8d1de 100644 --- a/tools/perf/util/bpf_skel/off_cpu.bpf.c +++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c @@ -18,10 +18,19 @@ #define MAX_STACKS 32 #define MAX_ENTRIES 102400 +#define MAX_CPUS 4096 +#define MAX_OFFCPU_LEN 37 + +// We have a 'struct stack' in vmlinux.h when building with GEN_VMLINUX_H=1 +struct __stack { + u64 array[MAX_STACKS]; +}; + struct tstamp_data { __u32 stack_id; __u32 state; __u64 timestamp; + struct __stack stack; }; struct offcpu_key { @@ -39,6 +48,24 @@ struct { __uint(max_entries, MAX_ENTRIES); } stacks SEC(".maps"); +struct offcpu_data { + u64 array[MAX_OFFCPU_LEN]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __uint(max_entries, MAX_CPUS); +} offcpu_output SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct offcpu_data)); + __uint(max_entries, 1); +} offcpu_payload SEC(".maps"); + struct { __uint(type, BPF_MAP_TYPE_TASK_STORAGE); __uint(map_flags, BPF_F_NO_PREALLOC); @@ -85,10 +112,11 @@ struct task_struct___old { } __attribute__((preserve_access_index)); int enabled = 0; -int has_cpu = 0; -int has_task = 0; -int has_cgroup = 0; -int uses_tgid = 0; + +const volatile int has_cpu = 0; +const volatile int has_task = 0; +const volatile int has_cgroup = 0; +const volatile int uses_tgid = 0; const volatile bool has_prev_state = false; const volatile bool needs_cgroup = false; @@ -96,6 +124,8 @@ const volatile bool uses_cgroup_v1 = false; int perf_subsys_id = -1; +__u64 offcpu_thresh_ns; + /* * Old kernel used to call it task_struct->state and now it's '__state'. * Use BPF CO-RE "ignored suffix rule" to deal with it like below: @@ -182,6 +212,47 @@ static inline int can_record(struct task_struct *t, int state) return 1; } +static inline int copy_stack(struct __stack *from, struct offcpu_data *to, int n) +{ + int len = 0; + + for (int i = 0; i < MAX_STACKS && from->array[i]; ++i, ++len) + to->array[n + 2 + i] = from->array[i]; + + return len; +} + +/** + * off_cpu_dump - dump off-cpu samples to ring buffer + * @data: payload for dumping off-cpu samples + * @key: off-cpu data + * @stack: stack trace of the task before being scheduled out + * + * If the threshold of off-cpu time is reached, acquire tid, period, callchain, and cgroup id + * information of the task, and dump it as a raw sample to perf ring buffer + */ +static int off_cpu_dump(void *ctx, struct offcpu_data *data, struct offcpu_key *key, + struct __stack *stack, __u64 delta) +{ + int n = 0, len = 0; + + data->array[n++] = (u64)key->tgid << 32 | key->pid; + data->array[n++] = delta; + + /* data->array[n] is callchain->nr (updated later) */ + data->array[n + 1] = PERF_CONTEXT_USER; + data->array[n + 2] = 0; + len = copy_stack(stack, data, n); + + /* update length of callchain */ + data->array[n] = len + 1; + n += len + 2; + + data->array[n++] = key->cgroup_id; + + return bpf_perf_event_output(ctx, &offcpu_output, BPF_F_CURRENT_CPU, data, n * sizeof(u64)); +} + static int off_cpu_stat(u64 *ctx, struct task_struct *prev, struct task_struct *next, int state) { @@ -206,6 +277,16 @@ static int off_cpu_stat(u64 *ctx, struct task_struct *prev, pelem->state = state; pelem->stack_id = stack_id; + /* + * If stacks are successfully collected by bpf_get_stackid(), collect them once more + * in task_storage for direct off-cpu sample dumping + */ + if (stack_id > 0 && bpf_get_stack(ctx, &pelem->stack, MAX_STACKS * sizeof(u64), BPF_F_USER_STACK)) { + /* + * This empty if block is used to avoid 'result unused warning' from bpf_get_stack(). + * If the collection fails, continue with the logic for the next task. + */ + } next: pelem = bpf_task_storage_get(&tstamp, next, NULL, 0); @@ -220,11 +301,19 @@ next: __u64 delta = ts - pelem->timestamp; __u64 *total; - total = bpf_map_lookup_elem(&off_cpu, &key); - if (total) - *total += delta; - else - bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY); + if (delta >= offcpu_thresh_ns) { + int zero = 0; + struct offcpu_data *data = bpf_map_lookup_elem(&offcpu_payload, &zero); + + if (data) + off_cpu_dump(ctx, data, &key, &pelem->stack, delta); + } else { + total = bpf_map_lookup_elem(&off_cpu, &key); + if (total) + *total += delta; + else + bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY); + } /* prevent to reuse the timestamp later */ pelem->timestamp = 0; diff --git a/tools/perf/util/bpf_skel/perf_version.h b/tools/perf/util/bpf_skel/perf_version.h new file mode 100644 index 000000000000..1ed5b2e59bf5 --- /dev/null +++ b/tools/perf/util/bpf_skel/perf_version.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef __PERF_VERSION_H__ +#define __PERF_VERSION_H__ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +/* + * This is used by tests/shell/record_bpf_metadata.sh + * to verify that BPF metadata generation works. + * + * PERF_VERSION is defined by a build rule at compile time. + */ +const char bpf_metadata_perf_version[] SEC(".rodata") = PERF_VERSION; + +#endif /* __PERF_VERSION_H__ */ diff --git a/tools/perf/util/bpf_skel/sample-filter.h b/tools/perf/util/bpf_skel/sample-filter.h index 2e96e1ab084a..683fec85e71e 100644 --- a/tools/perf/util/bpf_skel/sample-filter.h +++ b/tools/perf/util/bpf_skel/sample-filter.h @@ -1,7 +1,9 @@ #ifndef PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H #define PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H -#define MAX_FILTERS 64 +#define MAX_FILTERS 64 +#define MAX_IDX_HASH (16 * 1024) +#define MAX_EVT_HASH (1024 * 1024) /* supported filter operations */ enum perf_bpf_filter_op { @@ -14,14 +16,57 @@ enum perf_bpf_filter_op { PBF_OP_AND, PBF_OP_GROUP_BEGIN, PBF_OP_GROUP_END, + PBF_OP_DONE, +}; + +enum perf_bpf_filter_term { + /* No term is in use. */ + PBF_TERM_NONE = 0, + /* Terms that correspond to PERF_SAMPLE_xx values. */ + PBF_TERM_SAMPLE_START = PBF_TERM_NONE + 1, + PBF_TERM_IP = PBF_TERM_SAMPLE_START + 0, /* SAMPLE_IP = 1U << 0 */ + PBF_TERM_TID = PBF_TERM_SAMPLE_START + 1, /* SAMPLE_TID = 1U << 1 */ + PBF_TERM_TIME = PBF_TERM_SAMPLE_START + 2, /* SAMPLE_TIME = 1U << 2 */ + PBF_TERM_ADDR = PBF_TERM_SAMPLE_START + 3, /* SAMPLE_ADDR = 1U << 3 */ + __PBF_UNUSED_TERM4 = PBF_TERM_SAMPLE_START + 4, /* SAMPLE_READ = 1U << 4 */ + __PBF_UNUSED_TERM5 = PBF_TERM_SAMPLE_START + 5, /* SAMPLE_CALLCHAIN = 1U << 5 */ + PBF_TERM_ID = PBF_TERM_SAMPLE_START + 6, /* SAMPLE_ID = 1U << 6 */ + PBF_TERM_CPU = PBF_TERM_SAMPLE_START + 7, /* SAMPLE_CPU = 1U << 7 */ + PBF_TERM_PERIOD = PBF_TERM_SAMPLE_START + 8, /* SAMPLE_PERIOD = 1U << 8 */ + __PBF_UNUSED_TERM9 = PBF_TERM_SAMPLE_START + 9, /* SAMPLE_STREAM_ID = 1U << 9 */ + __PBF_UNUSED_TERM10 = PBF_TERM_SAMPLE_START + 10, /* SAMPLE_RAW = 1U << 10 */ + __PBF_UNUSED_TERM11 = PBF_TERM_SAMPLE_START + 11, /* SAMPLE_BRANCH_STACK = 1U << 11 */ + __PBF_UNUSED_TERM12 = PBF_TERM_SAMPLE_START + 12, /* SAMPLE_REGS_USER = 1U << 12 */ + __PBF_UNUSED_TERM13 = PBF_TERM_SAMPLE_START + 13, /* SAMPLE_STACK_USER = 1U << 13 */ + PBF_TERM_WEIGHT = PBF_TERM_SAMPLE_START + 14, /* SAMPLE_WEIGHT = 1U << 14 */ + PBF_TERM_DATA_SRC = PBF_TERM_SAMPLE_START + 15, /* SAMPLE_DATA_SRC = 1U << 15 */ + __PBF_UNUSED_TERM16 = PBF_TERM_SAMPLE_START + 16, /* SAMPLE_IDENTIFIER = 1U << 16 */ + PBF_TERM_TRANSACTION = PBF_TERM_SAMPLE_START + 17, /* SAMPLE_TRANSACTION = 1U << 17 */ + __PBF_UNUSED_TERM18 = PBF_TERM_SAMPLE_START + 18, /* SAMPLE_REGS_INTR = 1U << 18 */ + PBF_TERM_PHYS_ADDR = PBF_TERM_SAMPLE_START + 19, /* SAMPLE_PHYS_ADDR = 1U << 19 */ + __PBF_UNUSED_TERM20 = PBF_TERM_SAMPLE_START + 20, /* SAMPLE_AUX = 1U << 20 */ + PBF_TERM_CGROUP = PBF_TERM_SAMPLE_START + 21, /* SAMPLE_CGROUP = 1U << 21 */ + PBF_TERM_DATA_PAGE_SIZE = PBF_TERM_SAMPLE_START + 22, /* SAMPLE_DATA_PAGE_SIZE = 1U << 22 */ + PBF_TERM_CODE_PAGE_SIZE = PBF_TERM_SAMPLE_START + 23, /* SAMPLE_CODE_PAGE_SIZE = 1U << 23 */ + PBF_TERM_WEIGHT_STRUCT = PBF_TERM_SAMPLE_START + 24, /* SAMPLE_WEIGHT_STRUCT = 1U << 24 */ + PBF_TERM_SAMPLE_END = PBF_TERM_WEIGHT_STRUCT, + /* Terms computed from BPF helpers. */ + PBF_TERM_UID, + PBF_TERM_GID, }; /* BPF map entry for filtering */ struct perf_bpf_filter_entry { enum perf_bpf_filter_op op; __u32 part; /* sub-sample type info when it has multiple values */ - __u64 flags; /* perf sample type flags */ + enum perf_bpf_filter_term term; __u64 value; }; -#endif /* PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H */
\ No newline at end of file +struct idx_hash_key { + __u64 evt_id; + __u32 tgid; + __u32 reserved; +}; + +#endif /* PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H */ diff --git a/tools/perf/util/bpf_skel/sample_filter.bpf.c b/tools/perf/util/bpf_skel/sample_filter.bpf.c index fb94f5280626..e5666d4c1722 100644 --- a/tools/perf/util/bpf_skel/sample_filter.bpf.c +++ b/tools/perf/util/bpf_skel/sample_filter.bpf.c @@ -9,13 +9,41 @@ /* BPF map that will be filled by user space */ struct filters { - __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(type, BPF_MAP_TYPE_HASH); __type(key, int); - __type(value, struct perf_bpf_filter_entry); - __uint(max_entries, MAX_FILTERS); + __type(value, struct perf_bpf_filter_entry[MAX_FILTERS]); + __uint(max_entries, 1); } filters SEC(".maps"); -int dropped; +/* + * An evsel has multiple instances for each CPU or task but we need a single + * id to be used as a key for the idx_hash. This hashmap would translate the + * instance's ID to a representative ID. + */ +struct event_hash { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u64); + __type(value, __u64); + __uint(max_entries, 1); +} event_hash SEC(".maps"); + +/* tgid/evtid to filter index */ +struct idx_hash { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct idx_hash_key); + __type(value, int); + __uint(max_entries, 1); +} idx_hash SEC(".maps"); + +/* tgid to filter index */ +struct lost_count { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); +} dropped SEC(".maps"); + +volatile const int use_idx_hash; void *bpf_cast_to_kern_ctx(void *) __ksym; @@ -48,31 +76,55 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx, { struct perf_sample_data___new *data = (void *)kctx->data; - if (!bpf_core_field_exists(data->sample_flags) || - (data->sample_flags & entry->flags) == 0) + if (!bpf_core_field_exists(data->sample_flags)) return 0; - switch (entry->flags) { - case PERF_SAMPLE_IP: +#define BUILD_CHECK_SAMPLE(x) \ + _Static_assert((1 << (PBF_TERM_##x - PBF_TERM_SAMPLE_START)) == PERF_SAMPLE_##x, \ + "Mismatched PBF term to sample bit " #x) + BUILD_CHECK_SAMPLE(IP); + BUILD_CHECK_SAMPLE(TID); + BUILD_CHECK_SAMPLE(TIME); + BUILD_CHECK_SAMPLE(ADDR); + BUILD_CHECK_SAMPLE(ID); + BUILD_CHECK_SAMPLE(CPU); + BUILD_CHECK_SAMPLE(PERIOD); + BUILD_CHECK_SAMPLE(WEIGHT); + BUILD_CHECK_SAMPLE(DATA_SRC); + BUILD_CHECK_SAMPLE(TRANSACTION); + BUILD_CHECK_SAMPLE(PHYS_ADDR); + BUILD_CHECK_SAMPLE(CGROUP); + BUILD_CHECK_SAMPLE(DATA_PAGE_SIZE); + BUILD_CHECK_SAMPLE(CODE_PAGE_SIZE); + BUILD_CHECK_SAMPLE(WEIGHT_STRUCT); +#undef BUILD_CHECK_SAMPLE + + /* For sample terms check the sample bit is set. */ + if (entry->term >= PBF_TERM_SAMPLE_START && entry->term <= PBF_TERM_SAMPLE_END && + (data->sample_flags & (1 << (entry->term - PBF_TERM_SAMPLE_START))) == 0) + return 0; + + switch (entry->term) { + case PBF_TERM_IP: return kctx->data->ip; - case PERF_SAMPLE_ID: + case PBF_TERM_ID: return kctx->data->id; - case PERF_SAMPLE_TID: + case PBF_TERM_TID: if (entry->part) return kctx->data->tid_entry.pid; else return kctx->data->tid_entry.tid; - case PERF_SAMPLE_CPU: + case PBF_TERM_CPU: return kctx->data->cpu_entry.cpu; - case PERF_SAMPLE_TIME: + case PBF_TERM_TIME: return kctx->data->time; - case PERF_SAMPLE_ADDR: + case PBF_TERM_ADDR: return kctx->data->addr; - case PERF_SAMPLE_PERIOD: + case PBF_TERM_PERIOD: return kctx->data->period; - case PERF_SAMPLE_TRANSACTION: + case PBF_TERM_TRANSACTION: return kctx->data->txn; - case PERF_SAMPLE_WEIGHT_STRUCT: + case PBF_TERM_WEIGHT_STRUCT: if (entry->part == 1) return kctx->data->weight.var1_dw; if (entry->part == 2) @@ -80,15 +132,17 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx, if (entry->part == 3) return kctx->data->weight.var3_w; /* fall through */ - case PERF_SAMPLE_WEIGHT: + case PBF_TERM_WEIGHT: return kctx->data->weight.full; - case PERF_SAMPLE_PHYS_ADDR: + case PBF_TERM_PHYS_ADDR: return kctx->data->phys_addr; - case PERF_SAMPLE_CODE_PAGE_SIZE: + case PBF_TERM_CGROUP: + return kctx->data->cgroup; + case PBF_TERM_CODE_PAGE_SIZE: return kctx->data->code_page_size; - case PERF_SAMPLE_DATA_PAGE_SIZE: + case PBF_TERM_DATA_PAGE_SIZE: return kctx->data->data_page_size; - case PERF_SAMPLE_DATA_SRC: + case PBF_TERM_DATA_SRC: if (entry->part == 1) return kctx->data->data_src.mem_op; if (entry->part == 2) @@ -110,13 +164,28 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx, if (entry->part == 8) { union perf_mem_data_src___new *data = (void *)&kctx->data->data_src; - if (bpf_core_field_exists(data->mem_hops)) + if (__builtin_preserve_field_info(data->mem_hops, BPF_FIELD_EXISTS)) return data->mem_hops; return 0; } /* return the whole word */ return kctx->data->data_src.val; + case PBF_TERM_UID: + return bpf_get_current_uid_gid() & 0xFFFFFFFF; + case PBF_TERM_GID: + return bpf_get_current_uid_gid() >> 32; + case PBF_TERM_NONE: + case __PBF_UNUSED_TERM4: + case __PBF_UNUSED_TERM5: + case __PBF_UNUSED_TERM9: + case __PBF_UNUSED_TERM10: + case __PBF_UNUSED_TERM11: + case __PBF_UNUSED_TERM12: + case __PBF_UNUSED_TERM13: + case __PBF_UNUSED_TERM16: + case __PBF_UNUSED_TERM18: + case __PBF_UNUSED_TERM20: default: break; } @@ -140,39 +209,66 @@ int perf_sample_filter(void *ctx) __u64 sample_data; int in_group = 0; int group_result = 0; - int i; + int i, k; + int *losts; kctx = bpf_cast_to_kern_ctx(ctx); - for (i = 0; i < MAX_FILTERS; i++) { - int key = i; /* needed for verifier :( */ + k = 0; - entry = bpf_map_lookup_elem(&filters, &key); - if (entry == NULL) - break; - sample_data = perf_get_sample(kctx, entry); + if (use_idx_hash) { + struct idx_hash_key key = { + .tgid = bpf_get_current_pid_tgid() >> 32, + }; + __u64 eid = kctx->event->id; + __u64 *key_id; + int *idx; - switch (entry->op) { + /* get primary_event_id */ + if (kctx->event->parent) + eid = kctx->event->parent->id; + + key_id = bpf_map_lookup_elem(&event_hash, &eid); + if (key_id == NULL) + goto drop; + + key.evt_id = *key_id; + + idx = bpf_map_lookup_elem(&idx_hash, &key); + if (idx) + k = *idx; + else + goto drop; + } + + entry = bpf_map_lookup_elem(&filters, &k); + if (entry == NULL) + goto drop; + + for (i = 0; i < MAX_FILTERS; i++) { + sample_data = perf_get_sample(kctx, &entry[i]); + + switch (entry[i].op) { case PBF_OP_EQ: - CHECK_RESULT(sample_data, ==, entry->value) + CHECK_RESULT(sample_data, ==, entry[i].value) break; case PBF_OP_NEQ: - CHECK_RESULT(sample_data, !=, entry->value) + CHECK_RESULT(sample_data, !=, entry[i].value) break; case PBF_OP_GT: - CHECK_RESULT(sample_data, >, entry->value) + CHECK_RESULT(sample_data, >, entry[i].value) break; case PBF_OP_GE: - CHECK_RESULT(sample_data, >=, entry->value) + CHECK_RESULT(sample_data, >=, entry[i].value) break; case PBF_OP_LT: - CHECK_RESULT(sample_data, <, entry->value) + CHECK_RESULT(sample_data, <, entry[i].value) break; case PBF_OP_LE: - CHECK_RESULT(sample_data, <=, entry->value) + CHECK_RESULT(sample_data, <=, entry[i].value) break; case PBF_OP_AND: - CHECK_RESULT(sample_data, &, entry->value) + CHECK_RESULT(sample_data, &, entry[i].value) break; case PBF_OP_GROUP_BEGIN: in_group = 1; @@ -183,13 +279,19 @@ int perf_sample_filter(void *ctx) goto drop; in_group = 0; break; + case PBF_OP_DONE: + /* no failures so far, accept it */ + return 1; } } /* generate sample data */ return 1; drop: - __sync_fetch_and_add(&dropped, 1); + losts = bpf_map_lookup_elem(&dropped, &k); + if (losts != NULL) + __sync_fetch_and_add(losts, 1); + return 0; } diff --git a/tools/perf/util/bpf_skel/syscall_summary.bpf.c b/tools/perf/util/bpf_skel/syscall_summary.bpf.c new file mode 100644 index 000000000000..1bcd066a5199 --- /dev/null +++ b/tools/perf/util/bpf_skel/syscall_summary.bpf.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Trace raw_syscalls tracepoints to collect system call statistics. + */ + +#include "vmlinux.h" +#include "syscall_summary.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +/* This is to calculate a delta between sys-enter and sys-exit for each thread */ +struct syscall_trace { + int nr; /* syscall number is only available at sys-enter */ + int unused; + u64 timestamp; +}; + +#define MAX_ENTRIES (128 * 1024) + +struct syscall_trace_map { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); /* tid */ + __type(value, struct syscall_trace); + __uint(max_entries, MAX_ENTRIES); +} syscall_trace_map SEC(".maps"); + +struct syscall_stats_map { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct syscall_key); + __type(value, struct syscall_stats); + __uint(max_entries, MAX_ENTRIES); +} syscall_stats_map SEC(".maps"); + +int enabled; /* controlled from userspace */ + +const volatile enum syscall_aggr_mode aggr_mode; +const volatile int use_cgroup_v2; + +int perf_subsys_id = -1; + +static inline __u64 get_current_cgroup_id(void) +{ + struct task_struct *task; + struct cgroup *cgrp; + + if (use_cgroup_v2) + return bpf_get_current_cgroup_id(); + + task = bpf_get_current_task_btf(); + + if (perf_subsys_id == -1) { +#if __has_builtin(__builtin_preserve_enum_value) + perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id, + perf_event_cgrp_id); +#else + perf_subsys_id = perf_event_cgrp_id; +#endif + } + + cgrp = BPF_CORE_READ(task, cgroups, subsys[perf_subsys_id], cgroup); + return BPF_CORE_READ(cgrp, kn, id); +} + +static void update_stats(int cpu_or_tid, u64 cgroup_id, int nr, s64 duration, + long ret) +{ + struct syscall_key key = { + .cpu_or_tid = cpu_or_tid, + .cgroup = cgroup_id, + .nr = nr, + }; + struct syscall_stats *stats; + + stats = bpf_map_lookup_elem(&syscall_stats_map, &key); + if (stats == NULL) { + struct syscall_stats zero = {}; + + bpf_map_update_elem(&syscall_stats_map, &key, &zero, BPF_NOEXIST); + stats = bpf_map_lookup_elem(&syscall_stats_map, &key); + if (stats == NULL) + return; + } + + __sync_fetch_and_add(&stats->count, 1); + if (ret < 0) + __sync_fetch_and_add(&stats->error, 1); + + if (duration > 0) { + __sync_fetch_and_add(&stats->total_time, duration); + __sync_fetch_and_add(&stats->squared_sum, duration * duration); + if (stats->max_time < duration) + stats->max_time = duration; + if (stats->min_time > duration || stats->min_time == 0) + stats->min_time = duration; + } + + return; +} + +SEC("tp_btf/sys_enter") +int sys_enter(u64 *ctx) +{ + int tid; + struct syscall_trace st; + + if (!enabled) + return 0; + + st.nr = ctx[1]; /* syscall number */ + st.unused = 0; + st.timestamp = bpf_ktime_get_ns(); + + tid = bpf_get_current_pid_tgid(); + bpf_map_update_elem(&syscall_trace_map, &tid, &st, BPF_ANY); + + return 0; +} + +SEC("tp_btf/sys_exit") +int sys_exit(u64 *ctx) +{ + int tid; + int key = 0; + u64 cgroup = 0; + long ret = ctx[1]; /* return value of the syscall */ + struct syscall_trace *st; + s64 delta; + + if (!enabled) + return 0; + + tid = bpf_get_current_pid_tgid(); + st = bpf_map_lookup_elem(&syscall_trace_map, &tid); + if (st == NULL) + return 0; + + if (aggr_mode == SYSCALL_AGGR_THREAD) + key = tid; + else if (aggr_mode == SYSCALL_AGGR_CGROUP) + cgroup = get_current_cgroup_id(); + else + key = bpf_get_smp_processor_id(); + + delta = bpf_ktime_get_ns() - st->timestamp; + update_stats(key, cgroup, st->nr, delta, ret); + + bpf_map_delete_elem(&syscall_trace_map, &tid); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/perf/util/bpf_skel/syscall_summary.h b/tools/perf/util/bpf_skel/syscall_summary.h new file mode 100644 index 000000000000..72ccccb45925 --- /dev/null +++ b/tools/perf/util/bpf_skel/syscall_summary.h @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Data structures shared between BPF and tools. */ +#ifndef UTIL_BPF_SKEL_SYSCALL_SUMMARY_H +#define UTIL_BPF_SKEL_SYSCALL_SUMMARY_H + +enum syscall_aggr_mode { + SYSCALL_AGGR_THREAD, + SYSCALL_AGGR_CPU, + SYSCALL_AGGR_CGROUP, +}; + +struct syscall_key { + u64 cgroup; + int cpu_or_tid; + int nr; +}; + +struct syscall_stats { + u64 total_time; + u64 squared_sum; + u64 max_time; + u64 min_time; + u32 count; + u32 error; +}; + +#endif /* UTIL_BPF_SKEL_SYSCALL_SUMMARY_H */ diff --git a/tools/perf/util/bpf_skel/vmlinux/.gitignore b/tools/perf/util/bpf_skel/vmlinux/.gitignore new file mode 100644 index 000000000000..49502c04183a --- /dev/null +++ b/tools/perf/util/bpf_skel/vmlinux/.gitignore @@ -0,0 +1 @@ +!vmlinux.h diff --git a/tools/perf/util/bpf_skel/vmlinux/vmlinux.h b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h index ab84a6e1da5e..a59ce912be18 100644 --- a/tools/perf/util/bpf_skel/vmlinux/vmlinux.h +++ b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h @@ -15,11 +15,19 @@ typedef __u8 u8; typedef __u32 u32; +typedef __s32 s32; typedef __u64 u64; typedef __s64 s64; typedef int pid_t; +typedef __s64 time64_t; + +struct timespec64 { + time64_t tv_sec; + long int tv_nsec; +}; + enum cgroup_subsys_id { perf_event_cgrp_id = 8, }; @@ -163,10 +171,16 @@ struct perf_sample_data { u32 cpu; } cpu_entry; u64 phys_addr; + u64 cgroup; u64 data_page_size; u64 code_page_size; } __attribute__((__aligned__(64))) __attribute__((preserve_access_index)); +struct perf_event { + struct perf_event *parent; + u64 id; +} __attribute__((preserve_access_index)); + struct bpf_perf_event_data_kern { struct perf_sample_data *data; struct perf_event *event; @@ -181,4 +195,21 @@ struct bpf_perf_event_data_kern { */ struct rq {}; +struct kmem_cache { + const char *name; +} __attribute__((preserve_access_index)); + +struct bpf_iter__kmem_cache { + struct kmem_cache *s; +} __attribute__((preserve_access_index)); + +struct zone { + spinlock_t lock; +} __attribute__((preserve_access_index)); + +struct pglist_data { + struct zone node_zones[6]; /* value for all possible config */ + int nr_zones; +} __attribute__((preserve_access_index)); + #endif // __VMLINUX_H diff --git a/tools/perf/util/bpf_trace_augment.c b/tools/perf/util/bpf_trace_augment.c new file mode 100644 index 000000000000..56ed17534caa --- /dev/null +++ b/tools/perf/util/bpf_trace_augment.c @@ -0,0 +1,143 @@ +#include <bpf/libbpf.h> +#include <internal/xyarray.h> + +#include "util/debug.h" +#include "util/evlist.h" +#include "util/trace_augment.h" + +#include "bpf_skel/augmented_raw_syscalls.skel.h" + +static struct augmented_raw_syscalls_bpf *skel; +static struct evsel *bpf_output; + +int augmented_syscalls__prepare(void) +{ + struct bpf_program *prog; + char buf[128]; + int err; + + skel = augmented_raw_syscalls_bpf__open(); + if (!skel) { + pr_debug("Failed to open augmented syscalls BPF skeleton\n"); + return -errno; + } + + /* + * Disable attaching the BPF programs except for sys_enter and + * sys_exit that tail call into this as necessary. + */ + bpf_object__for_each_program(prog, skel->obj) { + if (prog != skel->progs.sys_enter && prog != skel->progs.sys_exit) + bpf_program__set_autoattach(prog, /*autoattach=*/false); + } + + err = augmented_raw_syscalls_bpf__load(skel); + if (err < 0) { + libbpf_strerror(err, buf, sizeof(buf)); + pr_debug("Failed to load augmented syscalls BPF skeleton: %s\n", buf); + return err; + } + + augmented_raw_syscalls_bpf__attach(skel); + return 0; +} + +int augmented_syscalls__create_bpf_output(struct evlist *evlist) +{ + int err = parse_event(evlist, "bpf-output/no-inherit=1,name=__augmented_syscalls__/"); + + if (err) { + pr_err("ERROR: Setup BPF output event failed: %d\n", err); + return err; + } + + bpf_output = evlist__last(evlist); + assert(evsel__name_is(bpf_output, "__augmented_syscalls__")); + + return 0; +} + +void augmented_syscalls__setup_bpf_output(void) +{ + struct perf_cpu cpu; + int i; + + if (bpf_output == NULL) + return; + + /* + * Set up the __augmented_syscalls__ BPF map to hold for each + * CPU the bpf-output event's file descriptor. + */ + perf_cpu_map__for_each_cpu(cpu, i, bpf_output->core.cpus) { + int mycpu = cpu.cpu; + + bpf_map__update_elem(skel->maps.__augmented_syscalls__, + &mycpu, sizeof(mycpu), + xyarray__entry(bpf_output->core.fd, + mycpu, 0), + sizeof(__u32), BPF_ANY); + } +} + +int augmented_syscalls__set_filter_pids(unsigned int nr, pid_t *pids) +{ + bool value = true; + int err = 0; + + if (skel == NULL) + return 0; + + for (size_t i = 0; i < nr; ++i) { + err = bpf_map__update_elem(skel->maps.pids_filtered, &pids[i], + sizeof(*pids), &value, sizeof(value), + BPF_ANY); + if (err) + break; + } + return err; +} + +int augmented_syscalls__get_map_fds(int *enter_fd, int *exit_fd, int *beauty_fd) +{ + if (skel == NULL) + return -1; + + *enter_fd = bpf_map__fd(skel->maps.syscalls_sys_enter); + *exit_fd = bpf_map__fd(skel->maps.syscalls_sys_exit); + *beauty_fd = bpf_map__fd(skel->maps.beauty_map_enter); + + if (*enter_fd < 0 || *exit_fd < 0 || *beauty_fd < 0) { + pr_err("Error: failed to get syscall or beauty map fd\n"); + return -1; + } + + return 0; +} + +struct bpf_program *augmented_syscalls__unaugmented(void) +{ + return skel->progs.syscall_unaugmented; +} + +struct bpf_program *augmented_syscalls__find_by_title(const char *name) +{ + struct bpf_program *pos; + const char *sec_name; + + if (skel->obj == NULL) + return NULL; + + bpf_object__for_each_program(pos, skel->obj) { + sec_name = bpf_program__section_name(pos); + if (sec_name && !strcmp(sec_name, name)) + return pos; + } + + return NULL; +} + +void augmented_syscalls__cleanup(void) +{ + augmented_raw_syscalls_bpf__destroy(skel); +} diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c index 378f16a24751..3712be067464 100644 --- a/tools/perf/util/branch.c +++ b/tools/perf/util/branch.c @@ -46,7 +46,7 @@ const char *branch_new_type_name(int new_type) "FAULT_DATA", "FAULT_INST", /* - * TODO: This switch should happen on 'session->header.env.arch' + * TODO: This switch should happen on 'perf_session__env(session)->arch' * instead, because an arm64 platform perf recording could be * opened for analysis on other platforms as well. */ @@ -109,7 +109,7 @@ const char *get_branch_type(struct branch_entry *e) return branch_type_name(e->flags.type); } -void branch_type_stat_display(FILE *fp, struct branch_type_stat *st) +void branch_type_stat_display(FILE *fp, const struct branch_type_stat *st) { u64 total = 0; int i; @@ -171,7 +171,7 @@ static int count_str_scnprintf(int idx, const char *str, char *bf, int size) return scnprintf(bf, size, "%s%s", (idx) ? " " : " (", str); } -int branch_type_str(struct branch_type_stat *st, char *bf, int size) +int branch_type_str(const struct branch_type_stat *st, char *bf, int size) { int i, j = 0, printed = 0; u64 total = 0; diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index e41bfffe2217..7429530fa774 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -25,7 +25,8 @@ struct branch_flags { u64 spec:2; u64 new_type:4; u64 priv:3; - u64 reserved:31; + u64 not_taken:1; + u64 reserved:30; }; }; }; @@ -34,6 +35,7 @@ struct branch_info { struct addr_map_symbol from; struct addr_map_symbol to; struct branch_flags flags; + u64 branch_stack_cntr; char *srcline_from; char *srcline_to; }; @@ -86,8 +88,8 @@ void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags, const char *branch_type_name(int type); const char *branch_new_type_name(int new_type); const char *get_branch_type(struct branch_entry *e); -void branch_type_stat_display(FILE *fp, struct branch_type_stat *st); -int branch_type_str(struct branch_type_stat *st, char *bf, int bfsize); +void branch_type_stat_display(FILE *fp, const struct branch_type_stat *st); +int branch_type_str(const struct branch_type_stat *st, char *bf, int bfsize); const char *branch_spec_desc(int spec); diff --git a/tools/perf/util/btf.c b/tools/perf/util/btf.c new file mode 100644 index 000000000000..bb163fe87767 --- /dev/null +++ b/tools/perf/util/btf.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Arnaldo Carvalho de Melo <acme@redhat.com> + * + * Copyright (C) 2024, Red Hat, Inc + */ + +#include <bpf/btf.h> +#include <util/btf.h> +#include <string.h> + +const struct btf_member *__btf_type__find_member_by_name(struct btf *btf, + int type_id, const char *member_name) +{ + const struct btf_type *t = btf__type_by_id(btf, type_id); + const struct btf_member *m; + int i; + + for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { + const char *current_member_name = btf__name_by_offset(btf, m->name_off); + + if (!strcmp(current_member_name, member_name)) + return m; + } + + return NULL; +} diff --git a/tools/perf/util/btf.h b/tools/perf/util/btf.h new file mode 100644 index 000000000000..05e6e5bf23d6 --- /dev/null +++ b/tools/perf/util/btf.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_UTIL_BTF +#define __PERF_UTIL_BTF 1 + +struct btf; +struct btf_member; + +const struct btf_member *__btf_type__find_member_by_name(struct btf *btf, + int type_id, const char *member_name); +#endif // __PERF_UTIL_BTF diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 36728222a5b4..fdb35133fde4 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -42,10 +42,20 @@ static bool no_buildid_cache; -int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, +static int mark_dso_hit_callback(struct callchain_cursor_node *node, void *data __maybe_unused) +{ + struct map *map = node->ms.map; + + if (map) + dso__set_hit(map__dso(map)); + + return 0; +} + +int build_id__mark_dso_hit(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, - struct evsel *evsel __maybe_unused, + struct evsel *evsel, struct machine *machine) { struct addr_location al; @@ -60,66 +70,39 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, addr_location__init(&al); if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) - map__dso(al.map)->hit = 1; + dso__set_hit(map__dso(al.map)); addr_location__exit(&al); - thread__put(thread); - return 0; -} -static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_sample *sample - __maybe_unused, - struct machine *machine) -{ - struct thread *thread = machine__findnew_thread(machine, - event->fork.pid, - event->fork.tid); - - dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid, - event->fork.ppid, event->fork.ptid); + sample__for_each_callchain_node(thread, evsel, sample, PERF_MAX_STACK_DEPTH, + /*symbols=*/false, mark_dso_hit_callback, /*data=*/NULL); - if (thread) { - machine__remove_thread(machine, thread); - thread__put(thread); - } + thread__put(thread); return 0; } -struct perf_tool build_id__mark_dso_hit_ops = { - .sample = build_id__mark_dso_hit, - .mmap = perf_event__process_mmap, - .mmap2 = perf_event__process_mmap2, - .fork = perf_event__process_fork, - .exit = perf_event__exit_del_thread, - .attr = perf_event__process_attr, - .build_id = perf_event__process_build_id, - .ordered_events = true, -}; - -int build_id__sprintf(const struct build_id *build_id, char *bf) +int build_id__snprintf(const struct build_id *build_id, char *bf, size_t bf_size) { - char *bid = bf; - const u8 *raw = build_id->data; - size_t i; - - bf[0] = 0x0; + size_t offs = 0; - for (i = 0; i < build_id->size; ++i) { - sprintf(bid, "%02x", *raw); - ++raw; - bid += 2; + if (build_id->size == 0) { + /* Ensure bf is always \0 terminated. */ + if (bf_size > 0) + bf[0] = '\0'; + return 0; } - return (bid - bf) + 1; + for (size_t i = 0; i < build_id->size && offs < bf_size; ++i) + offs += snprintf(bf + offs, bf_size - offs, "%02x", build_id->data[i]); + + return offs; } -int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id) +int sysfs__snprintf_build_id(const char *root_dir, char *sbuild_id, size_t sbuild_id_size) { char notes[PATH_MAX]; - struct build_id bid; + struct build_id bid = { .size = 0, }; int ret; if (!root_dir) @@ -131,19 +114,19 @@ int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id) if (ret < 0) return ret; - return build_id__sprintf(&bid, sbuild_id); + return build_id__snprintf(&bid, sbuild_id, sbuild_id_size); } -int filename__sprintf_build_id(const char *pathname, char *sbuild_id) +int filename__snprintf_build_id(const char *pathname, char *sbuild_id, size_t sbuild_id_size) { - struct build_id bid; + struct build_id bid = { .size = 0, }; int ret; ret = filename__read_build_id(pathname, &bid); if (ret < 0) return ret; - return build_id__sprintf(&bid, sbuild_id); + return build_id__snprintf(&bid, sbuild_id, sbuild_id_size); } /* asnprintf consolidates asprintf and snprintf */ @@ -244,9 +227,9 @@ static bool build_id_cache__valid_id(char *sbuild_id) return false; if (!strcmp(pathname, DSO__NAME_KALLSYMS)) - ret = sysfs__sprintf_build_id("/", real_sbuild_id); + ret = sysfs__snprintf_build_id("/", real_sbuild_id, sizeof(real_sbuild_id)); else if (pathname[0] == '/') - ret = filename__sprintf_build_id(pathname, real_sbuild_id); + ret = filename__snprintf_build_id(pathname, real_sbuild_id, sizeof(real_sbuild_id)); else ret = -EINVAL; /* Should we support other special DSO cache? */ if (ret >= 0) @@ -272,10 +255,10 @@ char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size, bool alloc = (bf == NULL); int ret; - if (!dso->has_build_id) + if (!dso__has_build_id(dso)) return NULL; - build_id__sprintf(&dso->bid, sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); linkname = build_id_cache__linkname(sbuild_id, NULL, 0); if (!linkname) return NULL; @@ -310,7 +293,7 @@ static int write_buildid(const char *name, size_t name_len, struct build_id *bid size_t len; len = name_len + 1; - len = PERF_ALIGN(len, NAME_ALIGN); + len = PERF_ALIGN(len, sizeof(u64)); memset(&b, 0, sizeof(b)); memcpy(&b.data, bid->data, bid->size); @@ -327,48 +310,56 @@ static int write_buildid(const char *name, size_t name_len, struct build_id *bid return write_padded(fd, name, name_len + 1, len); } -static int machine__write_buildid_table(struct machine *machine, - struct feat_fd *fd) +struct machine__write_buildid_table_cb_args { + struct machine *machine; + struct feat_fd *fd; + u16 kmisc, umisc; +}; + +static int machine__write_buildid_table_cb(struct dso *dso, void *data) { - int err = 0; - struct dso *pos; - u16 kmisc = PERF_RECORD_MISC_KERNEL, - umisc = PERF_RECORD_MISC_USER; + struct machine__write_buildid_table_cb_args *args = data; + const char *name; + size_t name_len; + bool in_kernel = false; - if (!machine__is_host(machine)) { - kmisc = PERF_RECORD_MISC_GUEST_KERNEL; - umisc = PERF_RECORD_MISC_GUEST_USER; - } + if (!dso__has_build_id(dso)) + return 0; + + if (!dso__hit(dso) && !dso__is_vdso(dso)) + return 0; - dsos__for_each_with_build_id(pos, &machine->dsos.head) { - const char *name; - size_t name_len; - bool in_kernel = false; + if (dso__is_vdso(dso)) { + name = dso__short_name(dso); + name_len = dso__short_name_len(dso); + } else if (dso__is_kcore(dso)) { + name = args->machine->mmap_name; + name_len = strlen(name); + } else { + name = dso__long_name(dso); + name_len = dso__long_name_len(dso); + } - if (!pos->hit && !dso__is_vdso(pos)) - continue; + in_kernel = dso__kernel(dso) || is_kernel_module(name, PERF_RECORD_MISC_CPUMODE_UNKNOWN); + return write_buildid(name, name_len, &dso__id(dso)->build_id, args->machine->pid, + in_kernel ? args->kmisc : args->umisc, args->fd); +} - if (dso__is_vdso(pos)) { - name = pos->short_name; - name_len = pos->short_name_len; - } else if (dso__is_kcore(pos)) { - name = machine->mmap_name; - name_len = strlen(name); - } else { - name = pos->long_name; - name_len = pos->long_name_len; - } +static int machine__write_buildid_table(struct machine *machine, struct feat_fd *fd) +{ + struct machine__write_buildid_table_cb_args args = { + .machine = machine, + .fd = fd, + .kmisc = PERF_RECORD_MISC_KERNEL, + .umisc = PERF_RECORD_MISC_USER, + }; - in_kernel = pos->kernel || - is_kernel_module(name, - PERF_RECORD_MISC_CPUMODE_UNKNOWN); - err = write_buildid(name, name_len, &pos->bid, machine->pid, - in_kernel ? kmisc : umisc, fd); - if (err) - break; + if (!machine__is_host(machine)) { + args.kmisc = PERF_RECORD_MISC_GUEST_KERNEL; + args.umisc = PERF_RECORD_MISC_GUEST_USER; } - return err; + return dsos__for_each_dso(&machine->dsos, machine__write_buildid_table_cb, &args); } int perf_session__write_buildid_table(struct perf_session *session, @@ -390,42 +381,6 @@ int perf_session__write_buildid_table(struct perf_session *session, return err; } -static int __dsos__hit_all(struct list_head *head) -{ - struct dso *pos; - - list_for_each_entry(pos, head, node) - pos->hit = true; - - return 0; -} - -static int machine__hit_all_dsos(struct machine *machine) -{ - return __dsos__hit_all(&machine->dsos.head); -} - -int dsos__hit_all(struct perf_session *session) -{ - struct rb_node *nd; - int err; - - err = machine__hit_all_dsos(&session->machines.host); - if (err) - return err; - - for (nd = rb_first_cached(&session->machines.guests); nd; - nd = rb_next(nd)) { - struct machine *pos = rb_entry(nd, struct machine, rb_node); - - err = machine__hit_all_dsos(pos); - if (err) - return err; - } - - return 0; -} - void disable_buildid_cache(void) { no_buildid_cache = true; @@ -560,7 +515,7 @@ char *build_id_cache__cachedir(const char *sbuild_id, const char *name, struct nsinfo *nsi, bool is_kallsyms, bool is_vdso) { - char *realname = (char *)name, *filename; + char *realname = NULL, *filename; bool slash = is_kallsyms || is_vdso; if (!slash) @@ -571,9 +526,7 @@ char *build_id_cache__cachedir(const char *sbuild_id, const char *name, sbuild_id ? "/" : "", sbuild_id ?: "") < 0) filename = NULL; - if (!slash) - free(realname); - + free(realname); return filename; } @@ -831,7 +784,7 @@ static int build_id_cache__add_b(const struct build_id *bid, { char sbuild_id[SBUILD_ID_SIZE]; - build_id__sprintf(bid, sbuild_id); + build_id__snprintf(bid, sbuild_id, sizeof(sbuild_id)); return __build_id_cache__add_s(sbuild_id, name, nsi, is_kallsyms, is_vdso, proper_name, root_dir); @@ -903,14 +856,14 @@ static int filename__read_build_id_ns(const char *filename, static bool dso__build_id_mismatch(struct dso *dso, const char *name) { - struct build_id bid; + struct build_id bid = { .size = 0, }; bool ret = false; - mutex_lock(&dso->lock); - if (filename__read_build_id_ns(name, &bid, dso->nsinfo) >= 0) + mutex_lock(dso__lock(dso)); + if (filename__read_build_id_ns(name, &bid, dso__nsinfo(dso)) >= 0) ret = !dso__build_id_equal(dso, &bid); - mutex_unlock(&dso->lock); + mutex_unlock(dso__lock(dso)); return ret; } @@ -920,13 +873,13 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine, { bool is_kallsyms = dso__is_kallsyms(dso); bool is_vdso = dso__is_vdso(dso); - const char *name = dso->long_name; + const char *name = dso__long_name(dso); const char *proper_name = NULL; const char *root_dir = NULL; char *allocated_name = NULL; int ret = 0; - if (!dso->has_build_id) + if (!dso__has_build_id(dso) || !dso__hit(dso)) return 0; if (dso__is_kcore(dso)) { @@ -951,10 +904,10 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine, if (!is_kallsyms && dso__build_id_mismatch(dso, name)) goto out_free; - mutex_lock(&dso->lock); - ret = build_id_cache__add_b(&dso->bid, name, dso->nsinfo, + mutex_lock(dso__lock(dso)); + ret = build_id_cache__add_b(dso__bid(dso), name, dso__nsinfo(dso), is_kallsyms, is_vdso, proper_name, root_dir); - mutex_unlock(&dso->lock); + mutex_unlock(dso__lock(dso)); out_free: free(allocated_name); return ret; @@ -994,7 +947,7 @@ int perf_session__cache_build_ids(struct perf_session *session) static bool machine__read_build_ids(struct machine *machine, bool with_hits) { - return __dsos__read_build_ids(&machine->dsos.head, with_hits); + return dsos__read_build_ids(&machine->dsos, with_hits); } bool perf_session__read_build_ids(struct perf_session *session, bool with_hits) @@ -1013,7 +966,10 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits) void build_id__init(struct build_id *bid, const u8 *data, size_t size) { - WARN_ON(size > BUILD_ID_SIZE); + if (size > BUILD_ID_SIZE) { + pr_debug("Truncating build_id size from %zd\n", size); + size = BUILD_ID_SIZE; + } memcpy(bid->data, data, size); bid->size = size; } diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 4e3a1169379b..47e621cebe1b 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -13,20 +13,18 @@ struct build_id { u8 data[BUILD_ID_SIZE]; - size_t size; + u8 size; }; -struct nsinfo; - -extern struct perf_tool build_id__mark_dso_hit_ops; struct dso; struct feat_fd; +struct nsinfo; void build_id__init(struct build_id *bid, const u8 *data, size_t size); -int build_id__sprintf(const struct build_id *build_id, char *bf); +int build_id__snprintf(const struct build_id *build_id, char *bf, size_t bf_size); bool build_id__is_defined(const struct build_id *bid); -int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id); -int filename__sprintf_build_id(const char *pathname, char *sbuild_id); +int sysfs__snprintf_build_id(const char *root_dir, char *sbuild_id, size_t sbuild_id_size); +int filename__snprintf_build_id(const char *pathname, char *sbuild_id, size_t sbuild_id_size); char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf, size_t size); @@ -35,13 +33,11 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size, char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size, bool is_debug, bool is_kallsyms); -int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, +int build_id__mark_dso_hit(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct evsel *evsel, struct machine *machine); -int dsos__hit_all(struct perf_session *session); - -int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event, +int perf_event__inject_buildid(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct evsel *evsel, struct machine *machine); diff --git a/tools/perf/util/c++/Build b/tools/perf/util/c++/Build deleted file mode 100644 index 613ecfd76527..000000000000 --- a/tools/perf/util/c++/Build +++ /dev/null @@ -1,2 +0,0 @@ -perf-$(CONFIG_CLANGLLVM) += clang.o -perf-$(CONFIG_CLANGLLVM) += clang-test.o diff --git a/tools/perf/util/c++/clang-c.h b/tools/perf/util/c++/clang-c.h deleted file mode 100644 index d3731a876b6c..000000000000 --- a/tools/perf/util/c++/clang-c.h +++ /dev/null @@ -1,43 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef PERF_UTIL_CLANG_C_H -#define PERF_UTIL_CLANG_C_H - -#include <stddef.h> /* for size_t */ - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef HAVE_LIBCLANGLLVM_SUPPORT -extern void perf_clang__init(void); -extern void perf_clang__cleanup(void); - -struct test_suite; -extern int test__clang_to_IR(struct test_suite *test, int subtest); -extern int test__clang_to_obj(struct test_suite *test, int subtest); - -extern int perf_clang__compile_bpf(const char *filename, - void **p_obj_buf, - size_t *p_obj_buf_sz); -#else - -#include <errno.h> -#include <linux/compiler.h> /* for __maybe_unused */ - -static inline void perf_clang__init(void) { } -static inline void perf_clang__cleanup(void) { } - -static inline int -perf_clang__compile_bpf(const char *filename __maybe_unused, - void **p_obj_buf __maybe_unused, - size_t *p_obj_buf_sz __maybe_unused) -{ - return -ENOTSUP; -} - -#endif - -#ifdef __cplusplus -} -#endif -#endif diff --git a/tools/perf/util/c++/clang-test.cpp b/tools/perf/util/c++/clang-test.cpp deleted file mode 100644 index a4683ca53697..000000000000 --- a/tools/perf/util/c++/clang-test.cpp +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "clang.h" -#include "clang-c.h" -extern "C" { -#include "../util.h" -} -#include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" - -#include <tests/llvm.h> -#include <string> - -class perf_clang_scope { -public: - explicit perf_clang_scope() {perf_clang__init();} - ~perf_clang_scope() {perf_clang__cleanup();} -}; - -static std::unique_ptr<llvm::Module> -__test__clang_to_IR(void) -{ - unsigned int kernel_version; - - if (fetch_kernel_version(&kernel_version, NULL, 0)) - return std::unique_ptr<llvm::Module>(nullptr); - - std::string cflag_kver("-DLINUX_VERSION_CODE=" + - std::to_string(kernel_version)); - - std::unique_ptr<llvm::Module> M = - perf::getModuleFromSource({cflag_kver.c_str()}, - "perf-test.c", - test_llvm__bpf_base_prog); - return M; -} - -extern "C" { -int test__clang_to_IR(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) -{ - perf_clang_scope _scope; - - auto M = __test__clang_to_IR(); - if (!M) - return -1; - for (llvm::Function& F : *M) - if (F.getName() == "bpf_func__SyS_epoll_pwait") - return 0; - return -1; -} - -int test__clang_to_obj(struct test_suite *test __maybe_unused, - int subtest __maybe_unused) -{ - perf_clang_scope _scope; - - auto M = __test__clang_to_IR(); - if (!M) - return -1; - - auto Buffer = perf::getBPFObjectFromModule(&*M); - if (!Buffer) - return -1; - return 0; -} - -} diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp deleted file mode 100644 index 1aad7d6d34aa..000000000000 --- a/tools/perf/util/c++/clang.cpp +++ /dev/null @@ -1,225 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * llvm C frontend for perf. Support dynamically compile C file - * - * Inspired by clang example code: - * http://llvm.org/svn/llvm-project/cfe/trunk/examples/clang-interpreter/main.cpp - * - * Copyright (C) 2016 Wang Nan <wangnan0@huawei.com> - * Copyright (C) 2016 Huawei Inc. - */ - -#include "clang/Basic/Version.h" -#include "clang/CodeGen/CodeGenAction.h" -#include "clang/Frontend/CompilerInvocation.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" -#include "clang/Tooling/Tooling.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/Module.h" -#include "llvm/Option/Option.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/ManagedStatic.h" -#if CLANG_VERSION_MAJOR >= 14 -#include "llvm/MC/TargetRegistry.h" -#else -#include "llvm/Support/TargetRegistry.h" -#endif -#include "llvm/Support/TargetSelect.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include <memory> - -#include "clang.h" -#include "clang-c.h" - -namespace perf { - -static std::unique_ptr<llvm::LLVMContext> LLVMCtx; - -using namespace clang; - -static CompilerInvocation * -createCompilerInvocation(llvm::opt::ArgStringList CFlags, StringRef& Path, - DiagnosticsEngine& Diags) -{ - llvm::opt::ArgStringList CCArgs { - "-cc1", - "-triple", "bpf-pc-linux", - "-fsyntax-only", - "-O2", - "-nostdsysteminc", - "-nobuiltininc", - "-vectorize-loops", - "-vectorize-slp", - "-Wno-unused-value", - "-Wno-pointer-sign", - "-x", "c"}; - - CCArgs.append(CFlags.begin(), CFlags.end()); - CompilerInvocation *CI = tooling::newInvocation(&Diags, CCArgs -#if CLANG_VERSION_MAJOR >= 11 - ,/*BinaryName=*/nullptr -#endif - ); - - FrontendOptions& Opts = CI->getFrontendOpts(); - Opts.Inputs.clear(); - Opts.Inputs.emplace_back(Path, - FrontendOptions::getInputKindForExtension("c")); - return CI; -} - -static std::unique_ptr<llvm::Module> -getModuleFromSource(llvm::opt::ArgStringList CFlags, - StringRef Path, IntrusiveRefCntPtr<vfs::FileSystem> VFS) -{ - CompilerInstance Clang; - Clang.createDiagnostics(); - -#if CLANG_VERSION_MAJOR < 9 - Clang.setVirtualFileSystem(&*VFS); -#else - Clang.createFileManager(&*VFS); -#endif - -#if CLANG_VERSION_MAJOR < 4 - IntrusiveRefCntPtr<CompilerInvocation> CI = - createCompilerInvocation(std::move(CFlags), Path, - Clang.getDiagnostics()); - Clang.setInvocation(&*CI); -#else - std::shared_ptr<CompilerInvocation> CI( - createCompilerInvocation(std::move(CFlags), Path, - Clang.getDiagnostics())); - Clang.setInvocation(CI); -#endif - - std::unique_ptr<CodeGenAction> Act(new EmitLLVMOnlyAction(&*LLVMCtx)); - if (!Clang.ExecuteAction(*Act)) - return std::unique_ptr<llvm::Module>(nullptr); - - return Act->takeModule(); -} - -std::unique_ptr<llvm::Module> -getModuleFromSource(llvm::opt::ArgStringList CFlags, - StringRef Name, StringRef Content) -{ - using namespace vfs; - - llvm::IntrusiveRefCntPtr<OverlayFileSystem> OverlayFS( - new OverlayFileSystem(getRealFileSystem())); - llvm::IntrusiveRefCntPtr<InMemoryFileSystem> MemFS( - new InMemoryFileSystem(true)); - - /* - * pushOverlay helps setting working dir for MemFS. Must call - * before addFile. - */ - OverlayFS->pushOverlay(MemFS); - MemFS->addFile(Twine(Name), 0, llvm::MemoryBuffer::getMemBuffer(Content)); - - return getModuleFromSource(std::move(CFlags), Name, OverlayFS); -} - -std::unique_ptr<llvm::Module> -getModuleFromSource(llvm::opt::ArgStringList CFlags, StringRef Path) -{ - IntrusiveRefCntPtr<vfs::FileSystem> VFS(vfs::getRealFileSystem()); - return getModuleFromSource(std::move(CFlags), Path, VFS); -} - -std::unique_ptr<llvm::SmallVectorImpl<char>> -getBPFObjectFromModule(llvm::Module *Module) -{ - using namespace llvm; - - std::string TargetTriple("bpf-pc-linux"); - std::string Error; - const Target* Target = TargetRegistry::lookupTarget(TargetTriple, Error); - if (!Target) { - llvm::errs() << Error; - return std::unique_ptr<llvm::SmallVectorImpl<char>>(nullptr); - } - - llvm::TargetOptions Opt; - TargetMachine *TargetMachine = - Target->createTargetMachine(TargetTriple, - "generic", "", - Opt, Reloc::Static); - - Module->setDataLayout(TargetMachine->createDataLayout()); - Module->setTargetTriple(TargetTriple); - - std::unique_ptr<SmallVectorImpl<char>> Buffer(new SmallVector<char, 0>()); - raw_svector_ostream ostream(*Buffer); - - legacy::PassManager PM; - bool NotAdded; - NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream -#if CLANG_VERSION_MAJOR >= 7 - , /*DwoOut=*/nullptr -#endif -#if CLANG_VERSION_MAJOR < 10 - , TargetMachine::CGFT_ObjectFile -#else - , llvm::CGFT_ObjectFile -#endif - ); - if (NotAdded) { - llvm::errs() << "TargetMachine can't emit a file of this type\n"; - return std::unique_ptr<llvm::SmallVectorImpl<char>>(nullptr); - } - PM.run(*Module); - - return Buffer; -} - -} - -extern "C" { -void perf_clang__init(void) -{ - perf::LLVMCtx.reset(new llvm::LLVMContext()); - LLVMInitializeBPFTargetInfo(); - LLVMInitializeBPFTarget(); - LLVMInitializeBPFTargetMC(); - LLVMInitializeBPFAsmPrinter(); -} - -void perf_clang__cleanup(void) -{ - perf::LLVMCtx.reset(nullptr); - llvm::llvm_shutdown(); -} - -int perf_clang__compile_bpf(const char *filename, - void **p_obj_buf, - size_t *p_obj_buf_sz) -{ - using namespace perf; - - if (!p_obj_buf || !p_obj_buf_sz) - return -EINVAL; - - llvm::opt::ArgStringList CFlags; - auto M = getModuleFromSource(std::move(CFlags), filename); - if (!M) - return -EINVAL; - auto O = getBPFObjectFromModule(&*M); - if (!O) - return -EINVAL; - - size_t size = O->size_in_bytes(); - void *buffer; - - buffer = malloc(size); - if (!buffer) - return -ENOMEM; - memcpy(buffer, O->data(), size); - *p_obj_buf = buffer; - *p_obj_buf_sz = size; - return 0; -} -} diff --git a/tools/perf/util/c++/clang.h b/tools/perf/util/c++/clang.h deleted file mode 100644 index 6ce33e22f23c..000000000000 --- a/tools/perf/util/c++/clang.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef PERF_UTIL_CLANG_H -#define PERF_UTIL_CLANG_H - -#include "llvm/ADT/StringRef.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/Option/Option.h" -#include <memory> - -namespace perf { - -using namespace llvm; - -std::unique_ptr<Module> -getModuleFromSource(opt::ArgStringList CFlags, - StringRef Name, StringRef Content); - -std::unique_ptr<Module> -getModuleFromSource(opt::ArgStringList CFlags, - StringRef Path); - -std::unique_ptr<llvm::SmallVectorImpl<char>> -getBPFObjectFromModule(llvm::Module *Module); - -} -#endif diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index aee937d14fbb..428e5350d7a2 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -275,9 +275,13 @@ int parse_callchain_record(const char *arg, struct callchain_param *param) if (tok) { unsigned long size; - size = strtoul(tok, &name, 0); - if (size < (unsigned) sysctl__max_stack()) - param->max_stack = size; + if (!strncmp(tok, "defer", sizeof("defer"))) { + param->defer = true; + } else { + size = strtoul(tok, &name, 0); + if (size < (unsigned) sysctl__max_stack()) + param->max_stack = size; + } } break; @@ -314,6 +318,12 @@ int parse_callchain_record(const char *arg, struct callchain_param *param) } while (0); free(buf); + + if (param->defer && param->record_mode != CALLCHAIN_FP) { + pr_err("callchain: deferred callchain only works with FP\n"); + return -EINVAL; + } + return ret; } @@ -586,12 +596,10 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) call = zalloc(sizeof(*call)); if (!call) { perror("not enough memory for the code path tree"); - return -1; + return -ENOMEM; } call->ip = cursor_node->ip; - call->ms = cursor_node->ms; - call->ms.map = map__get(call->ms.map); - call->ms.maps = maps__get(call->ms.maps); + map_symbol__copy(&call->ms, &cursor_node->ms); call->srcline = cursor_node->srcline; if (cursor_node->branch) { @@ -602,7 +610,15 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) * branch_from is set with value somewhere else * to imply it's "to" of a branch. */ - call->brtype_stat.branch_to = true; + if (!call->brtype_stat) { + call->brtype_stat = zalloc(sizeof(*call->brtype_stat)); + if (!call->brtype_stat) { + perror("not enough memory for the code path branch statistics"); + zfree(&call->brtype_stat); + return -ENOMEM; + } + } + call->brtype_stat->branch_to = true; if (cursor_node->branch_flags.predicted) call->predicted_count = 1; @@ -610,7 +626,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) if (cursor_node->branch_flags.abort) call->abort_count = 1; - branch_type_count(&call->brtype_stat, + branch_type_count(call->brtype_stat, &cursor_node->branch_flags, cursor_node->branch_from, cursor_node->ip); @@ -618,7 +634,8 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) /* * It's "from" of a branch */ - call->brtype_stat.branch_to = false; + if (call->brtype_stat && call->brtype_stat->branch_to) + call->brtype_stat->branch_to = false; call->cycles_count = cursor_node->branch_flags.cycles; call->iter_count = cursor_node->nr_loop_iter; @@ -650,8 +667,8 @@ add_child(struct callchain_node *parent, list_for_each_entry_safe(call, tmp, &new->val, list) { list_del_init(&call->list); - map__zput(call->ms.map); - maps__zput(call->ms.maps); + map_symbol__exit(&call->ms); + zfree(&call->brtype_stat); free(call); } free(new); @@ -762,7 +779,14 @@ static enum match_result match_chain(struct callchain_cursor_node *node, /* * It's "to" of a branch */ - cnode->brtype_stat.branch_to = true; + if (!cnode->brtype_stat) { + cnode->brtype_stat = zalloc(sizeof(*cnode->brtype_stat)); + if (!cnode->brtype_stat) { + perror("not enough memory for the code path branch statistics"); + return MATCH_ERROR; + } + } + cnode->brtype_stat->branch_to = true; if (node->branch_flags.predicted) cnode->predicted_count++; @@ -770,7 +794,7 @@ static enum match_result match_chain(struct callchain_cursor_node *node, if (node->branch_flags.abort) cnode->abort_count++; - branch_type_count(&cnode->brtype_stat, + branch_type_count(cnode->brtype_stat, &node->branch_flags, node->branch_from, node->ip); @@ -778,7 +802,8 @@ static enum match_result match_chain(struct callchain_cursor_node *node, /* * It's "from" of a branch */ - cnode->brtype_stat.branch_to = false; + if (cnode->brtype_stat && cnode->brtype_stat->branch_to) + cnode->brtype_stat->branch_to = false; cnode->cycles_count += node->branch_flags.cycles; cnode->iter_count += node->nr_loop_iter; cnode->iter_cycles += node->iter_cycles; @@ -1022,10 +1047,9 @@ merge_chain_branch(struct callchain_cursor *cursor, }; callchain_cursor_append(cursor, list->ip, &ms, false, NULL, 0, 0, 0, list->srcline); list_del_init(&list->list); - map__zput(ms.map); - maps__zput(ms.maps); - map__zput(list->ms.map); - maps__zput(list->ms.maps); + map_symbol__exit(&ms); + map_symbol__exit(&list->ms); + zfree(&list->brtype_stat); free(list); } @@ -1077,11 +1101,8 @@ int callchain_cursor_append(struct callchain_cursor *cursor, } node->ip = ip; - maps__zput(node->ms.maps); - map__zput(node->ms.map); - node->ms = *ms; - node->ms.maps = maps__get(ms->maps); - node->ms.map = map__get(ms->map); + map_symbol__exit(&node->ms); + map_symbol__copy(&node->ms, ms); node->branch = branch; node->nr_loop_iter = nr_loop_iter; node->iter_cycles = iter_cycles; @@ -1126,7 +1147,7 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node, bool hide_unresolved) { - struct machine *machine = maps__machine(node->ms.maps); + struct machine *machine = node->ms.maps ? maps__machine(node->ms.maps) : NULL; maps__put(al->maps); al->maps = maps__get(node->ms.maps); @@ -1142,7 +1163,7 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node * if (al->map == NULL) goto out; } - if (RC_CHK_ACCESS(al->maps) == RC_CHK_ACCESS(machine__kernel_maps(machine))) { + if (maps__equal(al->maps, machine__kernel_maps(machine))) { if (machine__is_host(machine)) { al->cpumode = PERF_RECORD_MISC_KERNEL; al->level = 'k'; @@ -1190,7 +1211,7 @@ char *callchain_list__sym_name(struct callchain_list *cl, if (show_dso) scnprintf(bf + printed, bfsize - printed, " %s", cl->ms.map ? - map__dso(cl->ms.map)->short_name : + dso__short_name(map__dso(cl->ms.map)) : "unknown"); return bf; @@ -1339,7 +1360,7 @@ static int count_float_printf(int idx, const char *str, float value, static int branch_to_str(char *bf, int bfsize, u64 branch_count, u64 predicted_count, u64 abort_count, - struct branch_type_stat *brtype_stat) + const struct branch_type_stat *brtype_stat) { int printed, i = 0; @@ -1403,7 +1424,7 @@ static int counts_str_build(char *bf, int bfsize, u64 abort_count, u64 cycles_count, u64 iter_count, u64 iter_cycles, u64 from_count, - struct branch_type_stat *brtype_stat) + const struct branch_type_stat *brtype_stat) { int printed; @@ -1430,7 +1451,7 @@ static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, u64 abort_count, u64 cycles_count, u64 iter_count, u64 iter_cycles, u64 from_count, - struct branch_type_stat *brtype_stat) + const struct branch_type_stat *brtype_stat) { char str[256]; @@ -1447,11 +1468,14 @@ static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, int callchain_list_counts__printf_value(struct callchain_list *clist, FILE *fp, char *bf, int bfsize) { + static const struct branch_type_stat empty_brtype_stat = {}; + const struct branch_type_stat *brtype_stat; u64 branch_count, predicted_count; u64 abort_count, cycles_count; u64 iter_count, iter_cycles; u64 from_count; + brtype_stat = clist->brtype_stat ?: &empty_brtype_stat; branch_count = clist->branch_count; predicted_count = clist->predicted_count; abort_count = clist->abort_count; @@ -1463,7 +1487,7 @@ int callchain_list_counts__printf_value(struct callchain_list *clist, return callchain_counts_printf(fp, bf, bfsize, branch_count, predicted_count, abort_count, cycles_count, iter_count, iter_cycles, - from_count, &clist->brtype_stat); + from_count, brtype_stat); } static void free_callchain_node(struct callchain_node *node) @@ -1474,15 +1498,15 @@ static void free_callchain_node(struct callchain_node *node) list_for_each_entry_safe(list, tmp, &node->parent_val, list) { list_del_init(&list->list); - map__zput(list->ms.map); - maps__zput(list->ms.maps); + map_symbol__exit(&list->ms); + zfree(&list->brtype_stat); free(list); } list_for_each_entry_safe(list, tmp, &node->val, list) { list_del_init(&list->list); - map__zput(list->ms.map); - maps__zput(list->ms.maps); + map_symbol__exit(&list->ms); + zfree(&list->brtype_stat); free(list); } @@ -1546,7 +1570,7 @@ int callchain_node__make_parent_list(struct callchain_node *node) goto out; *new = *chain; new->has_children = false; - new->ms.map = map__get(new->ms.map); + map_symbol__copy(&new->ms, &chain->ms); list_add_tail(&new->list, &head); } parent = parent->parent; @@ -1567,8 +1591,8 @@ int callchain_node__make_parent_list(struct callchain_node *node) out: list_for_each_entry_safe(chain, new, &head, list) { list_del_init(&chain->list); - map__zput(chain->ms.map); - maps__zput(chain->ms.maps); + map_symbol__exit(&chain->ms); + zfree(&chain->brtype_stat); free(chain); } return -ENOMEM; @@ -1651,10 +1675,8 @@ void callchain_cursor_reset(struct callchain_cursor *cursor) cursor->nr = 0; cursor->last = &cursor->first; - for (node = cursor->first; node != NULL; node = node->next) { - map__zput(node->ms.map); - maps__zput(node->ms.maps); - } + for (node = cursor->first; node != NULL; node = node->next) + map_symbol__exit(&node->ms); } void callchain_param_setup(u64 sample_type, const char *arch) @@ -1781,3 +1803,73 @@ s64 callchain_avg_cycles(struct callchain_node *cnode) return cycles; } + +int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel, + struct perf_sample *sample, int max_stack, + bool symbols, callchain_iter_fn cb, void *data) +{ + struct callchain_cursor *cursor = get_tls_callchain_cursor(); + int ret; + + if (!cursor) + return -ENOMEM; + + /* Fill in the callchain. */ + ret = __thread__resolve_callchain(thread, cursor, evsel, sample, + /*parent=*/NULL, /*root_al=*/NULL, + max_stack, symbols); + if (ret) + return ret; + + /* Switch from writing the callchain to reading it. */ + callchain_cursor_commit(cursor); + + while (1) { + struct callchain_cursor_node *node = callchain_cursor_current(cursor); + + if (!node) + break; + + ret = cb(node, data); + if (ret) + return ret; + + callchain_cursor_advance(cursor); + } + return 0; +} + +/* + * This function merges earlier samples (@sample_orig) waiting for deferred + * user callchains with the matching callchain record (@sample_callchain) + * which is delivered now. The @sample_orig->callchain should be released + * after use if ->deferred_callchain is set. + */ +int sample__merge_deferred_callchain(struct perf_sample *sample_orig, + struct perf_sample *sample_callchain) +{ + u64 nr_orig = sample_orig->callchain->nr - 1; + u64 nr_deferred = sample_callchain->callchain->nr; + struct ip_callchain *callchain; + + if (sample_orig->callchain->nr < 2) { + sample_orig->deferred_callchain = false; + return -EINVAL; + } + + callchain = calloc(1 + nr_orig + nr_deferred, sizeof(u64)); + if (callchain == NULL) { + sample_orig->deferred_callchain = false; + return -ENOMEM; + } + + callchain->nr = nr_orig + nr_deferred; + /* copy original including PERF_CONTEXT_USER_DEFERRED (but the cookie) */ + memcpy(callchain->ips, sample_orig->callchain->ips, nr_orig * sizeof(u64)); + /* copy deferred user callchains */ + memcpy(&callchain->ips[nr_orig], sample_callchain->callchain->ips, + nr_deferred * sizeof(u64)); + + sample_orig->callchain = callchain; + return 0; +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index d2618a47deca..2a52af8c80ac 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -98,6 +98,7 @@ extern bool dwarf_callchain_users; struct callchain_param { bool enabled; + bool defer; enum perf_call_graph_mode record_mode; u32 dump_size; enum chain_mode mode; @@ -116,22 +117,22 @@ extern struct callchain_param callchain_param; extern struct callchain_param callchain_param_default; struct callchain_list { + struct list_head list; u64 ip; struct map_symbol ms; - struct /* for TUI */ { - bool unfolded; - bool has_children; - }; + const char *srcline; u64 branch_count; u64 from_count; - u64 predicted_count; - u64 abort_count; u64 cycles_count; u64 iter_count; u64 iter_cycles; - struct branch_type_stat brtype_stat; - const char *srcline; - struct list_head list; + struct branch_type_stat *brtype_stat; + u64 predicted_count; + u64 abort_count; + struct /* for TUI */ { + bool unfolded; + bool has_children; + }; }; /* @@ -311,4 +312,13 @@ u64 callchain_total_hits(struct hists *hists); s64 callchain_avg_cycles(struct callchain_node *cnode); +typedef int (*callchain_iter_fn)(struct callchain_cursor_node *node, void *data); + +int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel, + struct perf_sample *sample, int max_stack, + bool symbols, callchain_iter_fn cb, void *data); + +int sample__merge_deferred_callchain(struct perf_sample *sample_orig, + struct perf_sample *sample_callchain); + #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/cap.c b/tools/perf/util/cap.c index c3ba841bbf37..24a0ea7e6d97 100644 --- a/tools/perf/util/cap.c +++ b/tools/perf/util/cap.c @@ -3,27 +3,47 @@ * Capability utilities */ -#ifdef HAVE_LIBCAP_SUPPORT - #include "cap.h" -#include <stdbool.h> -#include <sys/capability.h> - -bool perf_cap__capable(cap_value_t cap) -{ - cap_flag_value_t val; - cap_t caps = cap_get_proc(); - - if (!caps) - return false; +#include "debug.h" +#include <errno.h> +#include <string.h> +#include <sys/syscall.h> +#include <unistd.h> - if (cap_get_flag(caps, cap, CAP_EFFECTIVE, &val) != 0) - val = CAP_CLEAR; +#define MAX_LINUX_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 - if (cap_free(caps) != 0) - return false; - - return val == CAP_SET; +bool perf_cap__capable(int cap, bool *used_root) +{ + struct __user_cap_header_struct header = { + .version = _LINUX_CAPABILITY_VERSION_3, + .pid = 0, + }; + struct __user_cap_data_struct data[MAX_LINUX_CAPABILITY_U32S] = {}; + __u32 cap_val; + + *used_root = false; + while (syscall(SYS_capget, &header, &data[0]) == -1) { + /* Retry, first attempt has set the header.version correctly. */ + if (errno == EINVAL && header.version != _LINUX_CAPABILITY_VERSION_3 && + header.version == _LINUX_CAPABILITY_VERSION_1) + continue; + + pr_debug2("capget syscall failed (%s - %d) fall back on root check\n", + strerror(errno), errno); + *used_root = true; + return geteuid() == 0; + } + + /* Extract the relevant capability bit. */ + if (cap >= 32) { + if (header.version == _LINUX_CAPABILITY_VERSION_3) { + cap_val = data[1].effective; + } else { + /* Capability beyond 32 is requested but only 32 are supported. */ + return false; + } + } else { + cap_val = data[0].effective; + } + return (cap_val & (1 << (cap & 0x1f))) != 0; } - -#endif /* HAVE_LIBCAP_SUPPORT */ diff --git a/tools/perf/util/cap.h b/tools/perf/util/cap.h index ae52878c0b2e..c1b8ac033ccc 100644 --- a/tools/perf/util/cap.h +++ b/tools/perf/util/cap.h @@ -4,25 +4,6 @@ #include <stdbool.h> #include <linux/capability.h> -#include <linux/compiler.h> - -#ifdef HAVE_LIBCAP_SUPPORT - -#include <sys/capability.h> - -bool perf_cap__capable(cap_value_t cap); - -#else - -#include <unistd.h> -#include <sys/types.h> - -static inline bool perf_cap__capable(int cap __maybe_unused) -{ - return geteuid() == 0; -} - -#endif /* HAVE_LIBCAP_SUPPORT */ /* For older systems */ #ifndef CAP_SYSLOG @@ -33,4 +14,11 @@ static inline bool perf_cap__capable(int cap __maybe_unused) #define CAP_PERFMON 38 #endif +#ifndef CAP_BPF +#define CAP_BPF 39 +#endif + +/* Query if a capability is supported, used_root is set if the fallback root check was used. */ +bool perf_cap__capable(int cap, bool *used_root); + #endif /* __PERF_CAP_H */ diff --git a/tools/perf/util/capstone.c b/tools/perf/util/capstone.c new file mode 100644 index 000000000000..be5fd44b1f9d --- /dev/null +++ b/tools/perf/util/capstone.c @@ -0,0 +1,471 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "capstone.h" +#include "annotate.h" +#include "addr_location.h" +#include "debug.h" +#include "disasm.h" +#include "dso.h" +#include "machine.h" +#include "map.h" +#include "namespaces.h" +#include "print_insn.h" +#include "symbol.h" +#include "thread.h" +#include <errno.h> +#include <fcntl.h> +#include <string.h> + +#ifdef HAVE_LIBCAPSTONE_SUPPORT +#include <capstone/capstone.h> +#endif + +#ifdef HAVE_LIBCAPSTONE_SUPPORT +static int capstone_init(struct machine *machine, csh *cs_handle, bool is64, + bool disassembler_style) +{ + cs_arch arch; + cs_mode mode; + + if (machine__is(machine, "x86_64") && is64) { + arch = CS_ARCH_X86; + mode = CS_MODE_64; + } else if (machine__normalized_is(machine, "x86")) { + arch = CS_ARCH_X86; + mode = CS_MODE_32; + } else if (machine__normalized_is(machine, "arm64")) { + arch = CS_ARCH_ARM64; + mode = CS_MODE_ARM; + } else if (machine__normalized_is(machine, "arm")) { + arch = CS_ARCH_ARM; + mode = CS_MODE_ARM + CS_MODE_V8; + } else if (machine__normalized_is(machine, "s390")) { + arch = CS_ARCH_SYSZ; + mode = CS_MODE_BIG_ENDIAN; + } else { + return -1; + } + + if (cs_open(arch, mode, cs_handle) != CS_ERR_OK) { + pr_warning_once("cs_open failed\n"); + return -1; + } + + if (machine__normalized_is(machine, "x86")) { + /* + * In case of using capstone_init while symbol__disassemble + * setting CS_OPT_SYNTAX_ATT depends if disassembler_style opts + * is set via annotation args + */ + if (disassembler_style) + cs_option(*cs_handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); + /* + * Resolving address operands to symbols is implemented + * on x86 by investigating instruction details. + */ + cs_option(*cs_handle, CS_OPT_DETAIL, CS_OPT_ON); + } + + return 0; +} +#endif + +#ifdef HAVE_LIBCAPSTONE_SUPPORT +static size_t print_insn_x86(struct thread *thread, u8 cpumode, cs_insn *insn, + int print_opts, FILE *fp) +{ + struct addr_location al; + size_t printed = 0; + + if (insn->detail && insn->detail->x86.op_count == 1) { + cs_x86_op *op = &insn->detail->x86.operands[0]; + + addr_location__init(&al); + if (op->type == X86_OP_IMM && + thread__find_symbol(thread, cpumode, op->imm, &al)) { + printed += fprintf(fp, "%s ", insn[0].mnemonic); + printed += symbol__fprintf_symname_offs(al.sym, &al, fp); + if (print_opts & PRINT_INSN_IMM_HEX) + printed += fprintf(fp, " [%#" PRIx64 "]", op->imm); + addr_location__exit(&al); + return printed; + } + addr_location__exit(&al); + } + + printed += fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str); + return printed; +} +#endif + + +ssize_t capstone__fprintf_insn_asm(struct machine *machine __maybe_unused, + struct thread *thread __maybe_unused, + u8 cpumode __maybe_unused, bool is64bit __maybe_unused, + const uint8_t *code __maybe_unused, + size_t code_size __maybe_unused, + uint64_t ip __maybe_unused, int *lenp __maybe_unused, + int print_opts __maybe_unused, FILE *fp __maybe_unused) +{ +#ifdef HAVE_LIBCAPSTONE_SUPPORT + size_t printed; + cs_insn *insn; + csh cs_handle; + size_t count; + int ret; + + /* TODO: Try to initiate capstone only once but need a proper place. */ + ret = capstone_init(machine, &cs_handle, is64bit, true); + if (ret < 0) + return ret; + + count = cs_disasm(cs_handle, code, code_size, ip, 1, &insn); + if (count > 0) { + if (machine__normalized_is(machine, "x86")) + printed = print_insn_x86(thread, cpumode, &insn[0], print_opts, fp); + else + printed = fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str); + if (lenp) + *lenp = insn->size; + cs_free(insn, count); + } else { + printed = -1; + } + + cs_close(&cs_handle); + return printed; +#else + return -1; +#endif +} + +#ifdef HAVE_LIBCAPSTONE_SUPPORT +static void print_capstone_detail(cs_insn *insn, char *buf, size_t len, + struct annotate_args *args, u64 addr) +{ + int i; + struct map *map = args->ms.map; + struct symbol *sym; + + /* TODO: support more architectures */ + if (!arch__is(args->arch, "x86")) + return; + + if (insn->detail == NULL) + return; + + for (i = 0; i < insn->detail->x86.op_count; i++) { + cs_x86_op *op = &insn->detail->x86.operands[i]; + u64 orig_addr; + + if (op->type != X86_OP_MEM) + continue; + + /* only print RIP-based global symbols for now */ + if (op->mem.base != X86_REG_RIP) + continue; + + /* get the target address */ + orig_addr = addr + insn->size + op->mem.disp; + addr = map__objdump_2mem(map, orig_addr); + + if (dso__kernel(map__dso(map))) { + /* + * The kernel maps can be split into sections, let's + * find the map first and the search the symbol. + */ + map = maps__find(map__kmaps(map), addr); + if (map == NULL) + continue; + } + + /* convert it to map-relative address for search */ + addr = map__map_ip(map, addr); + + sym = map__find_symbol(map, addr); + if (sym == NULL) + continue; + + if (addr == sym->start) { + scnprintf(buf, len, "\t# %"PRIx64" <%s>", + orig_addr, sym->name); + } else { + scnprintf(buf, len, "\t# %"PRIx64" <%s+%#"PRIx64">", + orig_addr, sym->name, addr - sym->start); + } + break; + } +} +#endif + +#ifdef HAVE_LIBCAPSTONE_SUPPORT +struct find_file_offset_data { + u64 ip; + u64 offset; +}; + +/* This will be called for each PHDR in an ELF binary */ +static int find_file_offset(u64 start, u64 len, u64 pgoff, void *arg) +{ + struct find_file_offset_data *data = arg; + + if (start <= data->ip && data->ip < start + len) { + data->offset = pgoff + data->ip - start; + return 1; + } + return 0; +} +#endif + +int symbol__disassemble_capstone(const char *filename __maybe_unused, + struct symbol *sym __maybe_unused, + struct annotate_args *args __maybe_unused) +{ +#ifdef HAVE_LIBCAPSTONE_SUPPORT + struct annotation *notes = symbol__annotation(sym); + struct map *map = args->ms.map; + struct dso *dso = map__dso(map); + u64 start = map__rip_2objdump(map, sym->start); + u64 offset; + int i, count, free_count; + bool is_64bit = false; + bool needs_cs_close = false; + /* Malloc-ed buffer containing instructions read from disk. */ + u8 *code_buf = NULL; + /* Pointer to code to be disassembled. */ + const u8 *buf; + u64 buf_len; + csh handle; + cs_insn *insn = NULL; + char disasm_buf[512]; + struct disasm_line *dl; + bool disassembler_style = false; + + if (args->options->objdump_path) + return -1; + + buf = dso__read_symbol(dso, filename, map, sym, + &code_buf, &buf_len, &is_64bit); + if (buf == NULL) + return errno; + + /* add the function address and name */ + scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:", + start, sym->name); + + args->offset = -1; + args->line = disasm_buf; + args->line_nr = 0; + args->fileloc = NULL; + args->ms.sym = sym; + + dl = disasm_line__new(args); + if (dl == NULL) + goto err; + + annotation_line__add(&dl->al, ¬es->src->source); + + if (!args->options->disassembler_style || + !strcmp(args->options->disassembler_style, "att")) + disassembler_style = true; + + if (capstone_init(maps__machine(args->ms.maps), &handle, is_64bit, disassembler_style) < 0) + goto err; + + needs_cs_close = true; + + free_count = count = cs_disasm(handle, buf, buf_len, start, buf_len, &insn); + for (i = 0, offset = 0; i < count; i++) { + int printed; + + printed = scnprintf(disasm_buf, sizeof(disasm_buf), + " %-7s %s", + insn[i].mnemonic, insn[i].op_str); + print_capstone_detail(&insn[i], disasm_buf + printed, + sizeof(disasm_buf) - printed, args, + start + offset); + + args->offset = offset; + args->line = disasm_buf; + + dl = disasm_line__new(args); + if (dl == NULL) + goto err; + + annotation_line__add(&dl->al, ¬es->src->source); + + offset += insn[i].size; + } + + /* It failed in the middle: probably due to unknown instructions */ + if (offset != buf_len) { + struct list_head *list = ¬es->src->source; + + /* Discard all lines and fallback to objdump */ + while (!list_empty(list)) { + dl = list_first_entry(list, struct disasm_line, al.node); + + list_del_init(&dl->al.node); + disasm_line__free(dl); + } + count = -1; + } + +out: + if (needs_cs_close) { + cs_close(&handle); + if (free_count > 0) + cs_free(insn, free_count); + } + free(code_buf); + return count < 0 ? count : 0; + +err: + if (needs_cs_close) { + struct disasm_line *tmp; + + /* + * It probably failed in the middle of the above loop. + * Release any resources it might add. + */ + list_for_each_entry_safe(dl, tmp, ¬es->src->source, al.node) { + list_del(&dl->al.node); + disasm_line__free(dl); + } + } + count = -1; + goto out; +#else + return -1; +#endif +} + +int symbol__disassemble_capstone_powerpc(const char *filename __maybe_unused, + struct symbol *sym __maybe_unused, + struct annotate_args *args __maybe_unused) +{ +#ifdef HAVE_LIBCAPSTONE_SUPPORT + struct annotation *notes = symbol__annotation(sym); + struct map *map = args->ms.map; + struct dso *dso = map__dso(map); + struct nscookie nsc; + u64 start = map__rip_2objdump(map, sym->start); + u64 end = map__rip_2objdump(map, sym->end); + u64 len = end - start; + u64 offset; + int i, fd, count; + bool is_64bit = false; + bool needs_cs_close = false; + u8 *buf = NULL; + struct find_file_offset_data data = { + .ip = start, + }; + csh handle; + char disasm_buf[512]; + struct disasm_line *dl; + u32 *line; + bool disassembler_style = false; + + if (args->options->objdump_path) + return -1; + + nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); + fd = open(filename, O_RDONLY); + nsinfo__mountns_exit(&nsc); + if (fd < 0) + return -1; + + if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data, + &is_64bit) == 0) + goto err; + + if (!args->options->disassembler_style || + !strcmp(args->options->disassembler_style, "att")) + disassembler_style = true; + + if (capstone_init(maps__machine(args->ms.maps), &handle, is_64bit, disassembler_style) < 0) + goto err; + + needs_cs_close = true; + + buf = malloc(len); + if (buf == NULL) + goto err; + + count = pread(fd, buf, len, data.offset); + close(fd); + fd = -1; + + if ((u64)count != len) + goto err; + + line = (u32 *)buf; + + /* add the function address and name */ + scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:", + start, sym->name); + + args->offset = -1; + args->line = disasm_buf; + args->line_nr = 0; + args->fileloc = NULL; + args->ms.sym = sym; + + dl = disasm_line__new(args); + if (dl == NULL) + goto err; + + annotation_line__add(&dl->al, ¬es->src->source); + + /* + * TODO: enable disassm for powerpc + * count = cs_disasm(handle, buf, len, start, len, &insn); + * + * For now, only binary code is saved in disassembled line + * to be used in "type" and "typeoff" sort keys. Each raw code + * is 32 bit instruction. So use "len/4" to get the number of + * entries. + */ + count = len/4; + + for (i = 0, offset = 0; i < count; i++) { + args->offset = offset; + sprintf(args->line, "%x", line[i]); + + dl = disasm_line__new(args); + if (dl == NULL) + break; + + annotation_line__add(&dl->al, ¬es->src->source); + + offset += 4; + } + + /* It failed in the middle */ + if (offset != len) { + struct list_head *list = ¬es->src->source; + + /* Discard all lines and fallback to objdump */ + while (!list_empty(list)) { + dl = list_first_entry(list, struct disasm_line, al.node); + + list_del_init(&dl->al.node); + disasm_line__free(dl); + } + count = -1; + } + +out: + if (needs_cs_close) + cs_close(&handle); + free(buf); + return count < 0 ? count : 0; + +err: + if (fd >= 0) + close(fd); + count = -1; + goto out; +#else + return -1; +#endif +} diff --git a/tools/perf/util/capstone.h b/tools/perf/util/capstone.h new file mode 100644 index 000000000000..0f030ea034b6 --- /dev/null +++ b/tools/perf/util/capstone.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_CAPSTONE_H +#define __PERF_CAPSTONE_H + +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <linux/types.h> + +struct annotate_args; +struct machine; +struct symbol; +struct thread; + +ssize_t capstone__fprintf_insn_asm(struct machine *machine, struct thread *thread, u8 cpumode, + bool is64bit, const uint8_t *code, size_t code_size, + uint64_t ip, int *lenp, int print_opts, FILE *fp); +int symbol__disassemble_capstone(const char *filename, struct symbol *sym, + struct annotate_args *args); +int symbol__disassemble_capstone_powerpc(const char *filename, struct symbol *sym, + struct annotate_args *args); + +#endif /* __PERF_CAPSTONE_H */ diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index bfb13306d82c..040eb75f0804 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -10,6 +10,7 @@ #include <sys/types.h> #include <sys/stat.h> #include <sys/statfs.h> +#include <errno.h> #include <fcntl.h> #include <stdlib.h> #include <string.h> @@ -48,28 +49,36 @@ static int open_cgroup(const char *name) } #ifdef HAVE_FILE_HANDLE -int read_cgroup_id(struct cgroup *cgrp) +static u64 __read_cgroup_id(const char *path) { - char path[PATH_MAX + 1]; - char mnt[PATH_MAX + 1]; struct { struct file_handle fh; uint64_t cgroup_id; } handle; int mount_id; + handle.fh.handle_bytes = sizeof(handle.cgroup_id); + if (name_to_handle_at(AT_FDCWD, path, &handle.fh, &mount_id, 0) < 0) + return -1ULL; + + return handle.cgroup_id; +} + +int read_cgroup_id(struct cgroup *cgrp) +{ + char path[PATH_MAX + 1]; + char mnt[PATH_MAX + 1]; + if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1, "perf_event")) return -1; scnprintf(path, PATH_MAX, "%s/%s", mnt, cgrp->name); - handle.fh.handle_bytes = sizeof(handle.cgroup_id); - if (name_to_handle_at(AT_FDCWD, path, &handle.fh, &mount_id, 0) < 0) - return -1; - - cgrp->id = handle.cgroup_id; + cgrp->id = __read_cgroup_id(path); return 0; } +#else +static inline u64 __read_cgroup_id(const char *path __maybe_unused) { return -1ULL; } #endif /* HAVE_FILE_HANDLE */ #ifndef CGROUP2_SUPER_MAGIC @@ -106,7 +115,7 @@ static struct cgroup *evlist__find_cgroup(struct evlist *evlist, const char *str return NULL; } -static struct cgroup *cgroup__new(const char *name, bool do_open) +struct cgroup *cgroup__new(const char *name, bool do_open) { struct cgroup *cgroup = zalloc(sizeof(*cgroup)); @@ -405,8 +414,7 @@ static bool has_pattern_string(const char *str) return !!strpbrk(str, "{}[]()|*+?^$"); } -int evlist__expand_cgroup(struct evlist *evlist, const char *str, - struct rblist *metric_events, bool open_cgroup) +int evlist__expand_cgroup(struct evlist *evlist, const char *str, bool open_cgroup) { struct evlist *orig_list, *tmp_list; struct evsel *pos, *evsel, *leader; @@ -432,12 +440,8 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, evlist__splice_list_tail(orig_list, &evlist->core.entries); evlist->core.nr_entries = 0; - if (metric_events) { - orig_metric_events = *metric_events; - rblist__init(metric_events); - } else { - rblist__init(&orig_metric_events); - } + orig_metric_events = evlist->metric_events; + metricgroup__rblist_init(&evlist->metric_events); if (has_pattern_string(str)) prefix_len = match_cgroups(str); @@ -457,13 +461,15 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, name = cn->name + prefix_len; if (name[0] == '/' && name[1]) name++; + + /* the cgroup can go away in the meantime */ cgrp = cgroup__new(name, open_cgroup); if (cgrp == NULL) - goto out_err; + continue; leader = NULL; evlist__for_each_entry(orig_list, pos) { - evsel = evsel__clone(pos); + evsel = evsel__clone(/*dest=*/NULL, pos); if (evsel == NULL) goto out_err; @@ -480,12 +486,10 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, cgroup__put(cgrp); nr_cgroups++; - if (metric_events) { - if (metricgroup__copy_metric_events(tmp_list, cgrp, - metric_events, - &orig_metric_events) < 0) - goto out_err; - } + if (metricgroup__copy_metric_events(tmp_list, cgrp, + &evlist->metric_events, + &orig_metric_events) < 0) + goto out_err; evlist__splice_list_tail(evlist, &tmp_list->core.entries); tmp_list->core.nr_entries = 0; @@ -502,7 +506,7 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, out_err: evlist__delete(orig_list); evlist__delete(tmp_list); - rblist__exit(&orig_metric_events); + metricgroup__rblist_exit(&orig_metric_events); release_cgroup_list(); return ret; @@ -562,6 +566,11 @@ struct cgroup *cgroup__findnew(struct perf_env *env, uint64_t id, return cgrp; } +struct cgroup *__cgroup__find(struct rb_root *root, uint64_t id) +{ + return __cgroup__findnew(root, id, /*create=*/false, /*path=*/NULL); +} + struct cgroup *cgroup__find(struct perf_env *env, uint64_t id) { struct cgroup *cgrp; @@ -587,3 +596,35 @@ void perf_env__purge_cgroups(struct perf_env *env) } up_write(&env->cgroups.lock); } + +void read_all_cgroups(struct rb_root *root) +{ + char mnt[PATH_MAX]; + struct cgroup_name *cn; + int prefix_len; + + if (cgroupfs_find_mountpoint(mnt, sizeof(mnt), "perf_event")) + return; + + /* cgroup_name will have a full path, skip the root directory */ + prefix_len = strlen(mnt); + + /* collect all cgroups in the cgroup_list */ + if (nftw(mnt, add_cgroup_name, 20, 0) < 0) + return; + + list_for_each_entry(cn, &cgroup_list, list) { + const char *name; + u64 cgrp_id; + + /* cgroup_name might have a full path, skip the prefix */ + name = cn->name + prefix_len; + if (name[0] == '\0') + name = "/"; + + cgrp_id = __read_cgroup_id(cn->name); + __cgroup__findnew(root, cgrp_id, /*create=*/true, name); + } + + release_cgroup_list(); +} diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h index 12256b78608c..7b1bda22878c 100644 --- a/tools/perf/util/cgroup.h +++ b/tools/perf/util/cgroup.h @@ -26,9 +26,9 @@ void cgroup__put(struct cgroup *cgroup); struct evlist; struct rblist; +struct cgroup *cgroup__new(const char *name, bool do_open); struct cgroup *evlist__findnew_cgroup(struct evlist *evlist, const char *name); -int evlist__expand_cgroup(struct evlist *evlist, const char *cgroups, - struct rblist *metric_events, bool open_cgroup); +int evlist__expand_cgroup(struct evlist *evlist, const char *cgroups, bool open_cgroup); void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup); @@ -37,6 +37,7 @@ int parse_cgroups(const struct option *opt, const char *str, int unset); struct cgroup *cgroup__findnew(struct perf_env *env, uint64_t id, const char *path); struct cgroup *cgroup__find(struct perf_env *env, uint64_t id); +struct cgroup *__cgroup__find(struct rb_root *root, uint64_t id); void perf_env__purge_cgroups(struct perf_env *env); @@ -49,6 +50,9 @@ static inline int read_cgroup_id(struct cgroup *cgrp __maybe_unused) } #endif /* HAVE_FILE_HANDLE */ +/* read all cgroups in the system and save them in the rbtree */ +void read_all_cgroups(struct rb_root *root); + int cgroup_is_v2(const char *subsys); #endif /* __CGROUP_H__ */ diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index bffbdd216a6a..e51f0a676a22 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -93,34 +93,6 @@ int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) return r; } -/* - * This function splits the buffer by newlines and colors the lines individually. - * - * Returns 0 on success. - */ -int color_fwrite_lines(FILE *fp, const char *color, - size_t count, const char *buf) -{ - if (!*color) - return fwrite(buf, count, 1, fp) != 1; - - while (count) { - char *p = memchr(buf, '\n', count); - - if (p != buf && (fputs(color, fp) < 0 || - fwrite(buf, p ? (size_t)(p - buf) : count, 1, fp) != 1 || - fputs(PERF_COLOR_RESET, fp) < 0)) - return -1; - if (!p) - return 0; - if (fputc('\n', fp) < 0) - return -1; - count -= p + 1 - buf; - buf = p + 1; - } - return 0; -} - const char *get_percent_color(double percent) { const char *color = PERF_COLOR_NORMAL; diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 01f7bed21c9b..0319546decca 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -2,6 +2,7 @@ #ifndef __PERF_COLOR_H #define __PERF_COLOR_H +#include <linux/compiler.h> #include <stdio.h> #include <stdarg.h> @@ -22,27 +23,22 @@ #define MIN_GREEN 0.5 #define MIN_RED 5.0 +#define PERF_COLOR_DELETE_LINE "\033[A\33[2K\r" /* * This variable stores the value of color.ui */ extern int perf_use_color_default; -/* - * Use this instead of perf_default_config if you need the value of color.ui. - */ -int perf_color_default_config(const char *var, const char *value, void *cb); - int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty); int color_vsnprintf(char *bf, size_t size, const char *color, const char *fmt, va_list args); int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args); -int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); -int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...); -int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); +int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) __printf(3, 4); +int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...) __printf(4, 5); int value_color_snprintf(char *bf, size_t size, const char *fmt, double value); -int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...); -int percent_color_len_snprintf(char *bf, size_t size, const char *fmt, ...); +int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...) __printf(3, 4); +int percent_color_len_snprintf(char *bf, size_t size, const char *fmt, ...) __printf(3, 4); int percent_color_fprintf(FILE *fp, const char *fmt, double percent); const char *get_percent_color(double percent); diff --git a/tools/perf/util/color_config.c b/tools/perf/util/color_config.c index dc09ba7cb31e..301031ddc025 100644 --- a/tools/perf/util/color_config.c +++ b/tools/perf/util/color_config.c @@ -35,14 +35,3 @@ int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) } return 0; } - -int perf_color_default_config(const char *var, const char *value, - void *cb __maybe_unused) -{ - if (!strcmp(var, "color.ui")) { - perf_use_color_default = perf_config_colorbool(var, value, -1); - return 0; - } - - return 0; -} diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index afb8d4fd2644..9880247a2c33 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -1,108 +1,192 @@ // SPDX-License-Identifier: GPL-2.0 #include "comm.h" #include <errno.h> -#include <stdlib.h> -#include <stdio.h> #include <string.h> +#include <internal/rc_check.h> #include <linux/refcount.h> -#include <linux/rbtree.h> #include <linux/zalloc.h> +#include <tools/libc_compat.h> // reallocarray + #include "rwsem.h" -struct comm_str { - char *str; - struct rb_node rb_node; +DECLARE_RC_STRUCT(comm_str) { refcount_t refcnt; + char str[]; }; -/* Should perhaps be moved to struct machine */ -static struct rb_root comm_str_root; -static struct rw_semaphore comm_str_lock = {.lock = PTHREAD_RWLOCK_INITIALIZER,}; +static struct comm_strs { + struct rw_semaphore lock; + struct comm_str **strs; + int num_strs; + int capacity; +} _comm_strs; + +static void comm_strs__remove_if_last(struct comm_str *cs); + +static void comm_strs__init(void) + NO_THREAD_SAFETY_ANALYSIS /* Inherently single threaded due to pthread_once. */ +{ + init_rwsem(&_comm_strs.lock); + _comm_strs.capacity = 16; + _comm_strs.num_strs = 0; + _comm_strs.strs = calloc(16, sizeof(*_comm_strs.strs)); +} + +static struct comm_strs *comm_strs__get(void) +{ + static pthread_once_t comm_strs_type_once = PTHREAD_ONCE_INIT; + + pthread_once(&comm_strs_type_once, comm_strs__init); + + return &_comm_strs; +} + +static refcount_t *comm_str__refcnt(struct comm_str *cs) +{ + return &RC_CHK_ACCESS(cs)->refcnt; +} + +static const char *comm_str__str(const struct comm_str *cs) +{ + return &RC_CHK_ACCESS(cs)->str[0]; +} static struct comm_str *comm_str__get(struct comm_str *cs) { - if (cs && refcount_inc_not_zero(&cs->refcnt)) - return cs; + struct comm_str *result; + + if (RC_CHK_GET(result, cs)) + refcount_inc_not_zero(comm_str__refcnt(cs)); - return NULL; + return result; } static void comm_str__put(struct comm_str *cs) { - if (cs && refcount_dec_and_test(&cs->refcnt)) { - down_write(&comm_str_lock); - rb_erase(&cs->rb_node, &comm_str_root); - up_write(&comm_str_lock); - zfree(&cs->str); - free(cs); + if (!cs) + return; + + if (refcount_dec_and_test(comm_str__refcnt(cs))) { + RC_CHK_FREE(cs); + } else { + if (refcount_read(comm_str__refcnt(cs)) == 1) + comm_strs__remove_if_last(cs); + + RC_CHK_PUT(cs); } } -static struct comm_str *comm_str__alloc(const char *str) +static struct comm_str *comm_str__new(const char *str) { - struct comm_str *cs; + struct comm_str *result = NULL; + RC_STRUCT(comm_str) *cs; - cs = zalloc(sizeof(*cs)); - if (!cs) - return NULL; - - cs->str = strdup(str); - if (!cs->str) { - free(cs); - return NULL; + cs = malloc(sizeof(*cs) + strlen(str) + 1); + if (ADD_RC_CHK(result, cs)) { + refcount_set(comm_str__refcnt(result), 1); + strcpy(&cs->str[0], str); } + return result; +} - refcount_set(&cs->refcnt, 1); +static int comm_str__search(const void *_key, const void *_member) +{ + const char *key = _key; + const struct comm_str *member = *(const struct comm_str * const *)_member; - return cs; + return strcmp(key, comm_str__str(member)); } -static -struct comm_str *__comm_str__findnew(const char *str, struct rb_root *root) +static void comm_strs__remove_if_last(struct comm_str *cs) { - struct rb_node **p = &root->rb_node; - struct rb_node *parent = NULL; - struct comm_str *iter, *new; - int cmp; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct comm_str, rb_node); - - /* - * If we race with comm_str__put, iter->refcnt is 0 - * and it will be removed within comm_str__put call - * shortly, ignore it in this search. - */ - cmp = strcmp(str, iter->str); - if (!cmp && comm_str__get(iter)) - return iter; - - if (cmp < 0) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; + struct comm_strs *comm_strs = comm_strs__get(); + + down_write(&comm_strs->lock); + /* + * Are there only references from the array, if so remove the array + * reference under the write lock so that we don't race with findnew. + */ + if (refcount_read(comm_str__refcnt(cs)) == 1) { + struct comm_str **entry; + + entry = bsearch(comm_str__str(cs), comm_strs->strs, comm_strs->num_strs, + sizeof(struct comm_str *), comm_str__search); + comm_str__put(*entry); + for (int i = entry - comm_strs->strs; i < comm_strs->num_strs - 1; i++) + comm_strs->strs[i] = comm_strs->strs[i + 1]; + comm_strs->num_strs--; } + up_write(&comm_strs->lock); +} - new = comm_str__alloc(str); - if (!new) - return NULL; +static struct comm_str *__comm_strs__find(struct comm_strs *comm_strs, const char *str) + SHARED_LOCKS_REQUIRED(comm_strs->lock) +{ + struct comm_str **result; - rb_link_node(&new->rb_node, parent, p); - rb_insert_color(&new->rb_node, root); + result = bsearch(str, comm_strs->strs, comm_strs->num_strs, sizeof(struct comm_str *), + comm_str__search); - return new; + if (!result) + return NULL; + + return comm_str__get(*result); } -static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root) +static struct comm_str *comm_strs__findnew(const char *str) { - struct comm_str *cs; + struct comm_strs *comm_strs = comm_strs__get(); + struct comm_str *result; - down_write(&comm_str_lock); - cs = __comm_str__findnew(str, root); - up_write(&comm_str_lock); + if (!comm_strs) + return NULL; - return cs; + down_read(&comm_strs->lock); + result = __comm_strs__find(comm_strs, str); + up_read(&comm_strs->lock); + if (result) + return result; + + down_write(&comm_strs->lock); + result = __comm_strs__find(comm_strs, str); + if (!result) { + if (comm_strs->num_strs == comm_strs->capacity) { + struct comm_str **tmp; + + tmp = reallocarray(comm_strs->strs, + comm_strs->capacity + 16, + sizeof(*comm_strs->strs)); + if (!tmp) { + up_write(&comm_strs->lock); + return NULL; + } + comm_strs->strs = tmp; + comm_strs->capacity += 16; + } + result = comm_str__new(str); + if (result) { + int low = 0, high = comm_strs->num_strs - 1; + int insert = comm_strs->num_strs; /* Default to inserting at the end. */ + + while (low <= high) { + int mid = low + (high - low) / 2; + int cmp = strcmp(comm_str__str(comm_strs->strs[mid]), str); + + if (cmp < 0) { + low = mid + 1; + } else { + high = mid - 1; + insert = mid; + } + } + memmove(&comm_strs->strs[insert + 1], &comm_strs->strs[insert], + (comm_strs->num_strs - insert) * sizeof(struct comm_str *)); + comm_strs->num_strs++; + comm_strs->strs[insert] = result; + } + } + up_write(&comm_strs->lock); + return comm_str__get(result); } struct comm *comm__new(const char *str, u64 timestamp, bool exec) @@ -115,7 +199,7 @@ struct comm *comm__new(const char *str, u64 timestamp, bool exec) comm->start = timestamp; comm->exec = exec; - comm->comm_str = comm_str__findnew(str, &comm_str_root); + comm->comm_str = comm_strs__findnew(str); if (!comm->comm_str) { free(comm); return NULL; @@ -128,7 +212,7 @@ int comm__override(struct comm *comm, const char *str, u64 timestamp, bool exec) { struct comm_str *new, *old = comm->comm_str; - new = comm_str__findnew(str, &comm_str_root); + new = comm_strs__findnew(str); if (!new) return -ENOMEM; @@ -149,5 +233,5 @@ void comm__free(struct comm *comm) const char *comm__str(const struct comm *comm) { - return comm->comm_str->str; + return comm_str__str(comm->comm_str); } diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h index 0cd3369af2a4..6cfecfca16f2 100644 --- a/tools/perf/util/compress.h +++ b/tools/perf/util/compress.h @@ -3,6 +3,10 @@ #define PERF_COMPRESS_H #include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <sys/types.h> +#include <linux/compiler.h> #ifdef HAVE_ZSTD_SUPPORT #include <zstd.h> #endif @@ -13,14 +17,33 @@ bool gzip_is_compressed(const char *input); #endif #ifdef HAVE_LZMA_SUPPORT +int lzma_decompress_stream_to_file(FILE *input, int output_fd); int lzma_decompress_to_file(const char *input, int output_fd); bool lzma_is_compressed(const char *input); +#else +static inline +int lzma_decompress_stream_to_file(FILE *input __maybe_unused, + int output_fd __maybe_unused) +{ + return -1; +} +static inline +int lzma_decompress_to_file(const char *input __maybe_unused, + int output_fd __maybe_unused) +{ + return -1; +} +static inline int lzma_is_compressed(const char *input __maybe_unused) +{ + return false; +} #endif struct zstd_data { #ifdef HAVE_ZSTD_SUPPORT ZSTD_CStream *cstream; ZSTD_DStream *dstream; + int comp_level; #endif }; @@ -29,7 +52,7 @@ struct zstd_data { int zstd_init(struct zstd_data *data, int level); int zstd_fini(struct zstd_data *data); -size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, +ssize_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, void *src, size_t src_size, size_t max_record_size, size_t process_header(void *record, size_t increment)); @@ -48,7 +71,7 @@ static inline int zstd_fini(struct zstd_data *data __maybe_unused) } static inline -size_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused, +ssize_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused, void *dst __maybe_unused, size_t dst_size __maybe_unused, void *src __maybe_unused, size_t src_size __maybe_unused, size_t max_record_size __maybe_unused, diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 46f144c46827..e0219bc6330a 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -13,13 +13,13 @@ #include <sys/param.h> #include "cache.h" #include "callchain.h" +#include "header.h" #include <subcmd/exec-cmd.h> #include "util/event.h" /* proc_map_timeout */ #include "util/hist.h" /* perf_hist_config */ -#include "util/llvm-utils.h" /* perf_llvm_config */ #include "util/stat.h" /* perf_stat__set_big_num */ #include "util/evsel.h" /* evsel__hw_names, evsel__use_bpf_counters */ -#include "util/srcline.h" /* addr2line_timeout_ms */ +#include "util/addr2line.h" /* addr2line_timeout_ms */ #include "build-id.h" #include "debug.h" #include "config.h" @@ -35,6 +35,23 @@ #define DEBUG_CACHE_DIR ".debug" +#define METRIC_ONLY_LEN 20 + +static struct stats walltime_nsecs_stats; + +struct perf_stat_config stat_config = { + .aggr_mode = AGGR_GLOBAL, + .aggr_level = MAX_CACHE_LVL + 1, + .scale = true, + .unit_width = 4, /* strlen("unit") */ + .run_count = 1, + .metric_only_len = METRIC_ONLY_LEN, + .walltime_nsecs_stats = &walltime_nsecs_stats, + .big_num = true, + .ctl_fd = -1, + .ctl_fd_ack = -1, + .iostat_run = false, +}; char buildid_dir[MAXPATHLEN]; /* root dir for buildid, binary cache */ @@ -456,6 +473,16 @@ static int perf_ui_config(const char *var, const char *value) return 0; } +void perf_stat__set_big_num(int set) +{ + stat_config.big_num = (set != 0); +} + +static void perf_stat__set_no_csv_summary(int set) +{ + stat_config.no_csv_summary = (set != 0); +} + static int perf_stat_config(const char *var, const char *value) { if (!strcmp(var, "stat.big-num")) @@ -486,9 +513,6 @@ int perf_default_config(const char *var, const char *value, if (strstarts(var, "call-graph.")) return perf_callchain_config(var, value); - if (strstarts(var, "llvm.")) - return perf_llvm_config(var, value); - if (strstarts(var, "buildid.")) return perf_buildid_config(var, value); @@ -833,12 +857,6 @@ void perf_config__exit(void) config_set = NULL; } -void perf_config__refresh(void) -{ - perf_config__exit(); - perf_config__init(); -} - static void perf_config_item__delete(struct perf_config_item *item) { zfree(&item->name); @@ -916,6 +934,7 @@ void set_buildid_dir(const char *dir) struct perf_config_scan_data { const char *name; const char *fmt; + const char *value; va_list args; int ret; }; @@ -943,3 +962,24 @@ int perf_config_scan(const char *name, const char *fmt, ...) return d.ret; } + +static int perf_config_get_cb(const char *var, const char *value, void *data) +{ + struct perf_config_scan_data *d = data; + + if (!strcmp(var, d->name)) + d->value = value; + + return 0; +} + +const char *perf_config_get(const char *name) +{ + struct perf_config_scan_data d = { + .name = name, + .value = NULL, + }; + + perf_config(perf_config_get_cb, &d); + return d.value; +} diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index 2e5e808928a5..987b47cf54c3 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -30,6 +30,7 @@ typedef int (*config_fn_t)(const char *, const char *, void *); int perf_default_config(const char *, const char *, void *); int perf_config(config_fn_t fn, void *); int perf_config_scan(const char *name, const char *fmt, ...) __scanf(2, 3); +const char *perf_config_get(const char *name); int perf_config_set(struct perf_config_set *set, config_fn_t fn, void *data); int perf_config_int(int *dest, const char *, const char *); @@ -48,7 +49,7 @@ void perf_config_set__delete(struct perf_config_set *set); int perf_config_set__collect(struct perf_config_set *set, const char *file_name, const char *var, const char *value); void perf_config__exit(void); -void perf_config__refresh(void); +int perf_config__set_variable(const char *var, const char *value); /** * perf_config_sections__for_each - iterate thru all the sections diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 0e090e8bc334..a80845038a5e 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -67,19 +67,23 @@ static struct perf_cpu_map *cpu_map__from_entries(const struct perf_record_cpu_m struct perf_cpu_map *map; map = perf_cpu_map__empty_new(data->cpus_data.nr); - if (map) { - unsigned i; - - for (i = 0; i < data->cpus_data.nr; i++) { - /* - * Special treatment for -1, which is not real cpu number, - * and we need to use (int) -1 to initialize map[i], - * otherwise it would become 65535. - */ - if (data->cpus_data.cpu[i] == (u16) -1) - RC_CHK_ACCESS(map)->map[i].cpu = -1; - else - RC_CHK_ACCESS(map)->map[i].cpu = (int) data->cpus_data.cpu[i]; + if (!map) + return NULL; + + for (unsigned int i = 0; i < data->cpus_data.nr; i++) { + /* + * Special treatment for -1, which is not real cpu number, + * and we need to use (int) -1 to initialize map[i], + * otherwise it would become 65535. + */ + if (data->cpus_data.cpu[i] == (u16) -1) { + RC_CHK_ACCESS(map)->map[i].cpu = -1; + } else if (data->cpus_data.cpu[i] < INT16_MAX) { + RC_CHK_ACCESS(map)->map[i].cpu = (int16_t) data->cpus_data.cpu[i]; + } else { + pr_err("Invalid cpumap entry %u\n", data->cpus_data.cpu[i]); + perf_cpu_map__put(map); + return NULL; } } @@ -106,8 +110,15 @@ static struct perf_cpu_map *cpu_map__from_mask(const struct perf_record_cpu_map_ int cpu; perf_record_cpu_map_data__read_one_mask(data, i, local_copy); - for_each_set_bit(cpu, local_copy, 64) - RC_CHK_ACCESS(map)->map[j++].cpu = cpu + cpus_per_i; + for_each_set_bit(cpu, local_copy, 64) { + if (cpu + cpus_per_i < INT16_MAX) { + RC_CHK_ACCESS(map)->map[j++].cpu = cpu + cpus_per_i; + } else { + pr_err("Invalid cpumap entry %d\n", cpu + cpus_per_i); + perf_cpu_map__put(map); + return NULL; + } + } } return map; @@ -127,8 +138,15 @@ static struct perf_cpu_map *cpu_map__from_range(const struct perf_record_cpu_map RC_CHK_ACCESS(map)->map[i++].cpu = -1; for (int cpu = data->range_cpu_data.start_cpu; cpu <= data->range_cpu_data.end_cpu; - i++, cpu++) - RC_CHK_ACCESS(map)->map[i].cpu = cpu; + i++, cpu++) { + if (cpu < INT16_MAX) { + RC_CHK_ACCESS(map)->map[i].cpu = cpu; + } else { + pr_err("Invalid cpumap entry %d\n", cpu); + perf_cpu_map__put(map); + return NULL; + } + } return map; } @@ -180,8 +198,6 @@ struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr) cpus->nr = nr; for (i = 0; i < nr; i++) cpus->map[i] = aggr_cpu_id__empty(); - - refcount_set(&cpus->refcnt, 1); } return cpus; @@ -222,6 +238,8 @@ static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer) return a->socket - b->socket; else if (a->die != b->die) return a->die - b->die; + else if (a->cluster != b->cluster) + return a->cluster - b->cluster; else if (a->cache_lvl != b->cache_lvl) return a->cache_lvl - b->cache_lvl; else if (a->cache != b->cache) @@ -293,7 +311,7 @@ struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data) die = cpu__get_die_id(cpu); /* There is no die_id on legacy system. */ - if (die == -1) + if (die < 0) die = 0; /* @@ -309,6 +327,30 @@ struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data) return id; } +int cpu__get_cluster_id(struct perf_cpu cpu) +{ + int value, ret = cpu__get_topology_int(cpu.cpu, "cluster_id", &value); + + return ret ?: value; +} + +struct aggr_cpu_id aggr_cpu_id__cluster(struct perf_cpu cpu, void *data) +{ + int cluster = cpu__get_cluster_id(cpu); + struct aggr_cpu_id id; + + /* There is no cluster_id on legacy system. */ + if (cluster < 0) + cluster = 0; + + id = aggr_cpu_id__die(cpu, data); + if (aggr_cpu_id__is_empty(&id)) + return id; + + id.cluster = cluster; + return id; +} + int cpu__get_core_id(struct perf_cpu cpu) { int value, ret = cpu__get_topology_int(cpu.cpu, "core_id", &value); @@ -320,8 +362,8 @@ struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data) struct aggr_cpu_id id; int core = cpu__get_core_id(cpu); - /* aggr_cpu_id__die returns a struct with socket and die set. */ - id = aggr_cpu_id__die(cpu, data); + /* aggr_cpu_id__die returns a struct with socket die, and cluster set. */ + id = aggr_cpu_id__cluster(cpu, data); if (aggr_cpu_id__is_empty(&id)) return id; @@ -403,7 +445,7 @@ static void set_max_cpu_num(void) { const char *mnt; char path[PATH_MAX]; - int ret = -1; + int max, ret = -1; /* set up default */ max_cpu_num.cpu = 4096; @@ -420,10 +462,12 @@ static void set_max_cpu_num(void) goto out; } - ret = get_max_num(path, &max_cpu_num.cpu); + ret = get_max_num(path, &max); if (ret) goto out; + max_cpu_num.cpu = max; + /* get the highest present cpu number for a sparse allocation */ ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt); if (ret >= PATH_MAX) { @@ -431,8 +475,14 @@ static void set_max_cpu_num(void) goto out; } - ret = get_max_num(path, &max_present_cpu_num.cpu); + ret = get_max_num(path, &max); + if (!ret && max > INT16_MAX) { + pr_err("Read out of bounds max cpus of %d\n", max); + ret = -1; + } + if (!ret) + max_present_cpu_num.cpu = (int16_t)max; out: if (ret) pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu); @@ -582,7 +632,7 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size) #define COMMA first ? "" : "," for (i = 0; i < perf_cpu_map__nr(map) + 1; i++) { - struct perf_cpu cpu = { .cpu = INT_MAX }; + struct perf_cpu cpu = { .cpu = INT16_MAX }; bool last = i == perf_cpu_map__nr(map); if (!last) @@ -629,13 +679,18 @@ static char hex_char(unsigned char val) size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size) { - int i, cpu; + int idx; char *ptr = buf; unsigned char *bitmap; - struct perf_cpu last_cpu = perf_cpu_map__cpu(map, perf_cpu_map__nr(map) - 1); + struct perf_cpu c, last_cpu = perf_cpu_map__max(map); + + if (buf == NULL || size == 0) + return 0; - if (buf == NULL) + if (last_cpu.cpu < 0) { + buf[0] = '\0'; return 0; + } bitmap = zalloc(last_cpu.cpu / 8 + 1); if (bitmap == NULL) { @@ -643,12 +698,10 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size) return 0; } - for (i = 0; i < perf_cpu_map__nr(map); i++) { - cpu = perf_cpu_map__cpu(map, i).cpu; - bitmap[cpu / 8] |= 1 << (cpu % 8); - } + perf_cpu_map__for_each_cpu_skip_any(c, idx, map) + bitmap[c.cpu / 8] |= 1 << (c.cpu % 8); - for (cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) { + for (int cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) { unsigned char bits = bitmap[cpu / 8]; if (cpu % 8) @@ -672,9 +725,9 @@ struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ static struct perf_cpu_map *online; if (!online) - online = perf_cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */ + online = perf_cpu_map__new_online_cpus(); /* from /sys/devices/system/cpu/online */ - return online; + return perf_cpu_map__get(online); } bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b) @@ -683,6 +736,7 @@ bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b a->node == b->node && a->socket == b->socket && a->die == b->die && + a->cluster == b->cluster && a->cache_lvl == b->cache_lvl && a->cache == b->cache && a->core == b->core && @@ -695,6 +749,7 @@ bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a) a->node == -1 && a->socket == -1 && a->die == -1 && + a->cluster == -1 && a->cache_lvl == -1 && a->cache == -1 && a->core == -1 && @@ -708,6 +763,7 @@ struct aggr_cpu_id aggr_cpu_id__empty(void) .node = -1, .socket = -1, .die = -1, + .cluster = -1, .cache_lvl = -1, .cache = -1, .core = -1, diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 9df2aeb34d3d..ee0f6139b04a 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -5,7 +5,6 @@ #include <stdbool.h> #include <stdio.h> #include <perf/cpumap.h> -#include <linux/refcount.h> /** Identify where counts are aggregated, -1 implies not to aggregate. */ struct aggr_cpu_id { @@ -20,6 +19,8 @@ struct aggr_cpu_id { int socket; /** The die id as read from /sys/devices/system/cpu/cpuX/topology/die_id. */ int die; + /** The cluster id as read from /sys/devices/system/cpu/cpuX/topology/cluster_id */ + int cluster; /** The cache level as read from /sys/devices/system/cpu/cpuX/cache/indexY/level */ int cache_lvl; /** @@ -35,7 +36,6 @@ struct aggr_cpu_id { /** A collection of aggr_cpu_id values, the "built" version is sorted and uniqued. */ struct cpu_aggr_map { - refcount_t refcnt; /** Number of valid entries. */ int nr; /** The entries. */ @@ -87,6 +87,11 @@ int cpu__get_socket_id(struct perf_cpu cpu); */ int cpu__get_die_id(struct perf_cpu cpu); /** + * cpu__get_cluster_id - Returns the cluster id as read from + * /sys/devices/system/cpu/cpuX/topology/cluster_id for the given CPU + */ +int cpu__get_cluster_id(struct perf_cpu cpu); +/** * cpu__get_core_id - Returns the core id as read from * /sys/devices/system/cpu/cpuX/topology/core_id for the given CPU. */ @@ -127,9 +132,15 @@ struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data); */ struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data); /** - * aggr_cpu_id__core - Create an aggr_cpu_id with the core, die and socket - * populated with the core, die and socket for cpu. The function signature is - * compatible with aggr_cpu_id_get_t. + * aggr_cpu_id__cluster - Create an aggr_cpu_id with cluster, die and socket + * populated with the cluster, die and socket for cpu. The function signature + * is compatible with aggr_cpu_id_get_t. + */ +struct aggr_cpu_id aggr_cpu_id__cluster(struct perf_cpu cpu, void *data); +/** + * aggr_cpu_id__core - Create an aggr_cpu_id with the core, cluster, die and + * socket populated with the core, die and socket for cpu. The function + * signature is compatible with aggr_cpu_id_get_t. */ struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data); /** diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index 81cfc85f4668..8bbeb2dc76fd 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -267,7 +267,7 @@ struct cpu_topology *cpu_topology__new(void) ncpus = cpu__max_present_cpu().cpu; /* build online CPU map */ - map = perf_cpu_map__new(NULL); + map = perf_cpu_map__new_online_cpus(); if (map == NULL) { pr_debug("failed to get system cpumap\n"); return NULL; diff --git a/tools/perf/util/cs-etm-decoder/Build b/tools/perf/util/cs-etm-decoder/Build index 216cb17a3322..27550db2aa4c 100644 --- a/tools/perf/util/cs-etm-decoder/Build +++ b/tools/perf/util/cs-etm-decoder/Build @@ -1 +1 @@ -perf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o +perf-util-y += cs-etm-decoder.o diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index e917985bbbe6..3050fe212666 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -41,7 +41,7 @@ const u32 INSTR_PER_NS = 10; struct cs_etm_decoder { void *data; - void (*packet_printer)(const char *msg); + void (*packet_printer)(const char *msg, void *data); bool suppress_printing; dcd_tree_handle_t dcd_tree; cs_etm_mem_cb_type mem_access; @@ -202,7 +202,7 @@ static void cs_etm_decoder__print_str_cb(const void *p_context, const struct cs_etm_decoder *decoder = p_context; if (p_context && str_len && !decoder->suppress_printing) - decoder->packet_printer(msg); + decoder->packet_printer(msg, decoder->data); } static int @@ -388,7 +388,8 @@ cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue) } static ocsd_datapath_resp_t -cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue, +cs_etm_decoder__buffer_packet(struct cs_etm_queue *etmq, + struct cs_etm_packet_queue *packet_queue, const u8 trace_chan_id, enum cs_etm_sample_type sample_type) { @@ -398,7 +399,7 @@ cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue, if (packet_queue->packet_count >= CS_ETM_PACKET_MAX_BUFFER - 1) return OCSD_RESP_FATAL_SYS_ERR; - if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0) + if (cs_etm__get_cpu(etmq, trace_chan_id, &cpu) < 0) return OCSD_RESP_FATAL_SYS_ERR; et = packet_queue->tail; @@ -436,7 +437,7 @@ cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq, int ret = 0; struct cs_etm_packet *packet; - ret = cs_etm_decoder__buffer_packet(packet_queue, trace_chan_id, + ret = cs_etm_decoder__buffer_packet(etmq, packet_queue, trace_chan_id, CS_ETM_RANGE); if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT) return ret; @@ -496,7 +497,8 @@ out: } static ocsd_datapath_resp_t -cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue, +cs_etm_decoder__buffer_discontinuity(struct cs_etm_queue *etmq, + struct cs_etm_packet_queue *queue, const uint8_t trace_chan_id) { /* @@ -504,18 +506,19 @@ cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue, * reset time statistics. */ cs_etm_decoder__reset_timestamp(queue); - return cs_etm_decoder__buffer_packet(queue, trace_chan_id, + return cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id, CS_ETM_DISCONTINUITY); } static ocsd_datapath_resp_t -cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue, +cs_etm_decoder__buffer_exception(struct cs_etm_queue *etmq, + struct cs_etm_packet_queue *queue, const ocsd_generic_trace_elem *elem, const uint8_t trace_chan_id) { int ret = 0; struct cs_etm_packet *packet; - ret = cs_etm_decoder__buffer_packet(queue, trace_chan_id, + ret = cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id, CS_ETM_EXCEPTION); if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT) return ret; @@ -527,10 +530,11 @@ cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue, } static ocsd_datapath_resp_t -cs_etm_decoder__buffer_exception_ret(struct cs_etm_packet_queue *queue, +cs_etm_decoder__buffer_exception_ret(struct cs_etm_queue *etmq, + struct cs_etm_packet_queue *queue, const uint8_t trace_chan_id) { - return cs_etm_decoder__buffer_packet(queue, trace_chan_id, + return cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id, CS_ETM_EXCEPTION_RET); } @@ -584,6 +588,7 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( const ocsd_generic_trace_elem *elem) { ocsd_datapath_resp_t resp = OCSD_RESP_CONT; + ocsd_gen_trc_elem_t type; struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; struct cs_etm_queue *etmq = decoder->data; struct cs_etm_packet_queue *packet_queue; @@ -593,52 +598,29 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( if (!packet_queue) return OCSD_RESP_FATAL_SYS_ERR; - switch (elem->elem_type) { - case OCSD_GEN_TRC_ELEM_UNKNOWN: - break; - case OCSD_GEN_TRC_ELEM_EO_TRACE: - case OCSD_GEN_TRC_ELEM_NO_SYNC: - case OCSD_GEN_TRC_ELEM_TRACE_ON: - resp = cs_etm_decoder__buffer_discontinuity(packet_queue, + type = elem->elem_type; + + if (type == OCSD_GEN_TRC_ELEM_EO_TRACE || + type == OCSD_GEN_TRC_ELEM_NO_SYNC || + type == OCSD_GEN_TRC_ELEM_TRACE_ON) + resp = cs_etm_decoder__buffer_discontinuity(etmq, packet_queue, trace_chan_id); - break; - case OCSD_GEN_TRC_ELEM_INSTR_RANGE: + else if (type == OCSD_GEN_TRC_ELEM_INSTR_RANGE) resp = cs_etm_decoder__buffer_range(etmq, packet_queue, elem, trace_chan_id); - break; - case OCSD_GEN_TRC_ELEM_EXCEPTION: - resp = cs_etm_decoder__buffer_exception(packet_queue, elem, + else if (type == OCSD_GEN_TRC_ELEM_EXCEPTION) + resp = cs_etm_decoder__buffer_exception(etmq, packet_queue, elem, trace_chan_id); - break; - case OCSD_GEN_TRC_ELEM_EXCEPTION_RET: - resp = cs_etm_decoder__buffer_exception_ret(packet_queue, + else if (type == OCSD_GEN_TRC_ELEM_EXCEPTION_RET) + resp = cs_etm_decoder__buffer_exception_ret(etmq, packet_queue, trace_chan_id); - break; - case OCSD_GEN_TRC_ELEM_TIMESTAMP: + else if (type == OCSD_GEN_TRC_ELEM_TIMESTAMP) resp = cs_etm_decoder__do_hard_timestamp(etmq, elem, trace_chan_id, indx); - break; - case OCSD_GEN_TRC_ELEM_PE_CONTEXT: + else if (type == OCSD_GEN_TRC_ELEM_PE_CONTEXT) resp = cs_etm_decoder__set_tid(etmq, packet_queue, elem, trace_chan_id); - break; - /* Unused packet types */ - case OCSD_GEN_TRC_ELEM_I_RANGE_NOPATH: - case OCSD_GEN_TRC_ELEM_ADDR_NACC: - case OCSD_GEN_TRC_ELEM_CYCLE_COUNT: - case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN: - case OCSD_GEN_TRC_ELEM_EVENT: - case OCSD_GEN_TRC_ELEM_SWTRACE: - case OCSD_GEN_TRC_ELEM_CUSTOM: - case OCSD_GEN_TRC_ELEM_SYNC_MARKER: - case OCSD_GEN_TRC_ELEM_MEMTRANS: -#if (OCSD_VER_NUM >= 0x010400) - case OCSD_GEN_TRC_ELEM_INSTRUMENTATION: -#endif - default: - break; - } return resp; } @@ -680,14 +662,15 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, return -1; } - /* if the CPU has no trace ID associated, no decoder needed */ - if (csid == CORESIGHT_TRACE_ID_UNUSED_VAL) - return 0; - if (d_params->operation == CS_ETM_OPERATION_DECODE) { + int decode_flags = OCSD_CREATE_FLG_FULL_DECODER; +#ifdef OCSD_OPFLG_N_UNCOND_DIR_BR_CHK + decode_flags |= OCSD_OPFLG_N_UNCOND_DIR_BR_CHK | OCSD_OPFLG_CHK_RANGE_CONTINUE | + ETM4_OPFLG_PKTDEC_AA64_OPCODE_CHK; +#endif if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder->decoder_name, - OCSD_CREATE_FLG_FULL_DECODER, + decode_flags, trace_config, &csid)) return -1; diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 272c2efe78ee..12c782fa6db2 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -60,7 +60,7 @@ struct cs_etm_trace_params { struct cs_etm_decoder_params { int operation; - void (*packet_printer)(const char *msg); + void (*packet_printer)(const char *msg, void *data); cs_etm_mem_cb_type mem_acc_cb; bool formatted; bool fsyncs; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 1419b40dfbe8..25d56e0f1c07 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -6,10 +6,11 @@ * Author: Mathieu Poirier <mathieu.poirier@linaro.org> */ +#include <linux/kernel.h> +#include <linux/bitfield.h> #include <linux/bitops.h> #include <linux/coresight-pmu.h> #include <linux/err.h> -#include <linux/kernel.h> #include <linux/log2.h> #include <linux/types.h> #include <linux/zalloc.h> @@ -96,28 +97,43 @@ struct cs_etm_traceid_queue { struct cs_etm_packet_queue packet_queue; }; +enum cs_etm_format { + UNSET, + FORMATTED, + UNFORMATTED +}; + struct cs_etm_queue { struct cs_etm_auxtrace *etm; struct cs_etm_decoder *decoder; struct auxtrace_buffer *buffer; unsigned int queue_nr; u8 pending_timestamp_chan_id; + enum cs_etm_format format; u64 offset; const unsigned char *buf; size_t buf_len, buf_used; /* Conversion between traceID and index in traceid_queues array */ struct intlist *traceid_queues_list; struct cs_etm_traceid_queue **traceid_queues; + /* Conversion between traceID and metadata pointers */ + struct intlist *traceid_list; + /* + * Same as traceid_list, but traceid_list may be a reference to another + * queue's which has a matching sink ID. + */ + struct intlist *own_traceid_list; + u32 sink_id; }; -/* RB tree for quick conversion between traceID and metadata pointers */ -static struct intlist *traceid_list; - static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, pid_t tid); static int cs_etm__get_data_block(struct cs_etm_queue *etmq); static int cs_etm__decode_data_block(struct cs_etm_queue *etmq); +static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata); +static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu); +static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata); /* PTMs ETMIDR [11:8] set to b0011 */ #define ETMIDR_PTM_VERSION 0x00000300 @@ -132,6 +148,7 @@ static int cs_etm__decode_data_block(struct cs_etm_queue *etmq); (queue_nr << 16 | trace_chan_id) #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16) #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff) +#define SINK_UNSET ((u32) -1) static u32 cs_etm__get_v7_protocol_version(u32 etmidr) { @@ -143,12 +160,12 @@ static u32 cs_etm__get_v7_protocol_version(u32 etmidr) return CS_ETM_PROTO_ETMV3; } -static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic) +static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic) { struct int_node *inode; u64 *metadata; - inode = intlist__find(traceid_list, trace_chan_id); + inode = intlist__find(etmq->traceid_list, trace_chan_id); if (!inode) return -EINVAL; @@ -157,12 +174,12 @@ static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic) return 0; } -int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) +int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu) { struct int_node *inode; u64 *metadata; - inode = intlist__find(traceid_list, trace_chan_id); + inode = intlist__find(etmq->traceid_list, trace_chan_id); if (!inode) return -EINVAL; @@ -214,26 +231,171 @@ enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq) return etmq->etm->pid_fmt; } -static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata) +static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq, + u8 trace_chan_id, u64 *cpu_metadata) { - struct int_node *inode; - /* Get an RB node for this CPU */ - inode = intlist__findnew(traceid_list, trace_chan_id); + struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id); /* Something went wrong, no need to continue */ if (!inode) return -ENOMEM; + /* Disallow re-mapping a different traceID to metadata pair. */ + if (inode->priv) { + u64 *curr_cpu_data = inode->priv; + u8 curr_chan_id; + int err; + + if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) { + /* + * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs + * are expected (but not supported) in per-thread mode, + * rather than signifying an error. + */ + if (etmq->etm->per_thread_decoding) + pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n"); + else + pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n"); + + return -EINVAL; + } + + /* check that the mapped ID matches */ + err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data); + if (err) + return err; + + if (curr_chan_id != trace_chan_id) { + pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n"); + return -EINVAL; + } + + /* Skip re-adding the same mappings if everything matched */ + return 0; + } + + /* Not one we've seen before, associate the traceID with the metadata pointer */ + inode->priv = cpu_metadata; + + return 0; +} + +static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu) +{ + if (etm->per_thread_decoding) + return etm->queues.queue_array[0].priv; + else + return etm->queues.queue_array[cpu].priv; +} + +static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id, + u64 *cpu_metadata) +{ + struct cs_etm_queue *etmq; + + /* + * If the queue is unformatted then only save one mapping in the + * queue associated with that CPU so only one decoder is made. + */ + etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]); + if (etmq->format == UNFORMATTED) + return cs_etm__insert_trace_id_node(etmq, trace_chan_id, + cpu_metadata); + /* - * The node for that CPU should not be taken. - * Back out if that's the case. + * Otherwise, version 0 trace IDs are global so save them into every + * queue. */ - if (inode->priv) + for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) { + int ret; + + etmq = etm->queues.queue_array[i].priv; + ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id, + cpu_metadata); + if (ret) + return ret; + } + + return 0; +} + +static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu, + u64 hw_id) +{ + int err; + u64 *cpu_data; + u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id); + + cpu_data = get_cpu_data(etm, cpu); + if (cpu_data == NULL) return -EINVAL; - /* All good, associate the traceID with the metadata pointer */ - inode->priv = cpu_metadata; + err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data); + if (err) + return err; + + /* + * if we are picking up the association from the packet, need to plug + * the correct trace ID into the metadata for setting up decoders later. + */ + return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data); +} + +static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu, + u64 hw_id) +{ + struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu); + int ret; + u64 *cpu_data; + u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id); + u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id); + + /* + * Check sink id hasn't changed in per-cpu mode. In per-thread mode, + * let it pass for now until an actual overlapping trace ID is hit. In + * most cases IDs won't overlap even if the sink changes. + */ + if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET && + etmq->sink_id != sink_id) { + pr_err("CS_ETM: mismatch between sink IDs\n"); + return -EINVAL; + } + + etmq->sink_id = sink_id; + + /* Find which other queues use this sink and link their ID maps */ + for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) { + struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv; + + /* Different sinks, skip */ + if (other_etmq->sink_id != etmq->sink_id) + continue; + + /* Already linked, skip */ + if (other_etmq->traceid_list == etmq->traceid_list) + continue; + + /* At the point of first linking, this one should be empty */ + if (!intlist__empty(etmq->traceid_list)) { + pr_err("CS_ETM: Can't link populated trace ID lists\n"); + return -EINVAL; + } + + etmq->own_traceid_list = NULL; + intlist__delete(etmq->traceid_list); + etmq->traceid_list = other_etmq->traceid_list; + break; + } + + cpu_data = get_cpu_data(etm, cpu); + ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data); + if (ret) + return ret; + + ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data); + if (ret) + return ret; return 0; } @@ -260,7 +422,6 @@ static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata) /* * update metadata trace ID from the value found in the AUX_HW_INFO packet. - * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present. */ static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata) { @@ -282,33 +443,31 @@ static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata) } /* - * FIELD_GET (linux/bitfield.h) not available outside kernel code, - * and the header contains too many dependencies to just copy over, - * so roll our own based on the original - */ -#define __bf_shf(x) (__builtin_ffsll(x) - 1) -#define FIELD_GET(_mask, _reg) \ - ({ \ - (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \ - }) - -/* - * Get a metadata for a specific cpu from an array. + * Get a metadata index for a specific cpu from an array. * */ -static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu) +static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu) { int i; - u64 *metadata = NULL; for (i = 0; i < etm->num_cpu; i++) { if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) { - metadata = etm->metadata[i]; - break; + return i; } } - return metadata; + return -1; +} + +/* + * Get a metadata for a specific cpu from an array. + * + */ +static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu) +{ + int idx = get_cpu_data_idx(etm, cpu); + + return (idx != -1) ? etm->metadata[idx] : NULL; } /* @@ -323,21 +482,20 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session, { struct cs_etm_auxtrace *etm; struct perf_sample sample; - struct int_node *inode; struct evsel *evsel; - u64 *cpu_data; u64 hw_id; int cpu, version, err; - u8 trace_chan_id, curr_chan_id; /* extract and parse the HW ID */ hw_id = event->aux_output_hw_id.hw_id; - version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id); - trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id); + version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id); /* check that we can handle this version */ - if (version > CS_AUX_HW_ID_CURR_VERSION) + if (version > CS_AUX_HW_ID_MAJOR_VERSION) { + pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n", + version); return -EINVAL; + } /* get access to the etm metadata */ etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); @@ -348,52 +506,26 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session, evsel = evlist__event2evsel(session->evlist, event); if (!evsel) return -EINVAL; + perf_sample__init(&sample, /*all=*/false); err = evsel__parse_sample(evsel, event, &sample); if (err) - return err; + goto out; cpu = sample.cpu; if (cpu == -1) { /* no CPU in the sample - possibly recorded with an old version of perf */ pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record."); - return -EINVAL; + err = -EINVAL; + goto out; } - /* See if the ID is mapped to a CPU, and it matches the current CPU */ - inode = intlist__find(traceid_list, trace_chan_id); - if (inode) { - cpu_data = inode->priv; - if ((int)cpu_data[CS_ETM_CPU] != cpu) { - pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n"); - return -EINVAL; - } - - /* check that the mapped ID matches */ - err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data); - if (err) - return err; - if (curr_chan_id != trace_chan_id) { - pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n"); - return -EINVAL; - } - - /* mapped and matched - return OK */ - return 0; + if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0) { + err = cs_etm__process_trace_id_v0(etm, cpu, hw_id); + goto out; } - cpu_data = get_cpu_data(etm, cpu); - if (cpu_data == NULL) - return err; - - /* not one we've seen before - lets map it */ - err = cs_etm__map_trace_id(trace_chan_id, cpu_data); - if (err) - return err; - - /* - * if we are picking up the association from the packet, need to plug - * the correct trace ID into the metadata for setting up decoders later. - */ - err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data); + err = cs_etm__process_trace_id_v0_1(etm, cpu, hw_id); +out: + perf_sample__exit(&sample); return err; } @@ -637,80 +769,79 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm, } } -static void cs_etm__packet_dump(const char *pkt_string) +static void cs_etm__packet_dump(const char *pkt_string, void *data) { const char *color = PERF_COLOR_BLUE; int len = strlen(pkt_string); + struct cs_etm_queue *etmq = data; + char queue_nr[64]; + + if (verbose) + snprintf(queue_nr, sizeof(queue_nr), "Qnr:%u; ", etmq->queue_nr); + else + queue_nr[0] = '\0'; if (len && (pkt_string[len-1] == '\n')) - color_fprintf(stdout, color, " %s", pkt_string); + color_fprintf(stdout, color, " %s%s", queue_nr, pkt_string); else - color_fprintf(stdout, color, " %s\n", pkt_string); + color_fprintf(stdout, color, " %s%s\n", queue_nr, pkt_string); fflush(stdout); } static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, - struct cs_etm_auxtrace *etm, int idx, - u32 etmidr) + u64 *metadata, u32 etmidr) { - u64 **metadata = etm->metadata; - - t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr); - t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR]; - t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR]; + t_params->protocol = cs_etm__get_v7_protocol_version(etmidr); + t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR]; + t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR]; } static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, - struct cs_etm_auxtrace *etm, int idx) + u64 *metadata) { - u64 **metadata = etm->metadata; - - t_params[idx].protocol = CS_ETM_PROTO_ETMV4i; - t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0]; - t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1]; - t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2]; - t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8]; - t_params[idx].etmv4.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR]; - t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; + t_params->protocol = CS_ETM_PROTO_ETMV4i; + t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0]; + t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1]; + t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2]; + t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8]; + t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR]; + t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR]; } static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params, - struct cs_etm_auxtrace *etm, int idx) + u64 *metadata) { - u64 **metadata = etm->metadata; - - t_params[idx].protocol = CS_ETM_PROTO_ETE; - t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETE_TRCIDR0]; - t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETE_TRCIDR1]; - t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETE_TRCIDR2]; - t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETE_TRCIDR8]; - t_params[idx].ete.reg_configr = metadata[idx][CS_ETE_TRCCONFIGR]; - t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETE_TRCTRACEIDR]; - t_params[idx].ete.reg_devarch = metadata[idx][CS_ETE_TRCDEVARCH]; + t_params->protocol = CS_ETM_PROTO_ETE; + t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0]; + t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1]; + t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2]; + t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8]; + t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR]; + t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR]; + t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH]; } static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, - struct cs_etm_auxtrace *etm, - int decoders) + struct cs_etm_queue *etmq) { - int i; - u32 etmidr; - u64 architecture; + struct int_node *inode; - for (i = 0; i < decoders; i++) { - architecture = etm->metadata[i][CS_ETM_MAGIC]; + intlist__for_each_entry(inode, etmq->traceid_list) { + u64 *metadata = inode->priv; + u64 architecture = metadata[CS_ETM_MAGIC]; + u32 etmidr; switch (architecture) { case __perf_cs_etmv3_magic: - etmidr = etm->metadata[i][CS_ETM_ETMIDR]; - cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr); + etmidr = metadata[CS_ETM_ETMIDR]; + cs_etm__set_trace_param_etmv3(t_params++, metadata, etmidr); break; case __perf_cs_etmv4_magic: - cs_etm__set_trace_param_etmv4(t_params, etm, i); + cs_etm__set_trace_param_etmv4(t_params++, metadata); break; case __perf_cs_ete_magic: - cs_etm__set_trace_param_ete(t_params, etm, i); + cs_etm__set_trace_param_ete(t_params++, metadata); break; default: return -EINVAL; @@ -722,8 +853,7 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, struct cs_etm_queue *etmq, - enum cs_etm_decoder_operation mode, - bool formatted) + enum cs_etm_decoder_operation mode) { int ret = -EINVAL; @@ -733,7 +863,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, d_params->packet_printer = cs_etm__packet_dump; d_params->operation = mode; d_params->data = etmq; - d_params->formatted = formatted; + d_params->formatted = etmq->format == FORMATTED; d_params->fsyncs = false; d_params->hsyncs = false; d_params->frame_aligned = true; @@ -772,7 +902,7 @@ static void cs_etm__dump_event(struct cs_etm_queue *etmq, } static int cs_etm__flush_events(struct perf_session *session, - struct perf_tool *tool) + const struct perf_tool *tool) { struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, @@ -834,6 +964,7 @@ static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) static void cs_etm__free_queue(void *priv) { + struct int_node *inode, *tmp; struct cs_etm_queue *etmq = priv; if (!etmq) @@ -841,6 +972,16 @@ static void cs_etm__free_queue(void *priv) cs_etm_decoder__free(etmq->decoder); cs_etm__free_traceid_queues(etmq); + + if (etmq->own_traceid_list) { + /* First remove all traceID/metadata nodes for the RB tree */ + intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list) + intlist__remove(etmq->own_traceid_list, inode); + + /* Then the RB tree itself */ + intlist__delete(etmq->own_traceid_list); + } + free(etmq); } @@ -863,19 +1004,12 @@ static void cs_etm__free_events(struct perf_session *session) static void cs_etm__free(struct perf_session *session) { int i; - struct int_node *inode, *tmp; struct cs_etm_auxtrace *aux = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); cs_etm__free_events(session); session->auxtrace = NULL; - /* First remove all traceID/metadata nodes for the RB tree */ - intlist__for_each_entry_safe(inode, tmp, traceid_list) - intlist__remove(traceid_list, inode); - /* Then the RB tree itself */ - intlist__delete(traceid_list); - for (i = 0; i < aux->num_cpu; i++) zfree(&aux->metadata[i]); @@ -997,7 +1131,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, if (!dso) goto out; - if (dso->data.status == DSO_DATA_STATUS_ERROR && + if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR && dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE)) goto out; @@ -1011,11 +1145,11 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, if (len <= 0) { ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n" " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n"); - if (!dso->auxtrace_warned) { + if (!dso__auxtrace_warned(dso)) { pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n", - address, - dso->long_name ? dso->long_name : "Unknown"); - dso->auxtrace_warned = true; + address, + dso__long_name(dso) ? dso__long_name(dso) : "Unknown"); + dso__set_auxtrace_warned(dso); } goto out; } @@ -1025,19 +1159,9 @@ out: return ret; } -static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, - bool formatted) +static struct cs_etm_queue *cs_etm__alloc_queue(void) { - struct cs_etm_decoder_params d_params; - struct cs_etm_trace_params *t_params = NULL; - struct cs_etm_queue *etmq; - /* - * Each queue can only contain data from one CPU when unformatted, so only one decoder is - * needed. - */ - int decoders = formatted ? etm->num_cpu : 1; - - etmq = zalloc(sizeof(*etmq)); + struct cs_etm_queue *etmq = zalloc(sizeof(*etmq)); if (!etmq) return NULL; @@ -1045,42 +1169,17 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, if (!etmq->traceid_queues_list) goto out_free; - /* Use metadata to fill in trace parameters for trace decoder */ - t_params = zalloc(sizeof(*t_params) * decoders); - - if (!t_params) - goto out_free; - - if (cs_etm__init_trace_params(t_params, etm, decoders)) - goto out_free; - - /* Set decoder parameters to decode trace packets */ - if (cs_etm__init_decoder_params(&d_params, etmq, - dump_trace ? CS_ETM_OPERATION_PRINT : - CS_ETM_OPERATION_DECODE, - formatted)) - goto out_free; - - etmq->decoder = cs_etm_decoder__new(decoders, &d_params, - t_params); - - if (!etmq->decoder) - goto out_free; - /* - * Register a function to handle all memory accesses required by - * the trace decoder library. + * Create an RB tree for traceID-metadata tuple. Since the conversion + * has to be made for each packet that gets decoded, optimizing access + * in anything other than a sequential array is worth doing. */ - if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, - 0x0L, ((u64) -1L), - cs_etm__mem_access)) - goto out_free_decoder; + etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL); + if (!etmq->traceid_list) + goto out_free; - zfree(&t_params); return etmq; -out_free_decoder: - cs_etm_decoder__free(etmq->decoder); out_free: intlist__delete(etmq->traceid_queues_list); free(etmq); @@ -1090,15 +1189,14 @@ out_free: static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, struct auxtrace_queue *queue, - unsigned int queue_nr, - bool formatted) + unsigned int queue_nr) { struct cs_etm_queue *etmq = queue->priv; - if (list_empty(&queue->head) || etmq) + if (etmq) return 0; - etmq = cs_etm__alloc_queue(etm, formatted); + etmq = cs_etm__alloc_queue(); if (!etmq) return -ENOMEM; @@ -1106,7 +1204,9 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, queue->priv = etmq; etmq->etm = etm; etmq->queue_nr = queue_nr; + queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */ etmq->offset = 0; + etmq->sink_id = SINK_UNSET; return 0; } @@ -1250,8 +1350,12 @@ static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet) { - /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ - if (packet->sample_type == CS_ETM_DISCONTINUITY) + /* + * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't + * appear in samples. + */ + if (packet->sample_type == CS_ETM_DISCONTINUITY || + packet->sample_type == CS_ETM_EXCEPTION) return 0; return packet->start_addr; @@ -1463,8 +1567,9 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, int ret = 0; struct cs_etm_auxtrace *etm = etmq->etm; union perf_event *event = tidq->event_buf; - struct perf_sample sample = {.ip = 0,}; + struct perf_sample sample; + perf_sample__init(&sample, /*all=*/true); event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el); event->sample.header.size = sizeof(struct perf_event_header); @@ -1501,6 +1606,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, "CS ETM Trace: failed to deliver instruction event, error %d\n", ret); + perf_sample__exit(&sample); return ret; } @@ -1578,35 +1684,6 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, return ret; } -struct cs_etm_synth { - struct perf_tool dummy_tool; - struct perf_session *session; -}; - -static int cs_etm__event_synth(struct perf_tool *tool, - union perf_event *event, - struct perf_sample *sample __maybe_unused, - struct machine *machine __maybe_unused) -{ - struct cs_etm_synth *cs_etm_synth = - container_of(tool, struct cs_etm_synth, dummy_tool); - - return perf_session__deliver_synth_event(cs_etm_synth->session, - event, NULL); -} - -static int cs_etm__synth_event(struct perf_session *session, - struct perf_event_attr *attr, u64 id) -{ - struct cs_etm_synth cs_etm_synth; - - memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth)); - cs_etm_synth.session = session; - - return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1, - &id, cs_etm__event_synth); -} - static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, struct perf_session *session) { @@ -1649,16 +1726,13 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, attr.read_format = evsel->core.attr.read_format; /* create new id val to be a fixed offset from evsel id */ - id = evsel->core.id[0] + 1000000000; - - if (!id) - id = 1; + id = auxtrace_synth_id_range_start(evsel); if (etm->synth_opts.branches) { attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; attr.sample_period = 1; attr.sample_type |= PERF_SAMPLE_ADDR; - err = cs_etm__synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; etm->branches_sample_type = attr.sample_type; @@ -1681,7 +1755,7 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, attr.config = PERF_COUNT_HW_INSTRUCTIONS; attr.sample_period = etm->synth_opts.period; etm->instructions_sample_period = attr.sample_period; - err = cs_etm__synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; etm->instructions_sample_type = attr.sample_type; @@ -2235,7 +2309,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, PERF_IP_FLAG_TRACE_END; break; case CS_ETM_EXCEPTION: - ret = cs_etm__get_magic(packet->trace_chan_id, &magic); + ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic); if (ret) return ret; @@ -2422,12 +2496,6 @@ static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq) /* Ignore return value */ cs_etm__process_traceid_queue(etmq, tidq); - - /* - * Generate an instruction sample with the remaining - * branchstack entries. - */ - cs_etm__flush(etmq, tidq); } } @@ -2570,7 +2638,7 @@ static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm) while (1) { if (!etm->heap.heap_cnt) - goto out; + break; /* Take the entry at the top of the min heap */ cs_queue_nr = etm->heap.heap_array[0].queue_nr; @@ -2653,6 +2721,23 @@ refetch: ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); } + for (i = 0; i < etm->queues.nr_queues; i++) { + struct int_node *inode; + + etmq = etm->queues.queue_array[i].priv; + if (!etmq) + continue; + + intlist__for_each_entry(inode, etmq->traceid_queues_list) { + int idx = (int)(intptr_t)inode->priv; + + /* Flush any remaining branch stack entries */ + tidq = etmq->traceid_queues[idx]; + ret = cs_etm__end_block(etmq, tidq); + if (ret) + return ret; + } + } out: return ret; } @@ -2723,7 +2808,7 @@ static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm, static int cs_etm__process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, - struct perf_tool *tool) + const struct perf_tool *tool) { struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, @@ -2793,7 +2878,7 @@ static void dump_queued_data(struct cs_etm_auxtrace *etm, static int cs_etm__process_auxtrace_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, @@ -2819,17 +2904,6 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, if (err) return err; - /* - * Knowing if the trace is formatted or not requires a lookup of - * the aux record so only works in non-piped mode where data is - * queued in cs_etm__queue_aux_records(). Always assume - * formatted in piped mode (true). - */ - err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], - idx, true); - if (err) - return err; - if (dump_trace) if (auxtrace_buffer__get_data(buffer, fd)) { cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer); @@ -2946,8 +3020,7 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o struct perf_record_auxtrace *auxtrace_event; union perf_event auxtrace_fragment; __u64 aux_offset, aux_size; - __u32 idx; - bool formatted; + enum cs_etm_format format; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, @@ -3013,6 +3086,8 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o if (aux_offset >= auxtrace_event->offset && aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) { + struct cs_etm_queue *etmq = etm->queues.queue_array[auxtrace_event->idx].priv; + /* * If this AUX event was inside this buffer somewhere, create a new auxtrace event * based on the sizes of the aux event, and queue that fragment. @@ -3029,10 +3104,14 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o if (err) return err; - idx = auxtrace_event->idx; - formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW); - return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], - idx, formatted); + format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ? + UNFORMATTED : FORMATTED; + if (etmq->format != UNSET && format != etmq->format) { + pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n"); + return -EINVAL; + } + etmq->format = format; + return 0; } /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */ @@ -3078,9 +3157,10 @@ static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf evsel = evlist__event2evsel(session->evlist, event); if (!evsel) return -EINVAL; + perf_sample__init(&sample, /*all=*/false); ret = evsel__parse_sample(evsel, event, &sample); if (ret) - return ret; + goto out; /* * Loop through the auxtrace index to find the buffer that matches up with this aux event. @@ -3095,7 +3175,7 @@ static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf * 1 ('not found') */ if (ret != 1) - return ret; + goto out; } } @@ -3105,7 +3185,10 @@ static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf */ pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64 " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu); - return 0; + ret = 0; +out: + perf_sample__exit(&sample); + return ret; } static int cs_etm__queue_aux_records(struct perf_session *session) @@ -3158,7 +3241,8 @@ static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu) } /* map trace ids to correct metadata block, from information in metadata */ -static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata) +static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu, + u64 **metadata) { u64 cs_etm_magic; u8 trace_chan_id; @@ -3180,7 +3264,7 @@ static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata) /* unknown magic number */ return -EINVAL; } - err = cs_etm__map_trace_id(trace_chan_id, metadata[i]); + err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]); if (err) return err; } @@ -3188,30 +3272,85 @@ static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata) } /* - * If we found AUX_HW_ID packets, then set any metadata marked as unused to the - * unused value to reduce the number of unneeded decoders created. + * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX + * (formatted or not) packets to create the decoders. */ -static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata) +static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq) { - u64 cs_etm_magic; - int i; + struct cs_etm_decoder_params d_params; + struct cs_etm_trace_params *t_params; + int decoders = intlist__nr_entries(etmq->traceid_list); - for (i = 0; i < num_cpu; i++) { - cs_etm_magic = metadata[i][CS_ETM_MAGIC]; - switch (cs_etm_magic) { - case __perf_cs_etmv3_magic: - if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG) - metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL; - break; - case __perf_cs_etmv4_magic: - case __perf_cs_ete_magic: - if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG) - metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL; - break; - default: - /* unknown magic number */ - return -EINVAL; - } + if (decoders == 0) + return 0; + + /* + * Each queue can only contain data from one CPU when unformatted, so only one decoder is + * needed. + */ + if (etmq->format == UNFORMATTED) + assert(decoders == 1); + + /* Use metadata to fill in trace parameters for trace decoder */ + t_params = zalloc(sizeof(*t_params) * decoders); + + if (!t_params) + goto out_free; + + if (cs_etm__init_trace_params(t_params, etmq)) + goto out_free; + + /* Set decoder parameters to decode trace packets */ + if (cs_etm__init_decoder_params(&d_params, etmq, + dump_trace ? CS_ETM_OPERATION_PRINT : + CS_ETM_OPERATION_DECODE)) + goto out_free; + + etmq->decoder = cs_etm_decoder__new(decoders, &d_params, + t_params); + + if (!etmq->decoder) + goto out_free; + + /* + * Register a function to handle all memory accesses required by + * the trace decoder library. + */ + if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, + 0x0L, ((u64) -1L), + cs_etm__mem_access)) + goto out_free_decoder; + + zfree(&t_params); + return 0; + +out_free_decoder: + cs_etm_decoder__free(etmq->decoder); +out_free: + zfree(&t_params); + return -EINVAL; +} + +static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm) +{ + struct auxtrace_queues *queues = &etm->queues; + + for (unsigned int i = 0; i < queues->nr_queues; i++) { + bool empty = list_empty(&queues->queue_array[i].head); + struct cs_etm_queue *etmq = queues->queue_array[i].priv; + int ret; + + /* + * Don't create decoders for empty queues, mainly because + * etmq->format is unknown for empty queues. + */ + assert(empty || etmq->format != UNSET); + if (empty) + continue; + + ret = cs_etm__create_queue_decoders(etmq); + if (ret) + return ret; } return 0; } @@ -3225,30 +3364,19 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, int event_header_size = sizeof(struct perf_event_header); int total_size = auxtrace_info->header.size; int priv_size = 0; - int num_cpu; + int num_cpu, max_cpu = 0; int err = 0; int aux_hw_id_found; - int i, j; + int i; u64 *ptr = NULL; u64 **metadata = NULL; - /* - * Create an RB tree for traceID-metadata tuple. Since the conversion - * has to be made for each packet that gets decoded, optimizing access - * in anything other than a sequential array is worth doing. - */ - traceid_list = intlist__new(NULL); - if (!traceid_list) - return -ENOMEM; - /* First the global part */ ptr = (u64 *) auxtrace_info->priv; num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff; metadata = zalloc(sizeof(*metadata) * num_cpu); - if (!metadata) { - err = -ENOMEM; - goto err_free_traceid_list; - } + if (!metadata) + return -ENOMEM; /* Start parsing after the common part of the header */ i = CS_HEADER_VERSION_MAX; @@ -3259,7 +3387,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, * required by the trace decoder to properly decode the trace due * to its highly compressed nature. */ - for (j = 0; j < num_cpu; j++) { + for (int j = 0; j < num_cpu; j++) { if (ptr[i] == __perf_cs_etmv3_magic) { metadata[j] = cs_etm__create_meta_blk(ptr, &i, @@ -3283,6 +3411,9 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, err = -ENOMEM; goto err_free_metadata; } + + if ((int) metadata[j][CS_ETM_CPU] > max_cpu) + max_cpu = metadata[j][CS_ETM_CPU]; } /* @@ -3312,10 +3443,16 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, */ etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]); - err = auxtrace_queues__init(&etm->queues); + err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1); if (err) goto err_free_etm; + for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) { + err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j); + if (err) + goto err_free_queues; + } + if (session->itrace_synth_opts->set) { etm->synth_opts = *session->itrace_synth_opts; } else { @@ -3332,12 +3469,27 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, etm->metadata = metadata; etm->auxtrace_type = auxtrace_info->type; - /* Use virtual timestamps if all ETMs report ts_source = 1 */ - etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); + if (etm->synth_opts.use_timestamp) + /* + * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature, + * therefore the decoder cannot know if the timestamp trace is + * same with the kernel time. + * + * If a user has knowledge for the working platform and can + * specify itrace option 'T' to tell decoder to forcely use the + * traced timestamp as the kernel time. + */ + etm->has_virtual_ts = true; + else + /* Use virtual timestamps if all ETMs report ts_source = 1 */ + etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); if (!etm->has_virtual_ts) ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n" - "The time field of the samples will not be set accurately.\n\n"); + "The time field of the samples will not be set accurately.\n" + "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n" + "you can specify the itrace option 'T' for timestamp decoding\n" + "if the Coresight timestamp on the platform is same with the kernel time.\n\n"); etm->auxtrace.process_event = cs_etm__process_event; etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; @@ -3364,12 +3516,16 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, if (err) goto err_free_queues; + err = cs_etm__queue_aux_records(session); + if (err) + goto err_free_queues; + /* * Map Trace ID values to CPU metadata. * - * Trace metadata will always contain Trace ID values from the legacy algorithm. If the - * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata - * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set. + * Trace metadata will always contain Trace ID values from the legacy algorithm + * in case it's read by a version of Perf that doesn't know about HW_ID packets + * or the kernel doesn't emit them. * * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use * the same IDs as the old algorithm as far as is possible, unless there are clashes @@ -3378,15 +3534,14 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, * * For a perf able to interpret AUX_HW_ID packets we first check for the presence of * those packets. If they are there then the values will be mapped and plugged into - * the metadata. We then set any remaining metadata values with the used flag to a - * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required. + * the metadata and decoders are only created for each mapping received. * * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel - * then we map Trace ID values to CPU directly from the metadata - clearing any unused - * flags if present. + * then we map Trace ID values to CPU directly from the metadata and create decoders + * for all mappings. */ - /* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */ + /* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */ aux_hw_id_found = 0; err = perf_session__peek_events(session, session->header.data_offset, session->header.data_size, @@ -3394,17 +3549,14 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, if (err) goto err_free_queues; - /* if HW ID found then clear any unused metadata ID values */ - if (aux_hw_id_found) - err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata); - /* otherwise, this is a file with metadata values only, map from metadata */ - else - err = cs_etm__map_trace_ids_metadata(num_cpu, metadata); - - if (err) - goto err_free_queues; + /* if no HW ID found this is a file with metadata values only, map from metadata */ + if (!aux_hw_id_found) { + err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata); + if (err) + goto err_free_queues; + } - err = cs_etm__queue_aux_records(session); + err = cs_etm__create_decoders(etm); if (err) goto err_free_queues; @@ -3418,10 +3570,8 @@ err_free_etm: zfree(&etm); err_free_metadata: /* No need to check @metadata[j], free(NULL) is supported */ - for (j = 0; j < num_cpu; j++) + for (int j = 0; j < num_cpu; j++) zfree(&metadata[j]); zfree(&metadata); -err_free_traceid_list: - intlist__delete(traceid_list); return err; } diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 7cca37887917..a8caeea720aa 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -230,19 +230,9 @@ struct cs_etm_packet_queue { /* CoreSight trace ID is currently the bottom 7 bits of the value */ #define CORESIGHT_TRACE_ID_VAL_MASK GENMASK(6, 0) -/* - * perf record will set the legacy meta data values as unused initially. - * This allows perf report to manage the decoders created when dynamic - * allocation in operation. - */ -#define CORESIGHT_TRACE_ID_UNUSED_FLAG BIT(31) - -/* Value to set for unused trace ID values */ -#define CORESIGHT_TRACE_ID_UNUSED_VAL 0x7F - int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session); -struct perf_event_attr *cs_etm_get_default_config(struct perf_pmu *pmu); +void cs_etm_get_default_config(const struct perf_pmu *pmu, struct perf_event_attr *attr); enum cs_etm_pid_fmt { CS_ETM_PIDFMT_NONE, @@ -252,7 +242,7 @@ enum cs_etm_pid_fmt { #ifdef HAVE_CSTRACE_SUPPORT #include <opencsd/ocsd_if_types.h> -int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); +int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu); enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq); int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid, u8 trace_chan_id, ocsd_ex_level el); diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 2b732bccabad..3d2e437e1354 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -36,7 +36,7 @@ #include "util/sample.h" #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #define pr_N(n, fmt, ...) \ @@ -426,8 +426,9 @@ static int add_tracepoint_values(struct ctf_writer *cw, struct evsel *evsel, struct perf_sample *sample) { - struct tep_format_field *common_fields = evsel->tp_format->format.common_fields; - struct tep_format_field *fields = evsel->tp_format->format.fields; + const struct tep_event *tp_format = evsel__tp_format(evsel); + struct tep_format_field *common_fields = tp_format->format.common_fields; + struct tep_format_field *fields = tp_format->format.fields; int ret; ret = add_tracepoint_fields_values(cw, event_class, event, @@ -792,7 +793,7 @@ static bool is_flush_needed(struct ctf_stream *cs) return cs->count >= STREAM_FLUSH_COUNT; } -static int process_sample_event(struct perf_tool *tool, +static int process_sample_event(const struct perf_tool *tool, union perf_event *_event, struct perf_sample *sample, struct evsel *evsel, @@ -871,7 +872,7 @@ do { \ } while(0) #define __FUNC_PROCESS_NON_SAMPLE(_name, body) \ -static int process_##_name##_event(struct perf_tool *tool, \ +static int process_##_name##_event(const struct perf_tool *tool, \ union perf_event *_event, \ struct perf_sample *sample, \ struct machine *machine) \ @@ -1064,8 +1065,9 @@ static int add_tracepoint_types(struct ctf_writer *cw, struct evsel *evsel, struct bt_ctf_event_class *class) { - struct tep_format_field *common_fields = evsel->tp_format->format.common_fields; - struct tep_format_field *fields = evsel->tp_format->format.fields; + const struct tep_event *tp_format = evsel__tp_format(evsel); + struct tep_format_field *common_fields = tp_format ? tp_format->format.common_fields : NULL; + struct tep_format_field *fields = tp_format ? tp_format->format.fields : NULL; int ret; ret = add_tracepoint_fields_types(cw, common_fields, class); @@ -1336,14 +1338,14 @@ static void cleanup_events(struct perf_session *session) static int setup_streams(struct ctf_writer *cw, struct perf_session *session) { struct ctf_stream **stream; - struct perf_header *ph = &session->header; + struct perf_env *env = perf_session__env(session); int ncpus; /* * Try to get the number of cpus used in the data file, * if not present fallback to the MAX_CPUS. */ - ncpus = ph->env.nr_cpus_avail ?: MAX_CPUS; + ncpus = env->nr_cpus_avail ?: MAX_CPUS; stream = zalloc(sizeof(*stream) * ncpus); if (!stream) { @@ -1369,7 +1371,7 @@ static void free_streams(struct ctf_writer *cw) static int ctf_writer__setup_env(struct ctf_writer *cw, struct perf_session *session) { - struct perf_header *header = &session->header; + struct perf_env *env = perf_session__env(session); struct bt_ctf_writer *writer = cw->writer; #define ADD(__n, __v) \ @@ -1378,11 +1380,11 @@ do { \ return -1; \ } while (0) - ADD("host", header->env.hostname); + ADD("host", env->hostname); ADD("sysname", "Linux"); - ADD("release", header->env.os_release); - ADD("version", header->env.version); - ADD("machine", header->env.arch); + ADD("release", env->os_release); + ADD("version", env->version); + ADD("machine", env->arch); ADD("domain", "kernel"); ADD("tracer_name", "perf"); @@ -1399,7 +1401,7 @@ static int ctf_writer__setup_clock(struct ctf_writer *cw, int64_t offset = 0; if (tod) { - struct perf_env *env = &session->header.env; + struct perf_env *env = perf_session__env(session); if (!env->clock.enabled) { pr_err("Can't provide --tod time, missing clock data. " @@ -1607,25 +1609,23 @@ int bt_convert__perf2ctf(const char *input, const char *path, .mode = PERF_DATA_MODE_READ, .force = opts->force, }; - struct convert c = { - .tool = { - .sample = process_sample_event, - .mmap = perf_event__process_mmap, - .mmap2 = perf_event__process_mmap2, - .comm = perf_event__process_comm, - .exit = perf_event__process_exit, - .fork = perf_event__process_fork, - .lost = perf_event__process_lost, - .tracing_data = perf_event__process_tracing_data, - .build_id = perf_event__process_build_id, - .namespaces = perf_event__process_namespaces, - .ordered_events = true, - .ordering_requires_timestamps = true, - }, - }; + struct convert c = {}; struct ctf_writer *cw = &c.writer; int err; + perf_tool__init(&c.tool, /*ordered_events=*/true); + c.tool.sample = process_sample_event; + c.tool.mmap = perf_event__process_mmap; + c.tool.mmap2 = perf_event__process_mmap2; + c.tool.comm = perf_event__process_comm; + c.tool.exit = perf_event__process_exit; + c.tool.fork = perf_event__process_fork; + c.tool.lost = perf_event__process_lost; + c.tool.tracing_data = perf_event__process_tracing_data; + c.tool.build_id = perf_event__process_build_id; + c.tool.namespaces = perf_event__process_namespaces; + c.tool.ordering_requires_timestamps = true; + if (opts->all) { c.tool.comm = process_comm_event; c.tool.exit = process_exit_event; diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c index 5bb3c2ba95ca..9dc1e184cf3c 100644 --- a/tools/perf/util/data-convert-json.c +++ b/tools/perf/util/data-convert-json.c @@ -28,7 +28,7 @@ #include "util/tool.h" #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif struct convert_json { @@ -118,7 +118,7 @@ static void output_json_key_format(FILE *out, bool comma, int depth, va_end(args); } -static void output_sample_callchain_entry(struct perf_tool *tool, +static void output_sample_callchain_entry(const struct perf_tool *tool, u64 ip, struct addr_location *al) { struct convert_json *c = container_of(tool, struct convert_json, tool); @@ -134,7 +134,7 @@ static void output_sample_callchain_entry(struct perf_tool *tool, output_json_key_string(out, false, 5, "symbol", al->sym->name); if (dso) { - const char *dso_name = dso->short_name; + const char *dso_name = dso__short_name(dso); if (dso_name && strlen(dso_name) > 0) { fputc(',', out); @@ -146,7 +146,7 @@ static void output_sample_callchain_entry(struct perf_tool *tool, output_json_format(out, false, 4, "}"); } -static int process_sample_event(struct perf_tool *tool, +static int process_sample_event(const struct perf_tool *tool, union perf_event *event __maybe_unused, struct perf_sample *sample, struct evsel *evsel __maybe_unused, @@ -230,12 +230,12 @@ static int process_sample_event(struct perf_tool *tool, #ifdef HAVE_LIBTRACEEVENT if (sample->raw_data) { - int i; - struct tep_format_field **fields; + struct tep_event *tp_format = evsel__tp_format(evsel); + struct tep_format_field **fields = tp_format ? tep_event_fields(tp_format) : NULL; - fields = tep_event_fields(evsel->tp_format); if (fields) { - i = 0; + int i = 0; + while (fields[i]) { struct trace_seq s; @@ -257,7 +257,8 @@ static int process_sample_event(struct perf_tool *tool, static void output_headers(struct perf_session *session, struct convert_json *c) { struct stat st; - struct perf_header *header = &session->header; + const struct perf_header *header = &session->header; + const struct perf_env *env = perf_session__env(session); int ret; int fd = perf_data__fd(session->data); int i; @@ -280,30 +281,32 @@ static void output_headers(struct perf_session *session, struct convert_json *c) output_json_key_format(out, true, 2, "data-size", "%" PRIu64, header->data_size); output_json_key_format(out, true, 2, "feat-offset", "%" PRIu64, header->feat_offset); - output_json_key_string(out, true, 2, "hostname", header->env.hostname); - output_json_key_string(out, true, 2, "os-release", header->env.os_release); - output_json_key_string(out, true, 2, "arch", header->env.arch); + output_json_key_string(out, true, 2, "hostname", env->hostname); + output_json_key_string(out, true, 2, "os-release", env->os_release); + output_json_key_string(out, true, 2, "arch", env->arch); + + if (env->cpu_desc) + output_json_key_string(out, true, 2, "cpu-desc", env->cpu_desc); - output_json_key_string(out, true, 2, "cpu-desc", header->env.cpu_desc); - output_json_key_string(out, true, 2, "cpuid", header->env.cpuid); - output_json_key_format(out, true, 2, "nrcpus-online", "%u", header->env.nr_cpus_online); - output_json_key_format(out, true, 2, "nrcpus-avail", "%u", header->env.nr_cpus_avail); + output_json_key_string(out, true, 2, "cpuid", env->cpuid); + output_json_key_format(out, true, 2, "nrcpus-online", "%u", env->nr_cpus_online); + output_json_key_format(out, true, 2, "nrcpus-avail", "%u", env->nr_cpus_avail); - if (header->env.clock.enabled) { + if (env->clock.enabled) { output_json_key_format(out, true, 2, "clockid", - "%u", header->env.clock.clockid); + "%u", env->clock.clockid); output_json_key_format(out, true, 2, "clock-time", - "%" PRIu64, header->env.clock.clockid_ns); + "%" PRIu64, env->clock.clockid_ns); output_json_key_format(out, true, 2, "real-time", - "%" PRIu64, header->env.clock.tod_ns); + "%" PRIu64, env->clock.tod_ns); } - output_json_key_string(out, true, 2, "perf-version", header->env.version); + output_json_key_string(out, true, 2, "perf-version", env->version); output_json_key_format(out, true, 2, "cmdline", "["); - for (i = 0; i < header->env.nr_cmdline; i++) { + for (i = 0; i < env->nr_cmdline; i++) { output_json_delimiters(out, i != 0, 3); - output_json_string(c->out, header->env.cmdline_argv[i]); + output_json_string(c->out, env->cmdline_argv[i]); } output_json_format(out, false, 2, "]"); } @@ -314,39 +317,36 @@ int bt_convert__perf2json(const char *input_name, const char *output_name, struct perf_session *session; int fd; int ret = -1; - struct convert_json c = { - .tool = { - .sample = process_sample_event, - .mmap = perf_event__process_mmap, - .mmap2 = perf_event__process_mmap2, - .comm = perf_event__process_comm, - .namespaces = perf_event__process_namespaces, - .cgroup = perf_event__process_cgroup, - .exit = perf_event__process_exit, - .fork = perf_event__process_fork, - .lost = perf_event__process_lost, -#ifdef HAVE_LIBTRACEEVENT - .tracing_data = perf_event__process_tracing_data, -#endif - .build_id = perf_event__process_build_id, - .id_index = perf_event__process_id_index, - .auxtrace_info = perf_event__process_auxtrace_info, - .auxtrace = perf_event__process_auxtrace, - .event_update = perf_event__process_event_update, - .ordered_events = true, - .ordering_requires_timestamps = true, - }, .first = true, .events_count = 0, }; - struct perf_data data = { .mode = PERF_DATA_MODE_READ, .path = input_name, .force = opts->force, }; + perf_tool__init(&c.tool, /*ordered_events=*/true); + c.tool.sample = process_sample_event; + c.tool.mmap = perf_event__process_mmap; + c.tool.mmap2 = perf_event__process_mmap2; + c.tool.comm = perf_event__process_comm; + c.tool.namespaces = perf_event__process_namespaces; + c.tool.cgroup = perf_event__process_cgroup; + c.tool.exit = perf_event__process_exit; + c.tool.fork = perf_event__process_fork; + c.tool.lost = perf_event__process_lost; +#ifdef HAVE_LIBTRACEEVENT + c.tool.tracing_data = perf_event__process_tracing_data; +#endif + c.tool.build_id = perf_event__process_build_id; + c.tool.id_index = perf_event__process_id_index; + c.tool.auxtrace_info = perf_event__process_auxtrace_info; + c.tool.auxtrace = perf_event__process_auxtrace; + c.tool.event_update = perf_event__process_event_update; + c.tool.ordering_requires_timestamps = true; + if (opts->all) { pr_err("--all is currently unsupported for JSON output.\n"); goto err; @@ -377,8 +377,7 @@ int bt_convert__perf2json(const char *input_name, const char *output_name, fprintf(stderr, "Error creating perf session!\n"); goto err_fclose; } - - if (symbol__init(&session->header.env) < 0) { + if (symbol__init(perf_session__env(session)) < 0) { fprintf(stderr, "Symbol init error!\n"); goto err_session_delete; } diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index fc16299c915f..164eb45a0b36 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -17,6 +17,7 @@ #include "util.h" // rm_rf_perf_data() #include "debug.h" #include "header.h" +#include "rlimit.h" #include <internal/lib.h> static void close_dir(struct perf_data_file *files, int nr) @@ -35,6 +36,7 @@ void perf_data__close_dir(struct perf_data *data) int perf_data__create_dir(struct perf_data *data, int nr) { + enum rlimit_action set_rlimit = NO_CHANGE; struct perf_data_file *files = NULL; int i, ret; @@ -54,11 +56,21 @@ int perf_data__create_dir(struct perf_data *data, int nr) goto out_err; } +retry_open: ret = open(file->path, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR); if (ret < 0) { + /* + * If using parallel threads to collect data, + * perf record needs at least 6 fds per CPU. + * When we run out of them try to increase the limits. + */ + if (errno == EMFILE && rlimit__increase_nofile(&set_rlimit)) + goto retry_open; + ret = -errno; goto out_err; } + set_rlimit = NO_CHANGE; file->fd = ret; } @@ -146,26 +158,6 @@ out_err: return ret; } -int perf_data__update_dir(struct perf_data *data) -{ - int i; - - if (WARN_ON(!data->is_dir)) - return -EINVAL; - - for (i = 0; i < data->dir.nr; i++) { - struct perf_data_file *file = &data->dir.files[i]; - struct stat st; - - if (fstat(file->fd, &st)) - return -1; - - file->size = st.st_size; - } - - return 0; -} - static bool check_pipe(struct perf_data *data) { struct stat st; @@ -192,7 +184,12 @@ static bool check_pipe(struct perf_data *data) data->file.fd = fd; data->use_stdio = false; } - } else { + + /* + * When is_pipe and data->file.fd is given, use given fd + * instead of STDIN_FILENO or STDOUT_FILENO + */ + } else if (data->file.fd <= 0) { data->file.fd = fd; } } @@ -401,7 +398,7 @@ ssize_t perf_data_file__write(struct perf_data_file *file, } ssize_t perf_data__write(struct perf_data *data, - void *buf, size_t size) + void *buf, size_t size) { if (data->use_stdio) { if (fwrite(buf, size, 1, data->file.fptr) == 1) @@ -412,14 +409,12 @@ ssize_t perf_data__write(struct perf_data *data, } int perf_data__switch(struct perf_data *data, - const char *postfix, - size_t pos, bool at_exit, - char **new_filepath) + const char *postfix, + size_t pos, bool at_exit, + char **new_filepath) { int ret; - if (check_pipe(data)) - return -EINVAL; if (perf_data__is_read(data)) return -EINVAL; diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index effcc195d7e9..1438e32e0451 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -80,7 +80,7 @@ int perf_data__open(struct perf_data *data); void perf_data__close(struct perf_data *data); ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size); ssize_t perf_data__write(struct perf_data *data, - void *buf, size_t size); + void *buf, size_t size); ssize_t perf_data_file__write(struct perf_data_file *file, void *buf, size_t size); /* @@ -91,13 +91,12 @@ ssize_t perf_data_file__write(struct perf_data_file *file, * Return value is fd of new output. */ int perf_data__switch(struct perf_data *data, - const char *postfix, - size_t pos, bool at_exit, char **new_filepath); + const char *postfix, + size_t pos, bool at_exit, char **new_filepath); int perf_data__create_dir(struct perf_data *data, int nr); int perf_data__open_dir(struct perf_data *data); void perf_data__close_dir(struct perf_data *data); -int perf_data__update_dir(struct perf_data *data); unsigned long perf_data__size(struct perf_data *data); int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz); bool has_kcore_dir(const char *path); diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index b9fb71ab7a73..8f52e8cefcf3 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -146,10 +146,10 @@ int db_export__comm_thread(struct db_export *dbe, struct comm *comm, int db_export__dso(struct db_export *dbe, struct dso *dso, struct machine *machine) { - if (dso->db_id) + if (dso__db_id(dso)) return 0; - dso->db_id = ++dbe->dso_last_db_id; + dso__set_db_id(dso, ++dbe->dso_last_db_id); if (dbe->export_dso) return dbe->export_dso(dbe, dso, machine); @@ -181,10 +181,10 @@ static int db_ids_from_al(struct db_export *dbe, struct addr_location *al, if (al->map) { struct dso *dso = map__dso(al->map); - err = db_export__dso(dbe, dso, maps__machine(al->maps)); + err = db_export__dso(dbe, dso, maps__machine(thread__maps(al->thread))); if (err) return err; - *dso_db_id = dso->db_id; + *dso_db_id = dso__db_id(dso); if (!al->sym) { al->sym = symbol__new(al->addr, 0, 0, 0, "unknown"); @@ -253,9 +253,10 @@ static struct call_path *call_path_from_sample(struct db_export *dbe, */ addr_location__init(&al); al.sym = node->ms.sym; - al.map = node->ms.map; - al.maps = thread__maps(thread); + al.map = map__get(node->ms.map); + al.maps = maps__get(thread__maps(thread)); al.addr = node->ip; + al.thread = thread__get(thread); if (al.map && !al.sym) al.sym = dso__find_symbol(map__dso(al.map), al.addr); @@ -358,14 +359,18 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, }; struct thread *main_thread; struct comm *comm = NULL; - struct machine *machine; + struct machine *machine = NULL; int err; + if (thread__maps(thread)) + machine = maps__machine(thread__maps(thread)); + if (!machine) + return -1; + err = db_export__evsel(dbe, evsel); if (err) return err; - machine = maps__machine(al->maps); err = db_export__machine(dbe, machine); if (err) return err; diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 88378c4c5dd9..1dfa4d0eec4d 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -14,11 +14,19 @@ #ifdef HAVE_BACKTRACE_SUPPORT #include <execinfo.h> #endif +#include "addr_location.h" #include "color.h" -#include "event.h" #include "debug.h" +#include "env.h" +#include "event.h" +#include "machine.h" +#include "map.h" #include "print_binary.h" +#include "srcline.h" +#include "symbol.h" +#include "synthetic-events.h" #include "target.h" +#include "thread.h" #include "trace-event.h" #include "ui/helpline.h" #include "ui/ui.h" @@ -27,23 +35,34 @@ #include <linux/ctype.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #else #define LIBTRACEEVENT_VERSION 0 #endif int verbose; +int debug_kmaps; int debug_peo_args; bool dump_trace = false, quiet = false; int debug_ordered_events; static int redirect_to_stderr; int debug_data_convert; -static FILE *debug_file; +static FILE *_debug_file; bool debug_display_time; +int debug_type_profile; + +FILE *debug_file(void) +{ + if (!_debug_file) { + debug_set_file(stderr); + pr_warning_once("debug_file not set"); + } + return _debug_file; +} void debug_set_file(FILE *file) { - debug_file = file; + _debug_file = file; } void debug_set_display_time(bool set) @@ -78,8 +97,8 @@ int veprintf(int level, int var, const char *fmt, va_list args) if (use_browser >= 1 && !redirect_to_stderr) { ui_helpline__vshow(fmt, args); } else { - ret = fprintf_time(debug_file); - ret += vfprintf(debug_file, fmt, args); + ret = fprintf_time(debug_file()); + ret += vfprintf(debug_file(), fmt, args); } } @@ -107,9 +126,8 @@ static int veprintf_time(u64 t, const char *fmt, va_list args) nsecs -= secs * NSEC_PER_SEC; usecs = nsecs / NSEC_PER_USEC; - ret = fprintf(stderr, "[%13" PRIu64 ".%06" PRIu64 "] ", - secs, usecs); - ret += vfprintf(stderr, fmt, args); + ret = fprintf(debug_file(), "[%13" PRIu64 ".%06" PRIu64 "] ", secs, usecs); + ret += vfprintf(debug_file(), fmt, args); return ret; } @@ -221,6 +239,8 @@ static struct sublevel_option debug_opts[] = { { .name = "stderr", .value_ptr = &redirect_to_stderr}, { .name = "data-convert", .value_ptr = &debug_data_convert }, { .name = "perf-event-open", .value_ptr = &debug_peo_args }, + { .name = "kmaps", .value_ptr = &debug_kmaps }, + { .name = "type-profile", .value_ptr = &debug_type_profile }, { .name = NULL, } }; @@ -259,6 +279,8 @@ int perf_quiet_option(void) /* For debug variables that are used as bool types, set to 0. */ redirect_to_stderr = 0; debug_peo_args = 0; + debug_kmaps = 0; + debug_type_profile = 0; return 0; } @@ -284,21 +306,66 @@ void perf_debug_setup(void) libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper); } +void __dump_stack(FILE *file, void **stackdump, size_t stackdump_size) +{ + /* TODO: async safety. printf, malloc, etc. aren't safe inside a signal handler. */ + pid_t pid = getpid(); + struct machine *machine; + struct thread *thread = NULL; + struct perf_env host_env; + + perf_env__init(&host_env); + machine = machine__new_live(&host_env, /*kernel_maps=*/false, pid); + + if (machine) + thread = machine__find_thread(machine, pid, pid); + +#ifdef HAVE_BACKTRACE_SUPPORT + if (!machine || !thread) { + /* + * Backtrace functions are async signal safe. Fall back on them + * if machine/thread creation fails. + */ + backtrace_symbols_fd(stackdump, stackdump_size, fileno(file)); + machine__delete(machine); + perf_env__exit(&host_env); + return; + } +#endif + + for (size_t i = 0; i < stackdump_size; i++) { + struct addr_location al; + u64 addr = (u64)(uintptr_t)stackdump[i]; + bool printed = false; + + addr_location__init(&al); + if (thread && thread__find_map(thread, PERF_RECORD_MISC_USER, addr, &al)) { + al.sym = map__find_symbol(al.map, al.addr); + if (al.sym) { + fprintf(file, " #%zd %p in %s ", i, stackdump[i], al.sym->name); + printed = true; + } + } + if (!printed) + fprintf(file, " #%zd %p ", i, stackdump[i]); + + map__fprintf_srcline(al.map, al.addr, "", file); + fprintf(file, "\n"); + addr_location__exit(&al); + } + thread__put(thread); + machine__delete(machine); + perf_env__exit(&host_env); +} + /* Obtain a backtrace and print it to stdout. */ #ifdef HAVE_BACKTRACE_SUPPORT void dump_stack(void) { - void *array[16]; - size_t size = backtrace(array, ARRAY_SIZE(array)); - char **strings = backtrace_symbols(array, size); - size_t i; - - printf("Obtained %zd stack frames.\n", size); - - for (i = 0; i < size; i++) - printf("%s\n", strings[i]); + void *stackdump[32]; + size_t size = backtrace(stackdump, ARRAY_SIZE(stackdump)); - free(strings); + __dump_stack(stdout, stackdump, size); } #else void dump_stack(void) {} diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index f99468a7f681..6b737e195ce1 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -9,10 +9,12 @@ #include <linux/compiler.h> extern int verbose; +extern int debug_kmaps; extern int debug_peo_args; extern bool quiet, dump_trace; extern int debug_ordered_events; extern int debug_data_convert; +extern int debug_type_profile; #ifndef pr_fmt #define pr_fmt(fmt) fmt @@ -77,11 +79,13 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __printf(4, 5) int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); +FILE *debug_file(void); void debug_set_file(FILE *file); void debug_set_display_time(bool set); void perf_debug_setup(void); int perf_quiet_option(void); +void __dump_stack(FILE *file, void **stackdump, size_t stackdump_size); void dump_stack(void); void sighandler_dump_stack(int sig); diff --git a/tools/perf/util/debuginfo.c b/tools/perf/util/debuginfo.c new file mode 100644 index 000000000000..4a559b3e8cdc --- /dev/null +++ b/tools/perf/util/debuginfo.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * DWARF debug information handling code. Copied from probe-finder.c. + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + */ + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <linux/zalloc.h> + +#include "build-id.h" +#include "dso.h" +#include "debug.h" +#include "debuginfo.h" +#include "symbol.h" + +#ifdef HAVE_DEBUGINFOD_SUPPORT +#include <elfutils/debuginfod.h> +#endif + +/* Dwarf FL wrappers */ +static char *debuginfo_path; /* Currently dummy */ + +static const Dwfl_Callbacks offline_callbacks = { + .find_debuginfo = dwfl_standard_find_debuginfo, + .debuginfo_path = &debuginfo_path, + + .section_address = dwfl_offline_section_address, + + /* We use this table for core files too. */ + .find_elf = dwfl_build_id_find_elf, +}; + +/* Get a Dwarf from offline image */ +static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, + const char *path) +{ + GElf_Addr dummy; + int fd; + + fd = open(path, O_RDONLY); + if (fd < 0) + return fd; + + dbg->dwfl = dwfl_begin(&offline_callbacks); + if (!dbg->dwfl) + goto error; + + dwfl_report_begin(dbg->dwfl); + dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd); + if (!dbg->mod) + goto error; + + dbg->dbg = dwfl_module_getdwarf(dbg->mod, &dbg->bias); + if (!dbg->dbg) + goto error; + + dwfl_module_build_id(dbg->mod, &dbg->build_id, &dummy); + + dwfl_report_end(dbg->dwfl, NULL, NULL); + + return 0; +error: + if (dbg->dwfl) + dwfl_end(dbg->dwfl); + else + close(fd); + memset(dbg, 0, sizeof(*dbg)); + + return -ENOENT; +} + +static struct debuginfo *__debuginfo__new(const char *path) +{ + struct debuginfo *dbg = zalloc(sizeof(*dbg)); + if (!dbg) + return NULL; + + if (debuginfo__init_offline_dwarf(dbg, path) < 0) + zfree(&dbg); + if (dbg) + pr_debug("Open Debuginfo file: %s\n", path); + return dbg; +} + +enum dso_binary_type distro_dwarf_types[] = { + DSO_BINARY_TYPE__FEDORA_DEBUGINFO, + DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, + DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, + DSO_BINARY_TYPE__BUILDID_DEBUGINFO, + DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, + DSO_BINARY_TYPE__NOT_FOUND, +}; + +struct debuginfo *debuginfo__new(const char *path) +{ + enum dso_binary_type *type; + char buf[PATH_MAX], nil = '\0'; + struct dso *dso; + struct debuginfo *dinfo = NULL; + struct build_id bid = { .size = 0}; + + /* Try to open distro debuginfo files */ + dso = dso__new(path); + if (!dso) + goto out; + + /* + * Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO. Don't block + * incase the path isn't for a regular file. + */ + assert(!dso__has_build_id(dso)); + if (filename__read_build_id(path, &bid) > 0) + dso__set_build_id(dso, &bid); + + for (type = distro_dwarf_types; + !dinfo && *type != DSO_BINARY_TYPE__NOT_FOUND; + type++) { + if (dso__read_binary_type_filename(dso, *type, &nil, + buf, PATH_MAX) < 0) + continue; + dinfo = __debuginfo__new(buf); + } + dso__put(dso); + +out: + if (dinfo) + return dinfo; + + /* if failed to open all distro debuginfo, open given binary */ + symbol__join_symfs(buf, path); + return __debuginfo__new(buf); +} + +void debuginfo__delete(struct debuginfo *dbg) +{ + if (dbg) { + if (dbg->dwfl) + dwfl_end(dbg->dwfl); + free(dbg); + } +} + +/* For the kernel module, we need a special code to get a DIE */ +int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, + bool adjust_offset) +{ + int n, i; + Elf32_Word shndx; + Elf_Scn *scn; + Elf *elf; + GElf_Shdr mem, *shdr; + const char *p; + + elf = dwfl_module_getelf(dbg->mod, &dbg->bias); + if (!elf) + return -EINVAL; + + /* Get the number of relocations */ + n = dwfl_module_relocations(dbg->mod); + if (n < 0) + return -ENOENT; + /* Search the relocation related .text section */ + for (i = 0; i < n; i++) { + p = dwfl_module_relocation_info(dbg->mod, i, &shndx); + if (strcmp(p, ".text") == 0) { + /* OK, get the section header */ + scn = elf_getscn(elf, shndx); + if (!scn) + return -ENOENT; + shdr = gelf_getshdr(scn, &mem); + if (!shdr) + return -ENOENT; + *offs = shdr->sh_addr; + if (adjust_offset) + *offs -= shdr->sh_offset; + } + } + return 0; +} + +#ifdef HAVE_DEBUGINFOD_SUPPORT +int get_source_from_debuginfod(const char *raw_path, + const char *sbuild_id, char **new_path) +{ + debuginfod_client *c = debuginfod_begin(); + const char *p = raw_path; + int fd; + + if (!c) + return -ENOMEM; + + fd = debuginfod_find_source(c, (const unsigned char *)sbuild_id, + 0, p, new_path); + pr_debug("Search %s from debuginfod -> %d\n", p, fd); + if (fd >= 0) + close(fd); + debuginfod_end(c); + if (fd < 0) { + pr_debug("Failed to find %s in debuginfod (%s)\n", + raw_path, sbuild_id); + return -ENOENT; + } + pr_debug("Got a source %s\n", *new_path); + + return 0; +} +#endif /* HAVE_DEBUGINFOD_SUPPORT */ diff --git a/tools/perf/util/debuginfo.h b/tools/perf/util/debuginfo.h new file mode 100644 index 000000000000..a52d69932815 --- /dev/null +++ b/tools/perf/util/debuginfo.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _PERF_DEBUGINFO_H +#define _PERF_DEBUGINFO_H + +#include <errno.h> +#include <linux/compiler.h> + +#ifdef HAVE_LIBDW_SUPPORT + +#include "dwarf-aux.h" + +/* debug information structure */ +struct debuginfo { + Dwarf *dbg; + Dwfl_Module *mod; + Dwfl *dwfl; + Dwarf_Addr bias; + const unsigned char *build_id; +}; + +/* This also tries to open distro debuginfo */ +struct debuginfo *debuginfo__new(const char *path); +void debuginfo__delete(struct debuginfo *dbg); + +int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, + bool adjust_offset); + +#else /* HAVE_LIBDW_SUPPORT */ + +/* dummy debug information structure */ +struct debuginfo { +}; + +static inline struct debuginfo *debuginfo__new(const char *path __maybe_unused) +{ + return NULL; +} + +static inline void debuginfo__delete(struct debuginfo *dbg __maybe_unused) +{ +} + +typedef void Dwarf_Addr; + +static inline int debuginfo__get_text_offset(struct debuginfo *dbg __maybe_unused, + Dwarf_Addr *offs __maybe_unused, + bool adjust_offset __maybe_unused) +{ + return -EINVAL; +} + +#endif /* HAVE_LIBDW_SUPPORT */ + +#ifdef HAVE_DEBUGINFOD_SUPPORT +int get_source_from_debuginfod(const char *raw_path, const char *sbuild_id, + char **new_path); +#else /* HAVE_DEBUGINFOD_SUPPORT */ +static inline int get_source_from_debuginfod(const char *raw_path __maybe_unused, + const char *sbuild_id __maybe_unused, + char **new_path __maybe_unused) +{ + return -ENOTSUP; +} +#endif /* HAVE_DEBUGINFOD_SUPPORT */ + +#endif /* _PERF_DEBUGINFO_H */ diff --git a/tools/perf/util/demangle-cxx.h b/tools/perf/util/demangle-cxx.h index 26b5b66c0b4e..9359937a881a 100644 --- a/tools/perf/util/demangle-cxx.h +++ b/tools/perf/util/demangle-cxx.h @@ -2,6 +2,8 @@ #ifndef __PERF_DEMANGLE_CXX #define __PERF_DEMANGLE_CXX 1 +#include <stdbool.h> + #ifdef __cplusplus extern "C" { #endif diff --git a/tools/perf/util/demangle-rust-v0.c b/tools/perf/util/demangle-rust-v0.c new file mode 100644 index 000000000000..19924d85407d --- /dev/null +++ b/tools/perf/util/demangle-rust-v0.c @@ -0,0 +1,2042 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// The contents of this file come from the Rust rustc-demangle library, hosted +// in the <https://github.com/rust-lang/rustc-demangle> repository, licensed +// under "Apache-2.0 OR MIT". For copyright details, see +// <https://github.com/rust-lang/rustc-demangle/blob/main/README.md>. +// Please note that the file should be kept as close as possible to upstream. + +// Code for demangling Rust symbols. This code is mostly +// a line-by-line translation of the Rust code in `rustc-demangle`. + +// you can find the latest version of this code in https://github.com/rust-lang/rustc-demangle + +#include <stdint.h> +#include <stddef.h> +#include <string.h> +#include <stdbool.h> +#include <sys/param.h> +#include <stdio.h> + +#include "demangle-rust-v0.h" + +#if defined(__GNUC__) || defined(__clang__) +#define NODISCARD __attribute__((warn_unused_result)) +#else +#define NODISCARD +#endif + +#define MAX_DEPTH 500 + +typedef enum { + DemangleOk, + DemangleInvalid, + DemangleRecursed, + DemangleBug, +} demangle_status; + +struct demangle_v0 { + const char *mangled; + size_t mangled_len; +}; + +struct demangle_legacy { + const char *mangled; + size_t mangled_len; + size_t elements; +}; + +// private version of memrchr to avoid _GNU_SOURCE +static void *demangle_memrchr(const void *s, int c, size_t n) { + const uint8_t *s_ = s; + for (; n != 0; n--) { + if (s_[n-1] == c) { + return (void*)&s_[n-1]; + } + } + return NULL; +} + + +static bool unicode_iscontrol(uint32_t ch) { + // this is *technically* a unicode table, but + // some unicode properties are simpler than you might think + return ch < 0x20 || (ch >= 0x7f && ch < 0xa0); +} + +// "good enough" tables, the only consequence is that when printing +// *constant strings*, some characters are printed as `\u{abcd}` rather than themselves. +// +// I'm leaving these here to allow easily replacing them with actual +// tables if desired. +static bool unicode_isprint(uint32_t ch) { + if (ch < 0x20) { + return false; + } + if (ch < 0x7f) { + return true; + } + return false; +} + +static bool unicode_isgraphemextend(uint32_t ch) { + (void)ch; + return false; +} + +static bool str_isascii(const char *s, size_t s_len) { + for (size_t i = 0; i < s_len; i++) { + if (s[i] & 0x80) { + return false; + } + } + + return true; +} + +typedef enum { + PunycodeOk, + PunycodeError +} punycode_status; + +struct parser { + // the parser assumes that `sym` has a safe "terminating byte". It might be NUL, + // but it might also be something else if a symbol is "truncated". + const char *sym; + size_t sym_len; + size_t next; + uint32_t depth; +}; + +struct printer { + demangle_status status; // if status == 0 parser is valid + struct parser parser; + char *out; // NULL for no output [in which case out_len is not decremented] + size_t out_len; + uint32_t bound_lifetime_depth; + bool alternate; +}; + +static NODISCARD overflow_status printer_print_path(struct printer *printer, bool in_value); +static NODISCARD overflow_status printer_print_type(struct printer *printer); +static NODISCARD overflow_status printer_print_const(struct printer *printer, bool in_value); + +static NODISCARD demangle_status try_parse_path(struct parser *parser) { + struct printer printer = { + DemangleOk, + *parser, + NULL, + SIZE_MAX, + 0, + false + }; + overflow_status ignore = printer_print_path(&printer, false); // can't fail since no output + (void)ignore; + *parser = printer.parser; + return printer.status; +} + +NODISCARD static demangle_status rust_demangle_v0_demangle(const char *s, size_t s_len, struct demangle_v0 *res, const char **rest) { + if (s_len > strlen(s)) { + // s_len only exists to shorten the string, this is not a buffer API + return DemangleInvalid; + } + + const char *inner; + size_t inner_len; + if (s_len >= 2 && !strncmp(s, "_R", strlen("_R"))) { + inner = s+2; + inner_len = s_len - 2; + } else if (s_len >= 1 && !strncmp(s, "R", strlen("R"))) { + // On Windows, dbghelp strips leading underscores, so we accept "R..." + // form too. + inner = s+1; + inner_len = s_len - 1; + } else if (s_len >= 3 && !strncmp(s, "__R", strlen("__R"))) { + // On OSX, symbols are prefixed with an extra _ + inner = s+3; + inner_len = s_len - 3; + } else { + return DemangleInvalid; + } + + // Paths always start with uppercase characters. + if (*inner < 'A' || *inner > 'Z') { + return DemangleInvalid; + } + + if (!str_isascii(inner, inner_len)) { + return DemangleInvalid; + } + + struct parser parser = { inner, inner_len, 0, 0 }; + + demangle_status status = try_parse_path(&parser); + if (status != DemangleOk) return status; + char next = parser.sym[parser.next]; + + // Instantiating crate (paths always start with uppercase characters). + if (parser.next < parser.sym_len && next >= 'A' && next <= 'Z') { + status = try_parse_path(&parser); + if (status != DemangleOk) return status; + } + + res->mangled = inner; + res->mangled_len = inner_len; + if (rest) { + *rest = parser.sym + parser.next; + } + + return DemangleOk; +} + +// This might require `len` to be up to 3 characters bigger than the real output len in case of utf-8 +NODISCARD static overflow_status rust_demangle_v0_display_demangle(struct demangle_v0 res, char *out, size_t len, bool alternate) { + struct printer printer = { + DemangleOk, + { + res.mangled, + res.mangled_len, + 0, + 0 + }, + out, + len, + 0, + alternate + }; + if (printer_print_path(&printer, true) == OverflowOverflow) { + return OverflowOverflow; + } + if (printer.out_len < OVERFLOW_MARGIN) { + return OverflowOverflow; + } + *printer.out = '\0'; + return OverflowOk; +} + +static size_t code_to_utf8(unsigned char *buffer, uint32_t code) +{ + if (code <= 0x7F) { + buffer[0] = code; + return 1; + } + if (code <= 0x7FF) { + buffer[0] = 0xC0 | (code >> 6); /* 110xxxxx */ + buffer[1] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + return 2; + } + if (code <= 0xFFFF) { + buffer[0] = 0xE0 | (code >> 12); /* 1110xxxx */ + buffer[1] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ + buffer[2] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + return 3; + } + if (code <= 0x10FFFF) { + buffer[0] = 0xF0 | (code >> 18); /* 11110xxx */ + buffer[1] = 0x80 | ((code >> 12) & 0x3F); /* 10xxxxxx */ + buffer[2] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ + buffer[3] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + return 4; + } + return 0; +} + + +// return length of char at byte, or SIZE_MAX if invalid. buf should have 4 valid characters +static NODISCARD size_t utf8_next_char(uint8_t *s, uint32_t *ch) { + uint8_t byte = *s; + // UTF8-1 = %x00-7F + // UTF8-2 = %xC2-DF UTF8-tail + // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / + // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) + // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / + // %xF4 %x80-8F 2( UTF8-tail ) + if (byte < 0x80) { + *ch = byte; + return 1; + } else if (byte < 0xc2) { + return SIZE_MAX; + } else if (byte < 0xe0) { + if (s[1] >= 0x80 && s[1] < 0xc0) { + *ch = ((byte&0x1f)<<6) + (s[1] & 0x3f); + return 2; + } + return SIZE_MAX; + } if (byte < 0xf0) { + if (!(s[1] >= 0x80 && s[1] < 0xc0) || !(s[2] >= 0x80 && s[2] < 0xc0)) { + return SIZE_MAX; // basic validation + } + if (byte == 0xe0 && s[1] < 0xa0) { + return SIZE_MAX; // overshort + } + if (byte == 0xed && s[1] >= 0xa0) { + return SIZE_MAX; // surrogate + } + *ch = ((byte&0x0f)<<12) + ((s[1] & 0x3f)<<6) + (s[2] & 0x3f); + return 3; + } else if (byte < 0xf5) { + if (!(s[1] >= 0x80 && s[1] < 0xc0) || !(s[2] >= 0x80 && s[2] < 0xc0) || !(s[3] >= 0x80 && s[3] < 0xc0)) { + return SIZE_MAX; // basic validation + } + if (byte == 0xf0 && s[1] < 0x90) { + return SIZE_MAX; // overshort + } + if (byte == 0xf4 && s[1] >= 0x90) { + return SIZE_MAX; // over max + } + *ch = ((byte&0x07)<<18) + ((s[1] & 0x3f)<<12) + ((s[2] & 0x3f)<<6) + (s[3]&0x3f); + return 4; + } else { + return SIZE_MAX; + } +} + +static NODISCARD bool validate_char(uint32_t n) { + return ((n ^ 0xd800) - 0x800) < 0x110000 - 0x800; +} + +#define SMALL_PUNYCODE_LEN 128 + +static NODISCARD punycode_status punycode_decode(const char *start, size_t ascii_len, const char *punycode_start, size_t punycode_len, uint32_t (*out_)[SMALL_PUNYCODE_LEN], size_t *out_len) { + uint32_t *out = *out_; + + if (punycode_len == 0) { + return PunycodeError; + } + + if (ascii_len > SMALL_PUNYCODE_LEN) { + return PunycodeError; + } + for (size_t i = 0; i < ascii_len; i++) { + out[i] = start[i]; + } + size_t len = ascii_len; + + size_t base = 36, t_min = 1, t_max = 26, skew = 38, damp = 700, bias = 72, i = 0, n = 0x80; + for (;;) { + size_t delta = 0, w = 1, k = 0; + for (;;) { + k += base; + size_t biased = k < bias ? 0 : k - bias; + size_t t = MIN(MAX(biased, t_min), t_max); + size_t d; + if (punycode_len == 0) { + return PunycodeError; + } + char nx = *punycode_start++; + punycode_len--; + if ('a' <= nx && nx <= 'z') { + d = nx - 'a'; + } else if ('0' <= nx && nx <= '9') { + d = 26 + (nx - '0'); + } else { + return PunycodeError; + } + if (w == 0 || d > SIZE_MAX / w || d*w > SIZE_MAX - delta) { + return PunycodeError; + } + delta += d * w; + if (d < t) { + break; + } + if (base < t || w == 0 || (base - t) > SIZE_MAX / w) { + return PunycodeError; + } + w *= (base - t); + } + + len += 1; + if (i > SIZE_MAX - delta) { + return PunycodeError; + } + i += delta; + if (n > SIZE_MAX - i / len) { + return PunycodeError; + } + n += i / len; + i %= len; + + // char validation + if (n > UINT32_MAX || !validate_char((uint32_t)n)) { + return PunycodeError; + } + + // insert new character + if (len > SMALL_PUNYCODE_LEN) { + return PunycodeError; + } + memmove(out + i + 1, out + i, (len - i - 1) * sizeof(uint32_t)); + out[i] = (uint32_t)n; + + // start i index at incremented position + i++; + + // If there are no more deltas, decoding is complete. + if (punycode_len == 0) { + *out_len = len; + return PunycodeOk; + } + + // Perform bias adaptation. + delta /= damp; + damp = 2; + + delta += delta / len; + k = 0; + while (delta > ((base - t_min) * t_max) / 2) { + delta /= base - t_min; + k += base; + } + bias = k + ((base - t_min + 1) * delta) / (delta + skew); + } +} + +struct ident { + const char *ascii_start; + size_t ascii_len; + const char *punycode_start; + size_t punycode_len; +}; + +static NODISCARD overflow_status display_ident(const char *ascii_start, size_t ascii_len, const char *punycode_start, size_t punycode_len, uint8_t *out, size_t *out_len) { + uint32_t outbuf[SMALL_PUNYCODE_LEN]; + + size_t wide_len; + size_t out_buflen = *out_len; + + if (punycode_len == 0) { + if (ascii_len > out_buflen) { + return OverflowOverflow; + } + memcpy(out, ascii_start, ascii_len); + *out_len = ascii_len; + } else if (punycode_decode(ascii_start, ascii_len, punycode_start, punycode_len, &outbuf, &wide_len) == PunycodeOk) { + size_t narrow_len = 0; + for (size_t i = 0; i < wide_len; i++) { + if (out_buflen - narrow_len < 4) { + return OverflowOverflow; + } + unsigned char *pos = &out[narrow_len]; + narrow_len += code_to_utf8(pos, outbuf[i]); + } + *out_len = narrow_len; + } else { + size_t narrow_len = 0; + if (out_buflen < strlen("punycode{")) { + return OverflowOverflow; + } + memcpy(out, "punycode{", strlen("punycode{")); + narrow_len = strlen("punycode{"); + if (ascii_len > 0) { + if (out_buflen - narrow_len < ascii_len || out_buflen - narrow_len - ascii_len < 1) { + return OverflowOverflow; + } + memcpy(out + narrow_len, ascii_start, ascii_len); + narrow_len += ascii_len; + out[narrow_len] = '-'; + narrow_len++; + } + if (out_buflen - narrow_len < punycode_len || out_buflen - narrow_len - punycode_len < 1) { + return OverflowOverflow; + } + memcpy(out + narrow_len, punycode_start, punycode_len); + narrow_len += punycode_len; + out[narrow_len] = '}'; + narrow_len++; + *out_len = narrow_len; + } + + return OverflowOk; +} + +static NODISCARD bool try_parse_uint(const char *buf, size_t len, uint64_t *result) { + size_t cur = 0; + for(;cur < len && buf[cur] == '0';cur++); + uint64_t result_val = 0; + if (len - cur > 16) return false; + for(;cur < len;cur++) { + char c = buf[cur]; + result_val <<= 4; + if ('0' <= c && c <= '9') { + result_val += c - '0'; + } else if ('a' <= c && c <= 'f') { + result_val += 10 + (c - 'a'); + } else { + return false; + } + } + *result = result_val; + return true; +} + +static NODISCARD bool dinibble2int(const char *buf, uint8_t *result) { + uint8_t result_val = 0; + for (int i = 0; i < 2; i++) { + char c = buf[i]; + result_val <<= 4; + if ('0' <= c && c <= '9') { + result_val += c - '0'; + } else if ('a' <= c && c <= 'f') { + result_val += 10 + (c - 'a'); + } else { + return false; + } + } + *result = result_val; + return true; +} + + +typedef enum { + NtsOk = 0, + NtsOverflow = 1, + NtsInvalid = 2 +} nibbles_to_string_status; + +// '\u{10ffff}', +margin +#define ESCAPED_SIZE 12 + +static NODISCARD size_t char_to_string(uint32_t ch, uint8_t quote, bool first, char (*buf)[ESCAPED_SIZE]) { + // encode the character + char *escaped_buf = *buf; + escaped_buf[0] = '\\'; + size_t escaped_len = 2; + switch (ch) { + case '\0': + escaped_buf[1] = '0'; + break; + case '\t': + escaped_buf[1] = 't'; + break; + case '\r': + escaped_buf[1] = 'r'; + break; + case '\n': + escaped_buf[1] = 'n'; + break; + case '\\': + escaped_buf[1] = '\\'; + break; + default: + if (ch == quote) { + escaped_buf[1] = ch; + } else if (!unicode_isprint(ch) || (first && unicode_isgraphemextend(ch))) { + int hexlen = snprintf(escaped_buf, ESCAPED_SIZE, "\\u{%x}", (unsigned int)ch); + if (hexlen < 0) { + return 0; // (snprintf shouldn't fail!) + } + escaped_len = hexlen; + } else { + // printable character + escaped_buf[0] = ch; + escaped_len = 1; + } + break; + } + + return escaped_len; +} + +// convert nibbles to a single/double-quoted string +static NODISCARD nibbles_to_string_status nibbles_to_string(const char *buf, size_t len, uint8_t *out, size_t *out_len) { + uint8_t quote = '"'; + bool first = true; + + if ((len % 2) != 0) { + return NtsInvalid; // odd number of nibbles + } + + size_t cur_out_len = 0; + + // write starting quote + if (out != NULL) { + cur_out_len = *out_len; + if (cur_out_len == 0) { + return NtsOverflow; + } + *out++ = quote; + cur_out_len--; + } + + uint8_t conv_buf[4] = {0}; + size_t conv_buf_len = 0; + while (len > 1 || conv_buf_len > 0) { + while (len > 1 && conv_buf_len < sizeof(conv_buf)) { + if (!dinibble2int(buf, &conv_buf[conv_buf_len])) { + return NtsInvalid; + } + conv_buf_len++; + buf += 2; + len -= 2; + } + + // conv_buf is full here if possible, process 1 UTF-8 character + uint32_t ch = 0; + size_t consumed = utf8_next_char(conv_buf, &ch); + if (consumed > conv_buf_len) { + // either SIZE_MAX (invalid UTF-8) or finished input buffer and + // there are still bytes remaining, in both cases invalid + return NtsInvalid; + } + + // "consume" the character + memmove(conv_buf, conv_buf+consumed, conv_buf_len-consumed); + conv_buf_len -= consumed; + + char escaped_buf[ESCAPED_SIZE]; + size_t escaped_len = char_to_string(ch, '"', first, &escaped_buf); + if (out != NULL) { + if (cur_out_len < escaped_len) { + return NtsOverflow; + } + memcpy(out, escaped_buf, escaped_len); + out += escaped_len; + cur_out_len -= escaped_len; + } + first = false; + } + + // write ending quote + if (out != NULL) { + if (cur_out_len == 0) { + return NtsOverflow; + } + *out++ = quote; + cur_out_len--; + *out_len -= cur_out_len; // subtract remaining space to get used space + } + + return NtsOk; +} + +static const char* basic_type(uint8_t tag) { + switch(tag) { + case 'b': + return "bool"; + case 'c': + return "char"; + case 'e': + return "str"; + case 'u': + return "()"; + case 'a': + return "i8"; + case 's': + return "i16"; + case 'l': + return "i32"; + case 'x': + return "i64"; + case 'n': + return "i128"; + case 'i': + return "isize"; + case 'h': + return "u8"; + case 't': + return "u16"; + case 'm': + return "u32"; + case 'y': + return "u64"; + case 'o': + return "u128"; + case 'j': + return "usize"; + case 'f': + return "f32"; + case 'd': + return "f64"; + case 'z': + return "!"; + case 'p': + return "_"; + case 'v': + return "..."; + default: + return NULL; + } +} + +static NODISCARD demangle_status parser_push_depth(struct parser *parser) { + parser->depth++; + if (parser->depth > MAX_DEPTH) { + return DemangleRecursed; + } else { + return DemangleOk; + } +} + +static demangle_status parser_pop_depth(struct parser *parser) { + parser->depth--; + return DemangleOk; +} + +static uint8_t parser_peek(struct parser const *parser) { + if (parser->next == parser->sym_len) { + return 0; // add a "pseudo nul terminator" to avoid peeking past the end of a symbol + } else { + return parser->sym[parser->next]; + } +} + +static bool parser_eat(struct parser *parser, uint8_t ch) { + if (parser_peek(parser) == ch) { + if (ch != 0) { // safety: make sure we don't skip past the NUL terminator + parser->next++; + } + return true; + } else { + return false; + } +} + +static uint8_t parser_next(struct parser *parser) { + // don't advance after end of input, and return an imaginary NUL terminator + if (parser->next == parser->sym_len) { + return 0; + } else { + return parser->sym[parser->next++]; + } +} + +static NODISCARD demangle_status parser_ch(struct parser *parser, uint8_t *next) { + // don't advance after end of input + if (parser->next == parser->sym_len) { + return DemangleInvalid; + } else { + *next = parser->sym[parser->next++]; + return DemangleOk; + } +} + +struct buf { + const char *start; + size_t len; +}; + +static NODISCARD demangle_status parser_hex_nibbles(struct parser *parser, struct buf *buf) { + size_t start = parser->next; + for (;;) { + uint8_t ch = parser_next(parser); + if (ch == '_') { + break; + } + if (!(('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f'))) { + return DemangleInvalid; + } + } + buf->start = parser->sym + start; + buf->len = parser->next - start - 1; // skip final _ + return DemangleOk; +} + +static NODISCARD demangle_status parser_digit_10(struct parser *parser, uint8_t *out) { + uint8_t ch = parser_peek(parser); + if ('0' <= ch && ch <= '9') { + *out = ch - '0'; + parser->next++; + return DemangleOk; + } else { + return DemangleInvalid; + } +} + +static NODISCARD demangle_status parser_digit_62(struct parser *parser, uint64_t *out) { + uint8_t ch = parser_peek(parser); + if ('0' <= ch && ch <= '9') { + *out = ch - '0'; + parser->next++; + return DemangleOk; + } else if ('a' <= ch && ch <= 'z') { + *out = 10 + (ch - 'a'); + parser->next++; + return DemangleOk; + } else if ('A' <= ch && ch <= 'Z') { + *out = 10 + 26 + (ch - 'A'); + parser->next++; + return DemangleOk; + } else { + return DemangleInvalid; + } +} + +static NODISCARD demangle_status parser_integer_62(struct parser *parser, uint64_t *out) { + if (parser_eat(parser, '_')) { + *out = 0; + return DemangleOk; + } + + uint64_t x = 0; + demangle_status status; + while (!parser_eat(parser, '_')) { + uint64_t d; + if ((status = parser_digit_62(parser, &d)) != DemangleOk) { + return status; + } + if (x > UINT64_MAX / 62) { + return DemangleInvalid; + } + x *= 62; + if (x > UINT64_MAX - d) { + return DemangleInvalid; + } + x += d; + } + if (x == UINT64_MAX) { + return DemangleInvalid; + } + *out = x + 1; + return DemangleOk; +} + +static NODISCARD demangle_status parser_opt_integer_62(struct parser *parser, uint8_t tag, uint64_t *out) { + if (!parser_eat(parser, tag)) { + *out = 0; + return DemangleOk; + } + + demangle_status status; + if ((status = parser_integer_62(parser, out)) != DemangleOk) { + return status; + } + if (*out == UINT64_MAX) { + return DemangleInvalid; + } + *out = *out + 1; + return DemangleOk; +} + +static NODISCARD demangle_status parser_disambiguator(struct parser *parser, uint64_t *out) { + return parser_opt_integer_62(parser, 's', out); +} + +typedef uint8_t parser_namespace_type; + +static NODISCARD demangle_status parser_namespace(struct parser *parser, parser_namespace_type *out) { + uint8_t next = parser_next(parser); + if ('A' <= next && next <= 'Z') { + *out = next; + return DemangleOk; + } else if ('a' <= next && next <= 'z') { + *out = 0; + return DemangleOk; + } else { + return DemangleInvalid; + } +} + +static NODISCARD demangle_status parser_backref(struct parser *parser, struct parser *out) { + size_t start = parser->next; + if (start == 0) { + return DemangleBug; + } + size_t s_start = start - 1; + uint64_t i; + demangle_status status = parser_integer_62(parser, &i); + if (status != DemangleOk) { + return status; + } + if (i >= s_start) { + return DemangleInvalid; + } + struct parser res = { + .sym = parser->sym, + .sym_len = parser->sym_len, + .next = (size_t)i, + .depth = parser->depth + }; + status = parser_push_depth(&res); + if (status != DemangleOk) { + return status; + } + *out = res; + return DemangleOk; +} + +static NODISCARD demangle_status parser_ident(struct parser *parser, struct ident *out) { + bool is_punycode = parser_eat(parser, 'u'); + size_t len; + uint8_t d; + demangle_status status = parser_digit_10(parser, &d); + len = d; + if (status != DemangleOk) { + return status; + } + if (len) { + for (;;) { + status = parser_digit_10(parser, &d); + if (status != DemangleOk) { + break; + } + if (len > SIZE_MAX / 10) { + return DemangleInvalid; + } + len *= 10; + if (len > SIZE_MAX - d) { + return DemangleInvalid; + } + len += d; + } + } + + // Skip past the optional `_` separator. + parser_eat(parser, '_'); + + size_t start = parser->next; + if (parser->sym_len - parser->next < len) { + return DemangleInvalid; + } + parser->next += len; + + const char *ident = &parser->sym[start]; + + if (is_punycode) { + const char *underscore = demangle_memrchr(ident, '_', (size_t)len); + if (underscore == NULL) { + *out = (struct ident){ + .ascii_start="", + .ascii_len=0, + .punycode_start=ident, + .punycode_len=len + }; + } else { + size_t ascii_len = underscore - ident; + // ascii_len <= len - 1 since `_` is in the first len bytes + size_t punycode_len = len - 1 - ascii_len; + *out = (struct ident){ + .ascii_start=ident, + .ascii_len=ascii_len, + .punycode_start=underscore + 1, + .punycode_len=punycode_len + }; + } + if (out->punycode_len == 0) { + return DemangleInvalid; + } + return DemangleOk; + } else { + *out = (struct ident) { + .ascii_start=ident, + .ascii_len=(size_t)len, + .punycode_start="", + .punycode_len=0, + }; + return DemangleOk; + } +} + +#define INVALID_SYNTAX "{invalid syntax}" + +static const char *demangle_error_message(demangle_status status) { + switch (status) { + case DemangleInvalid: + return INVALID_SYNTAX; + case DemangleBug: + return "{bug}"; + case DemangleRecursed: + return "{recursion limit reached}"; + default: + return "{unknown error}"; + } +} + +#define PRINT(print_fn) \ + do { \ + if ((print_fn) == OverflowOverflow) { \ + return OverflowOverflow; \ + } \ + } while(0) + +#define PRINT_CH(printer, s) PRINT(printer_print_ch((printer), (s))) +#define PRINT_STR(printer, s) PRINT(printer_print_str((printer), (s))) +#define PRINT_U64(printer, s) PRINT(printer_print_u64((printer), (s))) +#define PRINT_IDENT(printer, s) PRINT(printer_print_ident((printer), (s))) + +#define INVALID(printer) \ + do { \ + PRINT_STR((printer), INVALID_SYNTAX); \ + (printer)->status = DemangleInvalid; \ + return OverflowOk; \ + } while(0) + +#define PARSE(printer, method, ...) \ + do { \ + if ((printer)->status != DemangleOk) { \ + PRINT_STR((printer), "?"); \ + return OverflowOk; \ + } else { \ + demangle_status _parse_status = method(&(printer)->parser, ## __VA_ARGS__); \ + if (_parse_status != DemangleOk) { \ + PRINT_STR((printer), demangle_error_message(_parse_status)); \ + (printer)->status = _parse_status; \ + return OverflowOk; \ + } \ + } \ + } while(0) + +#define PRINT_SEP_LIST(printer, body, sep) \ + do { \ + size_t _sep_list_i; \ + PRINT_SEP_LIST_COUNT(printer, _sep_list_i, body, sep); \ + } while(0) + +#define PRINT_SEP_LIST_COUNT(printer, count, body, sep) \ + do { \ + count = 0; \ + while ((printer)->status == DemangleOk && !printer_eat((printer), 'E')) { \ + if (count > 0) { PRINT_STR(printer, sep); } \ + body; \ + count++; \ + } \ + } while(0) + +static bool printer_eat(struct printer *printer, uint8_t b) { + if (printer->status != DemangleOk) { + return false; + } + + return parser_eat(&printer->parser, b); +} + +static void printer_pop_depth(struct printer *printer) { + if (printer->status == DemangleOk) { + parser_pop_depth(&printer->parser); + } +} + +static NODISCARD overflow_status printer_print_buf(struct printer *printer, const char *start, size_t len) { + if (printer->out == NULL) { + return OverflowOk; + } + if (printer->out_len < len) { + return OverflowOverflow; + } + + memcpy(printer->out, start, len); + printer->out += len; + printer->out_len -= len; + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_str(struct printer *printer, const char *buf) { + return printer_print_buf(printer, buf, strlen(buf)); +} + +static NODISCARD overflow_status printer_print_ch(struct printer *printer, char ch) { + return printer_print_buf(printer, &ch, 1); +} + +static NODISCARD overflow_status printer_print_u64(struct printer *printer, uint64_t n) { + char buf[32] = {0}; + sprintf(buf, "%llu", (unsigned long long)n); // printing uint64 uses 21 < 32 chars + return printer_print_str(printer, buf); +} + +static NODISCARD overflow_status printer_print_ident(struct printer *printer, struct ident *ident) { + if (printer->out == NULL) { + return OverflowOk; + } + + size_t out_len = printer->out_len; + overflow_status status; + if ((status = display_ident(ident->ascii_start, ident->ascii_len, ident->punycode_start, ident->punycode_len, (uint8_t*)printer->out, &out_len)) != OverflowOk) { + return status; + } + printer->out += out_len; + printer->out_len -= out_len; + return OverflowOk; +} + +typedef overflow_status (*printer_fn)(struct printer *printer); +typedef overflow_status (*backref_fn)(struct printer *printer, bool *arg); + +static NODISCARD overflow_status printer_print_backref(struct printer *printer, backref_fn func, bool *arg) { + struct parser backref; + PARSE(printer, parser_backref, &backref); + + if (printer->out == NULL) { + return OverflowOk; + } + + struct parser orig_parser = printer->parser; + demangle_status orig_status = printer->status; // fixme not sure this is needed match for Ok on the Rust side + printer->parser = backref; + printer->status = DemangleOk; + overflow_status status = func(printer, arg); + printer->parser = orig_parser; + printer->status = orig_status; + + return status; +} + +static NODISCARD overflow_status printer_print_lifetime_from_index(struct printer *printer, uint64_t lt) { + // Bound lifetimes aren't tracked when skipping printing. + if (printer->out == NULL) { + return OverflowOk; + } + + PRINT_STR(printer, "'"); + if (lt == 0) { + PRINT_STR(printer, "_"); + return OverflowOk; + } + + if (printer->bound_lifetime_depth < lt) { + INVALID(printer); + } else { + uint64_t depth = printer->bound_lifetime_depth - lt; + if (depth < 26) { + PRINT_CH(printer, 'a' + depth); + } else { + PRINT_STR(printer, "_"); + PRINT_U64(printer, depth); + } + + return OverflowOk; + } +} + +static NODISCARD overflow_status printer_in_binder(struct printer *printer, printer_fn func) { + uint64_t bound_lifetimes; + PARSE(printer, parser_opt_integer_62, 'G', &bound_lifetimes); + + // Don't track bound lifetimes when skipping printing. + if (printer->out == NULL) { + return func(printer); + } + + if (bound_lifetimes > 0) { + PRINT_STR(printer, "for<"); + for (uint64_t i = 0; i < bound_lifetimes; i++) { + if (i > 0) { + PRINT_STR(printer, ", "); + } + printer->bound_lifetime_depth++; + PRINT(printer_print_lifetime_from_index(printer, 1)); + } + PRINT_STR(printer, "> "); + } + + overflow_status r = func(printer); + printer->bound_lifetime_depth -= bound_lifetimes; + + return r; +} + +static NODISCARD overflow_status printer_print_generic_arg(struct printer *printer) { + if (printer_eat(printer, 'L')) { + uint64_t lt; + PARSE(printer, parser_integer_62, <); + return printer_print_lifetime_from_index(printer, lt); + } else if (printer_eat(printer, 'K')) { + return printer_print_const(printer, false); + } else { + return printer_print_type(printer); + } +} + +static NODISCARD overflow_status printer_print_generic_args(struct printer *printer) { + PRINT_STR(printer, "<"); + PRINT_SEP_LIST(printer, PRINT(printer_print_generic_arg(printer)), ", "); + PRINT_STR(printer, ">"); + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_path_out_of_value(struct printer *printer, bool *_arg) { + (void)_arg; + return printer_print_path(printer, false); +} + +static NODISCARD overflow_status printer_print_path_in_value(struct printer *printer, bool *_arg) { + (void)_arg; + return printer_print_path(printer, true); +} + +static NODISCARD overflow_status printer_print_path(struct printer *printer, bool in_value) { + PARSE(printer, parser_push_depth); + uint8_t tag; + PARSE(printer, parser_ch, &tag); + + overflow_status st; + uint64_t dis; + struct ident name; + parser_namespace_type ns; + char *orig_out; + + switch(tag) { + case 'C': + PARSE(printer, parser_disambiguator, &dis); + PARSE(printer, parser_ident, &name); + + PRINT_IDENT(printer, &name); + + if (printer->out != NULL && !printer->alternate && dis != 0) { + PRINT_STR(printer, "["); + char buf[24] = {0}; + sprintf(buf, "%llx", (unsigned long long)dis); + PRINT_STR(printer, buf); + PRINT_STR(printer, "]"); + } + break; + case 'N': + PARSE(printer, parser_namespace, &ns); + if ((st = printer_print_path(printer, in_value)) != OverflowOk) { + return st; + } + + // HACK(eddyb) if the parser is already marked as having errored, + // `parse!` below will print a `?` without its preceding `::` + // (because printing the `::` is skipped in certain conditions, + // i.e. a lowercase namespace with an empty identifier), + // so in order to get `::?`, the `::` has to be printed here. + if (printer->status != DemangleOk) { + PRINT_STR(printer, "::"); + } + + PARSE(printer, parser_disambiguator, &dis); + PARSE(printer, parser_ident, &name); + // Special namespace, like closures and shims + if (ns) { + PRINT_STR(printer, "::{"); + if (ns == 'C') { + PRINT_STR(printer, "closure"); + } else if (ns == 'S') { + PRINT_STR(printer, "shim"); + } else { + PRINT_CH(printer, ns); + } + if (name.ascii_len != 0 || name.punycode_len != 0) { + PRINT_STR(printer, ":"); + PRINT_IDENT(printer, &name); + } + PRINT_STR(printer, "#"); + PRINT_U64(printer, dis); + PRINT_STR(printer, "}"); + } else { + // Implementation-specific/unspecified namespaces + if (name.ascii_len != 0 || name.punycode_len != 0) { + PRINT_STR(printer, "::"); + PRINT_IDENT(printer, &name); + } + } + break; + case 'M': + case 'X': + // for impls, ignore the impls own path + PARSE(printer, parser_disambiguator, &dis); + orig_out = printer->out; + printer->out = NULL; + PRINT(printer_print_path(printer, false)); + printer->out = orig_out; + + // fallthru + case 'Y': + PRINT_STR(printer, "<"); + PRINT(printer_print_type(printer)); + if (tag != 'M') { + PRINT_STR(printer, " as "); + PRINT(printer_print_path(printer, false)); + } + PRINT_STR(printer, ">"); + break; + case 'I': + PRINT(printer_print_path(printer, in_value)); + if (in_value) { + PRINT_STR(printer, "::"); + } + PRINT(printer_print_generic_args(printer)); + break; + case 'B': + PRINT(printer_print_backref(printer, in_value ? printer_print_path_in_value : printer_print_path_out_of_value, NULL)); + break; + default: + INVALID(printer); + break; + } + + printer_pop_depth(printer); + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_const_uint(struct printer *printer, uint8_t tag) { + struct buf hex; + PARSE(printer, parser_hex_nibbles, &hex); + + uint64_t val; + if (try_parse_uint(hex.start, hex.len, &val)) { + PRINT_U64(printer, val); + } else { + PRINT_STR(printer, "0x"); + PRINT(printer_print_buf(printer, hex.start, hex.len)); + } + + if (printer->out != NULL && !printer->alternate) { + const char *ty = basic_type(tag); + if (/* safety */ ty != NULL) { + PRINT_STR(printer, ty); + } + } + + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_const_str_literal(struct printer *printer) { + struct buf hex; + PARSE(printer, parser_hex_nibbles, &hex); + + size_t out_len = SIZE_MAX; + nibbles_to_string_status nts_status = nibbles_to_string(hex.start, hex.len, NULL, &out_len); + switch (nts_status) { + case NtsOk: + if (printer->out != NULL) { + out_len = printer->out_len; + nts_status = nibbles_to_string(hex.start, hex.len, (uint8_t*)printer->out, &out_len); + if (nts_status != NtsOk) { + return OverflowOverflow; + } + printer->out += out_len; + printer->out_len -= out_len; + } + return OverflowOk; + case NtsOverflow: + // technically if there is a string of size `SIZE_MAX/6` whose escaped version overflows + // SIZE_MAX but has an invalid char, this will be a "fake" overflow. In practice, + // that is not going to happen and a fuzzer will not generate strings of this length. + return OverflowOverflow; + case NtsInvalid: + default: + INVALID(printer); + } +} + +static NODISCARD overflow_status printer_print_const_struct(struct printer *printer) { + uint64_t dis; + struct ident name; + PARSE(printer, parser_disambiguator, &dis); + PARSE(printer, parser_ident, &name); + PRINT_IDENT(printer, &name); + PRINT_STR(printer, ": "); + return printer_print_const(printer, true); +} + +static NODISCARD overflow_status printer_print_const_out_of_value(struct printer *printer, bool *_arg) { + (void)_arg; + return printer_print_const(printer, false); +} + +static NODISCARD overflow_status printer_print_const_in_value(struct printer *printer, bool *_arg) { + (void)_arg; + return printer_print_const(printer, true); +} + +static NODISCARD overflow_status printer_print_const(struct printer *printer, bool in_value) { + uint8_t tag; + + PARSE(printer, parser_ch, &tag); + PARSE(printer, parser_push_depth); + + struct buf hex; + uint64_t val; + size_t count; + + bool opened_brace = false; +#define OPEN_BRACE_IF_OUTSIDE_EXPR \ + do { if (!in_value) { \ + opened_brace = true; \ + PRINT_STR(printer, "{"); \ + } } while(0) + + switch(tag) { + case 'p': + PRINT_STR(printer, "_"); + break; + // Primitive leaves with hex-encoded values (see `basic_type`). + case 'a': + case 's': + case 'l': + case 'x': + case 'n': + case 'i': + if (printer_eat(printer, 'n')) { + PRINT_STR(printer, "-"); + } + /* fallthrough */ + case 'h': + case 't': + case 'm': + case 'y': + case 'o': + case 'j': + PRINT(printer_print_const_uint(printer, tag)); + break; + case 'b': + PARSE(printer, parser_hex_nibbles, &hex); + if (try_parse_uint(hex.start, hex.len, &val)) { + if (val == 0) { + PRINT_STR(printer, "false"); + } else if (val == 1) { + PRINT_STR(printer, "true"); + } else { + INVALID(printer); + } + } else { + INVALID(printer); + } + break; + case 'c': + PARSE(printer, parser_hex_nibbles, &hex); + if (try_parse_uint(hex.start, hex.len, &val) + && val < UINT32_MAX + && validate_char((uint32_t)val)) + { + char escaped_buf[ESCAPED_SIZE]; + size_t escaped_size = char_to_string((uint32_t)val, '\'', true, &escaped_buf); + + PRINT_STR(printer, "'"); + PRINT(printer_print_buf(printer, escaped_buf, escaped_size)); + PRINT_STR(printer, "'"); + } else { + INVALID(printer); + } + break; + case 'e': + OPEN_BRACE_IF_OUTSIDE_EXPR; + PRINT_STR(printer, "*"); + PRINT(printer_print_const_str_literal(printer)); + break; + case 'R': + case 'Q': + if (tag == 'R' && printer_eat(printer, 'e')) { + PRINT(printer_print_const_str_literal(printer)); + } else { + OPEN_BRACE_IF_OUTSIDE_EXPR; + PRINT_STR(printer, "&"); + if (tag != 'R') { + PRINT_STR(printer, "mut "); + } + PRINT(printer_print_const(printer, true)); + } + break; + case 'A': + OPEN_BRACE_IF_OUTSIDE_EXPR; + PRINT_STR(printer, "["); + PRINT_SEP_LIST(printer, PRINT(printer_print_const(printer, true)), ", "); + PRINT_STR(printer, "]"); + break; + case 'T': + OPEN_BRACE_IF_OUTSIDE_EXPR; + PRINT_STR(printer, "("); + PRINT_SEP_LIST_COUNT(printer, count, PRINT(printer_print_const(printer, true)), ", "); + if (count == 1) { + PRINT_STR(printer, ","); + } + PRINT_STR(printer, ")"); + break; + case 'V': + OPEN_BRACE_IF_OUTSIDE_EXPR; + PRINT(printer_print_path(printer, true)); + PARSE(printer, parser_ch, &tag); + switch(tag) { + case 'U': + break; + case 'T': + PRINT_STR(printer, "("); + PRINT_SEP_LIST(printer, PRINT(printer_print_const(printer, true)), ", "); + PRINT_STR(printer, ")"); + break; + case 'S': + PRINT_STR(printer, " { "); + PRINT_SEP_LIST(printer, PRINT(printer_print_const_struct(printer)), ", "); + PRINT_STR(printer, " }"); + break; + default: + INVALID(printer); + } + break; + case 'B': + PRINT(printer_print_backref(printer, in_value ? printer_print_const_in_value : printer_print_const_out_of_value, NULL)); + break; + default: + INVALID(printer); + } +#undef OPEN_BRACE_IF_OUTSIDE_EXPR + + if (opened_brace) { + PRINT_STR(printer, "}"); + } + printer_pop_depth(printer); + + return OverflowOk; +} + +/// A trait in a trait object may have some "existential projections" +/// (i.e. associated type bindings) after it, which should be printed +/// in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`. +/// To this end, this method will keep the `<...>` of an 'I' path +/// open, by omitting the `>`, and return `Ok(true)` in that case. +static NODISCARD overflow_status printer_print_maybe_open_generics(struct printer *printer, bool *open) { + if (printer_eat(printer, 'B')) { + // NOTE(eddyb) the closure may not run if printing is being skipped, + // but in that case the returned boolean doesn't matter. + *open = false; + return printer_print_backref(printer, printer_print_maybe_open_generics, open); + } else if(printer_eat(printer, 'I')) { + PRINT(printer_print_path(printer, false)); + PRINT_STR(printer, "<"); + PRINT_SEP_LIST(printer, PRINT(printer_print_generic_arg(printer)), ", "); + *open = true; + return OverflowOk; + } else { + PRINT(printer_print_path(printer, false)); + *open = false; + return OverflowOk; + } +} + +static NODISCARD overflow_status printer_print_dyn_trait(struct printer *printer) { + bool open; + PRINT(printer_print_maybe_open_generics(printer, &open)); + + while (printer_eat(printer, 'p')) { + if (!open) { + PRINT_STR(printer, "<"); + open = true; + } else { + PRINT_STR(printer, ", "); + } + + struct ident name; + PARSE(printer, parser_ident, &name); + + PRINT_IDENT(printer, &name); + PRINT_STR(printer, " = "); + PRINT(printer_print_type(printer)); + } + + if (open) { + PRINT_STR(printer, ">"); + } + + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_object_bounds(struct printer *printer) { + PRINT_SEP_LIST(printer, PRINT(printer_print_dyn_trait(printer)), " + "); + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_function_type(struct printer *printer) { + bool is_unsafe = printer_eat(printer, 'U'); + const char *abi; + size_t abi_len; + if (printer_eat(printer, 'K')) { + if (printer_eat(printer, 'C')) { + abi = "C"; + abi_len = 1; + } else { + struct ident abi_ident; + PARSE(printer, parser_ident, &abi_ident); + if (abi_ident.ascii_len == 0 || abi_ident.punycode_len != 0) { + INVALID(printer); + } + abi = abi_ident.ascii_start; + abi_len = abi_ident.ascii_len; + } + } else { + abi = NULL; + abi_len = 0; + } + + if (is_unsafe) { + PRINT_STR(printer, "unsafe "); + } + + if (abi != NULL) { + PRINT_STR(printer, "extern \""); + + // replace _ with - + while (abi_len > 0) { + const char *minus = memchr(abi, '_', abi_len); + if (minus == NULL) { + PRINT(printer_print_buf(printer, (const char*)abi, abi_len)); + break; + } else { + size_t space_to_minus = minus - abi; + PRINT(printer_print_buf(printer, (const char*)abi, space_to_minus)); + PRINT_STR(printer, "-"); + abi = minus + 1; + abi_len -= (space_to_minus + 1); + } + } + + PRINT_STR(printer, "\" "); + } + + PRINT_STR(printer, "fn("); + PRINT_SEP_LIST(printer, PRINT(printer_print_type(printer)), ", "); + PRINT_STR(printer, ")"); + + if (printer_eat(printer, 'u')) { + // Skip printing the return type if it's 'u', i.e. `()`. + } else { + PRINT_STR(printer, " -> "); + PRINT(printer_print_type(printer)); + } + + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_type_backref(struct printer *printer, bool *_arg) { + (void)_arg; + return printer_print_type(printer); +} + +static NODISCARD overflow_status printer_print_type(struct printer *printer) { + uint8_t tag; + PARSE(printer, parser_ch, &tag); + + const char *basic_ty = basic_type(tag); + if (basic_ty) { + return printer_print_str(printer, basic_ty); + } + + uint64_t count; + uint64_t lt; + + PARSE(printer, parser_push_depth); + switch (tag) { + case 'R': + case 'Q': + PRINT_STR(printer, "&"); + if (printer_eat(printer, 'L')) { + PARSE(printer, parser_integer_62, <); + if (lt != 0) { + PRINT(printer_print_lifetime_from_index(printer, lt)); + PRINT_STR(printer, " "); + } + } + if (tag != 'R') { + PRINT_STR(printer, "mut "); + } + PRINT(printer_print_type(printer)); + break; + case 'P': + case 'O': + PRINT_STR(printer, "*"); + if (tag != 'P') { + PRINT_STR(printer, "mut "); + } else { + PRINT_STR(printer, "const "); + } + PRINT(printer_print_type(printer)); + break; + case 'A': + case 'S': + PRINT_STR(printer, "["); + PRINT(printer_print_type(printer)); + if (tag == 'A') { + PRINT_STR(printer, "; "); + PRINT(printer_print_const(printer, true)); + } + PRINT_STR(printer, "]"); + break; + case 'T': + PRINT_STR(printer, "("); + PRINT_SEP_LIST_COUNT(printer, count, PRINT(printer_print_type(printer)), ", "); + if (count == 1) { + PRINT_STR(printer, ","); + } + PRINT_STR(printer, ")"); + break; + case 'F': + PRINT(printer_in_binder(printer, printer_print_function_type)); + break; + case 'D': + PRINT_STR(printer, "dyn "); + PRINT(printer_in_binder(printer, printer_print_object_bounds)); + + if (!printer_eat(printer, 'L')) { + INVALID(printer); + } + PARSE(printer, parser_integer_62, <); + + if (lt != 0) { + PRINT_STR(printer, " + "); + PRINT(printer_print_lifetime_from_index(printer, lt)); + } + break; + case 'B': + PRINT(printer_print_backref(printer, printer_print_type_backref, NULL)); + break; + default: + // Go back to the tag, so `print_path` also sees it. + if (printer->status == DemangleOk && /* safety */ printer->parser.next > 0) { + printer->parser.next--; + } + PRINT(printer_print_path(printer, false)); + } + + printer_pop_depth(printer); + return OverflowOk; +} + +NODISCARD static demangle_status rust_demangle_legacy_demangle(const char *s, size_t s_len, struct demangle_legacy *res, const char **rest) +{ + if (s_len > strlen(s)) { + // s_len only exists to shorten the string, this is not a buffer API + return DemangleInvalid; + } + + const char *inner; + size_t inner_len; + if (s_len >= 3 && !strncmp(s, "_ZN", 3)) { + inner = s + 3; + inner_len = s_len - 3; + } else if (s_len >= 2 && !strncmp(s, "ZN", 2)) { + // On Windows, dbghelp strips leading underscores, so we accept "ZN...E" + // form too. + inner = s + 2; + inner_len = s_len - 2; + } else if (s_len >= 4 && !strncmp(s, "__ZN", 4)) { + // On OSX, symbols are prefixed with an extra _ + inner = s + 4; + inner_len = s_len - 4; + } else { + return DemangleInvalid; + } + + if (!str_isascii(inner, inner_len)) { + return DemangleInvalid; + } + + size_t elements = 0; + const char *chars = inner; + size_t chars_len = inner_len; + if (chars_len == 0) { + return DemangleInvalid; + } + char c; + while ((c = *chars) != 'E') { + // Decode an identifier element's length + if (c < '0' || c > '9') { + return DemangleInvalid; + } + size_t len = 0; + while (c >= '0' && c <= '9') { + size_t d = c - '0'; + if (len > SIZE_MAX / 10) { + return DemangleInvalid; + } + len *= 10; + if (len > SIZE_MAX - d) { + return DemangleInvalid; + } + len += d; + + chars++; + chars_len--; + if (chars_len == 0) { + return DemangleInvalid; + } + c = *chars; + } + + // Advance by the length + if (chars_len <= len) { + return DemangleInvalid; + } + chars += len; + chars_len -= len; + elements++; + } + *res = (struct demangle_legacy) { inner, inner_len, elements }; + *rest = chars + 1; + return DemangleOk; +} + +static bool is_rust_hash(const char *s, size_t len) { + if (len == 0 || s[0] != 'h') { + return false; + } + + for (size_t i = 1; i < len; i++) { + if (!((s[i] >= '0' && s[i] <= '9') || (s[i] >= 'a' && s[i] <= 'f') || (s[i] >= 'A' && s[i] <= 'F'))) { + return false; + } + } + + return true; +} + +NODISCARD static overflow_status rust_demangle_legacy_display_demangle(struct demangle_legacy res, char *out, size_t len, bool alternate) +{ + struct printer printer = { + // not actually using the parser part of the printer, just keeping it to share the format functions + DemangleOk, + { NULL }, + out, + len, + 0, + alternate + }; + const char *inner = res.mangled; + for (size_t element = 0; element < res.elements; element++) { + size_t i = 0; + const char *rest; + for (rest = inner; rest < res.mangled + res.mangled_len && *rest >= '0' && *rest <= '9'; rest++) { + i *= 10; + i += *rest - '0'; + } + if ((size_t)(res.mangled + res.mangled_len - rest) < i) { + // safety: shouldn't reach this place if the input string is validated. bail out. + // safety: we knwo rest <= res.mangled + res.mangled_len from the for-loop above + break; + } + + size_t len = i; + inner = rest + len; + + // From here on, inner contains a pointer to the next element, rest[:len] to the current one + if (alternate && element + 1 == res.elements && is_rust_hash(rest, i)) { + break; + } + if (element != 0) { + PRINT_STR(&printer, "::"); + } + + if (len >= 2 && !strncmp(rest, "_$", 2)) { + rest++; + len--; + } + + while (len > 0) { + if (rest[0] == '.') { + if (len >= 2 && rest[1] == '.') { + PRINT_STR(&printer, "::"); + rest += 2; + len -= 2; + } else { + PRINT_STR(&printer, "."); + rest += 1; + len -= 1; + } + } else if (rest[0] == '$') { + const char *escape = memchr(rest + 1, '$', len - 1); + if (escape == NULL) { + break; + } + const char *escape_start = rest + 1; + size_t escape_len = escape - (rest + 1); + + size_t next_len = len - (escape + 1 - rest); + const char *next_rest = escape + 1; + + char ch; + if ((escape_len == 2 && escape_start[0] == 'S' && escape_start[1] == 'P')) { + ch = '@'; + } else if ((escape_len == 2 && escape_start[0] == 'B' && escape_start[1] == 'P')) { + ch = '*'; + } else if ((escape_len == 2 && escape_start[0] == 'R' && escape_start[1] == 'F')) { + ch = '&'; + } else if ((escape_len == 2 && escape_start[0] == 'L' && escape_start[1] == 'T')) { + ch = '<'; + } else if ((escape_len == 2 && escape_start[0] == 'G' && escape_start[1] == 'T')) { + ch = '>'; + } else if ((escape_len == 2 && escape_start[0] == 'L' && escape_start[1] == 'P')) { + ch = '('; + } else if ((escape_len == 2 && escape_start[0] == 'R' && escape_start[1] == 'P')) { + ch = ')'; + } else if ((escape_len == 1 && escape_start[0] == 'C')) { + ch = ','; + } else { + if (escape_len > 1 && escape_start[0] == 'u') { + escape_start++; + escape_len--; + uint64_t val; + if (try_parse_uint(escape_start, escape_len, &val) + && val < UINT32_MAX + && validate_char((uint32_t)val)) + { + if (!unicode_iscontrol(val)) { + uint8_t wchr[4]; + size_t wchr_len = code_to_utf8(wchr, (uint32_t)val); + PRINT(printer_print_buf(&printer, (const char*)wchr, wchr_len)); + len = next_len; + rest = next_rest; + continue; + } + } + } + break; // print the rest of this element raw + } + PRINT_CH(&printer, ch); + len = next_len; + rest = next_rest; + } else { + size_t j = 0; + for (;j < len && rest[j] != '$' && rest[j] != '.';j++); + if (j == len) { + break; + } + PRINT(printer_print_buf(&printer, rest, j)); + rest += j; + len -= j; + } + } + PRINT(printer_print_buf(&printer, rest, len)); + } + + if (printer.out_len < OVERFLOW_MARGIN) { + return OverflowOverflow; + } + *printer.out = '\0'; + return OverflowOk; +} + +static bool is_symbol_like(const char *s, size_t len) { + // rust-demangle definition of symbol like: control characters and space are not symbol-like, all else is + for (size_t i = 0; i < len; i++) { + char ch = s[i]; + if (!(ch >= 0x21 && ch <= 0x7e)) { + return false; + } + } + return true; +} + +void rust_demangle_demangle(const char *s, struct demangle *res) +{ + // During ThinLTO LLVM may import and rename internal symbols, so strip out + // those endings first as they're one of the last manglings applied to symbol + // names. + const char *llvm = ".llvm."; + const char *found_llvm = strstr(s, llvm); + size_t s_len = strlen(s); + if (found_llvm) { + const char *all_hex_ptr = found_llvm + strlen(".llvm."); + bool all_hex = true; + for (;*all_hex_ptr;all_hex_ptr++) { + if (!(('0' <= *all_hex_ptr && *all_hex_ptr <= '9') || + ('A' <= *all_hex_ptr && *all_hex_ptr <= 'F') || + *all_hex_ptr == '@')) { + all_hex = false; + break; + } + } + + if (all_hex) { + s_len = found_llvm - s; + } + } + + const char *suffix; + struct demangle_legacy legacy; + demangle_status st = rust_demangle_legacy_demangle(s, s_len, &legacy, &suffix); + if (st == DemangleOk) { + *res = (struct demangle) { + .style=DemangleStyleLegacy, + .mangled=legacy.mangled, + .mangled_len=legacy.mangled_len, + .elements=legacy.elements, + .original=s, + .original_len=s_len, + .suffix=suffix, + .suffix_len=s_len - (suffix - s), + }; + } else { + struct demangle_v0 v0; + st = rust_demangle_v0_demangle(s, s_len, &v0, &suffix); + if (st == DemangleOk) { + *res = (struct demangle) { + .style=DemangleStyleV0, + .mangled=v0.mangled, + .mangled_len=v0.mangled_len, + .elements=0, + .original=s, + .original_len=s_len, + .suffix=suffix, + .suffix_len=s_len - (suffix - s), + }; + } else { + *res = (struct demangle) { + .style=DemangleStyleUnknown, + .mangled=NULL, + .mangled_len=0, + .elements=0, + .original=s, + .original_len=s_len, + .suffix=s, + .suffix_len=0, + }; + } + } + + // Output like LLVM IR adds extra period-delimited words. See if + // we are in that case and save the trailing words if so. + if (res->suffix_len) { + if (res->suffix[0] == '.' && is_symbol_like(res->suffix, res->suffix_len)) { + // Keep the suffix + } else { + // Reset the suffix and invalidate the demangling + res->style = DemangleStyleUnknown; + res->suffix_len = 0; + } + } +} + +bool rust_demangle_is_known(struct demangle *res) { + return res->style != DemangleStyleUnknown; +} + +overflow_status rust_demangle_display_demangle(struct demangle const *res, char *out, size_t len, bool alternate) { + size_t original_len = res->original_len; + size_t out_len; + switch (res->style) { + case DemangleStyleUnknown: + if (len < original_len) { + return OverflowOverflow; + } else { + memcpy(out, res->original, original_len); + out += original_len; + len -= original_len; + break; + } + break; + case DemangleStyleLegacy: { + struct demangle_legacy legacy = { + res->mangled, + res->mangled_len, + res->elements + }; + if (rust_demangle_legacy_display_demangle(legacy, out, len, alternate) == OverflowOverflow) { + return OverflowOverflow; + } + out_len = strlen(out); + out += out_len; + len -= out_len; + break; + } + case DemangleStyleV0: { + struct demangle_v0 v0 = { + res->mangled, + res->mangled_len + }; + if (rust_demangle_v0_display_demangle(v0, out, len, alternate) == OverflowOverflow) { + return OverflowOverflow; + } + out_len = strlen(out); + out += out_len; + len -= out_len; + break; + } + } + size_t suffix_len = res->suffix_len; + if (len < suffix_len || len - suffix_len < OVERFLOW_MARGIN) { + return OverflowOverflow; + } + memcpy(out, res->suffix, suffix_len); + out[suffix_len] = 0; + return OverflowOk; +} diff --git a/tools/perf/util/demangle-rust-v0.h b/tools/perf/util/demangle-rust-v0.h new file mode 100644 index 000000000000..d0092818610a --- /dev/null +++ b/tools/perf/util/demangle-rust-v0.h @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// The contents of this file come from the Rust rustc-demangle library, hosted +// in the <https://github.com/rust-lang/rustc-demangle> repository, licensed +// under "Apache-2.0 OR MIT". For copyright details, see +// <https://github.com/rust-lang/rustc-demangle/blob/main/README.md>. +// Please note that the file should be kept as close as possible to upstream. + +#ifndef _H_DEMANGLE_V0_H +#define _H_DEMANGLE_V0_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stddef.h> + +#if defined(__GNUC__) || defined(__clang__) +#define DEMANGLE_NODISCARD __attribute__((warn_unused_result)) +#else +#define DEMANGLE_NODISCARD +#endif + +typedef enum { + OverflowOk, + OverflowOverflow +} overflow_status; + +enum demangle_style { + DemangleStyleUnknown = 0, + DemangleStyleLegacy, + DemangleStyleV0, +}; + +// Not using a union here to make the struct easier to copy-paste if needed. +struct demangle { + enum demangle_style style; + // points to the "mangled" part of the name, + // not including `ZN` or `R` prefixes. + const char *mangled; + size_t mangled_len; + // In DemangleStyleLegacy, is the number of path elements + size_t elements; + // while it's called "original", it will not contain `.llvm.9D1C9369@@16` suffixes + // that are to be ignored. + const char *original; + size_t original_len; + // Contains the part after the mangled name that is to be outputted, + // which can be `.exit.i.i` suffixes LLVM sometimes adds. + const char *suffix; + size_t suffix_len; +}; + +// if the length of the output buffer is less than `output_len-OVERFLOW_MARGIN`, +// the demangler will return `OverflowOverflow` even if there is no overflow. +#define OVERFLOW_MARGIN 4 + +/// Demangle a C string that refers to a Rust symbol and put the demangle intermediate result in `res`. +/// Beware that `res` contains references into `s`. If `s` is modified (or free'd) before calling +/// `rust_demangle_display_demangle` behavior is undefined. +/// +/// Use `rust_demangle_display_demangle` to convert it to an actual string. +void rust_demangle_demangle(const char *s, struct demangle *res); + +/// Write the string in a `struct demangle` into a buffer. +/// +/// Return `OverflowOk` if the output buffer was sufficiently big, `OverflowOverflow` if it wasn't. +/// This function is `O(n)` in the length of the input + *output* [$], but the demangled output of demangling a symbol can +/// be exponentially[$$] large, therefore it is recommended to have a sane bound (`rust-demangle` +/// uses 1,000,000 bytes) on `len`. +/// +/// `alternate`, if true, uses the less verbose alternate formatting (Rust `{:#}`) is used, which does not show +/// symbol hashes and types of constant ints. +/// +/// [$] It's `O(n * MAX_DEPTH)`, but `MAX_DEPTH` is a constant 300 and therefore it's `O(n)` +/// [$$] Technically, bounded by `O(n^MAX_DEPTH)`, but this is practically exponential. +DEMANGLE_NODISCARD overflow_status rust_demangle_display_demangle(struct demangle const *res, char *out, size_t len, bool alternate); + +/// Returns true if `res` refers to a known valid Rust demangling style, false if it's an unknown style. +bool rust_demangle_is_known(struct demangle *res); + +#undef DEMANGLE_NODISCARD + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/perf/util/demangle-rust.c b/tools/perf/util/demangle-rust.c deleted file mode 100644 index a659fc69f73a..000000000000 --- a/tools/perf/util/demangle-rust.c +++ /dev/null @@ -1,269 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <string.h> -#include "debug.h" - -#include "demangle-rust.h" - -/* - * Mangled Rust symbols look like this: - * - * _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a - * - * The original symbol is: - * - * <std::sys::fd::FileDesc as core::ops::Drop>::drop - * - * The last component of the path is a 64-bit hash in lowercase hex, prefixed - * with "h". Rust does not have a global namespace between crates, an illusion - * which Rust maintains by using the hash to distinguish things that would - * otherwise have the same symbol. - * - * Any path component not starting with a XID_Start character is prefixed with - * "_". - * - * The following escape sequences are used: - * - * "," => $C$ - * "@" => $SP$ - * "*" => $BP$ - * "&" => $RF$ - * "<" => $LT$ - * ">" => $GT$ - * "(" => $LP$ - * ")" => $RP$ - * " " => $u20$ - * "'" => $u27$ - * "[" => $u5b$ - * "]" => $u5d$ - * "~" => $u7e$ - * - * A double ".." means "::" and a single "." means "-". - * - * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$ - */ - -static const char *hash_prefix = "::h"; -static const size_t hash_prefix_len = 3; -static const size_t hash_len = 16; - -static bool is_prefixed_hash(const char *start); -static bool looks_like_rust(const char *sym, size_t len); -static bool unescape(const char **in, char **out, const char *seq, char value); - -/* - * INPUT: - * sym: symbol that has been through BFD-demangling - * - * This function looks for the following indicators: - * - * 1. The hash must consist of "h" followed by 16 lowercase hex digits. - * - * 2. As a sanity check, the hash must use between 5 and 15 of the 16 possible - * hex digits. This is true of 99.9998% of hashes so once in your life you - * may see a false negative. The point is to notice path components that - * could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In - * this case a false positive (non-Rust symbol has an important path - * component removed because it looks like a Rust hash) is worse than a - * false negative (the rare Rust symbol is not demangled) so this sets the - * balance in favor of false negatives. - * - * 3. There must be no characters other than a-zA-Z0-9 and _.:$ - * - * 4. There must be no unrecognized $-sign sequences. - * - * 5. There must be no sequence of three or more dots in a row ("..."). - */ -bool -rust_is_mangled(const char *sym) -{ - size_t len, len_without_hash; - - if (!sym) - return false; - - len = strlen(sym); - if (len <= hash_prefix_len + hash_len) - /* Not long enough to contain "::h" + hash + something else */ - return false; - - len_without_hash = len - (hash_prefix_len + hash_len); - if (!is_prefixed_hash(sym + len_without_hash)) - return false; - - return looks_like_rust(sym, len_without_hash); -} - -/* - * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex - * digits must comprise between 5 and 15 (inclusive) distinct digits. - */ -static bool is_prefixed_hash(const char *str) -{ - const char *end; - bool seen[16]; - size_t i; - int count; - - if (strncmp(str, hash_prefix, hash_prefix_len)) - return false; - str += hash_prefix_len; - - memset(seen, false, sizeof(seen)); - for (end = str + hash_len; str < end; str++) - if (*str >= '0' && *str <= '9') - seen[*str - '0'] = true; - else if (*str >= 'a' && *str <= 'f') - seen[*str - 'a' + 10] = true; - else - return false; - - /* Count how many distinct digits seen */ - count = 0; - for (i = 0; i < 16; i++) - if (seen[i]) - count++; - - return count >= 5 && count <= 15; -} - -static bool looks_like_rust(const char *str, size_t len) -{ - const char *end = str + len; - - while (str < end) - switch (*str) { - case '$': - if (!strncmp(str, "$C$", 3)) - str += 3; - else if (!strncmp(str, "$SP$", 4) - || !strncmp(str, "$BP$", 4) - || !strncmp(str, "$RF$", 4) - || !strncmp(str, "$LT$", 4) - || !strncmp(str, "$GT$", 4) - || !strncmp(str, "$LP$", 4) - || !strncmp(str, "$RP$", 4)) - str += 4; - else if (!strncmp(str, "$u20$", 5) - || !strncmp(str, "$u27$", 5) - || !strncmp(str, "$u5b$", 5) - || !strncmp(str, "$u5d$", 5) - || !strncmp(str, "$u7e$", 5)) - str += 5; - else - return false; - break; - case '.': - /* Do not allow three or more consecutive dots */ - if (!strncmp(str, "...", 3)) - return false; - /* Fall through */ - case 'a' ... 'z': - case 'A' ... 'Z': - case '0' ... '9': - case '_': - case ':': - str++; - break; - default: - return false; - } - - return true; -} - -/* - * INPUT: - * sym: symbol for which rust_is_mangled(sym) returns true - * - * The input is demangled in-place because the mangled name is always longer - * than the demangled one. - */ -void -rust_demangle_sym(char *sym) -{ - const char *in; - char *out; - const char *end; - - if (!sym) - return; - - in = sym; - out = sym; - end = sym + strlen(sym) - (hash_prefix_len + hash_len); - - while (in < end) - switch (*in) { - case '$': - if (!(unescape(&in, &out, "$C$", ',') - || unescape(&in, &out, "$SP$", '@') - || unescape(&in, &out, "$BP$", '*') - || unescape(&in, &out, "$RF$", '&') - || unescape(&in, &out, "$LT$", '<') - || unescape(&in, &out, "$GT$", '>') - || unescape(&in, &out, "$LP$", '(') - || unescape(&in, &out, "$RP$", ')') - || unescape(&in, &out, "$u20$", ' ') - || unescape(&in, &out, "$u27$", '\'') - || unescape(&in, &out, "$u5b$", '[') - || unescape(&in, &out, "$u5d$", ']') - || unescape(&in, &out, "$u7e$", '~'))) { - pr_err("demangle-rust: unexpected escape sequence"); - goto done; - } - break; - case '_': - /* - * If this is the start of a path component and the next - * character is an escape sequence, ignore the - * underscore. The mangler inserts an underscore to make - * sure the path component begins with a XID_Start - * character. - */ - if ((in == sym || in[-1] == ':') && in[1] == '$') - in++; - else - *out++ = *in++; - break; - case '.': - if (in[1] == '.') { - /* ".." becomes "::" */ - *out++ = ':'; - *out++ = ':'; - in += 2; - } else { - /* "." becomes "-" */ - *out++ = '-'; - in++; - } - break; - case 'a' ... 'z': - case 'A' ... 'Z': - case '0' ... '9': - case ':': - *out++ = *in++; - break; - default: - pr_err("demangle-rust: unexpected character '%c' in symbol\n", - *in); - goto done; - } - -done: - *out = '\0'; -} - -static bool unescape(const char **in, char **out, const char *seq, char value) -{ - size_t len = strlen(seq); - - if (strncmp(*in, seq, len)) - return false; - - **out = value; - - *in += len; - *out += 1; - - return true; -} diff --git a/tools/perf/util/demangle-rust.h b/tools/perf/util/demangle-rust.h deleted file mode 100644 index 2fca618b1aa5..000000000000 --- a/tools/perf/util/demangle-rust.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PERF_DEMANGLE_RUST -#define __PERF_DEMANGLE_RUST 1 - -bool rust_is_mangled(const char *str); -void rust_demangle_sym(char *str); - -#endif /* __PERF_DEMANGLE_RUST */ diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c new file mode 100644 index 000000000000..50b9433f3f8e --- /dev/null +++ b/tools/perf/util/disasm.c @@ -0,0 +1,1747 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <libgen.h> +#include <regex.h> +#include <stdlib.h> +#include <unistd.h> + +#include <linux/string.h> +#include <subcmd/run-command.h> + +#include "annotate.h" +#include "annotate-data.h" +#include "build-id.h" +#include "capstone.h" +#include "debug.h" +#include "disasm.h" +#include "dso.h" +#include "dwarf-regs.h" +#include "env.h" +#include "evsel.h" +#include "libbfd.h" +#include "llvm.h" +#include "map.h" +#include "maps.h" +#include "namespaces.h" +#include "srcline.h" +#include "symbol.h" +#include "util.h" + +static regex_t file_lineno; + +/* These can be referred from the arch-dependent code */ +static struct ins_ops call_ops; +static struct ins_ops dec_ops; +static struct ins_ops jump_ops; +static struct ins_ops mov_ops; +static struct ins_ops nop_ops; +static struct ins_ops lock_ops; +static struct ins_ops ret_ops; +static struct ins_ops load_store_ops; +static struct ins_ops arithmetic_ops; + +static int jump__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name); +static int call__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name); + +static void ins__sort(struct arch *arch); +static int disasm_line__parse(char *line, const char **namep, char **rawp); +static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args); + +static __attribute__((constructor)) void symbol__init_regexpr(void) +{ + regcomp(&file_lineno, "^/[^:]+:([0-9]+)", REG_EXTENDED); +} + +static int arch__grow_instructions(struct arch *arch) +{ + struct ins *new_instructions; + size_t new_nr_allocated; + + if (arch->nr_instructions_allocated == 0 && arch->instructions) + goto grow_from_non_allocated_table; + + new_nr_allocated = arch->nr_instructions_allocated + 128; + new_instructions = realloc(arch->instructions, new_nr_allocated * sizeof(struct ins)); + if (new_instructions == NULL) + return -1; + +out_update_instructions: + arch->instructions = new_instructions; + arch->nr_instructions_allocated = new_nr_allocated; + return 0; + +grow_from_non_allocated_table: + new_nr_allocated = arch->nr_instructions + 128; + new_instructions = calloc(new_nr_allocated, sizeof(struct ins)); + if (new_instructions == NULL) + return -1; + + memcpy(new_instructions, arch->instructions, arch->nr_instructions); + goto out_update_instructions; +} + +static int arch__associate_ins_ops(struct arch* arch, const char *name, struct ins_ops *ops) +{ + struct ins *ins; + + if (arch->nr_instructions == arch->nr_instructions_allocated && + arch__grow_instructions(arch)) + return -1; + + ins = &arch->instructions[arch->nr_instructions]; + ins->name = strdup(name); + if (!ins->name) + return -1; + + ins->ops = ops; + arch->nr_instructions++; + + ins__sort(arch); + return 0; +} + +#include "arch/arc/annotate/instructions.c" +#include "arch/arm/annotate/instructions.c" +#include "arch/arm64/annotate/instructions.c" +#include "arch/csky/annotate/instructions.c" +#include "arch/loongarch/annotate/instructions.c" +#include "arch/mips/annotate/instructions.c" +#include "arch/x86/annotate/instructions.c" +#include "arch/powerpc/annotate/instructions.c" +#include "arch/riscv64/annotate/instructions.c" +#include "arch/s390/annotate/instructions.c" +#include "arch/sparc/annotate/instructions.c" + +static struct arch architectures[] = { + { + .name = "arc", + .init = arc__annotate_init, + }, + { + .name = "arm", + .init = arm__annotate_init, + }, + { + .name = "arm64", + .init = arm64__annotate_init, + }, + { + .name = "csky", + .init = csky__annotate_init, + }, + { + .name = "mips", + .init = mips__annotate_init, + .objdump = { + .comment_char = '#', + }, + }, + { + .name = "x86", + .init = x86__annotate_init, + .instructions = x86__instructions, + .nr_instructions = ARRAY_SIZE(x86__instructions), + .insn_suffix = "bwlq", + .objdump = { + .comment_char = '#', + .register_char = '%', + .memory_ref_char = '(', + .imm_char = '$', + }, +#ifdef HAVE_LIBDW_SUPPORT + .update_insn_state = update_insn_state_x86, +#endif + }, + { + .name = "powerpc", + .init = powerpc__annotate_init, +#ifdef HAVE_LIBDW_SUPPORT + .update_insn_state = update_insn_state_powerpc, +#endif + }, + { + .name = "riscv64", + .init = riscv64__annotate_init, + }, + { + .name = "s390", + .init = s390__annotate_init, + .objdump = { + .comment_char = '#', + }, + }, + { + .name = "sparc", + .init = sparc__annotate_init, + .objdump = { + .comment_char = '#', + }, + }, + { + .name = "loongarch", + .init = loongarch__annotate_init, + .objdump = { + .comment_char = '#', + }, + }, +}; + +static int arch__key_cmp(const void *name, const void *archp) +{ + const struct arch *arch = archp; + + return strcmp(name, arch->name); +} + +static int arch__cmp(const void *a, const void *b) +{ + const struct arch *aa = a; + const struct arch *ab = b; + + return strcmp(aa->name, ab->name); +} + +static void arch__sort(void) +{ + const int nmemb = ARRAY_SIZE(architectures); + + qsort(architectures, nmemb, sizeof(struct arch), arch__cmp); +} + +struct arch *arch__find(const char *name) +{ + const int nmemb = ARRAY_SIZE(architectures); + static bool sorted; + + if (!sorted) { + arch__sort(); + sorted = true; + } + + return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp); +} + +bool arch__is(struct arch *arch, const char *name) +{ + return !strcmp(arch->name, name); +} + +static void ins_ops__delete(struct ins_operands *ops) +{ + if (ops == NULL) + return; + zfree(&ops->source.raw); + zfree(&ops->source.name); + zfree(&ops->target.raw); + zfree(&ops->target.name); +} + +static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name) +{ + return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->raw); +} + +static int ins__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name) +{ + if (ins->ops->scnprintf) + return ins->ops->scnprintf(ins, bf, size, ops, max_ins_name); + + return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); +} + +bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2) +{ + if (!arch || !arch->ins_is_fused) + return false; + + return arch->ins_is_fused(arch, ins1, ins2); +} + +static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, + struct disasm_line *dl __maybe_unused) +{ + char *endptr, *tok, *name; + struct map *map = ms->map; + struct addr_map_symbol target = { + .ms = { .map = map, }, + }; + + ops->target.addr = strtoull(ops->raw, &endptr, 16); + + name = strchr(endptr, '<'); + if (name == NULL) + goto indirect_call; + + name++; + + if (arch->objdump.skip_functions_char && + strchr(name, arch->objdump.skip_functions_char)) + return -1; + + tok = strchr(name, '>'); + if (tok == NULL) + return -1; + + *tok = '\0'; + ops->target.name = strdup(name); + *tok = '>'; + + if (ops->target.name == NULL) + return -1; +find_target: + target.addr = map__objdump_2mem(map, ops->target.addr); + + if (maps__find_ams(ms->maps, &target) == 0 && + map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr) + ops->target.sym = target.ms.sym; + + return 0; + +indirect_call: + tok = strchr(endptr, '*'); + if (tok != NULL) { + endptr++; + + /* Indirect call can use a non-rip register and offset: callq *0x8(%rbx). + * Do not parse such instruction. */ + if (strstr(endptr, "(%r") == NULL) + ops->target.addr = strtoull(endptr, NULL, 16); + } + goto find_target; +} + +static int call__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name) +{ + if (ops->target.sym) + return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name); + + if (ops->target.addr == 0) + return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); + + if (ops->target.name) + return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.name); + + return scnprintf(bf, size, "%-*s *%" PRIx64, max_ins_name, ins->name, ops->target.addr); +} + +static struct ins_ops call_ops = { + .parse = call__parse, + .scnprintf = call__scnprintf, +}; + +bool ins__is_call(const struct ins *ins) +{ + return ins->ops == &call_ops || ins->ops == &s390_call_ops || ins->ops == &loongarch_call_ops; +} + +/* + * Prevents from matching commas in the comment section, e.g.: + * ffff200008446e70: b.cs ffff2000084470f4 <generic_exec_single+0x314> // b.hs, b.nlast + * + * and skip comma as part of function arguments, e.g.: + * 1d8b4ac <linemap_lookup(line_maps const*, unsigned int)+0xcc> + */ +static inline const char *validate_comma(const char *c, struct ins_operands *ops) +{ + if (ops->jump.raw_comment && c > ops->jump.raw_comment) + return NULL; + + if (ops->jump.raw_func_start && c > ops->jump.raw_func_start) + return NULL; + + return c; +} + +static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, + struct disasm_line *dl __maybe_unused) +{ + struct map *map = ms->map; + struct symbol *sym = ms->sym; + struct addr_map_symbol target = { + .ms = { .map = map, }, + }; + const char *c = strchr(ops->raw, ','); + u64 start, end; + + ops->jump.raw_comment = strchr(ops->raw, arch->objdump.comment_char); + ops->jump.raw_func_start = strchr(ops->raw, '<'); + + c = validate_comma(c, ops); + + /* + * Examples of lines to parse for the _cpp_lex_token@@Base + * function: + * + * 1159e6c: jne 115aa32 <_cpp_lex_token@@Base+0xf92> + * 1159e8b: jne c469be <cpp_named_operator2name@@Base+0xa72> + * + * The first is a jump to an offset inside the same function, + * the second is to another function, i.e. that 0xa72 is an + * offset in the cpp_named_operator2name@@base function. + */ + /* + * skip over possible up to 2 operands to get to address, e.g.: + * tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0> + */ + if (c != NULL) { + c++; + ops->target.addr = strtoull(c, NULL, 16); + if (!ops->target.addr) { + c = strchr(c, ','); + c = validate_comma(c, ops); + if (c != NULL) { + c++; + ops->target.addr = strtoull(c, NULL, 16); + } + } + } else { + ops->target.addr = strtoull(ops->raw, NULL, 16); + } + + target.addr = map__objdump_2mem(map, ops->target.addr); + start = map__unmap_ip(map, sym->start); + end = map__unmap_ip(map, sym->end); + + ops->target.outside = target.addr < start || target.addr > end; + + /* + * FIXME: things like this in _cpp_lex_token (gcc's cc1 program): + + cpp_named_operator2name@@Base+0xa72 + + * Point to a place that is after the cpp_named_operator2name + * boundaries, i.e. in the ELF symbol table for cc1 + * cpp_named_operator2name is marked as being 32-bytes long, but it in + * fact is much larger than that, so we seem to need a symbols__find() + * routine that looks for >= current->start and < next_symbol->start, + * possibly just for C++ objects? + * + * For now lets just make some progress by marking jumps to outside the + * current function as call like. + * + * Actual navigation will come next, with further understanding of how + * the symbol searching and disassembly should be done. + */ + if (maps__find_ams(ms->maps, &target) == 0 && + map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr) + ops->target.sym = target.ms.sym; + + if (!ops->target.outside) { + ops->target.offset = target.addr - start; + ops->target.offset_avail = true; + } else { + ops->target.offset_avail = false; + } + + return 0; +} + +static int jump__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name) +{ + const char *c; + + if (!ops->target.addr || ops->target.offset < 0) + return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); + + if (ops->target.outside && ops->target.sym != NULL) + return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name); + + c = strchr(ops->raw, ','); + c = validate_comma(c, ops); + + if (c != NULL) { + const char *c2 = strchr(c + 1, ','); + + c2 = validate_comma(c2, ops); + /* check for 3-op insn */ + if (c2 != NULL) + c = c2; + c++; + + /* mirror arch objdump's space-after-comma style */ + if (*c == ' ') + c++; + } + + return scnprintf(bf, size, "%-*s %.*s%" PRIx64, max_ins_name, + ins->name, c ? c - ops->raw : 0, ops->raw, + ops->target.offset); +} + +static void jump__delete(struct ins_operands *ops __maybe_unused) +{ + /* + * The ops->jump.raw_comment and ops->jump.raw_func_start belong to the + * raw string, don't free them. + */ +} + +static struct ins_ops jump_ops = { + .free = jump__delete, + .parse = jump__parse, + .scnprintf = jump__scnprintf, +}; + +bool ins__is_jump(const struct ins *ins) +{ + return ins->ops == &jump_ops || ins->ops == &loongarch_jump_ops; +} + +static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) +{ + char *endptr, *name, *t; + + if (strstr(raw, "(%rip)") == NULL) + return 0; + + *addrp = strtoull(comment, &endptr, 16); + if (endptr == comment) + return 0; + name = strchr(endptr, '<'); + if (name == NULL) + return -1; + + name++; + + t = strchr(name, '>'); + if (t == NULL) + return 0; + + *t = '\0'; + *namep = strdup(name); + *t = '>'; + + return 0; +} + +static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, + struct disasm_line *dl __maybe_unused) +{ + ops->locked.ops = zalloc(sizeof(*ops->locked.ops)); + if (ops->locked.ops == NULL) + return 0; + + if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0) + goto out_free_ops; + + ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name, 0); + + if (ops->locked.ins.ops == NULL) + goto out_free_ops; + + if (ops->locked.ins.ops->parse && + ops->locked.ins.ops->parse(arch, ops->locked.ops, ms, NULL) < 0) + goto out_free_ops; + + return 0; + +out_free_ops: + zfree(&ops->locked.ops); + return 0; +} + +static int lock__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name) +{ + int printed; + + if (ops->locked.ins.ops == NULL) + return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name); + + printed = scnprintf(bf, size, "%-*s ", max_ins_name, ins->name); + return printed + ins__scnprintf(&ops->locked.ins, bf + printed, + size - printed, ops->locked.ops, max_ins_name); +} + +static void lock__delete(struct ins_operands *ops) +{ + struct ins *ins = &ops->locked.ins; + + if (ins->ops && ins->ops->free) + ins->ops->free(ops->locked.ops); + else + ins_ops__delete(ops->locked.ops); + + zfree(&ops->locked.ops); + zfree(&ops->locked.ins.name); + zfree(&ops->target.raw); + zfree(&ops->target.name); +} + +static struct ins_ops lock_ops = { + .free = lock__delete, + .parse = lock__parse, + .scnprintf = lock__scnprintf, +}; + +/* + * Check if the operand has more than one registers like x86 SIB addressing: + * 0x1234(%rax, %rbx, 8) + * + * But it doesn't care segment selectors like %gs:0x5678(%rcx), so just check + * the input string after 'memory_ref_char' if exists. + */ +static bool check_multi_regs(struct arch *arch, const char *op) +{ + int count = 0; + + if (arch->objdump.register_char == 0) + return false; + + if (arch->objdump.memory_ref_char) { + op = strchr(op, arch->objdump.memory_ref_char); + if (op == NULL) + return false; + } + + while ((op = strchr(op, arch->objdump.register_char)) != NULL) { + count++; + op++; + } + + return count > 1; +} + +static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused, + struct disasm_line *dl __maybe_unused) +{ + char *s = strchr(ops->raw, ','), *target, *comment, prev; + + if (s == NULL) + return -1; + + *s = '\0'; + + /* + * x86 SIB addressing has something like 0x8(%rax, %rcx, 1) + * then it needs to have the closing parenthesis. + */ + if (strchr(ops->raw, '(')) { + *s = ','; + s = strchr(ops->raw, ')'); + if (s == NULL || s[1] != ',') + return -1; + *++s = '\0'; + } + + ops->source.raw = strdup(ops->raw); + *s = ','; + + if (ops->source.raw == NULL) + return -1; + + ops->source.multi_regs = check_multi_regs(arch, ops->source.raw); + + target = skip_spaces(++s); + comment = strchr(s, arch->objdump.comment_char); + + if (comment != NULL) + s = comment - 1; + else + s = strchr(s, '\0') - 1; + + while (s > target && isspace(s[0])) + --s; + s++; + prev = *s; + *s = '\0'; + + ops->target.raw = strdup(target); + *s = prev; + + if (ops->target.raw == NULL) + goto out_free_source; + + ops->target.multi_regs = check_multi_regs(arch, ops->target.raw); + + if (comment == NULL) + return 0; + + comment = skip_spaces(comment); + comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name); + comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); + + return 0; + +out_free_source: + zfree(&ops->source.raw); + return -1; +} + +static int mov__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name) +{ + return scnprintf(bf, size, "%-*s %s,%s", max_ins_name, ins->name, + ops->source.name ?: ops->source.raw, + ops->target.name ?: ops->target.raw); +} + +static struct ins_ops mov_ops = { + .parse = mov__parse, + .scnprintf = mov__scnprintf, +}; + +#define PPC_22_30(R) (((R) >> 1) & 0x1ff) +#define MINUS_EXT_XO_FORM 234 +#define SUB_EXT_XO_FORM 232 +#define ADD_ZERO_EXT_XO_FORM 202 +#define SUB_ZERO_EXT_XO_FORM 200 + +static int arithmetic__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name) +{ + return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, + ops->raw); +} + +/* + * Sets the fields: multi_regs and "mem_ref". + * "mem_ref" is set for ops->source which is later used to + * fill the objdump->memory_ref-char field. This ops is currently + * used by powerpc and since binary instruction code is used to + * extract opcode, regs and offset, no other parsing is needed here. + * + * Dont set multi regs for 4 cases since it has only one operand + * for source: + * - Add to Minus One Extended XO-form ( Ex: addme, addmeo ) + * - Subtract From Minus One Extended XO-form ( Ex: subfme ) + * - Add to Zero Extended XO-form ( Ex: addze, addzeo ) + * - Subtract From Zero Extended XO-form ( Ex: subfze ) + */ +static int arithmetic__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, + struct map_symbol *ms __maybe_unused, struct disasm_line *dl) +{ + int opcode = PPC_OP(dl->raw.raw_insn); + + ops->source.mem_ref = false; + if (opcode == 31) { + if ((opcode != MINUS_EXT_XO_FORM) && (opcode != SUB_EXT_XO_FORM) \ + && (opcode != ADD_ZERO_EXT_XO_FORM) && (opcode != SUB_ZERO_EXT_XO_FORM)) + ops->source.multi_regs = true; + } + + ops->target.mem_ref = false; + ops->target.multi_regs = false; + + return 0; +} + +static struct ins_ops arithmetic_ops = { + .parse = arithmetic__parse, + .scnprintf = arithmetic__scnprintf, +}; + +static int load_store__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name) +{ + return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, + ops->raw); +} + +/* + * Sets the fields: multi_regs and "mem_ref". + * "mem_ref" is set for ops->source which is later used to + * fill the objdump->memory_ref-char field. This ops is currently + * used by powerpc and since binary instruction code is used to + * extract opcode, regs and offset, no other parsing is needed here + */ +static int load_store__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, + struct map_symbol *ms __maybe_unused, struct disasm_line *dl __maybe_unused) +{ + ops->source.mem_ref = true; + ops->source.multi_regs = false; + /* opcode 31 is of X form */ + if (PPC_OP(dl->raw.raw_insn) == 31) + ops->source.multi_regs = true; + + ops->target.mem_ref = false; + ops->target.multi_regs = false; + + return 0; +} + +static struct ins_ops load_store_ops = { + .parse = load_store__parse, + .scnprintf = load_store__scnprintf, +}; + +static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused, + struct disasm_line *dl __maybe_unused) +{ + char *target, *comment, *s, prev; + + target = s = ops->raw; + + while (s[0] != '\0' && !isspace(s[0])) + ++s; + prev = *s; + *s = '\0'; + + ops->target.raw = strdup(target); + *s = prev; + + if (ops->target.raw == NULL) + return -1; + + comment = strchr(s, arch->objdump.comment_char); + if (comment == NULL) + return 0; + + comment = skip_spaces(comment); + comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); + + return 0; +} + +static int dec__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name) +{ + return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, + ops->target.name ?: ops->target.raw); +} + +static struct ins_ops dec_ops = { + .parse = dec__parse, + .scnprintf = dec__scnprintf, +}; + +static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size, + struct ins_operands *ops __maybe_unused, int max_ins_name) +{ + return scnprintf(bf, size, "%-*s", max_ins_name, "nop"); +} + +static struct ins_ops nop_ops = { + .scnprintf = nop__scnprintf, +}; + +static struct ins_ops ret_ops = { + .scnprintf = ins__raw_scnprintf, +}; + +static bool ins__is_nop(const struct ins *ins) +{ + return ins->ops == &nop_ops; +} + +bool ins__is_ret(const struct ins *ins) +{ + return ins->ops == &ret_ops; +} + +bool ins__is_lock(const struct ins *ins) +{ + return ins->ops == &lock_ops; +} + +static int ins__key_cmp(const void *name, const void *insp) +{ + const struct ins *ins = insp; + + return strcmp(name, ins->name); +} + +static int ins__cmp(const void *a, const void *b) +{ + const struct ins *ia = a; + const struct ins *ib = b; + + return strcmp(ia->name, ib->name); +} + +static void ins__sort(struct arch *arch) +{ + const int nmemb = arch->nr_instructions; + + qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp); +} + +static struct ins_ops *__ins__find(struct arch *arch, const char *name, struct disasm_line *dl) +{ + struct ins *ins; + const int nmemb = arch->nr_instructions; + + if (arch__is(arch, "powerpc")) { + /* + * For powerpc, identify the instruction ops + * from the opcode using raw_insn. + */ + struct ins_ops *ops; + + ops = check_ppc_insn(dl); + if (ops) + return ops; + } + + if (!arch->sorted_instructions) { + ins__sort(arch); + arch->sorted_instructions = true; + } + + ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); + if (ins) + return ins->ops; + + if (arch->insn_suffix) { + char tmp[32]; + char suffix; + size_t len = strlen(name); + + if (len == 0 || len >= sizeof(tmp)) + return NULL; + + suffix = name[len - 1]; + if (strchr(arch->insn_suffix, suffix) == NULL) + return NULL; + + strcpy(tmp, name); + tmp[len - 1] = '\0'; /* remove the suffix and check again */ + + ins = bsearch(tmp, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); + } + return ins ? ins->ops : NULL; +} + +struct ins_ops *ins__find(struct arch *arch, const char *name, struct disasm_line *dl) +{ + struct ins_ops *ops = __ins__find(arch, name, dl); + + if (!ops && arch->associate_instruction_ops) + ops = arch->associate_instruction_ops(arch, name); + + return ops; +} + +static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms) +{ + dl->ins.ops = ins__find(arch, dl->ins.name, dl); + + if (!dl->ins.ops) + return; + + if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms, dl) < 0) + dl->ins.ops = NULL; +} + +static int disasm_line__parse(char *line, const char **namep, char **rawp) +{ + char tmp, *name = skip_spaces(line); + + if (name[0] == '\0') + return -1; + + *rawp = name + 1; + + while ((*rawp)[0] != '\0' && !isspace((*rawp)[0])) + ++*rawp; + + tmp = (*rawp)[0]; + (*rawp)[0] = '\0'; + *namep = strdup(name); + + if (*namep == NULL) + goto out; + + (*rawp)[0] = tmp; + *rawp = strim(*rawp); + + return 0; + +out: + return -1; +} + +/* + * Parses the result captured from symbol__disassemble_* + * Example, line read from DSO file in powerpc: + * line: 38 01 81 e8 + * opcode: fetched from arch specific get_opcode_insn + * rawp_insn: e8810138 + * + * rawp_insn is used later to extract the reg/offset fields + */ +#define PPC_OP(op) (((op) >> 26) & 0x3F) +#define RAW_BYTES 11 + +static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args) +{ + char *line = dl->al.line; + const char **namep = &dl->ins.name; + char **rawp = &dl->ops.raw; + char *tmp_raw_insn, *name_raw_insn = skip_spaces(line); + char *name = skip_spaces(name_raw_insn + RAW_BYTES); + int disasm = 0; + int ret = 0; + + if (args->options->disassembler_used) + disasm = 1; + + if (name_raw_insn[0] == '\0') + return -1; + + if (disasm) + ret = disasm_line__parse(name, namep, rawp); + else + *namep = ""; + + tmp_raw_insn = strndup(name_raw_insn, 11); + if (tmp_raw_insn == NULL) + return -1; + + remove_spaces(tmp_raw_insn); + + sscanf(tmp_raw_insn, "%x", &dl->raw.raw_insn); + if (disasm) + dl->raw.raw_insn = be32_to_cpu(dl->raw.raw_insn); + + return ret; +} + +static void annotation_line__init(struct annotation_line *al, + struct annotate_args *args, + int nr) +{ + al->offset = args->offset; + al->line = strdup(args->line); + al->line_nr = args->line_nr; + al->fileloc = args->fileloc; + al->data_nr = nr; +} + +static void annotation_line__exit(struct annotation_line *al) +{ + zfree_srcline(&al->path); + zfree(&al->line); + zfree(&al->cycles); + zfree(&al->br_cntr); +} + +static size_t disasm_line_size(int nr) +{ + struct annotation_line *al; + + return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr)); +} + +/* + * Allocating the disasm annotation line data with + * following structure: + * + * ------------------------------------------- + * struct disasm_line | struct annotation_line + * ------------------------------------------- + * + * We have 'struct annotation_line' member as last member + * of 'struct disasm_line' to have an easy access. + */ +struct disasm_line *disasm_line__new(struct annotate_args *args) +{ + struct disasm_line *dl = NULL; + struct annotation *notes = symbol__annotation(args->ms.sym); + int nr = notes->src->nr_events; + + dl = zalloc(disasm_line_size(nr)); + if (!dl) + return NULL; + + annotation_line__init(&dl->al, args, nr); + if (dl->al.line == NULL) + goto out_delete; + + if (args->offset != -1) { + if (arch__is(args->arch, "powerpc")) { + if (disasm_line__parse_powerpc(dl, args) < 0) + goto out_free_line; + } else if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) + goto out_free_line; + + disasm_line__init_ins(dl, args->arch, &args->ms); + } + + return dl; + +out_free_line: + zfree(&dl->al.line); +out_delete: + free(dl); + return NULL; +} + +void disasm_line__free(struct disasm_line *dl) +{ + if (dl->ins.ops && dl->ins.ops->free) + dl->ins.ops->free(&dl->ops); + else + ins_ops__delete(&dl->ops); + zfree(&dl->ins.name); + annotation_line__exit(&dl->al); + free(dl); +} + +int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name) +{ + if (raw || !dl->ins.ops) + return scnprintf(bf, size, "%-*s %s", max_ins_name, dl->ins.name, dl->ops.raw); + + return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name); +} + +/* + * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw) + * which looks like following + * + * 0000000000415500 <_init>: + * 415500: sub $0x8,%rsp + * 415504: mov 0x2f5ad5(%rip),%rax # 70afe0 <_DYNAMIC+0x2f8> + * 41550b: test %rax,%rax + * 41550e: je 415515 <_init+0x15> + * 415510: callq 416e70 <__gmon_start__@plt> + * 415515: add $0x8,%rsp + * 415519: retq + * + * it will be parsed and saved into struct disasm_line as + * <offset> <name> <ops.raw> + * + * The offset will be a relative offset from the start of the symbol and -1 + * means that it's not a disassembly line so should be treated differently. + * The ops.raw part will be parsed further according to type of the instruction. + */ +static int symbol__parse_objdump_line(struct symbol *sym, + struct annotate_args *args, + char *parsed_line, int *line_nr, char **fileloc) +{ + struct map *map = args->ms.map; + struct annotation *notes = symbol__annotation(sym); + struct disasm_line *dl; + char *tmp; + s64 line_ip, offset = -1; + regmatch_t match[2]; + + /* /filename:linenr ? Save line number and ignore. */ + if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { + *line_nr = atoi(parsed_line + match[1].rm_so); + free(*fileloc); + *fileloc = strdup(parsed_line); + return 0; + } + + /* Process hex address followed by ':'. */ + line_ip = strtoull(parsed_line, &tmp, 16); + if (parsed_line != tmp && tmp[0] == ':' && tmp[1] != '\0') { + u64 start = map__rip_2objdump(map, sym->start), + end = map__rip_2objdump(map, sym->end); + + offset = line_ip - start; + if ((u64)line_ip < start || (u64)line_ip >= end) + offset = -1; + else + parsed_line = tmp + 1; + } + + args->offset = offset; + args->line = parsed_line; + args->line_nr = *line_nr; + args->fileloc = *fileloc; + args->ms.sym = sym; + + dl = disasm_line__new(args); + (*line_nr)++; + + if (dl == NULL) + return -1; + + if (!disasm_line__has_local_offset(dl)) { + dl->ops.target.offset = dl->ops.target.addr - + map__rip_2objdump(map, sym->start); + dl->ops.target.offset_avail = true; + } + + /* kcore has no symbols, so add the call target symbol */ + if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) { + struct addr_map_symbol target = { + .addr = dl->ops.target.addr, + .ms = { .map = map, }, + }; + + if (!maps__find_ams(args->ms.maps, &target) && + target.ms.sym->start == target.al_addr) + dl->ops.target.sym = target.ms.sym; + } + + annotation_line__add(&dl->al, ¬es->src->source); + return 0; +} + +static void delete_last_nop(struct symbol *sym) +{ + struct annotation *notes = symbol__annotation(sym); + struct list_head *list = ¬es->src->source; + struct disasm_line *dl; + + while (!list_empty(list)) { + dl = list_entry(list->prev, struct disasm_line, al.node); + + if (dl->ins.ops) { + if (!ins__is_nop(&dl->ins)) + return; + } else { + if (!strstr(dl->al.line, " nop ") && + !strstr(dl->al.line, " nopl ") && + !strstr(dl->al.line, " nopw ")) + return; + } + + list_del_init(&dl->al.node); + disasm_line__free(dl); + } +} + +int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen) +{ + struct dso *dso = map__dso(ms->map); + + BUG_ON(buflen == 0); + + if (errnum >= 0) { + str_error_r(errnum, buf, buflen); + return 0; + } + + switch (errnum) { + case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: { + char bf[SBUILD_ID_SIZE + 15] = " with build id "; + char *build_id_msg = NULL; + + if (dso__has_build_id(dso)) { + build_id__snprintf(dso__bid(dso), bf + 15, sizeof(bf) - 15); + build_id_msg = bf; + } + scnprintf(buf, buflen, + "No vmlinux file%s\nwas found in the path.\n\n" + "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n" + "Please use:\n\n" + " perf buildid-cache -vu vmlinux\n\n" + "or:\n\n" + " --vmlinux vmlinux\n", build_id_msg ?: ""); + } + break; + case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF: + scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation"); + break; + case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP: + scnprintf(buf, buflen, "Problems with arch specific instruction name regular expressions."); + break; + case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING: + scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch specific initialization."); + break; + case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE: + scnprintf(buf, buflen, "Invalid BPF file: %s.", dso__long_name(dso)); + break; + case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF: + scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.", + dso__long_name(dso)); + break; + case SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE: + scnprintf(buf, buflen, "Couldn't determine the file %s type.", dso__long_name(dso)); + break; + default: + scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum); + break; + } + + return 0; +} + +static int dso__disassemble_filename(struct dso *dso, char *filename, size_t filename_size) +{ + char linkname[PATH_MAX]; + char *build_id_filename; + char *build_id_path = NULL; + char *pos; + int len; + + if (dso__symtab_type(dso) == DSO_BINARY_TYPE__KALLSYMS && + !dso__is_kcore(dso)) + return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX; + + build_id_filename = dso__build_id_filename(dso, NULL, 0, false); + if (build_id_filename) { + __symbol__join_symfs(filename, filename_size, build_id_filename); + free(build_id_filename); + } else { + if (dso__has_build_id(dso)) + return ENOMEM; + goto fallback; + } + + build_id_path = strdup(filename); + if (!build_id_path) + return ENOMEM; + + /* + * old style build-id cache has name of XX/XXXXXXX.. while + * new style has XX/XXXXXXX../{elf,kallsyms,vdso}. + * extract the build-id part of dirname in the new style only. + */ + pos = strrchr(build_id_path, '/'); + if (pos && strlen(pos) < SBUILD_ID_SIZE - 2) + dirname(build_id_path); + + if (dso__is_kcore(dso)) + goto fallback; + + len = readlink(build_id_path, linkname, sizeof(linkname) - 1); + if (len < 0) + goto fallback; + + linkname[len] = '\0'; + if (strstr(linkname, DSO__NAME_KALLSYMS) || + access(filename, R_OK)) { +fallback: + /* + * If we don't have build-ids or the build-id file isn't in the + * cache, or is just a kallsyms file, well, lets hope that this + * DSO is the same as when 'perf record' ran. + */ + if (dso__kernel(dso) && dso__long_name(dso)[0] == '/') + snprintf(filename, filename_size, "%s", dso__long_name(dso)); + else + __symbol__join_symfs(filename, filename_size, dso__long_name(dso)); + + mutex_lock(dso__lock(dso)); + if (access(filename, R_OK) && errno == ENOENT && dso__nsinfo(dso)) { + char *new_name = dso__filename_with_chroot(dso, filename); + if (new_name) { + strlcpy(filename, new_name, filename_size); + free(new_name); + } + } + mutex_unlock(dso__lock(dso)); + } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { + dso__set_binary_type(dso, DSO_BINARY_TYPE__BUILD_ID_CACHE); + } + + free(build_id_path); + return 0; +} + +static int symbol__disassemble_raw(char *filename, struct symbol *sym, + struct annotate_args *args) +{ + struct annotation *notes = symbol__annotation(sym); + struct map *map = args->ms.map; + struct dso *dso = map__dso(map); + u64 start = map__rip_2objdump(map, sym->start); + u64 end = map__rip_2objdump(map, sym->end); + u64 len = end - start; + u64 offset; + int i, count; + u8 *buf = NULL; + char disasm_buf[512]; + struct disasm_line *dl; + u32 *line; + + /* Return if objdump is specified explicitly */ + if (args->options->objdump_path) + return -1; + + pr_debug("Reading raw instruction from : %s using dso__data_read_offset\n", filename); + + buf = malloc(len); + if (buf == NULL) + goto err; + + count = dso__data_read_offset(dso, NULL, sym->start, buf, len); + + line = (u32 *)buf; + + if ((u64)count != len) + goto err; + + /* add the function address and name */ + scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:", + start, sym->name); + + args->offset = -1; + args->line = disasm_buf; + args->line_nr = 0; + args->fileloc = NULL; + args->ms.sym = sym; + + dl = disasm_line__new(args); + if (dl == NULL) + goto err; + + annotation_line__add(&dl->al, ¬es->src->source); + + /* Each raw instruction is 4 byte */ + count = len/4; + + for (i = 0, offset = 0; i < count; i++) { + args->offset = offset; + sprintf(args->line, "%x", line[i]); + dl = disasm_line__new(args); + if (dl == NULL) + break; + + annotation_line__add(&dl->al, ¬es->src->source); + offset += 4; + } + + /* It failed in the middle */ + if (offset != len) { + struct list_head *list = ¬es->src->source; + + /* Discard all lines and fallback to objdump */ + while (!list_empty(list)) { + dl = list_first_entry(list, struct disasm_line, al.node); + + list_del_init(&dl->al.node); + disasm_line__free(dl); + } + count = -1; + } + +out: + free(buf); + return count < 0 ? count : 0; + +err: + count = -1; + goto out; +} + +/* + * Possibly create a new version of line with tabs expanded. Returns the + * existing or new line, storage is updated if a new line is allocated. If + * allocation fails then NULL is returned. + */ +char *expand_tabs(char *line, char **storage, size_t *storage_len) +{ + size_t i, src, dst, len, new_storage_len, num_tabs; + char *new_line; + size_t line_len = strlen(line); + + for (num_tabs = 0, i = 0; i < line_len; i++) + if (line[i] == '\t') + num_tabs++; + + if (num_tabs == 0) + return line; + + /* + * Space for the line and '\0', less the leading and trailing + * spaces. Each tab may introduce 7 additional spaces. + */ + new_storage_len = line_len + 1 + (num_tabs * 7); + + new_line = malloc(new_storage_len); + if (new_line == NULL) { + pr_err("Failure allocating memory for tab expansion\n"); + return NULL; + } + + /* + * Copy regions starting at src and expand tabs. If there are two + * adjacent tabs then 'src == i', the memcpy is of size 0 and the spaces + * are inserted. + */ + for (i = 0, src = 0, dst = 0; i < line_len && num_tabs; i++) { + if (line[i] == '\t') { + len = i - src; + memcpy(&new_line[dst], &line[src], len); + dst += len; + new_line[dst++] = ' '; + while (dst % 8 != 0) + new_line[dst++] = ' '; + src = i + 1; + num_tabs--; + } + } + + /* Expand the last region. */ + len = line_len - src; + memcpy(&new_line[dst], &line[src], len); + dst += len; + new_line[dst] = '\0'; + + free(*storage); + *storage = new_line; + *storage_len = new_storage_len; + return new_line; +} + +static int symbol__disassemble_bpf_image(struct symbol *sym, struct annotate_args *args) +{ + struct annotation *notes = symbol__annotation(sym); + struct disasm_line *dl; + + args->offset = -1; + args->line = strdup("to be implemented"); + args->line_nr = 0; + args->fileloc = NULL; + dl = disasm_line__new(args); + if (dl) + annotation_line__add(&dl->al, ¬es->src->source); + + zfree(&args->line); + return 0; +} + +static int symbol__disassemble_objdump(const char *filename, struct symbol *sym, + struct annotate_args *args) +{ + struct annotation_options *opts = &annotate_opts; + struct map *map = args->ms.map; + struct dso *dso = map__dso(map); + char *command; + FILE *file; + int lineno = 0; + char *fileloc = NULL; + int nline; + char *line; + size_t line_len; + const char *objdump_argv[] = { + "/bin/sh", + "-c", + NULL, /* Will be the objdump command to run. */ + "--", + NULL, /* Will be the symfs path. */ + NULL, + }; + struct child_process objdump_process; + int err; + + if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) + return symbol__disassemble_bpf_libbfd(sym, args); + + if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) + return symbol__disassemble_bpf_image(sym, args); + + err = asprintf(&command, + "%s %s%s --start-address=0x%016" PRIx64 + " --stop-address=0x%016" PRIx64 + " %s -d %s %s %s %c%s%c %s%s -C \"$1\"", + opts->objdump_path ?: "objdump", + opts->disassembler_style ? "-M " : "", + opts->disassembler_style ?: "", + map__rip_2objdump(map, sym->start), + map__rip_2objdump(map, sym->end), + opts->show_linenr ? "-l" : "", + opts->show_asm_raw ? "" : "--no-show-raw-insn", + opts->annotate_src ? "-S" : "", + opts->prefix ? "--prefix " : "", + opts->prefix ? '"' : ' ', + opts->prefix ?: "", + opts->prefix ? '"' : ' ', + opts->prefix_strip ? "--prefix-strip=" : "", + opts->prefix_strip ?: ""); + + if (err < 0) { + pr_err("Failure allocating memory for the command to run\n"); + return err; + } + + pr_debug("Executing: %s\n", command); + + objdump_argv[2] = command; + objdump_argv[4] = filename; + + /* Create a pipe to read from for stdout */ + memset(&objdump_process, 0, sizeof(objdump_process)); + objdump_process.argv = objdump_argv; + objdump_process.out = -1; + objdump_process.err = -1; + objdump_process.no_stderr = 1; + if (start_command(&objdump_process)) { + pr_err("Failure starting to run %s\n", command); + err = -1; + goto out_free_command; + } + + file = fdopen(objdump_process.out, "r"); + if (!file) { + pr_err("Failure creating FILE stream for %s\n", command); + /* + * If we were using debug info should retry with + * original binary. + */ + err = -1; + goto out_close_stdout; + } + + /* Storage for getline. */ + line = NULL; + line_len = 0; + + nline = 0; + while (!feof(file)) { + const char *match; + char *expanded_line; + + if (getline(&line, &line_len, file) < 0 || !line) + break; + + /* Skip lines containing "filename:" */ + match = strstr(line, filename); + if (match && match[strlen(filename)] == ':') + continue; + + expanded_line = strim(line); + expanded_line = expand_tabs(expanded_line, &line, &line_len); + if (!expanded_line) + break; + + /* + * The source code line number (lineno) needs to be kept in + * across calls to symbol__parse_objdump_line(), so that it + * can associate it with the instructions till the next one. + * See disasm_line__new() and struct disasm_line::line_nr. + */ + if (symbol__parse_objdump_line(sym, args, expanded_line, + &lineno, &fileloc) < 0) + break; + nline++; + } + free(line); + free(fileloc); + + err = finish_command(&objdump_process); + if (err) + pr_err("Error running %s\n", command); + + if (nline == 0) { + err = -1; + pr_err("No output from %s\n", command); + } + + /* + * kallsyms does not have symbol sizes so there may a nop at the end. + * Remove it. + */ + if (dso__is_kcore(dso)) + delete_last_nop(sym); + + fclose(file); + +out_close_stdout: + close(objdump_process.out); + +out_free_command: + free(command); + return err; +} + +int symbol__disassemble(struct symbol *sym, struct annotate_args *args) +{ + struct annotation_options *options = args->options; + struct map *map = args->ms.map; + struct dso *dso = map__dso(map); + char symfs_filename[PATH_MAX]; + bool delete_extract = false; + struct kcore_extract kce; + bool decomp = false; + int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); + + if (err) + return err; + + pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, + symfs_filename, sym->name, map__unmap_ip(map, sym->start), + map__unmap_ip(map, sym->end)); + + pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso__long_name(dso), sym, sym->name); + + if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { + return SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE; + } else if (dso__is_kcore(dso)) { + kce.addr = map__rip_2objdump(map, sym->start); + kce.kcore_filename = symfs_filename; + kce.len = sym->end - sym->start; + kce.offs = sym->start; + + if (!kcore_extract__create(&kce)) { + delete_extract = true; + strlcpy(symfs_filename, kce.extract_filename, sizeof(symfs_filename)); + } + } else if (dso__needs_decompress(dso)) { + char tmp[KMOD_DECOMP_LEN]; + + if (dso__decompress_kmodule_path(dso, symfs_filename, tmp, sizeof(tmp)) < 0) + return -1; + + decomp = true; + strcpy(symfs_filename, tmp); + } + + /* + * For powerpc data type profiling, use the dso__data_read_offset to + * read raw instruction directly and interpret the binary code to + * understand instructions and register fields. For sort keys as type + * and typeoff, disassemble to mnemonic notation is not required in + * case of powerpc. + */ + if (arch__is(args->arch, "powerpc")) { + extern const char *sort_order; + + if (sort_order && !strstr(sort_order, "sym")) { + err = symbol__disassemble_raw(symfs_filename, sym, args); + if (err == 0) + goto out_remove_tmp; + + err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args); + if (err == 0) + goto out_remove_tmp; + } + } + + /* FIXME: LLVM and CAPSTONE should support source code */ + if (options->annotate_src && !options->hide_src_code) { + err = symbol__disassemble_objdump(symfs_filename, sym, args); + if (err == 0) + goto out_remove_tmp; + } + + err = -1; + for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers) && err != 0; i++) { + enum perf_disassembler dis = options->disassemblers[i]; + + switch (dis) { + case PERF_DISASM_LLVM: + args->options->disassembler_used = PERF_DISASM_LLVM; + err = symbol__disassemble_llvm(symfs_filename, sym, args); + break; + case PERF_DISASM_CAPSTONE: + args->options->disassembler_used = PERF_DISASM_CAPSTONE; + err = symbol__disassemble_capstone(symfs_filename, sym, args); + break; + case PERF_DISASM_OBJDUMP: + args->options->disassembler_used = PERF_DISASM_OBJDUMP; + err = symbol__disassemble_objdump(symfs_filename, sym, args); + break; + case PERF_DISASM_UNKNOWN: /* End of disassemblers. */ + default: + args->options->disassembler_used = PERF_DISASM_UNKNOWN; + goto out_remove_tmp; + } + if (err == 0) + pr_debug("Disassembled with %s\n", perf_disassembler__strs[dis]); + } +out_remove_tmp: + if (decomp) + unlink(symfs_filename); + + if (delete_extract) + kcore_extract__delete(&kce); + + return err; +} diff --git a/tools/perf/util/disasm.h b/tools/perf/util/disasm.h new file mode 100644 index 000000000000..d2cb555e4a3b --- /dev/null +++ b/tools/perf/util/disasm.h @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __PERF_UTIL_DISASM_H +#define __PERF_UTIL_DISASM_H + +#include "map_symbol.h" + +#ifdef HAVE_LIBDW_SUPPORT +#include "dwarf-aux.h" +#endif + +struct annotation_options; +struct disasm_line; +struct ins; +struct evsel; +struct symbol; +struct data_loc_info; +struct type_state; +struct disasm_line; + +struct arch { + const char *name; + struct ins *instructions; + size_t nr_instructions; + size_t nr_instructions_allocated; + struct ins_ops *(*associate_instruction_ops)(struct arch *arch, const char *name); + bool sorted_instructions; + bool initialized; + const char *insn_suffix; + void *priv; + unsigned int model; + unsigned int family; + int (*init)(struct arch *arch, char *cpuid); + bool (*ins_is_fused)(struct arch *arch, const char *ins1, + const char *ins2); + struct { + char comment_char; + char skip_functions_char; + char register_char; + char memory_ref_char; + char imm_char; + } objdump; +#ifdef HAVE_LIBDW_SUPPORT + void (*update_insn_state)(struct type_state *state, + struct data_loc_info *dloc, Dwarf_Die *cu_die, + struct disasm_line *dl); +#endif + /** @e_machine: ELF machine associated with arch. */ + unsigned int e_machine; + /** @e_flags: Optional ELF flags associated with arch. */ + unsigned int e_flags; +}; + +struct ins { + const char *name; + struct ins_ops *ops; +}; + +struct ins_operands { + char *raw; + struct { + char *raw; + char *name; + struct symbol *sym; + u64 addr; + s64 offset; + bool offset_avail; + bool outside; + bool multi_regs; + bool mem_ref; + } target; + union { + struct { + char *raw; + char *name; + u64 addr; + bool multi_regs; + bool mem_ref; + } source; + struct { + struct ins ins; + struct ins_operands *ops; + } locked; + struct { + char *raw_comment; + char *raw_func_start; + } jump; + }; +}; + +struct ins_ops { + void (*free)(struct ins_operands *ops); + int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, + struct disasm_line *dl); + int (*scnprintf)(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name); +}; + +struct annotate_args { + struct arch *arch; + struct map_symbol ms; + struct annotation_options *options; + s64 offset; + char *line; + int line_nr; + char *fileloc; +}; + +struct arch *arch__find(const char *name); +bool arch__is(struct arch *arch, const char *name); + +struct ins_ops *ins__find(struct arch *arch, const char *name, struct disasm_line *dl); + +bool ins__is_call(const struct ins *ins); +bool ins__is_jump(const struct ins *ins); +bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); +bool ins__is_ret(const struct ins *ins); +bool ins__is_lock(const struct ins *ins); + +struct disasm_line *disasm_line__new(struct annotate_args *args); +void disasm_line__free(struct disasm_line *dl); + +int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, + bool raw, int max_ins_name); + +int symbol__disassemble(struct symbol *sym, struct annotate_args *args); + +char *expand_tabs(char *line, char **storage, size_t *storage_len); + +#endif /* __PERF_UTIL_DISASM_H */ diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c index 46f74b2344db..c0afcbd954f8 100644 --- a/tools/perf/util/dlfilter.c +++ b/tools/perf/util/dlfilter.c @@ -10,6 +10,8 @@ #include <subcmd/exec-cmd.h> #include <linux/zalloc.h> #include <linux/build_bug.h> +#include <linux/kernel.h> +#include <linux/string.h> #include "debug.h" #include "event.h" @@ -31,13 +33,13 @@ static void al_to_d_al(struct addr_location *al, struct perf_dlfilter_al *d_al) if (al->map) { struct dso *dso = map__dso(al->map); - if (symbol_conf.show_kernel_path && dso->long_name) - d_al->dso = dso->long_name; + if (symbol_conf.show_kernel_path && dso__long_name(dso)) + d_al->dso = dso__long_name(dso); else - d_al->dso = dso->name; - d_al->is_64_bit = dso->is_64_bit; - d_al->buildid_size = dso->bid.size; - d_al->buildid = dso->bid.data; + d_al->dso = dso__name(dso); + d_al->is_64_bit = dso__is_64_bit(dso); + d_al->buildid_size = dso__bid(dso)->size; + d_al->buildid = dso__bid(dso)->data; } else { d_al->dso = NULL; d_al->is_64_bit = 0; @@ -50,8 +52,10 @@ static void al_to_d_al(struct addr_location *al, struct perf_dlfilter_al *d_al) d_al->sym_end = sym->end; if (al->addr < sym->end) d_al->symoff = al->addr - sym->start; - else + else if (al->map) d_al->symoff = al->addr - map__start(al->map) - sym->start; + else + d_al->symoff = 0; d_al->sym_binding = sym->binding; } else { d_al->sym = NULL; @@ -63,6 +67,7 @@ static void al_to_d_al(struct addr_location *al, struct perf_dlfilter_al *d_al) d_al->addr = al->addr; d_al->comm = NULL; d_al->filtered = 0; + d_al->priv = NULL; } static struct addr_location *get_al(struct dlfilter *d) @@ -151,6 +156,11 @@ static char **dlfilter__args(void *ctx, int *dlargc) return d->dlargv; } +static bool has_priv(struct perf_dlfilter_al *d_al_p) +{ + return d_al_p->size >= offsetof(struct perf_dlfilter_al, priv) + sizeof(d_al_p->priv); +} + static __s32 dlfilter__resolve_address(void *ctx, __u64 address, struct perf_dlfilter_al *d_al_p) { struct dlfilter *d = (struct dlfilter *)ctx; @@ -166,6 +176,7 @@ static __s32 dlfilter__resolve_address(void *ctx, __u64 address, struct perf_dlf if (!thread) return -1; + addr_location__init(&al); thread__find_symbol_fb(thread, d->sample->cpumode, address, &al); al_to_d_al(&al, &d_al); @@ -176,9 +187,31 @@ static __s32 dlfilter__resolve_address(void *ctx, __u64 address, struct perf_dlf memcpy(d_al_p, &d_al, min((size_t)sz, sizeof(d_al))); d_al_p->size = sz; + if (has_priv(d_al_p)) + d_al_p->priv = memdup(&al, sizeof(al)); + else /* Avoid leak for v0 API */ + addr_location__exit(&al); + return 0; } +static void dlfilter__al_cleanup(void *ctx __maybe_unused, struct perf_dlfilter_al *d_al_p) +{ + struct addr_location *al; + + /* Ensure backward compatibility */ + if (!has_priv(d_al_p) || !d_al_p->priv) + return; + + al = d_al_p->priv; + + d_al_p->priv = NULL; + + addr_location__exit(al); + + free(al); +} + static const __u8 *dlfilter__insn(void *ctx, __u32 *len) { struct dlfilter *d = (struct dlfilter *)ctx; @@ -201,7 +234,8 @@ static const __u8 *dlfilter__insn(void *ctx, __u32 *len) struct machine *machine = maps__machine(thread__maps(al->thread)); if (machine) - script_fetch_insn(d->sample, al->thread, machine); + script_fetch_insn(d->sample, al->thread, machine, + /*native_arch=*/true); } } @@ -251,13 +285,21 @@ static struct perf_event_attr *dlfilter__attr(void *ctx) return &d->evsel->core.attr; } +static __s32 code_read(__u64 ip, struct map *map, struct machine *machine, void *buf, __u32 len) +{ + u64 offset = map__map_ip(map, ip); + + if (ip + len >= map__end(map)) + len = map__end(map) - ip; + + return dso__data_read_offset(map__dso(map), machine, offset, buf, len); +} + static __s32 dlfilter__object_code(void *ctx, __u64 ip, void *buf, __u32 len) { struct dlfilter *d = (struct dlfilter *)ctx; struct addr_location *al; struct addr_location a; - struct map *map; - u64 offset; __s32 ret; if (!d->ctx_valid) @@ -267,27 +309,17 @@ static __s32 dlfilter__object_code(void *ctx, __u64 ip, void *buf, __u32 len) if (!al) return -1; - map = al->map; - - if (map && ip >= map__start(map) && ip < map__end(map) && + if (al->map && ip >= map__start(al->map) && ip < map__end(al->map) && machine__kernel_ip(d->machine, ip) == machine__kernel_ip(d->machine, d->sample->ip)) - goto have_map; + return code_read(ip, al->map, d->machine, buf, len); addr_location__init(&a); + thread__find_map_fb(al->thread, d->sample->cpumode, ip, &a); - if (!a.map) { - ret = -1; - goto out; - } + ret = a.map ? code_read(ip, a.map, d->machine, buf, len) : -1; - map = a.map; -have_map: - offset = map__map_ip(map, ip); - if (ip + len >= map__end(map)) - len = map__end(map) - ip; - ret = dso__data_read_offset(map__dso(map), d->machine, offset, buf, len); -out: addr_location__exit(&a); + return ret; } @@ -296,6 +328,7 @@ static const struct perf_dlfilter_fns perf_dlfilter_fns = { .resolve_addr = dlfilter__resolve_addr, .args = dlfilter__args, .resolve_address = dlfilter__resolve_address, + .al_cleanup = dlfilter__al_cleanup, .insn = dlfilter__insn, .srcline = dlfilter__srcline, .attr = dlfilter__attr, @@ -480,6 +513,7 @@ int dlfilter__do_filter_event(struct dlfilter *d, d->d_addr_al = &d_addr_al; d_sample.size = sizeof(d_sample); + d_sample.p_stage_cyc = sample->weight3; d_ip_al.size = 0; /* To indicate d_ip_al is not initialized */ d_addr_al.size = 0; /* To indicate d_addr_al is not initialized */ @@ -493,7 +527,6 @@ int dlfilter__do_filter_event(struct dlfilter *d, ASSIGN(period); ASSIGN(weight); ASSIGN(ins_lat); - ASSIGN(p_stage_cyc); ASSIGN(transaction); ASSIGN(insn_cnt); ASSIGN(cyc_cnt); diff --git a/tools/perf/util/drm_pmu.c b/tools/perf/util/drm_pmu.c new file mode 100644 index 000000000000..b48a375e4584 --- /dev/null +++ b/tools/perf/util/drm_pmu.c @@ -0,0 +1,689 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#include "drm_pmu.h" +#include "counts.h" +#include "cpumap.h" +#include "debug.h" +#include "evsel.h" +#include "pmu.h" +#include <perf/threadmap.h> +#include <api/fs/fs.h> +#include <api/io.h> +#include <ctype.h> +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> +#include <linux/unistd.h> +#include <linux/kcmp.h> +#include <linux/zalloc.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/sysmacros.h> +#include <sys/types.h> + +enum drm_pmu_unit { + DRM_PMU_UNIT_BYTES, + DRM_PMU_UNIT_CAPACITY, + DRM_PMU_UNIT_CYCLES, + DRM_PMU_UNIT_HZ, + DRM_PMU_UNIT_NS, + + DRM_PMU_UNIT_MAX, +}; + +struct drm_pmu_event { + const char *name; + const char *desc; + enum drm_pmu_unit unit; +}; + +struct drm_pmu { + struct perf_pmu pmu; + struct drm_pmu_event *events; + int num_events; +}; + +static const char * const drm_pmu_unit_strs[DRM_PMU_UNIT_MAX] = { + "bytes", + "capacity", + "cycles", + "hz", + "ns", +}; + +static const char * const drm_pmu_scale_unit_strs[DRM_PMU_UNIT_MAX] = { + "1bytes", + "1capacity", + "1cycles", + "1hz", + "1ns", +}; + +bool perf_pmu__is_drm(const struct perf_pmu *pmu) +{ + return pmu && pmu->type >= PERF_PMU_TYPE_DRM_START && + pmu->type <= PERF_PMU_TYPE_DRM_END; +} + +bool evsel__is_drm(const struct evsel *evsel) +{ + return perf_pmu__is_drm(evsel->pmu); +} + +static struct drm_pmu *add_drm_pmu(struct list_head *pmus, char *line, size_t line_len) +{ + struct drm_pmu *drm; + struct perf_pmu *pmu; + const char *name; + __u32 max_drm_pmu_type = 0, type; + int i = 12; + + if (line[line_len - 1] == '\n') + line[line_len - 1] = '\0'; + while (isspace(line[i])) + i++; + + line[--i] = '_'; + line[--i] = 'm'; + line[--i] = 'r'; + line[--i] = 'd'; + name = &line[i]; + + list_for_each_entry(pmu, pmus, list) { + if (!perf_pmu__is_drm(pmu)) + continue; + if (pmu->type > max_drm_pmu_type) + max_drm_pmu_type = pmu->type; + if (!strcmp(pmu->name, name)) { + /* PMU already exists. */ + return NULL; + } + } + + if (max_drm_pmu_type != 0) + type = max_drm_pmu_type + 1; + else + type = PERF_PMU_TYPE_DRM_START; + + if (type > PERF_PMU_TYPE_DRM_END) { + zfree(&drm); + pr_err("Unable to encode DRM PMU type for %s\n", name); + return NULL; + } + + drm = zalloc(sizeof(*drm)); + if (!drm) + return NULL; + + if (perf_pmu__init(&drm->pmu, type, name) != 0) { + perf_pmu__delete(&drm->pmu); + return NULL; + } + + drm->pmu.cpus = perf_cpu_map__new_int(0); + if (!drm->pmu.cpus) { + perf_pmu__delete(&drm->pmu); + return NULL; + } + return drm; +} + + +static bool starts_with(const char *str, const char *prefix) +{ + return !strncmp(prefix, str, strlen(prefix)); +} + +static int add_event(struct drm_pmu_event **events, int *num_events, + const char *line, enum drm_pmu_unit unit, const char *desc) +{ + const char *colon = strchr(line, ':'); + struct drm_pmu_event *tmp; + + if (!colon) + return -EINVAL; + + tmp = reallocarray(*events, *num_events + 1, sizeof(struct drm_pmu_event)); + if (!tmp) + return -ENOMEM; + tmp[*num_events].unit = unit; + tmp[*num_events].desc = desc; + tmp[*num_events].name = strndup(line, colon - line); + if (!tmp[*num_events].name) + return -ENOMEM; + (*num_events)++; + *events = tmp; + return 0; +} + +static int read_drm_pmus_cb(void *args, int fdinfo_dir_fd, const char *fd_name) +{ + struct list_head *pmus = args; + char buf[640]; + struct io io; + char *line = NULL; + size_t line_len; + struct drm_pmu *drm = NULL; + struct drm_pmu_event *events = NULL; + int num_events = 0; + + io__init(&io, openat(fdinfo_dir_fd, fd_name, O_RDONLY), buf, sizeof(buf)); + if (io.fd == -1) { + /* Failed to open file, ignore. */ + return 0; + } + + while (io__getline(&io, &line, &line_len) > 0) { + if (starts_with(line, "drm-driver:")) { + drm = add_drm_pmu(pmus, line, line_len); + if (!drm) + break; + continue; + } + /* + * Note the string matching below is alphabetical, with more + * specific matches appearing before less specific. + */ + if (starts_with(line, "drm-active-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, + "Total memory active in one or more engines"); + continue; + } + if (starts_with(line, "drm-cycles-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_CYCLES, + "Busy cycles"); + continue; + } + if (starts_with(line, "drm-engine-capacity-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_CAPACITY, + "Engine capacity"); + continue; + } + if (starts_with(line, "drm-engine-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_NS, + "Utilization in ns"); + continue; + } + if (starts_with(line, "drm-maxfreq-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_HZ, + "Maximum frequency"); + continue; + } + if (starts_with(line, "drm-purgeable-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, + "Size of resident and purgeable memory buffers"); + continue; + } + if (starts_with(line, "drm-resident-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, + "Size of resident memory buffers"); + continue; + } + if (starts_with(line, "drm-shared-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, + "Size of shared memory buffers"); + continue; + } + if (starts_with(line, "drm-total-cycles-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, + "Total busy cycles"); + continue; + } + if (starts_with(line, "drm-total-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, + "Size of shared and private memory"); + continue; + } + if (verbose > 1 && starts_with(line, "drm-") && + !starts_with(line, "drm-client-id:") && + !starts_with(line, "drm-pdev:")) + pr_debug("Unhandled DRM PMU fdinfo line match '%s'\n", line); + } + if (drm) { + drm->events = events; + drm->num_events = num_events; + list_add_tail(&drm->pmu.list, pmus); + } + free(line); + if (io.fd != -1) + close(io.fd); + return 0; +} + +void drm_pmu__exit(struct perf_pmu *pmu) +{ + struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu); + + free(drm->events); +} + +bool drm_pmu__have_event(const struct perf_pmu *pmu, const char *name) +{ + struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu); + + if (!starts_with(name, "drm-")) + return false; + + for (int i = 0; i < drm->num_events; i++) { + if (!strcasecmp(drm->events[i].name, name)) + return true; + } + return false; +} + +int drm_pmu__for_each_event(const struct perf_pmu *pmu, void *state, pmu_event_callback cb) +{ + struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu); + + for (int i = 0; i < drm->num_events; i++) { + char encoding_buf[128]; + struct pmu_event_info info = { + .pmu = pmu, + .name = drm->events[i].name, + .alias = NULL, + .scale_unit = drm_pmu_scale_unit_strs[drm->events[i].unit], + .desc = drm->events[i].desc, + .long_desc = NULL, + .encoding_desc = encoding_buf, + .topic = "drm", + .pmu_name = pmu->name, + .event_type_desc = "DRM event", + }; + int ret; + + snprintf(encoding_buf, sizeof(encoding_buf), "%s/config=0x%x/", pmu->name, i); + + ret = cb(state, &info); + if (ret) + return ret; + } + return 0; +} + +size_t drm_pmu__num_events(const struct perf_pmu *pmu) +{ + const struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu); + + return drm->num_events; +} + +static int drm_pmu__index_for_event(const struct drm_pmu *drm, const char *name) +{ + for (int i = 0; i < drm->num_events; i++) { + if (!strcmp(drm->events[i].name, name)) + return i; + } + return -1; +} + +static int drm_pmu__config_term(const struct drm_pmu *drm, + struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err) +{ + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) { + int i = drm_pmu__index_for_event(drm, term->config); + + if (i >= 0) { + attr->config = i; + return 0; + } + } + if (err) { + char *err_str; + + parse_events_error__handle(err, term->err_val, + asprintf(&err_str, + "unexpected drm event term (%s) %s", + parse_events__term_type_str(term->type_term), + term->config) < 0 + ? strdup("unexpected drm event term") + : err_str, + NULL); + } + return -EINVAL; +} + +int drm_pmu__config_terms(const struct perf_pmu *pmu, + struct perf_event_attr *attr, + struct parse_events_terms *terms, + struct parse_events_error *err) +{ + struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu); + struct parse_events_term *term; + + list_for_each_entry(term, &terms->terms, list) { + if (drm_pmu__config_term(drm, attr, term, err)) + return -EINVAL; + } + + return 0; +} + +int drm_pmu__check_alias(const struct perf_pmu *pmu, struct parse_events_terms *terms, + struct perf_pmu_info *info, struct parse_events_error *err) +{ + struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu); + struct parse_events_term *term = + list_first_entry(&terms->terms, struct parse_events_term, list); + + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) { + int i = drm_pmu__index_for_event(drm, term->config); + + if (i >= 0) { + info->unit = drm_pmu_unit_strs[drm->events[i].unit]; + info->scale = 1; + return 0; + } + } + if (err) { + char *err_str; + + parse_events_error__handle(err, term->err_val, + asprintf(&err_str, + "unexpected drm event term (%s) %s", + parse_events__term_type_str(term->type_term), + term->config) < 0 + ? strdup("unexpected drm event term") + : err_str, + NULL); + } + return -EINVAL; +} + +struct minor_info { + unsigned int *minors; + int minors_num, minors_len; +}; + +static int for_each_drm_fdinfo_in_dir(int (*cb)(void *args, int fdinfo_dir_fd, const char *fd_name), + void *args, int proc_dir, const char *pid_name, + struct minor_info *minors) +{ + char buf[256]; + DIR *fd_dir; + struct dirent *fd_entry; + int fd_dir_fd, fdinfo_dir_fd = -1; + + + scnprintf(buf, sizeof(buf), "%s/fd", pid_name); + fd_dir_fd = openat(proc_dir, buf, O_DIRECTORY); + if (fd_dir_fd == -1) + return 0; /* Presumably lost race to open. */ + fd_dir = fdopendir(fd_dir_fd); + if (!fd_dir) { + close(fd_dir_fd); + return -ENOMEM; + } + while ((fd_entry = readdir(fd_dir)) != NULL) { + struct stat stat; + unsigned int minor; + bool is_dup = false; + int ret; + + if (fd_entry->d_type != DT_LNK) + continue; + + if (fstatat(fd_dir_fd, fd_entry->d_name, &stat, 0) != 0) + continue; + + if ((stat.st_mode & S_IFMT) != S_IFCHR || major(stat.st_rdev) != 226) + continue; + + minor = minor(stat.st_rdev); + for (int i = 0; i < minors->minors_num; i++) { + if (minor(stat.st_rdev) == minors->minors[i]) { + is_dup = true; + break; + } + } + if (is_dup) + continue; + + if (minors->minors_num == minors->minors_len) { + unsigned int *tmp = reallocarray(minors->minors, minors->minors_len + 4, + sizeof(unsigned int)); + + if (tmp) { + minors->minors = tmp; + minors->minors_len += 4; + } + } + minors->minors[minors->minors_num++] = minor; + if (fdinfo_dir_fd == -1) { + /* Open fdinfo dir if we have a DRM fd. */ + scnprintf(buf, sizeof(buf), "%s/fdinfo", pid_name); + fdinfo_dir_fd = openat(proc_dir, buf, O_DIRECTORY); + if (fdinfo_dir_fd == -1) + continue; + } + ret = cb(args, fdinfo_dir_fd, fd_entry->d_name); + if (ret) + goto close_fdinfo; + } + +close_fdinfo: + if (fdinfo_dir_fd != -1) + close(fdinfo_dir_fd); + closedir(fd_dir); + return 0; +} + +static int for_each_drm_fdinfo(bool skip_all_duplicates, + int (*cb)(void *args, int fdinfo_dir_fd, const char *fd_name), + void *args) +{ + DIR *proc_dir; + struct dirent *proc_entry; + int ret; + /* + * minors maintains an array of DRM minor device numbers seen for a pid, + * or for all pids if skip_all_duplicates is true, so that duplicates + * are ignored. + */ + struct minor_info minors = { + .minors = NULL, + .minors_num = 0, + .minors_len = 0, + }; + + proc_dir = opendir(procfs__mountpoint()); + if (!proc_dir) + return 0; + + /* Walk through the /proc directory. */ + while ((proc_entry = readdir(proc_dir)) != NULL) { + if (proc_entry->d_type != DT_DIR || + !isdigit(proc_entry->d_name[0])) + continue; + if (!skip_all_duplicates) { + /* Reset the seen minor numbers for each pid. */ + minors.minors_num = 0; + } + ret = for_each_drm_fdinfo_in_dir(cb, args, + dirfd(proc_dir), proc_entry->d_name, + &minors); + if (ret) + break; + } + free(minors.minors); + closedir(proc_dir); + return ret; +} + +int perf_pmus__read_drm_pmus(struct list_head *pmus) +{ + return for_each_drm_fdinfo(/*skip_all_duplicates=*/true, read_drm_pmus_cb, pmus); +} + +int evsel__drm_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx) +{ + (void)evsel; + (void)threads; + (void)start_cpu_map_idx; + (void)end_cpu_map_idx; + return 0; +} + +static uint64_t read_count_and_apply_unit(const char *count_and_unit, enum drm_pmu_unit unit) +{ + char *unit_ptr = NULL; + uint64_t count = strtoul(count_and_unit, &unit_ptr, 10); + + if (!unit_ptr) + return 0; + + while (isblank(*unit_ptr)) + unit_ptr++; + + switch (unit) { + case DRM_PMU_UNIT_BYTES: + if (*unit_ptr == '\0') + assert(count == 0); /* Generally undocumented, happens for 0. */ + else if (!strcmp(unit_ptr, "KiB")) + count *= 1024; + else if (!strcmp(unit_ptr, "MiB")) + count *= 1024 * 1024; + else + pr_err("Unexpected bytes unit '%s'\n", unit_ptr); + break; + case DRM_PMU_UNIT_CAPACITY: + /* No units expected. */ + break; + case DRM_PMU_UNIT_CYCLES: + /* No units expected. */ + break; + case DRM_PMU_UNIT_HZ: + if (!strcmp(unit_ptr, "Hz")) + count *= 1; + else if (!strcmp(unit_ptr, "KHz")) + count *= 1000; + else if (!strcmp(unit_ptr, "MHz")) + count *= 1000000; + else + pr_err("Unexpected hz unit '%s'\n", unit_ptr); + break; + case DRM_PMU_UNIT_NS: + /* Only unit ns expected. */ + break; + case DRM_PMU_UNIT_MAX: + default: + break; + } + return count; +} + +static uint64_t read_drm_event(int fdinfo_dir_fd, const char *fd_name, + const char *match, enum drm_pmu_unit unit) +{ + char buf[640]; + struct io io; + char *line = NULL; + size_t line_len; + uint64_t count = 0; + + io__init(&io, openat(fdinfo_dir_fd, fd_name, O_RDONLY), buf, sizeof(buf)); + if (io.fd == -1) { + /* Failed to open file, ignore. */ + return 0; + } + while (io__getline(&io, &line, &line_len) > 0) { + size_t i = strlen(match); + + if (strncmp(line, match, i)) + continue; + if (line[i] != ':') + continue; + while (isblank(line[++i])) + ; + if (line[line_len - 1] == '\n') + line[line_len - 1] = '\0'; + count = read_count_and_apply_unit(&line[i], unit); + break; + } + free(line); + close(io.fd); + return count; +} + +struct read_drm_event_cb_args { + const char *match; + uint64_t count; + enum drm_pmu_unit unit; +}; + +static int read_drm_event_cb(void *vargs, int fdinfo_dir_fd, const char *fd_name) +{ + struct read_drm_event_cb_args *args = vargs; + + args->count += read_drm_event(fdinfo_dir_fd, fd_name, args->match, args->unit); + return 0; +} + +static uint64_t drm_pmu__read_system_wide(struct drm_pmu *drm, struct evsel *evsel) +{ + struct read_drm_event_cb_args args = { + .count = 0, + .match = drm->events[evsel->core.attr.config].name, + .unit = drm->events[evsel->core.attr.config].unit, + }; + + for_each_drm_fdinfo(/*skip_all_duplicates=*/false, read_drm_event_cb, &args); + return args.count; +} + +static uint64_t drm_pmu__read_for_pid(struct drm_pmu *drm, struct evsel *evsel, int pid) +{ + struct read_drm_event_cb_args args = { + .count = 0, + .match = drm->events[evsel->core.attr.config].name, + .unit = drm->events[evsel->core.attr.config].unit, + }; + struct minor_info minors = { + .minors = NULL, + .minors_num = 0, + .minors_len = 0, + }; + int proc_dir = open(procfs__mountpoint(), O_DIRECTORY); + char pid_name[12]; + int ret; + + if (proc_dir < 0) + return 0; + + snprintf(pid_name, sizeof(pid_name), "%d", pid); + ret = for_each_drm_fdinfo_in_dir(read_drm_event_cb, &args, proc_dir, pid_name, &minors); + free(minors.minors); + close(proc_dir); + return ret == 0 ? args.count : 0; +} + +int evsel__drm_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) +{ + struct drm_pmu *drm = container_of(evsel->pmu, struct drm_pmu, pmu); + struct perf_counts_values *count, *old_count = NULL; + int pid = perf_thread_map__pid(evsel->core.threads, thread); + uint64_t counter; + + if (pid != -1) + counter = drm_pmu__read_for_pid(drm, evsel, pid); + else + counter = drm_pmu__read_system_wide(drm, evsel); + + if (evsel->prev_raw_counts) + old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + + count = perf_counts(evsel->counts, cpu_map_idx, thread); + if (old_count) { + count->val = old_count->val + counter; + count->run = old_count->run + 1; + count->ena = old_count->ena + 1; + } else { + count->val = counter; + count->run++; + count->ena++; + } + return 0; +} diff --git a/tools/perf/util/drm_pmu.h b/tools/perf/util/drm_pmu.h new file mode 100644 index 000000000000..e7f366fca8a4 --- /dev/null +++ b/tools/perf/util/drm_pmu.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __DRM_PMU_H +#define __DRM_PMU_H +/* + * Linux DRM clients expose information through usage stats as documented in + * Documentation/gpu/drm-usage-stats.rst (available online at + * https://docs.kernel.org/gpu/drm-usage-stats.html). This is a tool like PMU + * that exposes DRM information. + */ + +#include "pmu.h" +#include <stdbool.h> + +struct list_head; +struct perf_thread_map; + +void drm_pmu__exit(struct perf_pmu *pmu); +bool drm_pmu__have_event(const struct perf_pmu *pmu, const char *name); +int drm_pmu__for_each_event(const struct perf_pmu *pmu, void *state, pmu_event_callback cb); +size_t drm_pmu__num_events(const struct perf_pmu *pmu); +int drm_pmu__config_terms(const struct perf_pmu *pmu, + struct perf_event_attr *attr, + struct parse_events_terms *terms, + struct parse_events_error *err); +int drm_pmu__check_alias(const struct perf_pmu *pmu, struct parse_events_terms *terms, + struct perf_pmu_info *info, struct parse_events_error *err); + + +bool perf_pmu__is_drm(const struct perf_pmu *pmu); +bool evsel__is_drm(const struct evsel *evsel); + +int perf_pmus__read_drm_pmus(struct list_head *pmus); + +int evsel__drm_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx); +int evsel__drm_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread); + +#endif /* __DRM_PMU_H */ diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index bdfead36b83a..344e689567ee 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -31,6 +31,7 @@ #include "debug.h" #include "string2.h" #include "vdso.h" +#include "annotate-data.h" static const char * const debuglink_paths[] = { "%.0s%s", @@ -39,6 +40,12 @@ static const char * const debuglink_paths[] = { "/usr/lib/debug%s/%s" }; +void dso__set_nsinfo(struct dso *dso, struct nsinfo *nsi) +{ + nsinfo__put(RC_CHK_ACCESS(dso)->nsinfo); + RC_CHK_ACCESS(dso)->nsinfo = nsi; +} + char dso__symtab_origin(const struct dso *dso) { static const char origin[] = { @@ -60,16 +67,17 @@ char dso__symtab_origin(const struct dso *dso) [DSO_BINARY_TYPE__GUEST_KMODULE] = 'G', [DSO_BINARY_TYPE__GUEST_KMODULE_COMP] = 'M', [DSO_BINARY_TYPE__GUEST_VMLINUX] = 'V', + [DSO_BINARY_TYPE__GNU_DEBUGDATA] = 'n', }; - if (dso == NULL || dso->symtab_type == DSO_BINARY_TYPE__NOT_FOUND) + if (dso == NULL || dso__symtab_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) return '!'; - return origin[dso->symtab_type]; + return origin[dso__symtab_type(dso)]; } bool dso__is_object_file(const struct dso *dso) { - switch (dso->binary_type) { + switch (dso__binary_type(dso)) { case DSO_BINARY_TYPE__KALLSYMS: case DSO_BINARY_TYPE__GUEST_KALLSYMS: case DSO_BINARY_TYPE__JAVA_JIT: @@ -86,6 +94,7 @@ bool dso__is_object_file(const struct dso *dso) case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: case DSO_BINARY_TYPE__BUILDID_DEBUGINFO: + case DSO_BINARY_TYPE__GNU_DEBUGDATA: case DSO_BINARY_TYPE__SYSTEM_PATH_DSO: case DSO_BINARY_TYPE__GUEST_KMODULE: case DSO_BINARY_TYPE__GUEST_KMODULE_COMP: @@ -116,7 +125,7 @@ int dso__read_binary_type_filename(const struct dso *dso, char symfile[PATH_MAX]; unsigned int i; - len = __symbol__join_symfs(filename, size, dso->long_name); + len = __symbol__join_symfs(filename, size, dso__long_name(dso)); last_slash = filename + len; while (last_slash != filename && *last_slash != '/') last_slash--; @@ -158,12 +167,12 @@ int dso__read_binary_type_filename(const struct dso *dso, case DSO_BINARY_TYPE__FEDORA_DEBUGINFO: len = __symbol__join_symfs(filename, size, "/usr/lib/debug"); - snprintf(filename + len, size - len, "%s.debug", dso->long_name); + snprintf(filename + len, size - len, "%s.debug", dso__long_name(dso)); break; case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: len = __symbol__join_symfs(filename, size, "/usr/lib/debug"); - snprintf(filename + len, size - len, "%s", dso->long_name); + snprintf(filename + len, size - len, "%s", dso__long_name(dso)); break; case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: @@ -172,13 +181,13 @@ int dso__read_binary_type_filename(const struct dso *dso, * /usr/lib/debug/lib when it is expected to be in * /usr/lib/debug/usr/lib */ - if (strlen(dso->long_name) < 9 || - strncmp(dso->long_name, "/usr/lib/", 9)) { + if (strlen(dso__long_name(dso)) < 9 || + strncmp(dso__long_name(dso), "/usr/lib/", 9)) { ret = -1; break; } len = __symbol__join_symfs(filename, size, "/usr/lib/debug"); - snprintf(filename + len, size - len, "%s", dso->long_name + 4); + snprintf(filename + len, size - len, "%s", dso__long_name(dso) + 4); break; case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: @@ -186,29 +195,29 @@ int dso__read_binary_type_filename(const struct dso *dso, const char *last_slash; size_t dir_size; - last_slash = dso->long_name + dso->long_name_len; - while (last_slash != dso->long_name && *last_slash != '/') + last_slash = dso__long_name(dso) + dso__long_name_len(dso); + while (last_slash != dso__long_name(dso) && *last_slash != '/') last_slash--; len = __symbol__join_symfs(filename, size, ""); - dir_size = last_slash - dso->long_name + 2; + dir_size = last_slash - dso__long_name(dso) + 2; if (dir_size > (size - len)) { ret = -1; break; } - len += scnprintf(filename + len, dir_size, "%s", dso->long_name); + len += scnprintf(filename + len, dir_size, "%s", dso__long_name(dso)); len += scnprintf(filename + len , size - len, ".debug%s", last_slash); break; } case DSO_BINARY_TYPE__BUILDID_DEBUGINFO: - if (!dso->has_build_id) { + if (!dso__has_build_id(dso)) { ret = -1; break; } - build_id__sprintf(&dso->bid, build_id_hex); + build_id__snprintf(dso__bid(dso), build_id_hex, sizeof(build_id_hex)); len = __symbol__join_symfs(filename, size, "/usr/lib/debug/.build-id/"); snprintf(filename + len, size - len, "%.2s/%s.debug", build_id_hex, build_id_hex + 2); @@ -217,23 +226,24 @@ int dso__read_binary_type_filename(const struct dso *dso, case DSO_BINARY_TYPE__VMLINUX: case DSO_BINARY_TYPE__GUEST_VMLINUX: case DSO_BINARY_TYPE__SYSTEM_PATH_DSO: - __symbol__join_symfs(filename, size, dso->long_name); + case DSO_BINARY_TYPE__GNU_DEBUGDATA: + __symbol__join_symfs(filename, size, dso__long_name(dso)); break; case DSO_BINARY_TYPE__GUEST_KMODULE: case DSO_BINARY_TYPE__GUEST_KMODULE_COMP: path__join3(filename, size, symbol_conf.symfs, - root_dir, dso->long_name); + root_dir, dso__long_name(dso)); break; case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE: case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP: - __symbol__join_symfs(filename, size, dso->long_name); + __symbol__join_symfs(filename, size, dso__long_name(dso)); break; case DSO_BINARY_TYPE__KCORE: case DSO_BINARY_TYPE__GUEST_KCORE: - snprintf(filename, size, "%s", dso->long_name); + snprintf(filename, size, "%s", dso__long_name(dso)); break; default: @@ -309,8 +319,8 @@ bool is_kernel_module(const char *pathname, int cpumode) bool dso__needs_decompress(struct dso *dso) { - return dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP || - dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP; + return dso__symtab_type(dso) == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP || + dso__symtab_type(dso) == DSO_BINARY_TYPE__GUEST_KMODULE_COMP; } int filename__decompress(const char *name, char *pathname, @@ -362,11 +372,10 @@ static int decompress_kmodule(struct dso *dso, const char *name, if (!dso__needs_decompress(dso)) return -1; - if (dso->comp == COMP_ID__NONE) + if (dso__comp(dso) == COMP_ID__NONE) return -1; - return filename__decompress(name, pathname, len, dso->comp, - &dso->load_errno); + return filename__decompress(name, pathname, len, dso__comp(dso), dso__load_errno(dso)); } int dso__decompress_kmodule_fd(struct dso *dso, const char *name) @@ -467,35 +476,60 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m, struct machine *machine) { if (machine__is_host(machine)) - dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; + dso__set_symtab_type(dso, DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE); else - dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE; + dso__set_symtab_type(dso, DSO_BINARY_TYPE__GUEST_KMODULE); /* _KMODULE_COMP should be next to _KMODULE */ if (m->kmod && m->comp) { - dso->symtab_type++; - dso->comp = m->comp; + dso__set_symtab_type(dso, dso__symtab_type(dso) + 1); + dso__set_comp(dso, m->comp); } + dso__set_is_kmod(dso); dso__set_short_name(dso, strdup(m->name), true); } /* * Global list of open DSOs and the counter. */ +struct mutex _dso__data_open_lock; static LIST_HEAD(dso__data_open); -static long dso__data_open_cnt; -static pthread_mutex_t dso__data_open_lock = PTHREAD_MUTEX_INITIALIZER; +static long dso__data_open_cnt GUARDED_BY(_dso__data_open_lock); + +static void dso__data_open_lock_init(void) +{ + mutex_init(&_dso__data_open_lock); +} -static void dso__list_add(struct dso *dso) +static struct mutex *dso__data_open_lock(void) LOCK_RETURNED(_dso__data_open_lock) { - list_add_tail(&dso->data.open_entry, &dso__data_open); + static pthread_once_t data_open_lock_once = PTHREAD_ONCE_INIT; + + pthread_once(&data_open_lock_once, dso__data_open_lock_init); + + return &_dso__data_open_lock; +} + +static void dso__list_add(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) +{ + list_add_tail(&dso__data(dso)->open_entry, &dso__data_open); +#ifdef REFCNT_CHECKING + dso__data(dso)->dso = dso__get(dso); +#endif + /* Assume the dso is part of dsos, hence the optional reference count above. */ + assert(dso__dsos(dso)); dso__data_open_cnt++; } -static void dso__list_del(struct dso *dso) +static void dso__list_del(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { - list_del_init(&dso->data.open_entry); + list_del_init(&dso__data(dso)->open_entry); +#ifdef REFCNT_CHECKING + mutex_unlock(dso__data_open_lock()); + dso__put(dso__data(dso)->dso); + mutex_lock(dso__data_open_lock()); +#endif WARN_ONCE(dso__data_open_cnt <= 0, "DSO data fd counter out of bounds."); dso__data_open_cnt--; @@ -503,7 +537,7 @@ static void dso__list_del(struct dso *dso) static void close_first_dso(void); -static int do_open(char *name) +static int do_open(char *name) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { int fd; char sbuf[STRERR_BUFSIZE]; @@ -526,10 +560,11 @@ static int do_open(char *name) char *dso__filename_with_chroot(const struct dso *dso, const char *filename) { - return filename_with_chroot(nsinfo__pid(dso->nsinfo), filename); + return filename_with_chroot(nsinfo__pid(dso__nsinfo_const(dso)), filename); } static int __open_dso(struct dso *dso, struct machine *machine) + EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { int fd = -EINVAL; char *root_dir = (char *)""; @@ -539,18 +574,18 @@ static int __open_dso(struct dso *dso, struct machine *machine) if (!name) return -ENOMEM; - mutex_lock(&dso->lock); + mutex_lock(dso__lock(dso)); if (machine) root_dir = machine->root_dir; - if (dso__read_binary_type_filename(dso, dso->binary_type, + if (dso__read_binary_type_filename(dso, dso__binary_type(dso), root_dir, name, PATH_MAX)) goto out; if (!is_regular_file(name)) { char *new_name; - if (errno != ENOENT || dso->nsinfo == NULL) + if (errno != ENOENT || dso__nsinfo(dso) == NULL) goto out; new_name = dso__filename_with_chroot(dso, name); @@ -566,7 +601,7 @@ static int __open_dso(struct dso *dso, struct machine *machine) size_t len = sizeof(newpath); if (dso__decompress_kmodule_path(dso, name, newpath, len) < 0) { - fd = -dso->load_errno; + fd = -(*dso__load_errno(dso)); goto out; } @@ -580,7 +615,7 @@ static int __open_dso(struct dso *dso, struct machine *machine) unlink(name); out: - mutex_unlock(&dso->lock); + mutex_unlock(dso__lock(dso)); free(name); return fd; } @@ -595,17 +630,18 @@ static void check_data_close(void); * list/count of open DSO objects. */ static int open_dso(struct dso *dso, struct machine *machine) + EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { int fd; struct nscookie nsc; - if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE) { - mutex_lock(&dso->lock); - nsinfo__mountns_enter(dso->nsinfo, &nsc); - mutex_unlock(&dso->lock); + if (dso__binary_type(dso) != DSO_BINARY_TYPE__BUILD_ID_CACHE) { + mutex_lock(dso__lock(dso)); + nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); + mutex_unlock(dso__lock(dso)); } fd = __open_dso(dso, machine); - if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE) + if (dso__binary_type(dso) != DSO_BINARY_TYPE__BUILD_ID_CACHE) nsinfo__mountns_exit(&nsc); if (fd >= 0) { @@ -620,12 +656,12 @@ static int open_dso(struct dso *dso, struct machine *machine) return fd; } -static void close_data_fd(struct dso *dso) +static void close_data_fd(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { - if (dso->data.fd >= 0) { - close(dso->data.fd); - dso->data.fd = -1; - dso->data.file_size = 0; + if (dso__data(dso)->fd >= 0) { + close(dso__data(dso)->fd); + dso__data(dso)->fd = -1; + dso__data(dso)->file_size = 0; dso__list_del(dso); } } @@ -637,16 +673,22 @@ static void close_data_fd(struct dso *dso) * Close @dso's data file descriptor and updates * list/count of open DSO objects. */ -static void close_dso(struct dso *dso) +static void close_dso(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { close_data_fd(dso); } -static void close_first_dso(void) +static void close_first_dso(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { + struct dso_data *dso_data; struct dso *dso; - dso = list_first_entry(&dso__data_open, struct dso, data.open_entry); + dso_data = list_first_entry(&dso__data_open, struct dso_data, open_entry); +#ifdef REFCNT_CHECKING + dso = dso_data->dso; +#else + dso = container_of(dso_data, struct dso, data); +#endif close_dso(dso); } @@ -681,7 +723,7 @@ void reset_fd_limit(void) fd_limit = 0; } -static bool may_cache_fd(void) +static bool may_cache_fd(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { if (!fd_limit) fd_limit = get_fd_limit(); @@ -697,7 +739,7 @@ static bool may_cache_fd(void) * for opened dso file descriptors. The limit is half * of the RLIMIT_NOFILE files opened. */ -static void check_data_close(void) +static void check_data_close(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { bool cache_fd = may_cache_fd(); @@ -713,12 +755,13 @@ static void check_data_close(void) */ void dso__data_close(struct dso *dso) { - pthread_mutex_lock(&dso__data_open_lock); + mutex_lock(dso__data_open_lock()); close_dso(dso); - pthread_mutex_unlock(&dso__data_open_lock); + mutex_unlock(dso__data_open_lock()); } static void try_to_open_dso(struct dso *dso, struct machine *machine) + EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { enum dso_binary_type binary_type_data[] = { DSO_BINARY_TYPE__BUILD_ID_CACHE, @@ -726,28 +769,29 @@ static void try_to_open_dso(struct dso *dso, struct machine *machine) DSO_BINARY_TYPE__NOT_FOUND, }; int i = 0; + struct dso_data *dso_data = dso__data(dso); - if (dso->data.fd >= 0) + if (dso_data->fd >= 0) return; - if (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND) { - dso->data.fd = open_dso(dso, machine); + if (dso__binary_type(dso) != DSO_BINARY_TYPE__NOT_FOUND) { + dso_data->fd = open_dso(dso, machine); goto out; } do { - dso->binary_type = binary_type_data[i++]; + dso__set_binary_type(dso, binary_type_data[i++]); - dso->data.fd = open_dso(dso, machine); - if (dso->data.fd >= 0) + dso_data->fd = open_dso(dso, machine); + if (dso_data->fd >= 0) goto out; - } while (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND); + } while (dso__binary_type(dso) != DSO_BINARY_TYPE__NOT_FOUND); out: - if (dso->data.fd >= 0) - dso->data.status = DSO_DATA_STATUS_OK; + if (dso_data->fd >= 0) + dso_data->status = DSO_DATA_STATUS_OK; else - dso->data.status = DSO_DATA_STATUS_ERROR; + dso_data->status = DSO_DATA_STATUS_ERROR; } /** @@ -759,35 +803,37 @@ out: * returns file descriptor. It should be paired with * dso__data_put_fd() if it returns non-negative value. */ -int dso__data_get_fd(struct dso *dso, struct machine *machine) +bool dso__data_get_fd(struct dso *dso, struct machine *machine, int *fd) { - if (dso->data.status == DSO_DATA_STATUS_ERROR) - return -1; + *fd = -1; + if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR) + return false; - if (pthread_mutex_lock(&dso__data_open_lock) < 0) - return -1; + mutex_lock(dso__data_open_lock()); try_to_open_dso(dso, machine); - if (dso->data.fd < 0) - pthread_mutex_unlock(&dso__data_open_lock); + *fd = dso__data(dso)->fd; + if (*fd >= 0) + return true; - return dso->data.fd; + mutex_unlock(dso__data_open_lock()); + return false; } void dso__data_put_fd(struct dso *dso __maybe_unused) { - pthread_mutex_unlock(&dso__data_open_lock); + mutex_unlock(dso__data_open_lock()); } bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by) { u32 flag = 1 << by; - if (dso->data.status_seen & flag) + if (dso__data(dso)->status_seen & flag) return true; - dso->data.status_seen |= flag; + dso__data(dso)->status_seen |= flag; return false; } @@ -797,12 +843,13 @@ static ssize_t bpf_read(struct dso *dso, u64 offset, char *data) { struct bpf_prog_info_node *node; ssize_t size = DSO__DATA_CACHE_SIZE; + struct dso_bpf_prog *dso_bpf_prog = dso__bpf_prog(dso); u64 len; u8 *buf; - node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id); + node = perf_env__find_bpf_prog_info(dso_bpf_prog->env, dso_bpf_prog->id); if (!node || !node->info_linear) { - dso->data.status = DSO_DATA_STATUS_ERROR; + dso__data(dso)->status = DSO_DATA_STATUS_ERROR; return -1; } @@ -820,14 +867,15 @@ static ssize_t bpf_read(struct dso *dso, u64 offset, char *data) static int bpf_size(struct dso *dso) { struct bpf_prog_info_node *node; + struct dso_bpf_prog *dso_bpf_prog = dso__bpf_prog(dso); - node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id); + node = perf_env__find_bpf_prog_info(dso_bpf_prog->env, dso_bpf_prog->id); if (!node || !node->info_linear) { - dso->data.status = DSO_DATA_STATUS_ERROR; + dso__data(dso)->status = DSO_DATA_STATUS_ERROR; return -1; } - dso->data.file_size = node->info_linear->info.jited_prog_len; + dso__data(dso)->file_size = node->info_linear->info.jited_prog_len; return 0; } #endif // HAVE_LIBBPF_SUPPORT @@ -835,10 +883,10 @@ static int bpf_size(struct dso *dso) static void dso_cache__free(struct dso *dso) { - struct rb_root *root = &dso->data.cache; + struct rb_root *root = &dso__data(dso)->cache; struct rb_node *next = rb_first(root); - mutex_lock(&dso->lock); + mutex_lock(dso__lock(dso)); while (next) { struct dso_cache *cache; @@ -847,12 +895,12 @@ dso_cache__free(struct dso *dso) rb_erase(&cache->rb_node, root); free(cache); } - mutex_unlock(&dso->lock); + mutex_unlock(dso__lock(dso)); } static struct dso_cache *__dso_cache__find(struct dso *dso, u64 offset) { - const struct rb_root *root = &dso->data.cache; + const struct rb_root *root = &dso__data(dso)->cache; struct rb_node * const *p = &root->rb_node; const struct rb_node *parent = NULL; struct dso_cache *cache; @@ -878,13 +926,13 @@ static struct dso_cache *__dso_cache__find(struct dso *dso, u64 offset) static struct dso_cache * dso_cache__insert(struct dso *dso, struct dso_cache *new) { - struct rb_root *root = &dso->data.cache; + struct rb_root *root = &dso__data(dso)->cache; struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; struct dso_cache *cache; u64 offset = new->offset; - mutex_lock(&dso->lock); + mutex_lock(dso__lock(dso)); while (*p != NULL) { u64 end; @@ -905,7 +953,7 @@ dso_cache__insert(struct dso *dso, struct dso_cache *new) cache = NULL; out: - mutex_unlock(&dso->lock); + mutex_unlock(dso__lock(dso)); return cache; } @@ -927,23 +975,23 @@ static ssize_t file_read(struct dso *dso, struct machine *machine, { ssize_t ret; - pthread_mutex_lock(&dso__data_open_lock); + mutex_lock(dso__data_open_lock()); /* - * dso->data.fd might be closed if other thread opened another + * dso__data(dso)->fd might be closed if other thread opened another * file (dso) due to open file limit (RLIMIT_NOFILE). */ try_to_open_dso(dso, machine); - if (dso->data.fd < 0) { - dso->data.status = DSO_DATA_STATUS_ERROR; + if (dso__data(dso)->fd < 0) { + dso__data(dso)->status = DSO_DATA_STATUS_ERROR; ret = -errno; goto out; } - ret = pread(dso->data.fd, data, DSO__DATA_CACHE_SIZE, offset); + ret = pread(dso__data(dso)->fd, data, DSO__DATA_CACHE_SIZE, offset); out: - pthread_mutex_unlock(&dso__data_open_lock); + mutex_unlock(dso__data_open_lock()); return ret; } @@ -961,11 +1009,11 @@ static struct dso_cache *dso_cache__populate(struct dso *dso, return NULL; } #ifdef HAVE_LIBBPF_SUPPORT - if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) + if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) *ret = bpf_read(dso, cache_offset, cache->data); else #endif - if (dso->binary_type == DSO_BINARY_TYPE__OOL) + if (dso__binary_type(dso) == DSO_BINARY_TYPE__OOL) *ret = DSO__DATA_CACHE_SIZE; else *ret = file_read(dso, machine, cache_offset, cache->data); @@ -1051,43 +1099,43 @@ static int file_size(struct dso *dso, struct machine *machine) struct stat st; char sbuf[STRERR_BUFSIZE]; - pthread_mutex_lock(&dso__data_open_lock); + mutex_lock(dso__data_open_lock()); /* - * dso->data.fd might be closed if other thread opened another + * dso__data(dso)->fd might be closed if other thread opened another * file (dso) due to open file limit (RLIMIT_NOFILE). */ try_to_open_dso(dso, machine); - if (dso->data.fd < 0) { + if (dso__data(dso)->fd < 0) { ret = -errno; - dso->data.status = DSO_DATA_STATUS_ERROR; + dso__data(dso)->status = DSO_DATA_STATUS_ERROR; goto out; } - if (fstat(dso->data.fd, &st) < 0) { + if (fstat(dso__data(dso)->fd, &st) < 0) { ret = -errno; pr_err("dso cache fstat failed: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); - dso->data.status = DSO_DATA_STATUS_ERROR; + dso__data(dso)->status = DSO_DATA_STATUS_ERROR; goto out; } - dso->data.file_size = st.st_size; + dso__data(dso)->file_size = st.st_size; out: - pthread_mutex_unlock(&dso__data_open_lock); + mutex_unlock(dso__data_open_lock()); return ret; } int dso__data_file_size(struct dso *dso, struct machine *machine) { - if (dso->data.file_size) + if (dso__data(dso)->file_size) return 0; - if (dso->data.status == DSO_DATA_STATUS_ERROR) + if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR) return -1; #ifdef HAVE_LIBBPF_SUPPORT - if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) + if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) return bpf_size(dso); #endif return file_size(dso, machine); @@ -1106,7 +1154,7 @@ off_t dso__data_size(struct dso *dso, struct machine *machine) return -1; /* For now just estimate dso data size is close to file size */ - return dso->data.file_size; + return dso__data(dso)->file_size; } static ssize_t data_read_write_offset(struct dso *dso, struct machine *machine, @@ -1117,7 +1165,7 @@ static ssize_t data_read_write_offset(struct dso *dso, struct machine *machine, return -1; /* Check the offset sanity. */ - if (offset > dso->data.file_size) + if (offset > dso__data(dso)->file_size) return -1; if (offset + size < offset) @@ -1140,12 +1188,74 @@ static ssize_t data_read_write_offset(struct dso *dso, struct machine *machine, ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, u64 offset, u8 *data, ssize_t size) { - if (dso->data.status == DSO_DATA_STATUS_ERROR) + if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR) return -1; return data_read_write_offset(dso, machine, offset, data, size, true); } +uint16_t dso__e_machine(struct dso *dso, struct machine *machine) +{ + uint16_t e_machine = EM_NONE; + int fd; + + switch (dso__binary_type(dso)) { + case DSO_BINARY_TYPE__KALLSYMS: + case DSO_BINARY_TYPE__GUEST_KALLSYMS: + case DSO_BINARY_TYPE__VMLINUX: + case DSO_BINARY_TYPE__GUEST_VMLINUX: + case DSO_BINARY_TYPE__GUEST_KMODULE: + case DSO_BINARY_TYPE__GUEST_KMODULE_COMP: + case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE: + case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP: + case DSO_BINARY_TYPE__KCORE: + case DSO_BINARY_TYPE__GUEST_KCORE: + case DSO_BINARY_TYPE__BPF_PROG_INFO: + case DSO_BINARY_TYPE__BPF_IMAGE: + case DSO_BINARY_TYPE__OOL: + case DSO_BINARY_TYPE__JAVA_JIT: + return EM_HOST; + case DSO_BINARY_TYPE__DEBUGLINK: + case DSO_BINARY_TYPE__BUILD_ID_CACHE: + case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO: + case DSO_BINARY_TYPE__GNU_DEBUGDATA: + case DSO_BINARY_TYPE__SYSTEM_PATH_DSO: + case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: + case DSO_BINARY_TYPE__FEDORA_DEBUGINFO: + case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: + case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: + case DSO_BINARY_TYPE__BUILDID_DEBUGINFO: + break; + case DSO_BINARY_TYPE__NOT_FOUND: + default: + return EM_NONE; + } + + mutex_lock(dso__data_open_lock()); + + /* + * dso__data(dso)->fd might be closed if other thread opened another + * file (dso) due to open file limit (RLIMIT_NOFILE). + */ + try_to_open_dso(dso, machine); + fd = dso__data(dso)->fd; + if (fd >= 0) { + _Static_assert(offsetof(Elf32_Ehdr, e_machine) == 18, "Unexpected offset"); + _Static_assert(offsetof(Elf64_Ehdr, e_machine) == 18, "Unexpected offset"); + if (dso__needs_swap(dso) == DSO_SWAP__UNSET) { + unsigned char eidata; + + if (pread(fd, &eidata, sizeof(eidata), EI_DATA) == sizeof(eidata)) + dso__swap_init(dso, eidata); + } + if (dso__needs_swap(dso) != DSO_SWAP__UNSET && + pread(fd, &e_machine, sizeof(e_machine), 18) == sizeof(e_machine)) + e_machine = DSO__SWAP(dso, uint16_t, e_machine); + } + mutex_unlock(dso__data_open_lock()); + return e_machine; +} + /** * dso__data_read_addr - Read data from dso address * @dso: dso object @@ -1180,7 +1290,7 @@ ssize_t dso__data_write_cache_offs(struct dso *dso, struct machine *machine, { u8 *data = (u8 *)data_in; /* cast away const to use same fns for r/w */ - if (dso->data.status == DSO_DATA_STATUS_ERROR) + if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR) return -1; return data_read_write_offset(dso, machine, offset, data, size, false); @@ -1233,56 +1343,158 @@ struct dso *machine__findnew_kernel(struct machine *machine, const char *name, */ if (dso != NULL) { dso__set_short_name(dso, short_name, false); - dso->kernel = dso_type; + dso__set_kernel(dso, dso_type); } return dso; } -static void dso__set_long_name_id(struct dso *dso, const char *name, struct dso_id *id, bool name_allocated) +static void __dso__set_long_name_id(struct dso *dso, const char *name, bool name_allocated) +{ + if (dso__long_name_allocated(dso)) + free((char *)dso__long_name(dso)); + + RC_CHK_ACCESS(dso)->long_name = name; + RC_CHK_ACCESS(dso)->long_name_len = strlen(name); + dso__set_long_name_allocated(dso, name_allocated); +} + +static void dso__set_long_name_id(struct dso *dso, const char *name, bool name_allocated) { - struct rb_root *root = dso->root; + struct dsos *dsos = dso__dsos(dso); if (name == NULL) return; - if (dso->long_name_allocated) - free((char *)dso->long_name); - - if (root) { - rb_erase(&dso->rb_node, root); + if (dsos) { /* - * __dsos__findnew_link_by_longname_id() isn't guaranteed to - * add it back, so a clean removal is required here. + * Need to avoid re-sorting the dsos breaking by non-atomically + * renaming the dso. */ - RB_CLEAR_NODE(&dso->rb_node); - dso->root = NULL; + down_write(&dsos->lock); + __dso__set_long_name_id(dso, name, name_allocated); + dsos->sorted = false; + up_write(&dsos->lock); + } else { + __dso__set_long_name_id(dso, name, name_allocated); } +} + +static int __dso_id__cmp(const struct dso_id *a, const struct dso_id *b) +{ + if (a->mmap2_valid && b->mmap2_valid) { + if (a->maj > b->maj) return -1; + if (a->maj < b->maj) return 1; - dso->long_name = name; - dso->long_name_len = strlen(name); - dso->long_name_allocated = name_allocated; + if (a->min > b->min) return -1; + if (a->min < b->min) return 1; + + if (a->ino > b->ino) return -1; + if (a->ino < b->ino) return 1; + } + if (a->mmap2_ino_generation_valid && b->mmap2_ino_generation_valid) { + if (a->ino_generation > b->ino_generation) return -1; + if (a->ino_generation < b->ino_generation) return 1; + } + if (build_id__is_defined(&a->build_id) && build_id__is_defined(&b->build_id)) { + if (a->build_id.size != b->build_id.size) + return a->build_id.size < b->build_id.size ? -1 : 1; + return memcmp(a->build_id.data, b->build_id.data, a->build_id.size); + } + return 0; +} + +const struct dso_id dso_id_empty = { + { + .maj = 0, + .min = 0, + .ino = 0, + .ino_generation = 0, + }, + .mmap2_valid = false, + .mmap2_ino_generation_valid = false, + { + .size = 0, + } +}; + +void __dso__improve_id(struct dso *dso, const struct dso_id *id) +{ + struct dsos *dsos = dso__dsos(dso); + struct dso_id *dso_id = dso__id(dso); + bool changed = false; + + /* dsos write lock held by caller. */ + + if (id->mmap2_valid && !dso_id->mmap2_valid) { + dso_id->maj = id->maj; + dso_id->min = id->min; + dso_id->ino = id->ino; + dso_id->mmap2_valid = true; + changed = true; + } + if (id->mmap2_ino_generation_valid && !dso_id->mmap2_ino_generation_valid) { + dso_id->ino_generation = id->ino_generation; + dso_id->mmap2_ino_generation_valid = true; + changed = true; + } + if (build_id__is_defined(&id->build_id) && !build_id__is_defined(&dso_id->build_id)) { + dso_id->build_id = id->build_id; + changed = true; + } + if (changed && dsos) + dsos->sorted = false; +} + +int dso_id__cmp(const struct dso_id *a, const struct dso_id *b) +{ + if (a == &dso_id_empty || b == &dso_id_empty) { + /* There is no valid data to compare so the comparison always returns identical. */ + return 0; + } - if (root) - __dsos__findnew_link_by_longname_id(root, dso, NULL, id); + return __dso_id__cmp(a, b); +} + +int dso__cmp_id(struct dso *a, struct dso *b) +{ + return __dso_id__cmp(dso__id(a), dso__id(b)); } void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) { - dso__set_long_name_id(dso, name, NULL, name_allocated); + dso__set_long_name_id(dso, name, name_allocated); +} + +static void __dso__set_short_name(struct dso *dso, const char *name, bool name_allocated) +{ + if (dso__short_name_allocated(dso)) + free((char *)dso__short_name(dso)); + + RC_CHK_ACCESS(dso)->short_name = name; + RC_CHK_ACCESS(dso)->short_name_len = strlen(name); + dso__set_short_name_allocated(dso, name_allocated); } void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated) { + struct dsos *dsos = dso__dsos(dso); + if (name == NULL) return; - if (dso->short_name_allocated) - free((char *)dso->short_name); - - dso->short_name = name; - dso->short_name_len = strlen(name); - dso->short_name_allocated = name_allocated; + if (dsos) { + /* + * Need to avoid re-sorting the dsos breaking by non-atomically + * renaming the dso. + */ + down_write(&dsos->lock); + __dso__set_short_name(dso, name, name_allocated); + dsos->sorted = false; + up_write(&dsos->lock); + } else { + __dso__set_short_name(dso, name, name_allocated); + } } int dso__name_len(const struct dso *dso) @@ -1290,42 +1502,48 @@ int dso__name_len(const struct dso *dso) if (!dso) return strlen("[unknown]"); if (verbose > 0) - return dso->long_name_len; + return dso__long_name_len(dso); - return dso->short_name_len; + return dso__short_name_len(dso); } bool dso__loaded(const struct dso *dso) { - return dso->loaded; + return RC_CHK_ACCESS(dso)->loaded; } bool dso__sorted_by_name(const struct dso *dso) { - return dso->sorted_by_name; + return RC_CHK_ACCESS(dso)->sorted_by_name; } void dso__set_sorted_by_name(struct dso *dso) { - dso->sorted_by_name = true; + RC_CHK_ACCESS(dso)->sorted_by_name = true; } -struct dso *dso__new_id(const char *name, struct dso_id *id) +struct dso *dso__new_id(const char *name, const struct dso_id *id) { - struct dso *dso = calloc(1, sizeof(*dso) + strlen(name) + 1); + RC_STRUCT(dso) *dso = zalloc(sizeof(*dso) + strlen(name) + 1); + struct dso *res; + struct dso_data *data; - if (dso != NULL) { + if (!dso) + return NULL; + + if (ADD_RC_CHK(res, dso)) { strcpy(dso->name, name); if (id) dso->id = *id; - dso__set_long_name_id(dso, dso->name, id, false); - dso__set_short_name(dso, dso->name, false); + dso__set_long_name_id(res, dso->name, false); + dso__set_short_name(res, dso->name, false); dso->symbols = RB_ROOT_CACHED; dso->symbol_names = NULL; dso->symbol_names_len = 0; - dso->data.cache = RB_ROOT; dso->inlined_nodes = RB_ROOT_CACHED; dso->srclines = RB_ROOT_CACHED; + dso->data_types = RB_ROOT; + dso->global_vars = RB_ROOT; dso->data.fd = -1; dso->data.status = DSO_DATA_STATUS_UNKNOWN; dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; @@ -1334,21 +1552,24 @@ struct dso *dso__new_id(const char *name, struct dso_id *id) dso->loaded = 0; dso->rel = 0; dso->sorted_by_name = 0; - dso->has_build_id = 0; dso->has_srcline = 1; dso->a2l_fails = 1; dso->kernel = DSO_SPACE__USER; + dso->is_kmod = 0; dso->needs_swap = DSO_SWAP__UNSET; dso->comp = COMP_ID__NONE; - RB_CLEAR_NODE(&dso->rb_node); - dso->root = NULL; - INIT_LIST_HEAD(&dso->node); - INIT_LIST_HEAD(&dso->data.open_entry); mutex_init(&dso->lock); refcount_set(&dso->refcnt, 1); + data = &dso->data; + data->cache = RB_ROOT; + data->fd = -1; + data->status = DSO_DATA_STATUS_UNKNOWN; + INIT_LIST_HEAD(&data->open_entry); +#ifdef REFCNT_CHECKING + data->dso = NULL; /* Set when on the open_entry list. */ +#endif } - - return dso; + return res; } struct dso *dso__new(const char *name) @@ -1358,99 +1579,139 @@ struct dso *dso__new(const char *name) void dso__delete(struct dso *dso) { - if (!RB_EMPTY_NODE(&dso->rb_node)) - pr_err("DSO %s is still in rbtree when being deleted!\n", - dso->long_name); + if (dso__dsos(dso)) + pr_err("DSO %s is still in rbtree when being deleted!\n", dso__long_name(dso)); /* free inlines first, as they reference symbols */ - inlines__tree_delete(&dso->inlined_nodes); - srcline__tree_delete(&dso->srclines); - symbols__delete(&dso->symbols); - dso->symbol_names_len = 0; - zfree(&dso->symbol_names); - if (dso->short_name_allocated) { - zfree((char **)&dso->short_name); - dso->short_name_allocated = false; + inlines__tree_delete(&RC_CHK_ACCESS(dso)->inlined_nodes); + srcline__tree_delete(&RC_CHK_ACCESS(dso)->srclines); + symbols__delete(&RC_CHK_ACCESS(dso)->symbols); + RC_CHK_ACCESS(dso)->symbol_names_len = 0; + zfree(&RC_CHK_ACCESS(dso)->symbol_names); + annotated_data_type__tree_delete(dso__data_types(dso)); + global_var_type__tree_delete(dso__global_vars(dso)); + + if (RC_CHK_ACCESS(dso)->short_name_allocated) { + zfree((char **)&RC_CHK_ACCESS(dso)->short_name); + RC_CHK_ACCESS(dso)->short_name_allocated = false; } - if (dso->long_name_allocated) { - zfree((char **)&dso->long_name); - dso->long_name_allocated = false; + if (RC_CHK_ACCESS(dso)->long_name_allocated) { + zfree((char **)&RC_CHK_ACCESS(dso)->long_name); + RC_CHK_ACCESS(dso)->long_name_allocated = false; } dso__data_close(dso); - auxtrace_cache__free(dso->auxtrace_cache); + auxtrace_cache__free(RC_CHK_ACCESS(dso)->auxtrace_cache); dso_cache__free(dso); dso__free_a2l(dso); - zfree(&dso->symsrc_filename); - nsinfo__zput(dso->nsinfo); - mutex_destroy(&dso->lock); - free(dso); + dso__free_symsrc_filename(dso); + nsinfo__zput(RC_CHK_ACCESS(dso)->nsinfo); + mutex_destroy(dso__lock(dso)); + RC_CHK_FREE(dso); } struct dso *dso__get(struct dso *dso) { - if (dso) - refcount_inc(&dso->refcnt); - return dso; + struct dso *result; + + if (RC_CHK_GET(result, dso)) + refcount_inc(&RC_CHK_ACCESS(dso)->refcnt); + + return result; } void dso__put(struct dso *dso) { - if (dso && refcount_dec_and_test(&dso->refcnt)) +#ifdef REFCNT_CHECKING + if (dso && dso__data(dso) && refcount_read(&RC_CHK_ACCESS(dso)->refcnt) == 2) + dso__data_close(dso); +#endif + if (dso && refcount_dec_and_test(&RC_CHK_ACCESS(dso)->refcnt)) dso__delete(dso); + else + RC_CHK_PUT(dso); } -void dso__set_build_id(struct dso *dso, struct build_id *bid) +int dso__swap_init(struct dso *dso, unsigned char eidata) { - dso->bid = *bid; - dso->has_build_id = 1; + static unsigned int const endian = 1; + + dso__set_needs_swap(dso, DSO_SWAP__NO); + + switch (eidata) { + case ELFDATA2LSB: + /* We are big endian, DSO is little endian. */ + if (*(unsigned char const *)&endian != 1) + dso__set_needs_swap(dso, DSO_SWAP__YES); + break; + + case ELFDATA2MSB: + /* We are little endian, DSO is big endian. */ + if (*(unsigned char const *)&endian != 0) + dso__set_needs_swap(dso, DSO_SWAP__YES); + break; + + default: + pr_err("unrecognized DSO data encoding %d\n", eidata); + return -EINVAL; + } + + return 0; } -bool dso__build_id_equal(const struct dso *dso, struct build_id *bid) +void dso__set_build_id(struct dso *dso, const struct build_id *bid) { - if (dso->bid.size > bid->size && dso->bid.size == BUILD_ID_SIZE) { + dso__id(dso)->build_id = *bid; +} + +bool dso__build_id_equal(const struct dso *dso, const struct build_id *bid) +{ + const struct build_id *dso_bid = dso__bid(dso); + + if (dso_bid->size > bid->size && dso_bid->size == BUILD_ID_SIZE) { /* * For the backward compatibility, it allows a build-id has * trailing zeros. */ - return !memcmp(dso->bid.data, bid->data, bid->size) && - !memchr_inv(&dso->bid.data[bid->size], 0, - dso->bid.size - bid->size); + return !memcmp(dso_bid->data, bid->data, bid->size) && + !memchr_inv(&dso_bid->data[bid->size], 0, + dso_bid->size - bid->size); } - return dso->bid.size == bid->size && - memcmp(dso->bid.data, bid->data, dso->bid.size) == 0; + return dso_bid->size == bid->size && + memcmp(dso_bid->data, bid->data, dso_bid->size) == 0; } void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine) { char path[PATH_MAX]; + struct build_id bid = { .size = 0, }; if (machine__is_default_guest(machine)) return; sprintf(path, "%s/sys/kernel/notes", machine->root_dir); - if (sysfs__read_build_id(path, &dso->bid) == 0) - dso->has_build_id = true; + sysfs__read_build_id(path, &bid); + dso__set_build_id(dso, &bid); } int dso__kernel_module_get_build_id(struct dso *dso, const char *root_dir) { char filename[PATH_MAX]; + struct build_id bid = { .size = 0, }; /* * kernel module short names are of the form "[module]" and * we need just "module" here. */ - const char *name = dso->short_name + 1; + const char *name = dso__short_name(dso) + 1; snprintf(filename, sizeof(filename), "%s/sys/module/%.*s/notes/.note.gnu.build-id", root_dir, (int)strlen(name) - 1, name); - if (sysfs__read_build_id(filename, &dso->bid) == 0) - dso->has_build_id = true; - + sysfs__read_build_id(filename, &bid); + dso__set_build_id(dso, &bid); return 0; } @@ -1458,21 +1719,21 @@ static size_t dso__fprintf_buildid(struct dso *dso, FILE *fp) { char sbuild_id[SBUILD_ID_SIZE]; - build_id__sprintf(&dso->bid, sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); return fprintf(fp, "%s", sbuild_id); } size_t dso__fprintf(struct dso *dso, FILE *fp) { struct rb_node *nd; - size_t ret = fprintf(fp, "dso: %s (", dso->short_name); + size_t ret = fprintf(fp, "dso: %s (", dso__short_name(dso)); - if (dso->short_name != dso->long_name) - ret += fprintf(fp, "%s, ", dso->long_name); + if (dso__short_name(dso) != dso__long_name(dso)) + ret += fprintf(fp, "%s, ", dso__long_name(dso)); ret += fprintf(fp, "%sloaded, ", dso__loaded(dso) ? "" : "NOT "); ret += dso__fprintf_buildid(dso, fp); ret += fprintf(fp, ")\n"); - for (nd = rb_first_cached(&dso->symbols); nd; nd = rb_next(nd)) { + for (nd = rb_first_cached(dso__symbols(dso)); nd; nd = rb_next(nd)) { struct symbol *pos = rb_entry(nd, struct symbol, rb_node); ret += symbol__fprintf(pos, fp); } @@ -1482,11 +1743,10 @@ size_t dso__fprintf(struct dso *dso, FILE *fp) enum dso_type dso__type(struct dso *dso, struct machine *machine) { - int fd; + int fd = -1; enum dso_type type = DSO__TYPE_UNKNOWN; - fd = dso__data_get_fd(dso, machine); - if (fd >= 0) { + if (dso__data_get_fd(dso, machine, &fd)) { type = dso__type_fd(fd); dso__data_put_fd(dso); } @@ -1496,7 +1756,7 @@ enum dso_type dso__type(struct dso *dso, struct machine *machine) int dso__strerror_load(struct dso *dso, char *buf, size_t buflen) { - int idx, errnum = dso->load_errno; + int idx, errnum = *dso__load_errno(dso); /* * This must have a same ordering as the enum dso_load_errno. */ @@ -1526,3 +1786,127 @@ int dso__strerror_load(struct dso *dso, char *buf, size_t buflen) scnprintf(buf, buflen, "%s", dso_load__error_str[idx]); return 0; } + +bool perf_pid_map_tid(const char *dso_name, int *tid) +{ + return sscanf(dso_name, "/tmp/perf-%d.map", tid) == 1; +} + +bool is_perf_pid_map_name(const char *dso_name) +{ + int tid; + + return perf_pid_map_tid(dso_name, &tid); +} + +struct find_file_offset_data { + u64 ip; + u64 offset; +}; + +/* This will be called for each PHDR in an ELF binary */ +static int find_file_offset(u64 start, u64 len, u64 pgoff, void *arg) +{ + struct find_file_offset_data *data = arg; + + if (start <= data->ip && data->ip < start + len) { + data->offset = pgoff + data->ip - start; + return 1; + } + return 0; +} + +static const u8 *__dso__read_symbol(struct dso *dso, const char *symfs_filename, + u64 start, size_t len, + u8 **out_buf, u64 *out_buf_len, bool *is_64bit) +{ + struct nscookie nsc; + int fd; + ssize_t count; + struct find_file_offset_data data = { + .ip = start, + }; + u8 *code_buf = NULL; + int saved_errno; + + nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); + fd = open(symfs_filename, O_RDONLY); + saved_errno = errno; + nsinfo__mountns_exit(&nsc); + if (fd < 0) { + errno = saved_errno; + return NULL; + } + if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data, is_64bit) <= 0) { + close(fd); + errno = ENOENT; + return NULL; + } + code_buf = malloc(len); + if (code_buf == NULL) { + close(fd); + errno = ENOMEM; + return NULL; + } + count = pread(fd, code_buf, len, data.offset); + saved_errno = errno; + close(fd); + if ((u64)count != len) { + free(code_buf); + errno = saved_errno; + return NULL; + } + *out_buf = code_buf; + *out_buf_len = len; + return code_buf; +} + +/* + * Read a symbol into memory for disassembly by a library like capstone of + * libLLVM. If memory is allocated out_buf holds it. + */ +const u8 *dso__read_symbol(struct dso *dso, const char *symfs_filename, + const struct map *map, const struct symbol *sym, + u8 **out_buf, u64 *out_buf_len, bool *is_64bit) +{ + u64 start = map__rip_2objdump(map, sym->start); + u64 end = map__rip_2objdump(map, sym->end); + size_t len = end - start; + + *out_buf = NULL; + *out_buf_len = 0; + *is_64bit = false; + + if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) { + /* + * Note, there is fallback BPF image disassembly in the objdump + * version but it currently does nothing. + */ + errno = EOPNOTSUPP; + return NULL; + } + if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) { +#ifdef HAVE_LIBBPF_SUPPORT + struct bpf_prog_info_node *info_node; + struct perf_bpil *info_linear; + + *is_64bit = sizeof(void *) == sizeof(u64); + info_node = perf_env__find_bpf_prog_info(dso__bpf_prog(dso)->env, + dso__bpf_prog(dso)->id); + if (!info_node) { + errno = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF; + return NULL; + } + info_linear = info_node->info_linear; + assert(len <= info_linear->info.jited_prog_len); + *out_buf_len = len; + return (const u8 *)(uintptr_t)(info_linear->info.jited_prog_insns); +#else + pr_debug("No BPF program disassembly support\n"); + errno = EOPNOTSUPP; + return NULL; +#endif + } + return __dso__read_symbol(dso, symfs_filename, start, len, + out_buf, out_buf_len, is_64bit); +} diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index b41c9782c754..f8ccb9816b89 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -10,7 +10,9 @@ #include <stdio.h> #include <linux/bitops.h> #include "build-id.h" +#include "debuginfo.h" #include "mutex.h" +#include <internal/rc_check.h> struct machine; struct map; @@ -19,30 +21,88 @@ struct perf_env; #define DSO__NAME_KALLSYMS "[kernel.kallsyms]" #define DSO__NAME_KCORE "[kernel.kcore]" +/** + * enum dso_binary_type - The kind of DSO generally associated with a memory + * region (struct map). + */ enum dso_binary_type { + /** @DSO_BINARY_TYPE__KALLSYMS: Symbols from /proc/kallsyms file. */ DSO_BINARY_TYPE__KALLSYMS = 0, + /** @DSO_BINARY_TYPE__GUEST_KALLSYMS: Guest /proc/kallsyms file. */ DSO_BINARY_TYPE__GUEST_KALLSYMS, + /** @DSO_BINARY_TYPE__VMLINUX: Path to kernel /boot/vmlinux file. */ DSO_BINARY_TYPE__VMLINUX, + /** @DSO_BINARY_TYPE__GUEST_VMLINUX: Path to guest kernel /boot/vmlinux file. */ DSO_BINARY_TYPE__GUEST_VMLINUX, + /** @DSO_BINARY_TYPE__JAVA_JIT: Symbols from /tmp/perf.map file. */ DSO_BINARY_TYPE__JAVA_JIT, + /** + * @DSO_BINARY_TYPE__DEBUGLINK: Debug file readable from the file path + * in the .gnu_debuglink ELF section of the dso. + */ DSO_BINARY_TYPE__DEBUGLINK, + /** + * @DSO_BINARY_TYPE__BUILD_ID_CACHE: File named after buildid located in + * the buildid cache with an elf filename. + */ DSO_BINARY_TYPE__BUILD_ID_CACHE, + /** + * @DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO: File named after buildid + * located in the buildid cache with a debug filename. + */ DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO, + /** + * @DSO_BINARY_TYPE__FEDORA_DEBUGINFO: Debug file in /usr/lib/debug + * with .debug suffix. + */ DSO_BINARY_TYPE__FEDORA_DEBUGINFO, + /** @DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: Debug file in /usr/lib/debug. */ DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, + /** + * @DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: dso__long_name debuginfo + * file in /usr/lib/debug/lib rather than the expected + * /usr/lib/debug/usr/lib. + */ DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, + /** + * @DSO_BINARY_TYPE__BUILDID_DEBUGINFO: File named after buildid located + * in /usr/lib/debug/.build-id/. + */ DSO_BINARY_TYPE__BUILDID_DEBUGINFO, + /** + * @DSO_BINARY_TYPE__GNU_DEBUGDATA: MiniDebuginfo where a compressed + * ELF file is placed in a .gnu_debugdata section. + */ + DSO_BINARY_TYPE__GNU_DEBUGDATA, + /** @DSO_BINARY_TYPE__SYSTEM_PATH_DSO: A regular executable/shared-object file. */ DSO_BINARY_TYPE__SYSTEM_PATH_DSO, + /** @DSO_BINARY_TYPE__GUEST_KMODULE: Guest kernel module .ko file. */ DSO_BINARY_TYPE__GUEST_KMODULE, + /** @DSO_BINARY_TYPE__GUEST_KMODULE_COMP: Guest kernel module .ko.gz file. */ DSO_BINARY_TYPE__GUEST_KMODULE_COMP, + /** @DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE: Kernel module .ko file. */ DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE, + /** @DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP: Kernel module .ko.gz file. */ DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP, + /** @DSO_BINARY_TYPE__KCORE: /proc/kcore file. */ DSO_BINARY_TYPE__KCORE, + /** @DSO_BINARY_TYPE__GUEST_KCORE: Guest /proc/kcore file. */ DSO_BINARY_TYPE__GUEST_KCORE, + /** + * @DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: Openembedded/Yocto -dbg + * package debug info. + */ DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, + /** @DSO_BINARY_TYPE__BPF_PROG_INFO: jitted BPF code. */ DSO_BINARY_TYPE__BPF_PROG_INFO, + /** @DSO_BINARY_TYPE__BPF_IMAGE: jitted BPF trampoline or dispatcher code. */ DSO_BINARY_TYPE__BPF_IMAGE, + /** + * @DSO_BINARY_TYPE__OOL: out of line code such as kprobe-replaced + * instructions or optimized kprobes or ftrace trampolines. + */ DSO_BINARY_TYPE__OOL, + /** @DSO_BINARY_TYPE__NOT_FOUND: Unknown DSO kind. */ DSO_BINARY_TYPE__NOT_FOUND, }; @@ -100,39 +160,59 @@ enum dso_load_errno { __DSO_LOAD_ERRNO__END, }; -#define DSO__SWAP(dso, type, val) \ -({ \ - type ____r = val; \ - BUG_ON(dso->needs_swap == DSO_SWAP__UNSET); \ - if (dso->needs_swap == DSO_SWAP__YES) { \ - switch (sizeof(____r)) { \ - case 2: \ - ____r = bswap_16(val); \ - break; \ - case 4: \ - ____r = bswap_32(val); \ - break; \ - case 8: \ - ____r = bswap_64(val); \ - break; \ - default: \ - BUG_ON(1); \ - } \ - } \ - ____r; \ +#define DSO__SWAP(dso, type, val) \ +({ \ + type ____r = val; \ + enum dso_swap_type ___dst = dso__needs_swap(dso); \ + BUG_ON(___dst == DSO_SWAP__UNSET); \ + if (___dst == DSO_SWAP__YES) { \ + switch (sizeof(____r)) { \ + case 2: \ + ____r = bswap_16(val); \ + break; \ + case 4: \ + ____r = bswap_32(val); \ + break; \ + case 8: \ + ____r = bswap_64(val); \ + break; \ + default: \ + BUG_ON(1); \ + } \ + } \ + ____r; \ }) #define DSO__DATA_CACHE_SIZE 4096 #define DSO__DATA_CACHE_MASK ~(DSO__DATA_CACHE_SIZE - 1) -/* - * Data about backing storage DSO, comes from PERF_RECORD_MMAP2 meta events +/** + * struct dso_id + * + * Data about backing storage DSO, comes from PERF_RECORD_MMAP2 meta events, + * reading from /proc/pid/maps or synthesis of build_ids from DSOs. Possibly + * incomplete at any particular use. */ struct dso_id { - u32 maj; - u32 min; - u64 ino; - u64 ino_generation; + /* Data related to the mmap2 event or read from /proc/pid/maps. */ + struct { + u32 maj; + u32 min; + u64 ino; + u64 ino_generation; + }; + /** @mmap2_valid: Are the maj, min and ino fields valid? */ + bool mmap2_valid; + /** + * @mmap2_ino_generation_valid: Is the ino_generation valid? Generally + * false for /proc/pid/maps mmap event. + */ + bool mmap2_ino_generation_valid; + /** + * @build_id: A possibly populated build_id. build_id__is_defined checks + * whether it is populated. + */ + struct build_id build_id; }; struct dso_cache { @@ -142,83 +222,99 @@ struct dso_cache { char data[]; }; +struct dso_data { + struct rb_root cache; + struct list_head open_entry; +#ifdef REFCNT_CHECKING + struct dso *dso; +#endif + int fd; + int status; + u32 status_seen; + u64 file_size; +#ifdef HAVE_LIBUNWIND_SUPPORT + u64 elf_base_addr; + u64 debug_frame_offset; + u64 eh_frame_hdr_addr; + u64 eh_frame_hdr_offset; +#endif +}; + +struct dso_bpf_prog { + u32 id; + u32 sub_id; + struct perf_env *env; +}; + struct auxtrace_cache; -struct dso { +DECLARE_RC_STRUCT(dso) { struct mutex lock; - struct list_head node; - struct rb_node rb_node; /* rbtree node sorted by long name */ - struct rb_root *root; /* root of rbtree that rb_node is in */ + struct dsos *dsos; struct rb_root_cached symbols; struct symbol **symbol_names; size_t symbol_names_len; struct rb_root_cached inlined_nodes; struct rb_root_cached srclines; + struct rb_root data_types; + struct rb_root global_vars; + struct { u64 addr; struct symbol *symbol; } last_find_result; + u64 text_offset; + u64 text_end; + const char *short_name; + const char *long_name; void *a2l; char *symsrc_filename; +#if defined(__powerpc__) + void *dwfl; /* DWARF debug info */ +#endif + struct nsinfo *nsinfo; + struct auxtrace_cache *auxtrace_cache; + union { /* Tool specific area */ + void *priv; + u64 db_id; + }; + /* bpf prog information */ + struct dso_bpf_prog bpf_prog; + /* dso data file */ + struct dso_data data; + struct dso_id id; unsigned int a2l_fails; - enum dso_space_type kernel; - enum dso_swap_type needs_swap; - enum dso_binary_type symtab_type; - enum dso_binary_type binary_type; + int comp; + refcount_t refcnt; enum dso_load_errno load_errno; + u16 long_name_len; + u16 short_name_len; + enum dso_binary_type symtab_type:8; + enum dso_binary_type binary_type:8; + enum dso_space_type kernel:2; + enum dso_swap_type needs_swap:2; + bool is_kmod:1; u8 adjust_symbols:1; - u8 has_build_id:1; u8 header_build_id:1; u8 has_srcline:1; u8 hit:1; u8 annotate_warned:1; u8 auxtrace_warned:1; + u8 debuginfo_warned:1; u8 short_name_allocated:1; u8 long_name_allocated:1; u8 is_64_bit:1; bool sorted_by_name; bool loaded; u8 rel; - struct build_id bid; - u64 text_offset; - const char *short_name; - const char *long_name; - u16 long_name_len; - u16 short_name_len; - void *dwfl; /* DWARF debug info */ - struct auxtrace_cache *auxtrace_cache; - int comp; - - /* dso data file */ - struct { - struct rb_root cache; - int fd; - int status; - u32 status_seen; - u64 file_size; - struct list_head open_entry; - u64 elf_base_addr; - u64 debug_frame_offset; - u64 eh_frame_hdr_addr; - u64 eh_frame_hdr_offset; - } data; - /* bpf prog information */ - struct { - u32 id; - u32 sub_id; - struct perf_env *env; - } bpf_prog; - - union { /* Tool specific area */ - void *priv; - u64 db_id; - }; - struct nsinfo *nsinfo; - struct dso_id id; - refcount_t refcnt; char name[]; }; +extern struct mutex _dso__data_open_lock; +extern const struct dso_id dso_id_empty; + +int dso_id__cmp(const struct dso_id *a, const struct dso_id *b); + /* dso__for_each_symbol - iterate over the symbols of given type * * @dso: the 'struct dso *' in which symbols are iterated @@ -226,31 +322,418 @@ struct dso { * @n: the 'struct rb_node *' to use as a temporary storage */ #define dso__for_each_symbol(dso, pos, n) \ - symbols__for_each_entry(&(dso)->symbols, pos, n) + symbols__for_each_entry(dso__symbols(dso), pos, n) + +static inline void *dso__a2l(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->a2l; +} + +static inline void dso__set_a2l(struct dso *dso, void *val) +{ + RC_CHK_ACCESS(dso)->a2l = val; +} -#define dsos__for_each_with_build_id(pos, head) \ - list_for_each_entry(pos, head, node) \ - if (!pos->has_build_id) \ - continue; \ - else +static inline unsigned int dso__a2l_fails(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->a2l_fails; +} + +static inline void dso__set_a2l_fails(struct dso *dso, unsigned int val) +{ + RC_CHK_ACCESS(dso)->a2l_fails = val; +} + +static inline bool dso__adjust_symbols(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->adjust_symbols; +} + +static inline void dso__set_adjust_symbols(struct dso *dso, bool val) +{ + RC_CHK_ACCESS(dso)->adjust_symbols = val; +} + +static inline bool dso__annotate_warned(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->annotate_warned; +} + +static inline void dso__set_annotate_warned(struct dso *dso) +{ + RC_CHK_ACCESS(dso)->annotate_warned = 1; +} + +static inline bool dso__debuginfo_warned(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->debuginfo_warned; +} + +static inline void dso__set_debuginfo_warned(struct dso *dso) +{ + RC_CHK_ACCESS(dso)->debuginfo_warned = 1; +} + +static inline bool dso__auxtrace_warned(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->auxtrace_warned; +} + +static inline void dso__set_auxtrace_warned(struct dso *dso) +{ + RC_CHK_ACCESS(dso)->auxtrace_warned = 1; +} + +static inline struct auxtrace_cache *dso__auxtrace_cache(struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->auxtrace_cache; +} + +static inline void dso__set_auxtrace_cache(struct dso *dso, struct auxtrace_cache *cache) +{ + RC_CHK_ACCESS(dso)->auxtrace_cache = cache; +} + +static inline struct dso_bpf_prog *dso__bpf_prog(struct dso *dso) +{ + return &RC_CHK_ACCESS(dso)->bpf_prog; +} + +static inline bool dso__has_srcline(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->has_srcline; +} + +static inline void dso__set_has_srcline(struct dso *dso, bool val) +{ + RC_CHK_ACCESS(dso)->has_srcline = val; +} + +static inline int dso__comp(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->comp; +} + +static inline void dso__set_comp(struct dso *dso, int comp) +{ + RC_CHK_ACCESS(dso)->comp = comp; +} + +static inline struct dso_data *dso__data(struct dso *dso) +{ + return &RC_CHK_ACCESS(dso)->data; +} + +static inline u64 dso__db_id(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->db_id; +} + +static inline void dso__set_db_id(struct dso *dso, u64 db_id) +{ + RC_CHK_ACCESS(dso)->db_id = db_id; +} + +static inline struct dsos *dso__dsos(struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->dsos; +} + +static inline void dso__set_dsos(struct dso *dso, struct dsos *dsos) +{ + RC_CHK_ACCESS(dso)->dsos = dsos; +} + +static inline bool dso__header_build_id(struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->header_build_id; +} + +static inline void dso__set_header_build_id(struct dso *dso, bool val) +{ + RC_CHK_ACCESS(dso)->header_build_id = val; +} + +static inline bool dso__hit(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->hit; +} + +static inline void dso__set_hit(struct dso *dso) +{ + RC_CHK_ACCESS(dso)->hit = 1; +} + +static inline struct dso_id *dso__id(struct dso *dso) +{ + return &RC_CHK_ACCESS(dso)->id; +} + +static inline const struct dso_id *dso__id_const(const struct dso *dso) +{ + return &RC_CHK_ACCESS(dso)->id; +} + +static inline const struct build_id *dso__bid(const struct dso *dso) +{ + return &dso__id_const(dso)->build_id; +} + +static inline bool dso__has_build_id(const struct dso *dso) +{ + return build_id__is_defined(dso__bid(dso)); +} + +static inline struct rb_root_cached *dso__inlined_nodes(struct dso *dso) +{ + return &RC_CHK_ACCESS(dso)->inlined_nodes; +} + +static inline bool dso__is_64_bit(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->is_64_bit; +} + +static inline void dso__set_is_64_bit(struct dso *dso, bool is) +{ + RC_CHK_ACCESS(dso)->is_64_bit = is; +} + +static inline bool dso__is_kmod(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->is_kmod; +} + +static inline void dso__set_is_kmod(struct dso *dso) +{ + RC_CHK_ACCESS(dso)->is_kmod = 1; +} + +static inline enum dso_space_type dso__kernel(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->kernel; +} + +static inline void dso__set_kernel(struct dso *dso, enum dso_space_type kernel) +{ + RC_CHK_ACCESS(dso)->kernel = kernel; +} + +static inline u64 dso__last_find_result_addr(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->last_find_result.addr; +} + +static inline void dso__set_last_find_result_addr(struct dso *dso, u64 addr) +{ + RC_CHK_ACCESS(dso)->last_find_result.addr = addr; +} + +static inline struct symbol *dso__last_find_result_symbol(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->last_find_result.symbol; +} + +static inline void dso__set_last_find_result_symbol(struct dso *dso, struct symbol *symbol) +{ + RC_CHK_ACCESS(dso)->last_find_result.symbol = symbol; +} + +static inline enum dso_load_errno *dso__load_errno(struct dso *dso) +{ + return &RC_CHK_ACCESS(dso)->load_errno; +} static inline void dso__set_loaded(struct dso *dso) { - dso->loaded = true; + RC_CHK_ACCESS(dso)->loaded = true; +} + +static inline struct mutex *dso__lock(struct dso *dso) +{ + return &RC_CHK_ACCESS(dso)->lock; +} + +static inline const char *dso__long_name(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->long_name; +} + +static inline bool dso__long_name_allocated(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->long_name_allocated; +} + +static inline void dso__set_long_name_allocated(struct dso *dso, bool allocated) +{ + RC_CHK_ACCESS(dso)->long_name_allocated = allocated; +} + +static inline u16 dso__long_name_len(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->long_name_len; +} + +static inline const char *dso__name(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->name; +} + +static inline enum dso_swap_type dso__needs_swap(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->needs_swap; +} + +static inline void dso__set_needs_swap(struct dso *dso, enum dso_swap_type type) +{ + RC_CHK_ACCESS(dso)->needs_swap = type; +} + +static inline struct nsinfo *dso__nsinfo(struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->nsinfo; +} + +static inline const struct nsinfo *dso__nsinfo_const(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->nsinfo; +} + +static inline struct nsinfo **dso__nsinfo_ptr(struct dso *dso) +{ + return &RC_CHK_ACCESS(dso)->nsinfo; +} + +void dso__set_nsinfo(struct dso *dso, struct nsinfo *nsi); + +static inline u8 dso__rel(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->rel; +} + +static inline void dso__set_rel(struct dso *dso, u8 rel) +{ + RC_CHK_ACCESS(dso)->rel = rel; +} + +static inline const char *dso__short_name(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->short_name; } -struct dso *dso__new_id(const char *name, struct dso_id *id); +static inline bool dso__short_name_allocated(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->short_name_allocated; +} + +static inline void dso__set_short_name_allocated(struct dso *dso, bool allocated) +{ + RC_CHK_ACCESS(dso)->short_name_allocated = allocated; +} + +static inline u16 dso__short_name_len(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->short_name_len; +} + +static inline struct rb_root_cached *dso__srclines(struct dso *dso) +{ + return &RC_CHK_ACCESS(dso)->srclines; +} + +static inline struct rb_root *dso__data_types(struct dso *dso) +{ + return &RC_CHK_ACCESS(dso)->data_types; +} + +static inline struct rb_root *dso__global_vars(struct dso *dso) +{ + return &RC_CHK_ACCESS(dso)->global_vars; +} + +static inline struct rb_root_cached *dso__symbols(struct dso *dso) +{ + return &RC_CHK_ACCESS(dso)->symbols; +} + +static inline struct symbol **dso__symbol_names(struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->symbol_names; +} + +static inline void dso__set_symbol_names(struct dso *dso, struct symbol **names) +{ + RC_CHK_ACCESS(dso)->symbol_names = names; +} + +static inline size_t dso__symbol_names_len(struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->symbol_names_len; +} + +static inline void dso__set_symbol_names_len(struct dso *dso, size_t len) +{ + RC_CHK_ACCESS(dso)->symbol_names_len = len; +} + +static inline const char *dso__symsrc_filename(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->symsrc_filename; +} + +static inline void dso__set_symsrc_filename(struct dso *dso, char *val) +{ + RC_CHK_ACCESS(dso)->symsrc_filename = val; +} + +static inline void dso__free_symsrc_filename(struct dso *dso) +{ + zfree(&RC_CHK_ACCESS(dso)->symsrc_filename); +} + +static inline enum dso_binary_type dso__symtab_type(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->symtab_type; +} + +static inline void dso__set_symtab_type(struct dso *dso, enum dso_binary_type bt) +{ + RC_CHK_ACCESS(dso)->symtab_type = bt; +} + +static inline u64 dso__text_end(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->text_end; +} + +static inline void dso__set_text_end(struct dso *dso, u64 val) +{ + RC_CHK_ACCESS(dso)->text_end = val; +} + +static inline u64 dso__text_offset(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->text_offset; +} + +static inline void dso__set_text_offset(struct dso *dso, u64 val) +{ + RC_CHK_ACCESS(dso)->text_offset = val; +} + +struct dso *dso__new_id(const char *name, const struct dso_id *id); struct dso *dso__new(const char *name); void dso__delete(struct dso *dso); int dso__cmp_id(struct dso *a, struct dso *b); void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated); void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated); +void __dso__improve_id(struct dso *dso, const struct dso_id *id); int dso__name_len(const struct dso *dso); struct dso *dso__get(struct dso *dso); -void dso__put(struct dso *dso); +void dso__put(struct dso *dso) LOCKS_EXCLUDED(_dso__data_open_lock); static inline void __dso__zput(struct dso **dso) { @@ -264,7 +747,7 @@ bool dso__loaded(const struct dso *dso); static inline bool dso__has_symbols(const struct dso *dso) { - return !RB_EMPTY_ROOT(&dso->symbols.rb_root); + return !RB_EMPTY_ROOT(&RC_CHK_ACCESS(dso)->symbols.rb_root); } char *dso__filename_with_chroot(const struct dso *dso, const char *filename); @@ -273,8 +756,10 @@ bool dso__sorted_by_name(const struct dso *dso); void dso__set_sorted_by_name(struct dso *dso); void dso__sort_by_name(struct dso *dso); -void dso__set_build_id(struct dso *dso, struct build_id *bid); -bool dso__build_id_equal(const struct dso *dso, struct build_id *bid); +int dso__swap_init(struct dso *dso, unsigned char eidata); + +void dso__set_build_id(struct dso *dso, const struct build_id *bid); +bool dso__build_id_equal(const struct dso *dso, const struct build_id *bid); void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine); int dso__kernel_module_get_build_id(struct dso *dso, const char *root_dir); @@ -330,8 +815,8 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m, * The current usage of the dso__data_* interface is as follows: * * Get DSO's fd: - * int fd = dso__data_get_fd(dso, machine); - * if (fd >= 0) { + * int fd; + * if (dso__data_get_fd(dso, machine, &fd)) { * USE 'fd' SOMEHOW * dso__data_put_fd(dso); * } @@ -353,14 +838,16 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m, * * TODO */ -int dso__data_get_fd(struct dso *dso, struct machine *machine); -void dso__data_put_fd(struct dso *dso); -void dso__data_close(struct dso *dso); +bool dso__data_get_fd(struct dso *dso, struct machine *machine, int *fd) + EXCLUSIVE_TRYLOCK_FUNCTION(true, _dso__data_open_lock); +void dso__data_put_fd(struct dso *dso) UNLOCK_FUNCTION(_dso__data_open_lock); +void dso__data_close(struct dso *dso) LOCKS_EXCLUDED(_dso__data_open_lock); int dso__data_file_size(struct dso *dso, struct machine *machine); off_t dso__data_size(struct dso *dso, struct machine *machine); ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, u64 offset, u8 *data, ssize_t size); +uint16_t dso__e_machine(struct dso *dso, struct machine *machine); ssize_t dso__data_read_addr(struct dso *dso, struct map *map, struct machine *machine, u64 addr, u8 *data, ssize_t size); @@ -380,21 +867,35 @@ void dso__reset_find_symbol_cache(struct dso *dso); size_t dso__fprintf_symbols_by_name(struct dso *dso, FILE *fp); size_t dso__fprintf(struct dso *dso, FILE *fp); +static inline enum dso_binary_type dso__binary_type(const struct dso *dso) +{ + return RC_CHK_ACCESS(dso)->binary_type; +} + +static inline void dso__set_binary_type(struct dso *dso, enum dso_binary_type bt) +{ + RC_CHK_ACCESS(dso)->binary_type = bt; +} + static inline bool dso__is_vmlinux(const struct dso *dso) { - return dso->binary_type == DSO_BINARY_TYPE__VMLINUX || - dso->binary_type == DSO_BINARY_TYPE__GUEST_VMLINUX; + enum dso_binary_type bt = dso__binary_type(dso); + + return bt == DSO_BINARY_TYPE__VMLINUX || bt == DSO_BINARY_TYPE__GUEST_VMLINUX; } static inline bool dso__is_kcore(const struct dso *dso) { - return dso->binary_type == DSO_BINARY_TYPE__KCORE || - dso->binary_type == DSO_BINARY_TYPE__GUEST_KCORE; + enum dso_binary_type bt = dso__binary_type(dso); + + return bt == DSO_BINARY_TYPE__KCORE || bt == DSO_BINARY_TYPE__GUEST_KCORE; } static inline bool dso__is_kallsyms(const struct dso *dso) { - return dso->kernel && dso->long_name[0] != '/'; + enum dso_binary_type bt = dso__binary_type(dso); + + return bt == DSO_BINARY_TYPE__KALLSYMS || bt == DSO_BINARY_TYPE__GUEST_KALLSYMS; } bool dso__is_object_file(const struct dso *dso); @@ -407,4 +908,24 @@ int dso__strerror_load(struct dso *dso, char *buf, size_t buflen); void reset_fd_limit(void); +u64 dso__find_global_type(struct dso *dso, u64 addr); +u64 dso__findnew_global_type(struct dso *dso, u64 addr, u64 offset); + +/* Check if dso name is of format "/tmp/perf-%d.map" */ +bool perf_pid_map_tid(const char *dso_name, int *tid); +bool is_perf_pid_map_name(const char *dso_name); + +/* + * In the future, we may get debuginfo using build-ID (w/o path). + * Add this helper is for the smooth conversion. + */ +static inline struct debuginfo *dso__debuginfo(struct dso *dso) +{ + return debuginfo__new(dso__long_name(dso)); +} + +const u8 *dso__read_symbol(struct dso *dso, const char *symfs_filename, + const struct map *map, const struct symbol *sym, + u8 **out_buf, u64 *out_buf_len, bool *is_64bit); + #endif /* __PERF_DSO */ diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index cf80aa42dd07..0a7645c7fae7 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -12,115 +12,141 @@ #include <symbol.h> // filename__read_build_id #include <unistd.h> -static int __dso_id__cmp(struct dso_id *a, struct dso_id *b) +void dsos__init(struct dsos *dsos) { - if (a->maj > b->maj) return -1; - if (a->maj < b->maj) return 1; + init_rwsem(&dsos->lock); - if (a->min > b->min) return -1; - if (a->min < b->min) return 1; + dsos->cnt = 0; + dsos->allocated = 0; + dsos->dsos = NULL; + dsos->sorted = true; +} - if (a->ino > b->ino) return -1; - if (a->ino < b->ino) return 1; +static void dsos__purge(struct dsos *dsos) +{ + down_write(&dsos->lock); - /* - * Synthesized MMAP events have zero ino_generation, avoid comparing - * them with MMAP events with actual ino_generation. - * - * I found it harmful because the mismatch resulted in a new - * dso that did not have a build ID whereas the original dso did have a - * build ID. The build ID was essential because the object was not found - * otherwise. - Adrian - */ - if (a->ino_generation && b->ino_generation) { - if (a->ino_generation > b->ino_generation) return -1; - if (a->ino_generation < b->ino_generation) return 1; - } + for (unsigned int i = 0; i < dsos->cnt; i++) { + struct dso *dso = dsos->dsos[i]; - return 0; -} + dso__set_dsos(dso, NULL); + dso__put(dso); + } -static bool dso_id__empty(struct dso_id *id) -{ - if (!id) - return true; + zfree(&dsos->dsos); + dsos->cnt = 0; + dsos->allocated = 0; + dsos->sorted = true; - return !id->maj && !id->min && !id->ino && !id->ino_generation; + up_write(&dsos->lock); } -static void dso__inject_id(struct dso *dso, struct dso_id *id) +void dsos__exit(struct dsos *dsos) { - dso->id.maj = id->maj; - dso->id.min = id->min; - dso->id.ino = id->ino; - dso->id.ino_generation = id->ino_generation; + dsos__purge(dsos); + exit_rwsem(&dsos->lock); } -static int dso_id__cmp(struct dso_id *a, struct dso_id *b) + +static int __dsos__for_each_dso(struct dsos *dsos, + int (*cb)(struct dso *dso, void *data), + void *data) { - /* - * The second is always dso->id, so zeroes if not set, assume passing - * NULL for a means a zeroed id - */ - if (dso_id__empty(a) || dso_id__empty(b)) - return 0; + for (unsigned int i = 0; i < dsos->cnt; i++) { + struct dso *dso = dsos->dsos[i]; + int err; - return __dso_id__cmp(a, b); + err = cb(dso, data); + if (err) + return err; + } + return 0; } -int dso__cmp_id(struct dso *a, struct dso *b) -{ - return __dso_id__cmp(&a->id, &b->id); -} +struct dsos__read_build_ids_cb_args { + bool with_hits; + bool have_build_id; +}; -bool __dsos__read_build_ids(struct list_head *head, bool with_hits) +static int dsos__read_build_ids_cb(struct dso *dso, void *data) { - bool have_build_id = false; - struct dso *pos; + struct dsos__read_build_ids_cb_args *args = data; struct nscookie nsc; + struct build_id bid = { .size = 0, }; - list_for_each_entry(pos, head, node) { - if (with_hits && !pos->hit && !dso__is_vdso(pos)) - continue; - if (pos->has_build_id) { - have_build_id = true; - continue; - } - nsinfo__mountns_enter(pos->nsinfo, &nsc); - if (filename__read_build_id(pos->long_name, &pos->bid) > 0) { - have_build_id = true; - pos->has_build_id = true; - } else if (errno == ENOENT && pos->nsinfo) { - char *new_name = dso__filename_with_chroot(pos, pos->long_name); - - if (new_name && filename__read_build_id(new_name, - &pos->bid) > 0) { - have_build_id = true; - pos->has_build_id = true; - } - free(new_name); + if (args->with_hits && !dso__hit(dso) && !dso__is_vdso(dso)) + return 0; + if (dso__has_build_id(dso)) { + args->have_build_id = true; + return 0; + } + nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); + if (filename__read_build_id(dso__long_name(dso), &bid) > 0) { + dso__set_build_id(dso, &bid); + args->have_build_id = true; + } else if (errno == ENOENT && dso__nsinfo(dso)) { + char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso)); + + if (new_name && filename__read_build_id(new_name, &bid) > 0) { + dso__set_build_id(dso, &bid); + args->have_build_id = true; } - nsinfo__mountns_exit(&nsc); + free(new_name); } + nsinfo__mountns_exit(&nsc); + return 0; +} + +bool dsos__read_build_ids(struct dsos *dsos, bool with_hits) +{ + struct dsos__read_build_ids_cb_args args = { + .with_hits = with_hits, + .have_build_id = false, + }; + + dsos__for_each_dso(dsos, dsos__read_build_ids_cb, &args); + return args.have_build_id; +} - return have_build_id; +static int __dso__cmp_long_name(const char *long_name, const struct dso_id *id, + const struct dso *b) +{ + int rc = strcmp(long_name, dso__long_name(b)); + return rc ?: dso_id__cmp(id, dso__id_const(b)); } -static int __dso__cmp_long_name(const char *long_name, struct dso_id *id, struct dso *b) +static int __dso__cmp_short_name(const char *short_name, const struct dso_id *id, + const struct dso *b) { - int rc = strcmp(long_name, b->long_name); - return rc ?: dso_id__cmp(id, &b->id); + int rc = strcmp(short_name, dso__short_name(b)); + return rc ?: dso_id__cmp(id, dso__id_const(b)); } -static int __dso__cmp_short_name(const char *short_name, struct dso_id *id, struct dso *b) +static int dsos__cmp_long_name_id_short_name(const void *va, const void *vb) { - int rc = strcmp(short_name, b->short_name); - return rc ?: dso_id__cmp(id, &b->id); + const struct dso *a = *((const struct dso **)va); + const struct dso *b = *((const struct dso **)vb); + int rc = strcmp(dso__long_name(a), dso__long_name(b)); + + if (!rc) { + rc = dso_id__cmp(dso__id_const(a), dso__id_const(b)); + if (!rc) + rc = strcmp(dso__short_name(a), dso__short_name(b)); + } + return rc; } -static int dso__cmp_short_name(struct dso *a, struct dso *b) +struct dsos__key { + const char *long_name; + const struct dso_id *id; +}; + +static int dsos__cmp_key_long_name_id(const void *vkey, const void *vdso) { - return __dso__cmp_short_name(a->short_name, &a->id, b); + const struct dsos__key *key = vkey; + const struct dso *dso = *((const struct dso **)vdso); + + return __dso__cmp_long_name(key->long_name, key->id, dso); } /* @@ -128,110 +154,142 @@ static int dso__cmp_short_name(struct dso *a, struct dso *b) * Either one of the dso or name parameter must be non-NULL or the * function will not work. */ -struct dso *__dsos__findnew_link_by_longname_id(struct rb_root *root, struct dso *dso, - const char *name, struct dso_id *id) +static struct dso *__dsos__find_by_longname_id(struct dsos *dsos, + const char *name, + const struct dso_id *id, + bool write_locked) + SHARED_LOCKS_REQUIRED(dsos->lock) { - struct rb_node **p = &root->rb_node; - struct rb_node *parent = NULL; + struct dsos__key key = { + .long_name = name, + .id = id, + }; + struct dso **res; - if (!name) - name = dso->long_name; - /* - * Find node with the matching name - */ - while (*p) { - struct dso *this = rb_entry(*p, struct dso, rb_node); - int rc = __dso__cmp_long_name(name, id, this); - - parent = *p; - if (rc == 0) { - /* - * In case the new DSO is a duplicate of an existing - * one, print a one-time warning & put the new entry - * at the end of the list of duplicates. - */ - if (!dso || (dso == this)) - return this; /* Find matching dso */ - /* - * The core kernel DSOs may have duplicated long name. - * In this case, the short name should be different. - * Comparing the short names to differentiate the DSOs. - */ - rc = dso__cmp_short_name(dso, this); - if (rc == 0) { - pr_err("Duplicated dso name: %s\n", name); - return NULL; - } + if (dsos->dsos == NULL) + return NULL; + + if (!dsos->sorted) { + if (!write_locked) { + struct dso *dso; + + up_read(&dsos->lock); + down_write(&dsos->lock); + dso = __dsos__find_by_longname_id(dsos, name, id, + /*write_locked=*/true); + up_write(&dsos->lock); + down_read(&dsos->lock); + return dso; } - if (rc < 0) - p = &parent->rb_left; - else - p = &parent->rb_right; - } - if (dso) { - /* Add new node and rebalance tree */ - rb_link_node(&dso->rb_node, parent, p); - rb_insert_color(&dso->rb_node, root); - dso->root = root; + qsort(dsos->dsos, dsos->cnt, sizeof(struct dso *), + dsos__cmp_long_name_id_short_name); + dsos->sorted = true; } - return NULL; + + res = bsearch(&key, dsos->dsos, dsos->cnt, sizeof(struct dso *), + dsos__cmp_key_long_name_id); + if (!res) + return NULL; + + return dso__get(*res); } -void __dsos__add(struct dsos *dsos, struct dso *dso) +int __dsos__add(struct dsos *dsos, struct dso *dso) { - list_add_tail(&dso->node, &dsos->head); - __dsos__findnew_link_by_longname_id(&dsos->root, dso, NULL, &dso->id); - /* - * It is now in the linked list, grab a reference, then garbage collect - * this when needing memory, by looking at LRU dso instances in the - * list with atomic_read(&dso->refcnt) == 1, i.e. no references - * anywhere besides the one for the list, do, under a lock for the - * list: remove it from the list, then a dso__put(), that probably will - * be the last and will then call dso__delete(), end of life. - * - * That, or at the end of the 'struct machine' lifetime, when all - * 'struct dso' instances will be removed from the list, in - * dsos__exit(), if they have no other reference from some other data - * structure. - * - * E.g.: after processing a 'perf.data' file and storing references - * to objects instantiated while processing events, we will have - * references to the 'thread', 'map', 'dso' structs all from 'struct - * hist_entry' instances, but we may not need anything not referenced, - * so we might as well call machines__exit()/machines__delete() and - * garbage collect it. - */ - dso__get(dso); + if (dsos->cnt == dsos->allocated) { + unsigned int to_allocate = 2; + struct dso **temp; + + if (dsos->allocated > 0) + to_allocate = dsos->allocated * 2; + temp = realloc(dsos->dsos, sizeof(struct dso *) * to_allocate); + if (!temp) + return -ENOMEM; + dsos->dsos = temp; + dsos->allocated = to_allocate; + } + if (!dsos->sorted) { + dsos->dsos[dsos->cnt++] = dso__get(dso); + } else { + int low = 0, high = dsos->cnt - 1; + int insert = dsos->cnt; /* Default to inserting at the end. */ + + while (low <= high) { + int mid = low + (high - low) / 2; + int cmp = dsos__cmp_long_name_id_short_name(&dsos->dsos[mid], &dso); + + if (cmp < 0) { + low = mid + 1; + } else { + high = mid - 1; + insert = mid; + } + } + memmove(&dsos->dsos[insert + 1], &dsos->dsos[insert], + (dsos->cnt - insert) * sizeof(struct dso *)); + dsos->cnt++; + dsos->dsos[insert] = dso__get(dso); + } + dso__set_dsos(dso, dsos); + return 0; } -void dsos__add(struct dsos *dsos, struct dso *dso) +int dsos__add(struct dsos *dsos, struct dso *dso) { + int ret; + down_write(&dsos->lock); - __dsos__add(dsos, dso); + ret = __dsos__add(dsos, dso); up_write(&dsos->lock); + return ret; } -static struct dso *__dsos__findnew_by_longname_id(struct rb_root *root, const char *name, struct dso_id *id) +struct dsos__find_id_cb_args { + const char *name; + const struct dso_id *id; + struct dso *res; +}; + +static int dsos__find_id_cb(struct dso *dso, void *data) { - return __dsos__findnew_link_by_longname_id(root, NULL, name, id); + struct dsos__find_id_cb_args *args = data; + + if (__dso__cmp_short_name(args->name, args->id, dso) == 0) { + args->res = dso__get(dso); + return 1; + } + return 0; + } -static struct dso *__dsos__find_id(struct dsos *dsos, const char *name, struct dso_id *id, bool cmp_short) +static struct dso *__dsos__find_id(struct dsos *dsos, const char *name, const struct dso_id *id, + bool cmp_short, bool write_locked) + SHARED_LOCKS_REQUIRED(dsos->lock) { - struct dso *pos; + struct dso *res; if (cmp_short) { - list_for_each_entry(pos, &dsos->head, node) - if (__dso__cmp_short_name(name, id, pos) == 0) - return pos; - return NULL; + struct dsos__find_id_cb_args args = { + .name = name, + .id = id, + .res = NULL, + }; + + __dsos__for_each_dso(dsos, dsos__find_id_cb, &args); + return args.res; } - return __dsos__findnew_by_longname_id(&dsos->root, name, id); + res = __dsos__find_by_longname_id(dsos, name, id, write_locked); + return res; } -struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short) +struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short) { - return __dsos__find_id(dsos, name, NULL, cmp_short); + struct dso *res; + + down_read(&dsos->lock); + res = __dsos__find_id(dsos, name, &dso_id_empty, cmp_short, /*write_locked=*/false); + up_read(&dsos->lock); + return res; } static void dso__set_basename(struct dso *dso) @@ -239,7 +297,7 @@ static void dso__set_basename(struct dso *dso) char *base, *lname; int tid; - if (sscanf(dso->long_name, "/tmp/perf-%d.map", &tid) == 1) { + if (perf_pid_map_tid(dso__long_name(dso), &tid)) { if (asprintf(&base, "[JIT] tid %d", tid) < 0) return; } else { @@ -247,7 +305,7 @@ static void dso__set_basename(struct dso *dso) * basename() may modify path buffer, so we must pass * a copy. */ - lname = strdup(dso->long_name); + lname = strdup(dso__long_name(dso)); if (!lname) return; @@ -266,68 +324,183 @@ static void dso__set_basename(struct dso *dso) dso__set_short_name(dso, base, true); } -static struct dso *__dsos__addnew_id(struct dsos *dsos, const char *name, struct dso_id *id) +static struct dso *__dsos__addnew_id(struct dsos *dsos, const char *name, const struct dso_id *id) { struct dso *dso = dso__new_id(name, id); if (dso != NULL) { - __dsos__add(dsos, dso); + /* + * The dsos lock is held on entry, so rename the dso before + * adding it to avoid needing to take the dsos lock again to say + * the array isn't sorted. + */ dso__set_basename(dso); - /* Put dso here because __dsos_add already got it */ - dso__put(dso); + __dsos__add(dsos, dso); } return dso; } -struct dso *__dsos__addnew(struct dsos *dsos, const char *name) -{ - return __dsos__addnew_id(dsos, name, NULL); -} - -static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id) +static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, const struct dso_id *id) + SHARED_LOCKS_REQUIRED(dsos->lock) { - struct dso *dso = __dsos__find_id(dsos, name, id, false); + struct dso *dso = __dsos__find_id(dsos, name, id, false, /*write_locked=*/true); - if (dso && dso_id__empty(&dso->id) && !dso_id__empty(id)) - dso__inject_id(dso, id); + if (dso) + __dso__improve_id(dso, id); return dso ? dso : __dsos__addnew_id(dsos, name, id); } -struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id) +struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, const struct dso_id *id) { struct dso *dso; down_write(&dsos->lock); - dso = dso__get(__dsos__findnew_id(dsos, name, id)); + dso = __dsos__findnew_id(dsos, name, id); up_write(&dsos->lock); return dso; } -size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, - bool (skip)(struct dso *dso, int parm), int parm) +struct dsos__fprintf_buildid_cb_args { + FILE *fp; + bool (*skip)(struct dso *dso, int parm); + int parm; + size_t ret; +}; + +static int dsos__fprintf_buildid_cb(struct dso *dso, void *data) { - struct dso *pos; - size_t ret = 0; + struct dsos__fprintf_buildid_cb_args *args = data; + char sbuild_id[SBUILD_ID_SIZE]; - list_for_each_entry(pos, head, node) { - char sbuild_id[SBUILD_ID_SIZE]; + if (args->skip && args->skip(dso, args->parm)) + return 0; + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); + args->ret += fprintf(args->fp, "%-40s %s\n", sbuild_id, dso__long_name(dso)); + return 0; +} - if (skip && skip(pos, parm)) - continue; - build_id__sprintf(&pos->bid, sbuild_id); - ret += fprintf(fp, "%-40s %s\n", sbuild_id, pos->long_name); - } - return ret; +size_t dsos__fprintf_buildid(struct dsos *dsos, FILE *fp, + bool (*skip)(struct dso *dso, int parm), int parm) +{ + struct dsos__fprintf_buildid_cb_args args = { + .fp = fp, + .skip = skip, + .parm = parm, + .ret = 0, + }; + + dsos__for_each_dso(dsos, dsos__fprintf_buildid_cb, &args); + return args.ret; +} + +struct dsos__fprintf_cb_args { + FILE *fp; + size_t ret; +}; + +static int dsos__fprintf_cb(struct dso *dso, void *data) +{ + struct dsos__fprintf_cb_args *args = data; + + args->ret += dso__fprintf(dso, args->fp); + return 0; +} + +size_t dsos__fprintf(struct dsos *dsos, FILE *fp) +{ + struct dsos__fprintf_cb_args args = { + .fp = fp, + .ret = 0, + }; + + dsos__for_each_dso(dsos, dsos__fprintf_cb, &args); + return args.ret; +} + +static int dsos__hit_all_cb(struct dso *dso, void *data __maybe_unused) +{ + dso__set_hit(dso); + return 0; } -size_t __dsos__fprintf(struct list_head *head, FILE *fp) +int dsos__hit_all(struct dsos *dsos) { - struct dso *pos; - size_t ret = 0; + return dsos__for_each_dso(dsos, dsos__hit_all_cb, NULL); +} - list_for_each_entry(pos, head, node) { - ret += dso__fprintf(pos, fp); +struct dso *dsos__findnew_module_dso(struct dsos *dsos, + struct machine *machine, + struct kmod_path *m, + const char *filename) +{ + struct dso *dso; + + down_write(&dsos->lock); + + dso = __dsos__find_id(dsos, m->name, &dso_id_empty, /*cmp_short=*/true, + /*write_locked=*/true); + if (dso) { + up_write(&dsos->lock); + return dso; + } + /* + * Failed to find the dso so create it. Change the name before adding it + * to the array, to avoid unnecessary sorts and potential locking + * issues. + */ + dso = dso__new_id(m->name, /*id=*/NULL); + if (!dso) { + up_write(&dsos->lock); + return NULL; } + dso__set_basename(dso); + dso__set_module_info(dso, m, machine); + dso__set_long_name(dso, strdup(filename), true); + dso__set_kernel(dso, DSO_SPACE__KERNEL); + __dsos__add(dsos, dso); - return ret; + up_write(&dsos->lock); + return dso; +} + +static int dsos__find_kernel_dso_cb(struct dso *dso, void *data) +{ + struct dso **res = data; + /* + * The cpumode passed to is_kernel_module is not the cpumode of *this* + * event. If we insist on passing correct cpumode to is_kernel_module, + * we should record the cpumode when we adding this dso to the linked + * list. + * + * However we don't really need passing correct cpumode. We know the + * correct cpumode must be kernel mode (if not, we should not link it + * onto kernel_dsos list). + * + * Therefore, we pass PERF_RECORD_MISC_CPUMODE_UNKNOWN. + * is_kernel_module() treats it as a kernel cpumode. + */ + if (!dso__kernel(dso) || + is_kernel_module(dso__long_name(dso), PERF_RECORD_MISC_CPUMODE_UNKNOWN)) + return 0; + + *res = dso__get(dso); + return 1; +} + +struct dso *dsos__find_kernel_dso(struct dsos *dsos) +{ + struct dso *res = NULL; + + dsos__for_each_dso(dsos, dsos__find_kernel_dso_cb, &res); + return res; +} + +int dsos__for_each_dso(struct dsos *dsos, int (*cb)(struct dso *dso, void *data), void *data) +{ + int err; + + down_read(&dsos->lock); + err = __dsos__for_each_dso(dsos, cb, data); + up_read(&dsos->lock); + return err; } diff --git a/tools/perf/util/dsos.h b/tools/perf/util/dsos.h index 5dbec2bc6966..a26774950866 100644 --- a/tools/perf/util/dsos.h +++ b/tools/perf/util/dsos.h @@ -10,31 +10,43 @@ struct dso; struct dso_id; +struct kmod_path; +struct machine; /* - * DSOs are put into both a list for fast iteration and rbtree for fast - * long name lookup. + * Collection of DSOs as an array for iteration speed, but sorted for O(n) + * lookup. */ struct dsos { - struct list_head head; - struct rb_root root; /* rbtree root sorted by long name */ struct rw_semaphore lock; + struct dso **dsos; + unsigned int cnt; + unsigned int allocated; + bool sorted; }; -void __dsos__add(struct dsos *dsos, struct dso *dso); -void dsos__add(struct dsos *dsos, struct dso *dso); -struct dso *__dsos__addnew(struct dsos *dsos, const char *name); -struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short); +void dsos__init(struct dsos *dsos); +void dsos__exit(struct dsos *dsos); -struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id); - -struct dso *__dsos__findnew_link_by_longname_id(struct rb_root *root, struct dso *dso, - const char *name, struct dso_id *id); +int __dsos__add(struct dsos *dsos, struct dso *dso); +int dsos__add(struct dsos *dsos, struct dso *dso); +struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short); -bool __dsos__read_build_ids(struct list_head *head, bool with_hits); +struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, const struct dso_id *id); + +bool dsos__read_build_ids(struct dsos *dsos, bool with_hits); -size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, +size_t dsos__fprintf_buildid(struct dsos *dsos, FILE *fp, bool (skip)(struct dso *dso, int parm), int parm); -size_t __dsos__fprintf(struct list_head *head, FILE *fp); +size_t dsos__fprintf(struct dsos *dsos, FILE *fp); + +int dsos__hit_all(struct dsos *dsos); + +struct dso *dsos__findnew_module_dso(struct dsos *dsos, struct machine *machine, + struct kmod_path *m, const char *filename); + +struct dso *dsos__find_kernel_dso(struct dsos *dsos); + +int dsos__for_each_dso(struct dsos *dsos, int (*cb)(struct dso *dso, void *data), void *data); #endif /* __PERF_DSOS */ diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c index 2bd8585db93c..c1cc0ade48d0 100644 --- a/tools/perf/util/dump-insn.c +++ b/tools/perf/util/dump-insn.c @@ -15,7 +15,7 @@ const char *dump_insn(struct perf_insn *x __maybe_unused, } __weak -int arch_is_branch(const unsigned char *buf __maybe_unused, +int arch_is_uncond_branch(const unsigned char *buf __maybe_unused, size_t len __maybe_unused, int x86_64 __maybe_unused) { diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h index 650125061530..20d4d7bb5275 100644 --- a/tools/perf/util/dump-insn.h +++ b/tools/perf/util/dump-insn.h @@ -11,6 +11,7 @@ struct thread; struct perf_insn { /* Initialized by callers: */ struct thread *thread; + struct machine *machine; u8 cpumode; bool is64bit; int cpu; @@ -20,6 +21,6 @@ struct perf_insn { const char *dump_insn(struct perf_insn *x, u64 ip, u8 *inbuf, int inlen, int *lenp); -int arch_is_branch(const unsigned char *buf, size_t len, int x86_64); +int arch_is_uncond_branch(const unsigned char *buf, size_t len, int x86_64); #endif diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 2941d88f2199..9267af204c7d 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -9,6 +9,7 @@ #include <stdlib.h> #include "debug.h" #include "dwarf-aux.h" +#include "dwarf-regs.h" #include "strbuf.h" #include "string2.h" @@ -266,7 +267,7 @@ Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) } /* Get a type die, but skip qualifiers */ -static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) +Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) { int tag; @@ -696,6 +697,49 @@ Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, return die_mem; } +static int __die_find_func_rettype_cb(Dwarf_Die *die_mem, void *data) +{ + const char *func_name; + + if (dwarf_tag(die_mem) != DW_TAG_subprogram) + return DIE_FIND_CB_SIBLING; + + func_name = dwarf_diename(die_mem); + if (func_name && !strcmp(func_name, data)) + return DIE_FIND_CB_END; + + return DIE_FIND_CB_SIBLING; +} + +/** + * die_find_func_rettype - Search a return type of function + * @cu_die: a CU DIE + * @name: target function name + * @die_mem: a buffer for result DIE + * + * Search a non-inlined function which matches to @name and stores the + * return type of the function to @die_mem and returns it if found. + * Returns NULL if failed. Note that it doesn't needs to find a + * definition of the function, so it doesn't match with address. + * Most likely, it can find a declaration at the top level. Thus the + * callback function continues to sibling entries only. + */ +Dwarf_Die *die_find_func_rettype(Dwarf_Die *cu_die, const char *name, + Dwarf_Die *die_mem) +{ + Dwarf_Die tmp_die; + + cu_die = die_find_child(cu_die, __die_find_func_rettype_cb, + (void *)name, &tmp_die); + if (!cu_die) + return NULL; + + if (die_get_real_type(&tmp_die, die_mem) == NULL) + return NULL; + + return die_mem; +} + struct __instance_walk_param { void *addr; int (*callback)(Dwarf_Die *, void *); @@ -1051,32 +1095,30 @@ Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, } /** - * die_get_typename - Get the name of given variable DIE - * @vr_die: a variable DIE + * die_get_typename_from_type - Get the name of given type DIE + * @type_die: a type DIE * @buf: a strbuf for result type name * - * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded. + * Get the name of @type_die and stores it to @buf. Return 0 if succeeded. * and Return -ENOENT if failed to find type name. * Note that the result will stores typedef name if possible, and stores * "*(function_type)" if the type is a function pointer. */ -int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) +int die_get_typename_from_type(Dwarf_Die *type_die, struct strbuf *buf) { - Dwarf_Die type; int tag, ret; const char *tmp = ""; - if (__die_get_real_type(vr_die, &type) == NULL) - return -ENOENT; - - tag = dwarf_tag(&type); - if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type) + tag = dwarf_tag(type_die); + if (tag == DW_TAG_pointer_type) tmp = "*"; + else if (tag == DW_TAG_array_type) + tmp = "[]"; else if (tag == DW_TAG_subroutine_type) { /* Function pointer */ return strbuf_add(buf, "(function_type)", 15); } else { - const char *name = dwarf_diename(&type); + const char *name = dwarf_diename(type_die); if (tag == DW_TAG_union_type) tmp = "union "; @@ -1089,8 +1131,35 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) /* Write a base name */ return strbuf_addf(buf, "%s%s", tmp, name ?: ""); } - ret = die_get_typename(&type, buf); - return ret ? ret : strbuf_addstr(buf, tmp); + ret = die_get_typename(type_die, buf); + if (ret < 0) { + /* void pointer has no type attribute */ + if (tag == DW_TAG_pointer_type && ret == -ENOENT) + return strbuf_addf(buf, "void*"); + + return ret; + } + return strbuf_addstr(buf, tmp); +} + +/** + * die_get_typename - Get the name of given variable DIE + * @vr_die: a variable DIE + * @buf: a strbuf for result type name + * + * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded. + * and Return -ENOENT if failed to find type name. + * Note that the result will stores typedef name if possible, and stores + * "*(function_type)" if the type is a function pointer. + */ +int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) +{ + Dwarf_Die type; + + if (__die_get_real_type(vr_die, &type) == NULL) + return -ENOENT; + + return die_get_typename_from_type(&type, buf); } /** @@ -1113,7 +1182,69 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf) return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die)); } -#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT +static int reg_from_dwarf_op(Dwarf_Op *op) +{ + switch (op->atom) { + case DW_OP_reg0 ... DW_OP_reg31: + return op->atom - DW_OP_reg0; + case DW_OP_breg0 ... DW_OP_breg31: + return op->atom - DW_OP_breg0; + case DW_OP_regx: + case DW_OP_bregx: + return op->number; + case DW_OP_fbreg: + return DWARF_REG_FB; + default: + break; + } + return -1; +} + +static int offset_from_dwarf_op(Dwarf_Op *op) +{ + switch (op->atom) { + case DW_OP_reg0 ... DW_OP_reg31: + case DW_OP_regx: + return 0; + case DW_OP_breg0 ... DW_OP_breg31: + case DW_OP_fbreg: + return op->number; + case DW_OP_bregx: + return op->number2; + default: + break; + } + return -1; +} + +static bool check_allowed_ops(Dwarf_Op *ops, size_t nops) +{ + /* The first op is checked separately */ + ops++; + nops--; + + /* + * It needs to make sure if the location expression matches to the given + * register and offset exactly. Thus it rejects any complex expressions + * and only allows a few of selected operators that doesn't change the + * location. + */ + while (nops) { + switch (ops->atom) { + case DW_OP_stack_value: + case DW_OP_deref_size: + case DW_OP_deref: + case DW_OP_piece: + break; + default: + return false; + } + ops++; + nops--; + } + return true; +} + /** * die_get_var_innermost_scope - Get innermost scope range of given variable DIE * @sp_die: a subprogram DIE @@ -1238,14 +1369,402 @@ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf) out: return ret; } -#else -int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, - Dwarf_Die *vr_die __maybe_unused, - struct strbuf *buf __maybe_unused) + +/* Interval parameters for __die_find_var_reg_cb() */ +struct find_var_data { + /* Target instruction address */ + Dwarf_Addr pc; + /* Target memory address (for global data) */ + Dwarf_Addr addr; + /* Target register */ + unsigned reg; + /* Access offset, set for global data */ + int offset; + /* True if the current register is the frame base */ + bool is_fbreg; +}; + +/* Max number of registers DW_OP_regN supports */ +#define DWARF_OP_DIRECT_REGS 32 + +static bool match_var_offset(Dwarf_Die *die_mem, struct find_var_data *data, + s64 addr_offset, s64 addr_type, bool is_pointer) +{ + Dwarf_Die type_die; + Dwarf_Word size; + s64 offset = addr_offset - addr_type; + + if (offset == 0) { + /* Update offset relative to the start of the variable */ + data->offset = 0; + return true; + } + + if (offset < 0) + return false; + + if (die_get_real_type(die_mem, &type_die) == NULL) + return false; + + if (is_pointer && dwarf_tag(&type_die) == DW_TAG_pointer_type) { + /* Get the target type of the pointer */ + if (die_get_real_type(&type_die, &type_die) == NULL) + return false; + } + + if (dwarf_aggregate_size(&type_die, &size) < 0) + return false; + + if ((u64)offset >= size) + return false; + + /* Update offset relative to the start of the variable */ + data->offset = offset; + return true; +} + +/** + * is_breg_access_indirect - Check if breg based access implies type + * dereference + * @ops: DWARF operations array + * @nops: Number of operations in @ops + * + * Returns true if the DWARF expression evaluates to the variable's + * value, so the memory access on that register needs type dereference. + * Returns false if the expression evaluates to the variable's address. + * This is called after check_allowed_ops. + */ +static bool is_breg_access_indirect(Dwarf_Op *ops, size_t nops) { - return -ENOTSUP; + /* only the base register */ + if (nops == 1) + return false; + + if (nops == 2 && ops[1].atom == DW_OP_stack_value) + return true; + + if (nops == 3 && (ops[1].atom == DW_OP_deref || + ops[1].atom == DW_OP_deref_size) && + ops[2].atom == DW_OP_stack_value) + return false; + /* unreachable, OP not supported */ + return false; +} + +/* Only checks direct child DIEs in the given scope. */ +static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg) +{ + struct find_var_data *data = arg; + int tag = dwarf_tag(die_mem); + ptrdiff_t off = 0; + Dwarf_Attribute attr; + Dwarf_Addr base, start, end; + Dwarf_Op *ops; + size_t nops; + + if (tag != DW_TAG_variable && tag != DW_TAG_formal_parameter) + return DIE_FIND_CB_SIBLING; + + if (dwarf_attr(die_mem, DW_AT_location, &attr) == NULL) + return DIE_FIND_CB_SIBLING; + + while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) { + /* Assuming the location list is sorted by address */ + if (end <= data->pc) + continue; + if (start > data->pc) + break; + + /* Local variables accessed using frame base register */ + if (data->is_fbreg && ops->atom == DW_OP_fbreg && + check_allowed_ops(ops, nops) && + match_var_offset(die_mem, data, data->offset, ops->number, + is_breg_access_indirect(ops, nops))) + return DIE_FIND_CB_END; + + /* Only match with a simple case */ + if (data->reg < DWARF_OP_DIRECT_REGS) { + /* pointer variables saved in a register 0 to 31 */ + if (ops->atom == (DW_OP_reg0 + data->reg) && + check_allowed_ops(ops, nops) && + match_var_offset(die_mem, data, data->offset, 0, + /*is_pointer=*/true)) + return DIE_FIND_CB_END; + + /* variables accessed by a register + offset */ + if (ops->atom == (DW_OP_breg0 + data->reg) && + check_allowed_ops(ops, nops) && + match_var_offset(die_mem, data, data->offset, ops->number, + is_breg_access_indirect(ops, nops))) + return DIE_FIND_CB_END; + } else { + /* pointer variables saved in a register 32 or above */ + if (ops->atom == DW_OP_regx && ops->number == data->reg && + check_allowed_ops(ops, nops) && + match_var_offset(die_mem, data, data->offset, 0, + /*is_pointer=*/true)) + return DIE_FIND_CB_END; + + /* variables accessed by a register + offset */ + if (ops->atom == DW_OP_bregx && data->reg == ops->number && + check_allowed_ops(ops, nops) && + match_var_offset(die_mem, data, data->offset, ops->number2, + is_breg_access_indirect(ops, nops))) + return DIE_FIND_CB_END; + } + } + return DIE_FIND_CB_SIBLING; +} + +/** + * die_find_variable_by_reg - Find a variable saved in a register + * @sc_die: a scope DIE + * @pc: the program address to find + * @reg: the register number to find + * @poffset: pointer to offset, will be updated for fbreg case + * @is_fbreg: boolean value if the current register is the frame base + * @die_mem: a buffer to save the resulting DIE + * + * Find the variable DIE accessed by the given register. It'll update the @offset + * when the variable is in the stack. + */ +Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die, Dwarf_Addr pc, int reg, + int *poffset, bool is_fbreg, + Dwarf_Die *die_mem) +{ + struct find_var_data data = { + .pc = pc, + .reg = reg, + .offset = *poffset, + .is_fbreg = is_fbreg, + }; + Dwarf_Die *result; + + result = die_find_child(sc_die, __die_find_var_reg_cb, &data, die_mem); + if (result) + *poffset = data.offset; + return result; +} + +/* Only checks direct child DIEs in the given scope */ +static int __die_find_var_addr_cb(Dwarf_Die *die_mem, void *arg) +{ + struct find_var_data *data = arg; + int tag = dwarf_tag(die_mem); + ptrdiff_t off = 0; + Dwarf_Attribute attr; + Dwarf_Addr base, start, end; + Dwarf_Op *ops; + size_t nops; + + if (tag != DW_TAG_variable) + return DIE_FIND_CB_SIBLING; + + if (dwarf_attr(die_mem, DW_AT_location, &attr) == NULL) + return DIE_FIND_CB_SIBLING; + + while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) { + if (ops->atom != DW_OP_addr) + continue; + + if (check_allowed_ops(ops, nops) && + match_var_offset(die_mem, data, data->addr, ops->number, + /*is_pointer=*/false)) + return DIE_FIND_CB_END; + } + return DIE_FIND_CB_SIBLING; +} + +/** + * die_find_variable_by_addr - Find variable located at given address + * @sc_die: a scope DIE + * @addr: the data address to find + * @die_mem: a buffer to save the resulting DIE + * @offset: the offset in the resulting type + * + * Find the variable DIE located at the given address (in PC-relative mode). + * This is usually for global variables. + */ +Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr addr, + Dwarf_Die *die_mem, int *offset) +{ + struct find_var_data data = { + .addr = addr, + }; + Dwarf_Die *result; + + result = die_find_child(sc_die, __die_find_var_addr_cb, &data, die_mem); + if (result) + *offset = data.offset; + return result; +} + +static int __die_collect_vars_cb(Dwarf_Die *die_mem, void *arg) +{ + struct die_var_type **var_types = arg; + Dwarf_Die type_die; + int tag = dwarf_tag(die_mem); + Dwarf_Attribute attr; + Dwarf_Addr base, start, end; + Dwarf_Op *ops; + size_t nops; + struct die_var_type *vt; + + if (tag != DW_TAG_variable && tag != DW_TAG_formal_parameter) + return DIE_FIND_CB_SIBLING; + + if (dwarf_attr(die_mem, DW_AT_location, &attr) == NULL) + return DIE_FIND_CB_SIBLING; + + /* + * Only collect the first location as it can reconstruct the + * remaining state by following the instructions. + * start = 0 means it covers the whole range. + */ + if (dwarf_getlocations(&attr, 0, &base, &start, &end, &ops, &nops) <= 0) + return DIE_FIND_CB_SIBLING; + + if (!check_allowed_ops(ops, nops)) + return DIE_FIND_CB_SIBLING; + + if (__die_get_real_type(die_mem, &type_die) == NULL) + return DIE_FIND_CB_SIBLING; + + vt = malloc(sizeof(*vt)); + if (vt == NULL) + return DIE_FIND_CB_END; + + /* Usually a register holds the value of a variable */ + vt->is_reg_var_addr = false; + + if (((ops->atom >= DW_OP_breg0 && ops->atom <= DW_OP_breg31) || + ops->atom == DW_OP_bregx || ops->atom == DW_OP_fbreg) && + !is_breg_access_indirect(ops, nops)) + /* The register contains an address of the variable. */ + vt->is_reg_var_addr = true; + + vt->die_off = dwarf_dieoffset(&type_die); + vt->addr = start; + vt->reg = reg_from_dwarf_op(ops); + vt->offset = offset_from_dwarf_op(ops); + vt->next = *var_types; + *var_types = vt; + + return DIE_FIND_CB_SIBLING; +} + +/** + * die_collect_vars - Save all variables and parameters + * @sc_die: a scope DIE + * @var_types: a pointer to save the resulting list + * + * Save all variables and parameters in the @sc_die and save them to @var_types. + * The @var_types is a singly-linked list containing type and location info. + * Actual type can be retrieved using dwarf_offdie() with 'die_off' later. + * + * Callers should free @var_types. + */ +void die_collect_vars(Dwarf_Die *sc_die, struct die_var_type **var_types) +{ + Dwarf_Die die_mem; + + die_find_child(sc_die, __die_collect_vars_cb, (void *)var_types, &die_mem); +} + +static int __die_collect_global_vars_cb(Dwarf_Die *die_mem, void *arg) +{ + struct die_var_type **var_types = arg; + Dwarf_Die type_die; + int tag = dwarf_tag(die_mem); + Dwarf_Attribute attr; + Dwarf_Addr base, start, end; + Dwarf_Op *ops; + size_t nops; + struct die_var_type *vt; + + if (tag != DW_TAG_variable) + return DIE_FIND_CB_SIBLING; + + if (dwarf_attr(die_mem, DW_AT_location, &attr) == NULL) + return DIE_FIND_CB_SIBLING; + + /* Only collect the location with an absolute address. */ + if (dwarf_getlocations(&attr, 0, &base, &start, &end, &ops, &nops) <= 0) + return DIE_FIND_CB_SIBLING; + + if (ops->atom != DW_OP_addr) + return DIE_FIND_CB_SIBLING; + + if (!check_allowed_ops(ops, nops)) + return DIE_FIND_CB_SIBLING; + + if (die_get_real_type(die_mem, &type_die) == NULL) + return DIE_FIND_CB_SIBLING; + + vt = malloc(sizeof(*vt)); + if (vt == NULL) + return DIE_FIND_CB_END; + + vt->die_off = dwarf_dieoffset(&type_die); + vt->addr = ops->number; + vt->reg = -1; + vt->offset = 0; + vt->next = *var_types; + *var_types = vt; + + return DIE_FIND_CB_SIBLING; +} + +/** + * die_collect_global_vars - Save all global variables + * @cu_die: a CU DIE + * @var_types: a pointer to save the resulting list + * + * Save all global variables in the @cu_die and save them to @var_types. + * The @var_types is a singly-linked list containing type and location info. + * Actual type can be retrieved using dwarf_offdie() with 'die_off' later. + * + * Callers should free @var_types. + */ +void die_collect_global_vars(Dwarf_Die *cu_die, struct die_var_type **var_types) +{ + Dwarf_Die die_mem; + + die_find_child(cu_die, __die_collect_global_vars_cb, (void *)var_types, &die_mem); +} + +/** + * die_get_cfa - Get frame base information + * @dwarf: a Dwarf info + * @pc: program address + * @preg: pointer for saved register + * @poffset: pointer for saved offset + * + * This function gets register and offset for CFA (Canonical Frame Address) + * by searching the CIE/FDE info. The CFA usually points to the start address + * of the current stack frame and local variables can be located using an offset + * from the CFA. The @preg and @poffset will be updated if it returns 0. + */ +int die_get_cfa(Dwarf *dwarf, u64 pc, int *preg, int *poffset) +{ + Dwarf_CFI *cfi; + Dwarf_Frame *frame = NULL; + Dwarf_Op *ops = NULL; + size_t nops; + + cfi = dwarf_getcfi(dwarf); + if (cfi == NULL) + return -1; + + if (!dwarf_cfi_addrframe(cfi, pc, &frame) && + !dwarf_frame_cfa(frame, &ops, &nops) && + check_allowed_ops(ops, nops)) { + *preg = reg_from_dwarf_op(ops); + *poffset = offset_from_dwarf_op(ops); + return 0; + } + return -1; } -#endif /* * die_has_loclist - Check if DW_AT_location of @vr_die is a location list @@ -1425,3 +1944,185 @@ void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die, *entrypc = postprologue_addr; } + +/* Internal parameters for __die_find_scope_cb() */ +struct find_scope_data { + /* Target instruction address */ + Dwarf_Addr pc; + /* Number of scopes found [output] */ + int nr; + /* Array of scopes found, 0 for the outermost one. [output] */ + Dwarf_Die *scopes; +}; + +static int __die_find_scope_cb(Dwarf_Die *die_mem, void *arg) +{ + struct find_scope_data *data = arg; + int tag = dwarf_tag(die_mem); + + if (dwarf_haspc(die_mem, data->pc)) { + Dwarf_Die *tmp; + + tmp = realloc(data->scopes, (data->nr + 1) * sizeof(*tmp)); + if (tmp == NULL) + return DIE_FIND_CB_END; + + memcpy(tmp + data->nr, die_mem, sizeof(*die_mem)); + data->scopes = tmp; + data->nr++; + return DIE_FIND_CB_CHILD; + } + + /* + * If the DIE doesn't have the PC, we still need to check its children + * and siblings if it's a container like a namespace. + */ + if (tag == DW_TAG_namespace) + return DIE_FIND_CB_CONTINUE; + + return DIE_FIND_CB_SIBLING; +} + +/** + * die_get_scopes - Return a list of scopes including the address + * @cu_die: a compile unit DIE + * @pc: the address to find + * @scopes: the array of DIEs for scopes (result) + * + * This function does the same as the dwarf_getscopes() but doesn't follow + * the origins of inlined functions. It returns the number of scopes saved + * in the @scopes argument. The outer scope will be saved first (index 0) and + * the last one is the innermost scope at the @pc. + */ +int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes) +{ + struct find_scope_data data = { + .pc = pc, + }; + Dwarf_Die die_mem; + + die_find_child(cu_die, __die_find_scope_cb, &data, &die_mem); + + *scopes = data.scopes; + return data.nr; +} + +static int __die_find_member_offset_cb(Dwarf_Die *die_mem, void *arg) +{ + Dwarf_Die type_die; + Dwarf_Word size, loc; + Dwarf_Word offset = (long)arg; + int tag = dwarf_tag(die_mem); + + if (tag != DW_TAG_member) + return DIE_FIND_CB_SIBLING; + + /* Unions might not have location */ + if (die_get_data_member_location(die_mem, &loc) < 0) { + Dwarf_Attribute attr; + + if (dwarf_attr_integrate(die_mem, DW_AT_data_bit_offset, &attr) && + dwarf_formudata(&attr, &loc) == 0) + loc /= 8; + else + loc = 0; + } + + if (offset == loc) + return DIE_FIND_CB_END; + + if (die_get_real_type(die_mem, &type_die) == NULL) { + // TODO: add a pr_debug_dtp() later for this unlikely failure + return DIE_FIND_CB_SIBLING; + } + + if (dwarf_aggregate_size(&type_die, &size) < 0) + size = 0; + + if (loc < offset && offset < (loc + size)) + return DIE_FIND_CB_END; + + return DIE_FIND_CB_SIBLING; +} + +/** + * die_get_member_type - Return type info of struct member + * @type_die: a type DIE + * @offset: offset in the type + * @die_mem: a buffer to save the resulting DIE + * + * This function returns a type of a member in @type_die where it's located at + * @offset if it's a struct. For now, it just returns the first matching + * member in a union. For other types, it'd return the given type directly + * if it's within the size of the type or NULL otherwise. + */ +Dwarf_Die *die_get_member_type(Dwarf_Die *type_die, int offset, + Dwarf_Die *die_mem) +{ + Dwarf_Die *member; + Dwarf_Die mb_type; + int tag; + + tag = dwarf_tag(type_die); + /* If it's not a compound type, return the type directly */ + if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) { + Dwarf_Word size; + + if (dwarf_aggregate_size(type_die, &size) < 0) + size = 0; + + if ((unsigned)offset >= size) + return NULL; + + *die_mem = *type_die; + return die_mem; + } + + mb_type = *type_die; + /* TODO: Handle union types better? */ + while (tag == DW_TAG_structure_type || tag == DW_TAG_union_type) { + member = die_find_child(&mb_type, __die_find_member_offset_cb, + (void *)(long)offset, die_mem); + if (member == NULL) + return NULL; + + if (die_get_real_type(member, &mb_type) == NULL) + return NULL; + + tag = dwarf_tag(&mb_type); + + if (tag == DW_TAG_structure_type || tag == DW_TAG_union_type) { + Dwarf_Word loc; + + /* Update offset for the start of the member struct */ + if (die_get_data_member_location(member, &loc) == 0) + offset -= loc; + } + } + *die_mem = mb_type; + return die_mem; +} + +/** + * die_deref_ptr_type - Return type info for pointer access + * @ptr_die: a pointer type DIE + * @offset: access offset for the pointer + * @die_mem: a buffer to save the resulting DIE + * + * This function follows the pointer in @ptr_die with given @offset + * and saves the resulting type in @die_mem. If the pointer points + * a struct type, actual member at the offset would be returned. + */ +Dwarf_Die *die_deref_ptr_type(Dwarf_Die *ptr_die, int offset, + Dwarf_Die *die_mem) +{ + Dwarf_Die type_die; + + if (dwarf_tag(ptr_die) != DW_TAG_pointer_type) + return NULL; + + if (die_get_real_type(ptr_die, &type_die) == NULL) + return NULL; + + return die_get_member_type(&type_die, offset, die_mem); +} diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 7ec8bc1083bb..cd481ec9c5a1 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -56,6 +56,8 @@ const char *die_get_decl_file(Dwarf_Die *dw_die); /* Get type die */ Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); +/* Get a type die, but skip qualifiers */ +Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); /* Get a type die, but skip qualifiers and typedef */ Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); @@ -94,6 +96,10 @@ Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, Dwarf_Die *die_mem); +/* Search a non-inlined function by name and returns its return type */ +Dwarf_Die *die_find_func_rettype(Dwarf_Die *sp_die, const char *name, + Dwarf_Die *die_mem); + /* Walk on the instances of given DIE */ int die_walk_instances(Dwarf_Die *in_die, int (*callback)(Dwarf_Die *, void *), void *data); @@ -116,12 +122,14 @@ Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name, Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, Dwarf_Die *die_mem); +/* Get the name of given type DIE */ +int die_get_typename_from_type(Dwarf_Die *type_die, struct strbuf *buf); + /* Get the name of given variable DIE */ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf); /* Get the name and type of given variable DIE, stored as "type\tname" */ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf); -int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); /* Check if target program is compiled with optimization */ bool die_is_optimized_target(Dwarf_Die *cu_die); @@ -130,4 +138,45 @@ bool die_is_optimized_target(Dwarf_Die *cu_die); void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die, Dwarf_Addr *entrypc); -#endif +/* Get the list of including scopes */ +int die_get_scopes(Dwarf_Die *cu_die, Dwarf_Addr pc, Dwarf_Die **scopes); + +/* Variable type information */ +struct die_var_type { + struct die_var_type *next; + u64 die_off; + u64 addr; + int reg; + int offset; + /* Whether the register holds a address to the type */ + bool is_reg_var_addr; +}; + +/* Return type info of a member at offset */ +Dwarf_Die *die_get_member_type(Dwarf_Die *type_die, int offset, Dwarf_Die *die_mem); + +/* Return type info where the pointer and offset point to */ +Dwarf_Die *die_deref_ptr_type(Dwarf_Die *ptr_die, int offset, Dwarf_Die *die_mem); + +/* Get byte offset range of given variable DIE */ +int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); + +/* Find a variable saved in the 'reg' at given address */ +Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die, Dwarf_Addr pc, int reg, + int *poffset, bool is_fbreg, + Dwarf_Die *die_mem); + +/* Find a (global) variable located in the 'addr' */ +Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die, Dwarf_Addr addr, + Dwarf_Die *die_mem, int *offset); + +/* Save all variables and parameters in this scope */ +void die_collect_vars(Dwarf_Die *sc_die, struct die_var_type **var_types); + +/* Save all global variables in this CU */ +void die_collect_global_vars(Dwarf_Die *cu_die, struct die_var_type **var_types); + +/* Get the frame base information from CFA */ +int die_get_cfa(Dwarf *dwarf, u64 pc, int *preg, int *poffset); + +#endif /* _DWARF_AUX_H */ diff --git a/tools/perf/util/dwarf-regs-csky.c b/tools/perf/util/dwarf-regs-csky.c new file mode 100644 index 000000000000..d38ef1f07f3e --- /dev/null +++ b/tools/perf/util/dwarf-regs-csky.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. +// Mapping of DWARF debug register numbers into register names. + +#include <stddef.h> +#include <dwarf-regs.h> + +#define CSKY_ABIV2_MAX_REGS 73 +const char *csky_dwarf_regs_table_abiv2[CSKY_ABIV2_MAX_REGS] = { + /* r0 ~ r8 */ + "%a0", "%a1", "%a2", "%a3", "%regs0", "%regs1", "%regs2", "%regs3", + /* r9 ~ r15 */ + "%regs4", "%regs5", "%regs6", "%regs7", "%regs8", "%regs9", "%sp", + "%lr", + /* r16 ~ r23 */ + "%exregs0", "%exregs1", "%exregs2", "%exregs3", "%exregs4", + "%exregs5", "%exregs6", "%exregs7", + /* r24 ~ r31 */ + "%exregs8", "%exregs9", "%exregs10", "%exregs11", "%exregs12", + "%exregs13", "%exregs14", "%tls", + "%pc", NULL, NULL, NULL, "%hi", "%lo", NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "%epc", +}; + +#define CSKY_ABIV1_MAX_REGS 57 +const char *csky_dwarf_regs_table_abiv1[CSKY_ABIV1_MAX_REGS] = { + /* r0 ~ r8 */ + "%sp", "%regs9", "%a0", "%a1", "%a2", "%a3", "%regs0", "%regs1", + /* r9 ~ r15 */ + "%regs2", "%regs3", "%regs4", "%regs5", "%regs6", "%regs7", "%regs8", + "%lr", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "%epc", +}; + +const char *get_csky_regstr(unsigned int n, unsigned int flags) +{ + if (flags & EF_CSKY_ABIV2) + return (n < CSKY_ABIV2_MAX_REGS) ? csky_dwarf_regs_table_abiv2[n] : NULL; + + return (n < CSKY_ABIV1_MAX_REGS) ? csky_dwarf_regs_table_abiv1[n] : NULL; +} diff --git a/tools/perf/util/dwarf-regs-powerpc.c b/tools/perf/util/dwarf-regs-powerpc.c new file mode 100644 index 000000000000..caf77a234c78 --- /dev/null +++ b/tools/perf/util/dwarf-regs-powerpc.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Mapping of DWARF debug register numbers into register names. + * + * Copyright (C) 2010 Ian Munsie, IBM Corporation. + */ + +#include <dwarf-regs.h> + +#define PPC_OP(op) (((op) >> 26) & 0x3F) +#define PPC_RA(a) (((a) >> 16) & 0x1f) +#define PPC_RT(t) (((t) >> 21) & 0x1f) +#define PPC_RB(b) (((b) >> 11) & 0x1f) +#define PPC_D(D) ((D) & 0xfffe) +#define PPC_DS(DS) ((DS) & 0xfffc) +#define OP_LD 58 +#define OP_STD 62 + +static int get_source_reg(u32 raw_insn) +{ + return PPC_RA(raw_insn); +} + +static int get_target_reg(u32 raw_insn) +{ + return PPC_RT(raw_insn); +} + +static int get_offset_opcode(u32 raw_insn) +{ + int opcode = PPC_OP(raw_insn); + + /* DS- form */ + if ((opcode == OP_LD) || (opcode == OP_STD)) + return PPC_DS(raw_insn); + else + return PPC_D(raw_insn); +} + +/* + * Fills the required fields for op_loc depending on if it + * is a source or target. + * D form: ins RT,D(RA) -> src_reg1 = RA, offset = D, dst_reg1 = RT + * DS form: ins RT,DS(RA) -> src_reg1 = RA, offset = DS, dst_reg1 = RT + * X form: ins RT,RA,RB -> src_reg1 = RA, src_reg2 = RB, dst_reg1 = RT + */ +void get_powerpc_regs(u32 raw_insn, int is_source, + struct annotated_op_loc *op_loc) +{ + if (is_source) + op_loc->reg1 = get_source_reg(raw_insn); + else + op_loc->reg1 = get_target_reg(raw_insn); + + if (op_loc->multi_regs) + op_loc->reg2 = PPC_RB(raw_insn); + + /* TODO: Implement offset handling for X Form */ + if ((op_loc->mem_ref) && (PPC_OP(raw_insn) != 31)) + op_loc->offset = get_offset_opcode(raw_insn); +} diff --git a/tools/perf/util/dwarf-regs-x86.c b/tools/perf/util/dwarf-regs-x86.c new file mode 100644 index 000000000000..7a55c65e8da6 --- /dev/null +++ b/tools/perf/util/dwarf-regs-x86.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. + * Extracted from probe-finder.c + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + */ + +#include <errno.h> /* for EINVAL */ +#include <string.h> /* for strcmp */ +#include <linux/kernel.h> /* for ARRAY_SIZE */ +#include <dwarf-regs.h> + +struct dwarf_regs_idx { + const char *name; + int idx; +}; + +static const struct dwarf_regs_idx x86_regidx_table[] = { + { "rax", 0 }, { "eax", 0 }, { "ax", 0 }, { "al", 0 }, + { "rdx", 1 }, { "edx", 1 }, { "dx", 1 }, { "dl", 1 }, + { "rcx", 2 }, { "ecx", 2 }, { "cx", 2 }, { "cl", 2 }, + { "rbx", 3 }, { "edx", 3 }, { "bx", 3 }, { "bl", 3 }, + { "rsi", 4 }, { "esi", 4 }, { "si", 4 }, { "sil", 4 }, + { "rdi", 5 }, { "edi", 5 }, { "di", 5 }, { "dil", 5 }, + { "rbp", 6 }, { "ebp", 6 }, { "bp", 6 }, { "bpl", 6 }, + { "rsp", 7 }, { "esp", 7 }, { "sp", 7 }, { "spl", 7 }, + { "r8", 8 }, { "r8d", 8 }, { "r8w", 8 }, { "r8b", 8 }, + { "r9", 9 }, { "r9d", 9 }, { "r9w", 9 }, { "r9b", 9 }, + { "r10", 10 }, { "r10d", 10 }, { "r10w", 10 }, { "r10b", 10 }, + { "r11", 11 }, { "r11d", 11 }, { "r11w", 11 }, { "r11b", 11 }, + { "r12", 12 }, { "r12d", 12 }, { "r12w", 12 }, { "r12b", 12 }, + { "r13", 13 }, { "r13d", 13 }, { "r13w", 13 }, { "r13b", 13 }, + { "r14", 14 }, { "r14d", 14 }, { "r14w", 14 }, { "r14b", 14 }, + { "r15", 15 }, { "r15d", 15 }, { "r15w", 15 }, { "r15b", 15 }, + { "rip", DWARF_REG_PC }, +}; + +int get_x86_regnum(const char *name) +{ + unsigned int i; + + if (*name != '%') + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(x86_regidx_table); i++) + if (!strcmp(x86_regidx_table[i].name, name + 1)) + return x86_regidx_table[i].idx; + return -ENOENT; +} diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c index 69cfaa5953bf..28a1cfdf26d4 100644 --- a/tools/perf/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs.c @@ -5,19 +5,14 @@ * Written by: Masami Hiramatsu <mhiramat@kernel.org> */ +#include <stdlib.h> +#include <string.h> #include <debug.h> #include <dwarf-regs.h> #include <elf.h> +#include <errno.h> #include <linux/kernel.h> -#ifndef EM_AARCH64 -#define EM_AARCH64 183 /* ARM 64 bit */ -#endif - -#ifndef EM_LOONGARCH -#define EM_LOONGARCH 258 /* LoongArch */ -#endif - /* Define const char * {arch}_register_tbl[] */ #define DEFINE_DWARF_REGSTR_TABLE #include "../arch/x86/include/dwarf-regs-table.h" @@ -25,6 +20,7 @@ #include "../arch/arm64/include/dwarf-regs-table.h" #include "../arch/sh/include/dwarf-regs-table.h" #include "../arch/powerpc/include/dwarf-regs-table.h" +#include "../arch/riscv/include/dwarf-regs-table.h" #include "../arch/s390/include/dwarf-regs-table.h" #include "../arch/sparc/include/dwarf-regs-table.h" #include "../arch/xtensa/include/dwarf-regs-table.h" @@ -34,11 +30,13 @@ #define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL) /* Return architecture dependent register string (for kprobe-tracer) */ -const char *get_dwarf_regstr(unsigned int n, unsigned int machine) +const char *get_dwarf_regstr(unsigned int n, unsigned int machine, unsigned int flags) { + if (machine == EM_NONE) { + /* Generic arch - use host arch */ + machine = EM_HOST; + } switch (machine) { - case EM_NONE: /* Generic arch - use host arch */ - return get_arch_regstr(n); case EM_386: return __get_dwarf_regstr(x86_32_regstr_tbl, n); case EM_X86_64: @@ -47,6 +45,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) return __get_dwarf_regstr(arm_regstr_tbl, n); case EM_AARCH64: return __get_dwarf_regstr(aarch64_regstr_tbl, n); + case EM_CSKY: + return get_csky_regstr(n, flags); case EM_SH: return __get_dwarf_regstr(sh_regstr_tbl, n); case EM_S390: @@ -54,6 +54,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) case EM_PPC: case EM_PPC64: return __get_dwarf_regstr(powerpc_regstr_tbl, n); + case EM_RISCV: + return __get_dwarf_regstr(riscv_regstr_tbl, n); case EM_SPARC: case EM_SPARCV9: return __get_dwarf_regstr(sparc_regstr_tbl, n); @@ -68,3 +70,47 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) } return NULL; } + +#if EM_HOST != EM_X86_64 && EM_HOST != EM_386 +__weak int get_arch_regnum(const char *name __maybe_unused) +{ + return -ENOTSUP; +} +#endif + +/* Return DWARF register number from architecture register name */ +int get_dwarf_regnum(const char *name, unsigned int machine, unsigned int flags __maybe_unused) +{ + char *regname = strdup(name); + int reg = -1; + char *p; + + if (regname == NULL) + return -EINVAL; + + /* For convenience, remove trailing characters */ + p = strpbrk(regname, " ,)"); + if (p) + *p = '\0'; + + if (machine == EM_NONE) { + /* Generic arch - use host arch */ + machine = EM_HOST; + } + switch (machine) { +#if EM_HOST != EM_X86_64 && EM_HOST != EM_386 + case EM_HOST: + reg = get_arch_regnum(regname); + break; +#endif + case EM_X86_64: + fallthrough; + case EM_386: + reg = get_x86_regnum(regname); + break; + default: + pr_err("ELF MACHINE %x is not supported.\n", machine); + } + free(regname); + return reg; +} diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 9eabf3ec56e9..f1626d2032cd 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -3,32 +3,46 @@ #include "debug.h" #include "env.h" #include "util/header.h" +#include "util/rwsem.h" +#include <linux/compiler.h> #include <linux/ctype.h> +#include <linux/rbtree.h> +#include <linux/string.h> #include <linux/zalloc.h> #include "cgroup.h" #include <errno.h> #include <sys/utsname.h> #include <stdlib.h> #include <string.h> +#include "pmu.h" #include "pmus.h" #include "strbuf.h" - -struct perf_env perf_env; +#include "trace/beauty/beauty.h" #ifdef HAVE_LIBBPF_SUPPORT #include "bpf-event.h" #include "bpf-utils.h" #include <bpf/libbpf.h> -void perf_env__insert_bpf_prog_info(struct perf_env *env, +bool perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node) { + bool ret; + + down_write(&env->bpf_progs.lock); + ret = __perf_env__insert_bpf_prog_info(env, info_node); + up_write(&env->bpf_progs.lock); + + return ret; +} + +bool __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node) +{ __u32 prog_id = info_node->info_linear->info.id; struct bpf_prog_info_node *node; struct rb_node *parent = NULL; struct rb_node **p; - down_write(&env->bpf_progs.lock); p = &env->bpf_progs.infos.rb_node; while (*p != NULL) { @@ -40,15 +54,14 @@ void perf_env__insert_bpf_prog_info(struct perf_env *env, p = &(*p)->rb_right; } else { pr_debug("duplicated bpf prog info %u\n", prog_id); - goto out; + return false; } } rb_link_node(&info_node->rb_node, parent, p); rb_insert_color(&info_node->rb_node, &env->bpf_progs.infos); env->bpf_progs.infos_cnt++; -out: - up_write(&env->bpf_progs.lock); + return true; } struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, @@ -76,15 +89,37 @@ out: return node; } +void perf_env__iterate_bpf_prog_info(struct perf_env *env, + void (*cb)(struct bpf_prog_info_node *node, + void *data), + void *data) +{ + struct rb_node *first; + + down_read(&env->bpf_progs.lock); + first = rb_first(&env->bpf_progs.infos); + for (struct rb_node *node = first; node != NULL; node = rb_next(node)) + (*cb)(rb_entry(node, struct bpf_prog_info_node, rb_node), data); + up_read(&env->bpf_progs.lock); +} + bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) { + bool ret; + + down_write(&env->bpf_progs.lock); + ret = __perf_env__insert_btf(env, btf_node); + up_write(&env->bpf_progs.lock); + return ret; +} + +bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) +{ struct rb_node *parent = NULL; __u32 btf_id = btf_node->id; struct btf_node *node; struct rb_node **p; - bool ret = true; - down_write(&env->bpf_progs.lock); p = &env->bpf_progs.btfs.rb_node; while (*p != NULL) { @@ -96,25 +131,31 @@ bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) p = &(*p)->rb_right; } else { pr_debug("duplicated btf %u\n", btf_id); - ret = false; - goto out; + return false; } } rb_link_node(&btf_node->rb_node, parent, p); rb_insert_color(&btf_node->rb_node, &env->bpf_progs.btfs); env->bpf_progs.btfs_cnt++; -out: - up_write(&env->bpf_progs.lock); - return ret; + return true; } struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id) { + struct btf_node *res; + + down_read(&env->bpf_progs.lock); + res = __perf_env__find_btf(env, btf_id); + up_read(&env->bpf_progs.lock); + return res; +} + +struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id) +{ struct btf_node *node = NULL; struct rb_node *n; - down_read(&env->bpf_progs.lock); n = env->bpf_progs.btfs.rb_node; while (n) { @@ -124,13 +165,9 @@ struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id) else if (btf_id > node->id) n = n->rb_right; else - goto out; + return node; } - node = NULL; - -out: - up_read(&env->bpf_progs.lock); - return node; + return NULL; } /* purge data in bpf_progs.infos tree */ @@ -151,6 +188,7 @@ static void perf_env__purge_bpf(struct perf_env *env) next = rb_next(&node->rb_node); rb_erase(&node->rb_node, root); zfree(&node->info_linear); + bpf_metadata_free(node->metadata); free(node); } @@ -231,6 +269,7 @@ void perf_env__exit(struct perf_env *env) void perf_env__init(struct perf_env *env) { + memset(env, 0, sizeof(*env)); #ifdef HAVE_LIBBPF_SUPPORT env->bpf_progs.infos = RB_ROOT; env->bpf_progs.btfs = RB_ROOT; @@ -308,10 +347,13 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) for (idx = 0; idx < nr_cpus; ++idx) { struct perf_cpu cpu = { .cpu = idx }; + int core_id = cpu__get_core_id(cpu); + int socket_id = cpu__get_socket_id(cpu); + int die_id = cpu__get_die_id(cpu); - env->cpu[idx].core_id = cpu__get_core_id(cpu); - env->cpu[idx].socket_id = cpu__get_socket_id(cpu); - env->cpu[idx].die_id = cpu__get_die_id(cpu); + env->cpu[idx].core_id = core_id >= 0 ? core_id : -1; + env->cpu[idx].socket_id = socket_id >= 0 ? socket_id : -1; + env->cpu[idx].die_id = die_id >= 0 ? die_id : -1; } env->nr_cpus_avail = nr_cpus; @@ -324,11 +366,9 @@ int perf_env__read_pmu_mappings(struct perf_env *env) u32 pmu_num = 0; struct strbuf sb; - while ((pmu = perf_pmus__scan(pmu))) { - if (!pmu->name) - continue; + while ((pmu = perf_pmus__scan(pmu))) pmu_num++; - } + if (!pmu_num) { pr_debug("pmu mappings not available\n"); return -ENOENT; @@ -339,8 +379,6 @@ int perf_env__read_pmu_mappings(struct perf_env *env) return -ENOMEM; while ((pmu = perf_pmus__scan(pmu))) { - if (!pmu->name) - continue; if (strbuf_addf(&sb, "%u:%s", pmu->type, pmu->name) < 0) goto error; /* include a NULL character at the end */ @@ -360,7 +398,8 @@ error: int perf_env__read_cpuid(struct perf_env *env) { char cpuid[128]; - int err = get_cpuid(cpuid, sizeof(cpuid)); + struct perf_cpu cpu = {-1}; + int err = get_cpuid(cpuid, sizeof(cpuid), cpu); if (err) return err; @@ -393,6 +432,116 @@ static int perf_env__read_nr_cpus_avail(struct perf_env *env) return env->nr_cpus_avail ? 0 : -ENOENT; } +static int __perf_env__read_core_pmu_caps(const struct perf_pmu *pmu, + int *nr_caps, char ***caps, + unsigned int *max_branches, + unsigned int *br_cntr_nr, + unsigned int *br_cntr_width) +{ + struct perf_pmu_caps *pcaps = NULL; + char *ptr, **tmp; + int ret = 0; + + *nr_caps = 0; + *caps = NULL; + + if (!pmu->nr_caps) + return 0; + + *caps = calloc(pmu->nr_caps, sizeof(char *)); + if (!*caps) + return -ENOMEM; + + tmp = *caps; + list_for_each_entry(pcaps, &pmu->caps, list) { + if (asprintf(&ptr, "%s=%s", pcaps->name, pcaps->value) < 0) { + ret = -ENOMEM; + goto error; + } + + *tmp++ = ptr; + + if (!strcmp(pcaps->name, "branches")) + *max_branches = atoi(pcaps->value); + else if (!strcmp(pcaps->name, "branch_counter_nr")) + *br_cntr_nr = atoi(pcaps->value); + else if (!strcmp(pcaps->name, "branch_counter_width")) + *br_cntr_width = atoi(pcaps->value); + } + *nr_caps = pmu->nr_caps; + return 0; +error: + while (tmp-- != *caps) + zfree(tmp); + zfree(caps); + *nr_caps = 0; + return ret; +} + +int perf_env__read_core_pmu_caps(struct perf_env *env) +{ + struct pmu_caps *pmu_caps; + struct perf_pmu *pmu = NULL; + int nr_pmu, i = 0, j; + int ret; + + nr_pmu = perf_pmus__num_core_pmus(); + + if (!nr_pmu) + return -ENODEV; + + if (nr_pmu == 1) { + pmu = perf_pmus__find_core_pmu(); + if (!pmu) + return -ENODEV; + ret = perf_pmu__caps_parse(pmu); + if (ret < 0) + return ret; + return __perf_env__read_core_pmu_caps(pmu, &env->nr_cpu_pmu_caps, + &env->cpu_pmu_caps, + &env->max_branches, + &env->br_cntr_nr, + &env->br_cntr_width); + } + + pmu_caps = calloc(nr_pmu, sizeof(*pmu_caps)); + if (!pmu_caps) + return -ENOMEM; + + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { + if (perf_pmu__caps_parse(pmu) <= 0) + continue; + ret = __perf_env__read_core_pmu_caps(pmu, &pmu_caps[i].nr_caps, + &pmu_caps[i].caps, + &pmu_caps[i].max_branches, + &pmu_caps[i].br_cntr_nr, + &pmu_caps[i].br_cntr_width); + if (ret) + goto error; + + pmu_caps[i].pmu_name = strdup(pmu->name); + if (!pmu_caps[i].pmu_name) { + ret = -ENOMEM; + goto error; + } + i++; + } + + env->nr_pmus_with_caps = nr_pmu; + env->pmu_caps = pmu_caps; + + return 0; +error: + for (i = 0; i < nr_pmu; i++) { + for (j = 0; j < pmu_caps[i].nr_caps; j++) + zfree(&pmu_caps[i].caps[j]); + zfree(&pmu_caps[i].caps); + zfree(&pmu_caps[i].pmu_name); + } + zfree(&pmu_caps); + return ret; +} + const char *perf_env__raw_arch(struct perf_env *env) { return env && !perf_env__read_arch(env) ? env->arch : "unknown"; @@ -457,11 +606,27 @@ const char *perf_env__arch(struct perf_env *env) return normalize_arch(arch_name); } +#if defined(HAVE_LIBTRACEEVENT) +#include "trace/beauty/arch_errno_names.c" +#endif + +const char *perf_env__arch_strerrno(struct perf_env *env __maybe_unused, int err __maybe_unused) +{ +#if defined(HAVE_LIBTRACEEVENT) + if (env->arch_strerrno == NULL) + env->arch_strerrno = arch_syscalls__strerrno_function(perf_env__arch(env)); + + return env->arch_strerrno ? env->arch_strerrno(err) : "no arch specific strerrno function"; +#else + return "!HAVE_LIBTRACEEVENT"; +#endif +} + const char *perf_env__cpuid(struct perf_env *env) { int status; - if (!env || !env->cpuid) { /* Assume local operation */ + if (!env->cpuid) { /* Assume local operation */ status = perf_env__read_cpuid(env); if (status) return NULL; @@ -474,7 +639,7 @@ int perf_env__nr_pmu_mappings(struct perf_env *env) { int status; - if (!env || !env->nr_pmu_mappings) { /* Assume local operation */ + if (!env->nr_pmu_mappings) { /* Assume local operation */ status = perf_env__read_pmu_mappings(env); if (status) return 0; @@ -487,7 +652,7 @@ const char *perf_env__pmu_mappings(struct perf_env *env) { int status; - if (!env || !env->pmu_mappings) { /* Assume local operation */ + if (!env->pmu_mappings) { /* Assume local operation */ status = perf_env__read_pmu_mappings(env); if (status) return NULL; @@ -504,7 +669,7 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu) for (i = 0; i < env->nr_numa_nodes; i++) { nn = &env->numa_nodes[i]; - nr = max(nr, perf_cpu_map__max(nn->map).cpu); + nr = max(nr, (int)perf_cpu_map__max(nn->map).cpu); } nr++; @@ -535,6 +700,24 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu) return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1; } +bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name) +{ + char *pmu_mapping = env->pmu_mappings, *colon; + + for (int i = 0; i < env->nr_pmu_mappings; ++i) { + if (strtoul(pmu_mapping, &colon, 0) == ULONG_MAX || *colon != ':') + goto out_error; + + pmu_mapping = colon + 1; + if (strcmp(pmu_mapping, pmu_name) == 0) + return true; + + pmu_mapping += strlen(pmu_mapping) + 1; + } +out_error: + return false; +} + char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name, const char *cap) { @@ -582,3 +765,62 @@ out: free(cap_eq); return NULL; } + +void perf_env__find_br_cntr_info(struct perf_env *env, + unsigned int *nr, + unsigned int *width) +{ + if (nr) { + *nr = env->cpu_pmu_caps ? env->br_cntr_nr : + env->pmu_caps->br_cntr_nr; + } + + if (width) { + *width = env->cpu_pmu_caps ? env->br_cntr_width : + env->pmu_caps->br_cntr_width; + } +} + +bool perf_env__is_x86_amd_cpu(struct perf_env *env) +{ + static int is_amd; /* 0: Uninitialized, 1: Yes, -1: No */ + + if (is_amd == 0) + is_amd = env->cpuid && strstarts(env->cpuid, "AuthenticAMD") ? 1 : -1; + + return is_amd >= 1 ? true : false; +} + +bool x86__is_amd_cpu(void) +{ + struct perf_env env = { .total_mem = 0, }; + bool is_amd; + + perf_env__cpuid(&env); + is_amd = perf_env__is_x86_amd_cpu(&env); + perf_env__exit(&env); + + return is_amd; +} + +bool perf_env__is_x86_intel_cpu(struct perf_env *env) +{ + static int is_intel; /* 0: Uninitialized, 1: Yes, -1: No */ + + if (is_intel == 0) + is_intel = env->cpuid && strstarts(env->cpuid, "GenuineIntel") ? 1 : -1; + + return is_intel >= 1 ? true : false; +} + +bool x86__is_intel_cpu(void) +{ + struct perf_env env = { .total_mem = 0, }; + bool is_intel; + + perf_env__cpuid(&env); + is_intel = perf_env__is_x86_intel_cpu(&env); + perf_env__exit(&env); + + return is_intel; +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 4566c51f2fd9..9977b85523a8 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -12,6 +12,7 @@ struct perf_cpu_map; struct cpu_topology_map { int socket_id; int die_id; + int cluster_id; int core_id; }; @@ -46,10 +47,15 @@ struct hybrid_node { struct pmu_caps { int nr_caps; unsigned int max_branches; + unsigned int br_cntr_nr; + unsigned int br_cntr_width; + char **caps; char *pmu_name; }; +typedef const char *(arch_syscalls__strerrno_t)(int err); + struct perf_env { char *hostname; char *os_release; @@ -62,6 +68,8 @@ struct perf_env { unsigned long long total_mem; unsigned int msr_pmu_type; unsigned int max_branches; + unsigned int br_cntr_nr; + unsigned int br_cntr_width; int kernel_is_64_bit; int nr_cmdline; @@ -130,6 +138,7 @@ struct perf_env { */ bool enabled; } clock; + arch_syscalls__strerrno_t *arch_strerrno; }; enum perf_compress_type { @@ -141,8 +150,7 @@ enum perf_compress_type { struct bpf_prog_info_node; struct btf_node; -extern struct perf_env perf_env; - +int perf_env__read_core_pmu_caps(struct perf_env *env); void perf_env__exit(struct perf_env *env); int perf_env__kernel_is_64_bit(struct perf_env *env); @@ -159,19 +167,41 @@ int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); const char *perf_env__arch(struct perf_env *env); +const char *perf_env__arch_strerrno(struct perf_env *env, int err); const char *perf_env__cpuid(struct perf_env *env); const char *perf_env__raw_arch(struct perf_env *env); int perf_env__nr_cpus_avail(struct perf_env *env); void perf_env__init(struct perf_env *env); -void perf_env__insert_bpf_prog_info(struct perf_env *env, +#ifdef HAVE_LIBBPF_SUPPORT +bool __perf_env__insert_bpf_prog_info(struct perf_env *env, + struct bpf_prog_info_node *info_node); +bool perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node); struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, __u32 prog_id); +void perf_env__iterate_bpf_prog_info(struct perf_env *env, + void (*cb)(struct bpf_prog_info_node *node, + void *data), + void *data); bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); +bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); +struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id); +#endif // HAVE_LIBBPF_SUPPORT int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu); char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name, const char *cap); + +bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name); +void perf_env__find_br_cntr_info(struct perf_env *env, + unsigned int *nr, + unsigned int *width); + +bool x86__is_amd_cpu(void); +bool perf_env__is_x86_amd_cpu(struct perf_env *env); +bool x86__is_intel_cpu(void); +bool perf_env__is_x86_intel_cpu(struct perf_env *env); + #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 4cbb092e0684..4c92cc1a952c 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1,9 +1,12 @@ #include <errno.h> #include <fcntl.h> #include <inttypes.h> +#include <linux/compiler.h> #include <linux/kernel.h> #include <linux/types.h> #include <perf/cpumap.h> +#include <perf/event.h> +#include <stdio.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> @@ -58,6 +61,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_CGROUP] = "CGROUP", [PERF_RECORD_TEXT_POKE] = "TEXT_POKE", [PERF_RECORD_AUX_OUTPUT_HW_ID] = "AUX_OUTPUT_HW_ID", + [PERF_RECORD_CALLCHAIN_DEFERRED] = "CALLCHAIN_DEFERRED", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -77,6 +81,8 @@ static const char *perf_event__names[] = { [PERF_RECORD_HEADER_FEATURE] = "FEATURE", [PERF_RECORD_COMPRESSED] = "COMPRESSED", [PERF_RECORD_FINISHED_INIT] = "FINISHED_INIT", + [PERF_RECORD_COMPRESSED2] = "COMPRESSED2", + [PERF_RECORD_BPF_METADATA] = "BPF_METADATA", }; const char *perf_event__name(unsigned int id) @@ -93,8 +99,8 @@ struct process_symbol_args { u64 start; }; -static int find_symbol_cb(void *arg, const char *name, char type, - u64 start) +static int find_func_symbol_cb(void *arg, const char *name, char type, + u64 start) { struct process_symbol_args *args = arg; @@ -110,12 +116,36 @@ static int find_symbol_cb(void *arg, const char *name, char type, return 1; } +static int find_any_symbol_cb(void *arg, const char *name, + char type __maybe_unused, u64 start) +{ + struct process_symbol_args *args = arg; + + if (strcmp(name, args->name)) + return 0; + + args->start = start; + return 1; +} + int kallsyms__get_function_start(const char *kallsyms_filename, const char *symbol_name, u64 *addr) { struct process_symbol_args args = { .name = symbol_name, }; - if (kallsyms__parse(kallsyms_filename, &args, find_symbol_cb) <= 0) + if (kallsyms__parse(kallsyms_filename, &args, find_func_symbol_cb) <= 0) + return -1; + + *addr = args.start; + return 0; +} + +int kallsyms__get_symbol_start(const char *kallsyms_filename, + const char *symbol_name, u64 *addr) +{ + struct process_symbol_args args = { .name = symbol_name, }; + + if (kallsyms__parse(kallsyms_filename, &args, find_any_symbol_cb) <= 0) return -1; *addr = args.start; @@ -192,7 +222,7 @@ size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp) event->cgroup.id, event->cgroup.path); } -int perf_event__process_comm(struct perf_tool *tool __maybe_unused, +int perf_event__process_comm(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -200,7 +230,7 @@ int perf_event__process_comm(struct perf_tool *tool __maybe_unused, return machine__process_comm_event(machine, event, sample); } -int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused, +int perf_event__process_namespaces(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -208,7 +238,7 @@ int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused, return machine__process_namespaces_event(machine, event, sample); } -int perf_event__process_cgroup(struct perf_tool *tool __maybe_unused, +int perf_event__process_cgroup(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -216,7 +246,7 @@ int perf_event__process_cgroup(struct perf_tool *tool __maybe_unused, return machine__process_cgroup_event(machine, event, sample); } -int perf_event__process_lost(struct perf_tool *tool __maybe_unused, +int perf_event__process_lost(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -224,7 +254,7 @@ int perf_event__process_lost(struct perf_tool *tool __maybe_unused, return machine__process_lost_event(machine, event, sample); } -int perf_event__process_aux(struct perf_tool *tool __maybe_unused, +int perf_event__process_aux(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused, struct machine *machine) @@ -232,7 +262,7 @@ int perf_event__process_aux(struct perf_tool *tool __maybe_unused, return machine__process_aux_event(machine, event); } -int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused, +int perf_event__process_itrace_start(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused, struct machine *machine) @@ -240,7 +270,7 @@ int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused, return machine__process_itrace_start_event(machine, event); } -int perf_event__process_aux_output_hw_id(struct perf_tool *tool __maybe_unused, +int perf_event__process_aux_output_hw_id(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused, struct machine *machine) @@ -248,7 +278,7 @@ int perf_event__process_aux_output_hw_id(struct perf_tool *tool __maybe_unused, return machine__process_aux_output_hw_id_event(machine, event); } -int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused, +int perf_event__process_lost_samples(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -256,7 +286,7 @@ int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused, return machine__process_lost_samples_event(machine, event, sample); } -int perf_event__process_switch(struct perf_tool *tool __maybe_unused, +int perf_event__process_switch(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused, struct machine *machine) @@ -264,7 +294,7 @@ int perf_event__process_switch(struct perf_tool *tool __maybe_unused, return machine__process_switch_event(machine, event); } -int perf_event__process_ksymbol(struct perf_tool *tool __maybe_unused, +int perf_event__process_ksymbol(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused, struct machine *machine) @@ -272,7 +302,7 @@ int perf_event__process_ksymbol(struct perf_tool *tool __maybe_unused, return machine__process_ksymbol(machine, event, sample); } -int perf_event__process_bpf(struct perf_tool *tool __maybe_unused, +int perf_event__process_bpf(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -280,7 +310,7 @@ int perf_event__process_bpf(struct perf_tool *tool __maybe_unused, return machine__process_bpf(machine, event, sample); } -int perf_event__process_text_poke(struct perf_tool *tool __maybe_unused, +int perf_event__process_text_poke(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -305,7 +335,7 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp) build_id__init(&bid, event->mmap2.build_id, event->mmap2.build_id_size); - build_id__sprintf(&bid, sbuild_id); + build_id__snprintf(&bid, sbuild_id, sizeof(sbuild_id)); return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64 " <%s>]: %c%c%c%c %s\n", @@ -363,7 +393,7 @@ size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp) return ret; } -int perf_event__process_mmap(struct perf_tool *tool __maybe_unused, +int perf_event__process_mmap(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -371,7 +401,7 @@ int perf_event__process_mmap(struct perf_tool *tool __maybe_unused, return machine__process_mmap_event(machine, event, sample); } -int perf_event__process_mmap2(struct perf_tool *tool __maybe_unused, +int perf_event__process_mmap2(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -386,7 +416,7 @@ size_t perf_event__fprintf_task(union perf_event *event, FILE *fp) event->fork.ppid, event->fork.ptid); } -int perf_event__process_fork(struct perf_tool *tool __maybe_unused, +int perf_event__process_fork(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -394,7 +424,7 @@ int perf_event__process_fork(struct perf_tool *tool __maybe_unused, return machine__process_fork_event(machine, event, sample); } -int perf_event__process_exit(struct perf_tool *tool __maybe_unused, +int perf_event__process_exit(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -402,14 +432,35 @@ int perf_event__process_exit(struct perf_tool *tool __maybe_unused, return machine__process_exit_event(machine, event, sample); } +int perf_event__exit_del_thread(const struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine) +{ + struct thread *thread = machine__findnew_thread(machine, + event->fork.pid, + event->fork.tid); + + dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid, + event->fork.ppid, event->fork.ptid); + + if (thread) { + machine__remove_thread(machine, thread); + thread__put(thread); + } + + return 0; +} + size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp) { - return fprintf(fp, " offset: %#"PRI_lx64" size: %#"PRI_lx64" flags: %#"PRI_lx64" [%s%s%s]\n", + return fprintf(fp, " offset: %#"PRI_lx64" size: %#"PRI_lx64" flags: %#"PRI_lx64" [%s%s%s%s]\n", event->aux.aux_offset, event->aux.aux_size, event->aux.flags, event->aux.flags & PERF_AUX_FLAG_TRUNCATED ? "T" : "", event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : "", - event->aux.flags & PERF_AUX_FLAG_PARTIAL ? "P" : ""); + event->aux.flags & PERF_AUX_FLAG_PARTIAL ? "P" : "", + event->aux.flags & PERF_AUX_FLAG_COLLISION ? "C" : ""); } size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp) @@ -459,6 +510,20 @@ size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp) event->bpf.type, event->bpf.flags, event->bpf.id); } +size_t perf_event__fprintf_bpf_metadata(union perf_event *event, FILE *fp) +{ + struct perf_record_bpf_metadata *metadata = &event->bpf_metadata; + size_t ret; + + ret = fprintf(fp, " prog %s\n", metadata->prog_name); + for (__u32 i = 0; i < metadata->nr_entries; i++) { + ret += fprintf(fp, " entry %d: %20s = %s\n", i, + metadata->entries[i].key, + metadata->entries[i].value); + } + return ret; +} + static int text_poke_printer(enum binary_printer_ops op, unsigned int val, void *extra, FILE *fp) { @@ -487,7 +552,7 @@ size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *ma struct addr_location al; addr_location__init(&al); - al.map = map__get(maps__find(machine__kernel_maps(machine), tp->addr)); + al.map = maps__find(machine__kernel_maps(machine), tp->addr); if (al.map && map__load(al.map) >= 0) { al.addr = map__map_ip(al.map, tp->addr); al.sym = map__find_symbol(al.map, al.addr); @@ -556,6 +621,9 @@ size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FIL case PERF_RECORD_AUX_OUTPUT_HW_ID: ret += perf_event__fprintf_aux_output_hw_id(event, fp); break; + case PERF_RECORD_BPF_METADATA: + ret += perf_event__fprintf_bpf_metadata(event, fp); + break; default: ret += fprintf(fp, "\n"); } @@ -563,7 +631,7 @@ size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FIL return ret; } -int perf_event__process(struct perf_tool *tool __maybe_unused, +int perf_event__process(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -593,13 +661,13 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) { al->level = 'k'; maps = machine__kernel_maps(machine); - load_map = true; + load_map = !symbol_conf.lazy_load_kernel_maps; } else if (cpumode == PERF_RECORD_MISC_USER && perf_host) { al->level = '.'; } else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) { al->level = 'g'; maps = machine__kernel_maps(machine); - load_map = true; + load_map = !symbol_conf.lazy_load_kernel_maps; } else if (cpumode == PERF_RECORD_MISC_GUEST_USER && perf_guest) { al->level = 'u'; } else { @@ -617,7 +685,7 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, return NULL; } al->maps = maps__get(maps); - al->map = map__get(maps__find(maps, al->addr)); + al->map = maps__find(maps, al->addr); if (al->map != NULL) { /* * Kernel maps might be changed when loading symbols so loading @@ -702,7 +770,7 @@ int machine__resolve(struct machine *machine, struct addr_location *al, dso = al->map ? map__dso(al->map) : NULL; dump_printf(" ...... dso: %s\n", dso - ? dso->long_name + ? dso__long_name(dso) : (al->level == 'H' ? "[hypervisor]" : "<not found>")); if (thread__is_filtered(thread)) @@ -723,13 +791,24 @@ int machine__resolve(struct machine *machine, struct addr_location *al, al->socket = env->cpu[al->cpu].socket_id; } + /* Account for possible out-of-order switch events. */ + al->parallelism = max(1, min(machine->parallelism, machine__nr_cpus_avail(machine))); + if (test_bit(al->parallelism, symbol_conf.parallelism_filter)) + al->filtered |= (1 << HIST_FILTER__PARALLELISM); + /* + * Multiply it by some const to avoid precision loss or dealing + * with floats. The multiplier does not matter otherwise since + * we only print it as percents. + */ + al->latency = sample->period * 1000 / al->parallelism; + if (al->map) { if (symbol_conf.dso_list && (!dso || !(strlist__has_entry(symbol_conf.dso_list, - dso->short_name) || - (dso->short_name != dso->long_name && + dso__short_name(dso)) || + (dso__short_name(dso) != dso__long_name(dso) && strlist__has_entry(symbol_conf.dso_list, - dso->long_name))))) { + dso__long_name(dso)))))) { al->filtered |= (1 << HIST_FILTER__DSO); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index de20e01c9d72..64c63b59d617 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -66,9 +66,16 @@ enum { PERF_IP_FLAG_VMEXIT = 1ULL << 12, PERF_IP_FLAG_INTR_DISABLE = 1ULL << 13, PERF_IP_FLAG_INTR_TOGGLE = 1ULL << 14, + PERF_IP_FLAG_BRANCH_MISS = 1ULL << 15, + PERF_IP_FLAG_NOT_TAKEN = 1ULL << 16, }; -#define PERF_IP_FLAG_CHARS "bcrosyiABExghDt" +#define PERF_IP_FLAG_CHARS "bcrosyiABExghDtmn" + +#define PERF_ADDITIONAL_STATE_MASK \ + (PERF_IP_FLAG_IN_TX | \ + PERF_IP_FLAG_INTR_DISABLE | \ + PERF_IP_FLAG_INTR_TOGGLE) #define PERF_BRANCH_MASK (\ PERF_IP_FLAG_BRANCH |\ @@ -84,6 +91,10 @@ enum { PERF_IP_FLAG_VMENTRY |\ PERF_IP_FLAG_VMEXIT) +#define PERF_IP_FLAG_BRANCH_EVENT_MASK \ + (PERF_IP_FLAG_BRANCH_MISS | \ + PERF_IP_FLAG_NOT_TAKEN) + #define PERF_MEM_DATA_SRC_NONE \ (PERF_MEM_S(OP, NA) |\ PERF_MEM_S(LVL, NA) |\ @@ -106,6 +117,7 @@ enum perf_synth_id { PERF_SYNTH_INTEL_PSB, PERF_SYNTH_INTEL_EVT, PERF_SYNTH_INTEL_IFLAG_CHG, + PERF_SYNTH_POWERPC_VPA_DTL, }; /* @@ -243,6 +255,25 @@ struct perf_synth_intel_iflag_chg { u64 branch_ip; /* If via_branch */ }; +/* + * The powerpc VPA DTL entries are of below format + */ +struct powerpc_vpadtl_entry { + u8 dispatch_reason; + u8 preempt_reason; + u16 processor_id; + u32 enqueue_to_dispatch_time; + u32 ready_to_enqueue_time; + u32 waiting_to_ready_time; + u64 timebase; + u64 fault_addr; + u64 srr0; + u64 srr1; +}; + +extern const char *dispatch_reasons[11]; +extern const char *preempt_reasons[10]; + static inline void *perf_synth__raw_data(void *p) { return p + 4; @@ -267,71 +298,75 @@ struct perf_tool; void perf_event__read_stat_config(struct perf_stat_config *config, struct perf_record_stat_config *event); -int perf_event__process_comm(struct perf_tool *tool, +int perf_event__process_comm(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_lost(struct perf_tool *tool, +int perf_event__process_lost(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_lost_samples(struct perf_tool *tool, +int perf_event__process_lost_samples(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_aux(struct perf_tool *tool, +int perf_event__process_aux(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_itrace_start(struct perf_tool *tool, +int perf_event__process_itrace_start(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_aux_output_hw_id(struct perf_tool *tool, +int perf_event__process_aux_output_hw_id(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_switch(struct perf_tool *tool, +int perf_event__process_switch(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_namespaces(struct perf_tool *tool, +int perf_event__process_namespaces(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_cgroup(struct perf_tool *tool, +int perf_event__process_cgroup(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_mmap(struct perf_tool *tool, +int perf_event__process_mmap(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_mmap2(struct perf_tool *tool, +int perf_event__process_mmap2(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_fork(struct perf_tool *tool, +int perf_event__process_fork(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_exit(struct perf_tool *tool, +int perf_event__process_exit(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_ksymbol(struct perf_tool *tool, +int perf_event__exit_del_thread(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_bpf(struct perf_tool *tool, +int perf_event__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); +int perf_event__process_bpf(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_text_poke(struct perf_tool *tool, +int perf_event__process_text_poke(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process(struct perf_tool *tool, +int perf_event__process(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); @@ -355,11 +390,14 @@ size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp); size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp); size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp); size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_bpf_metadata(union perf_event *event, FILE *fp); size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine,FILE *fp); size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp); int kallsyms__get_function_start(const char *kallsyms_filename, const char *symbol_name, u64 *addr); +int kallsyms__get_symbol_start(const char *kallsyms_filename, + const char *symbol_name, u64 *addr); void event_attr_init(struct perf_event_attr *attr); @@ -373,11 +411,6 @@ extern unsigned int proc_map_timeout; #define PAGE_SIZE_NAME_LEN 32 char *get_page_size_name(u64 size, char *str); -void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type); -void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type); -const char *arch_perf_header_entry(const char *se_header); -int arch_support_sort_key(const char *sort_key); - static inline bool perf_event_header__cpumode_is_guest(u8 cpumode) { return cpumode == PERF_RECORD_MISC_GUEST_KERNEL || diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h index 8fecc9fbaecc..dcff697ed252 100644 --- a/tools/perf/util/events_stats.h +++ b/tools/perf/util/events_stats.h @@ -18,7 +18,18 @@ * PERF_RECORD_LOST_SAMPLES event. The number of lost-samples events is stored * in .nr_events[PERF_RECORD_LOST_SAMPLES] while total_lost_samples tells * exactly how many samples the kernel in fact dropped, i.e. it is the sum of - * all struct perf_record_lost_samples.lost fields reported. + * all struct perf_record_lost_samples.lost fields reported without setting the + * misc field in the header. + * + * The BPF program can discard samples according to the filter expressions given + * by the user. This number is kept in a BPF map and dumped at the end of perf + * record in a PERF_RECORD_LOST_SAMPLES event. To differentiate it from other + * lost samples, perf tools sets PERF_RECORD_MISC_LOST_SAMPLES_BPF flag in the + * header.misc field. The number of dropped-samples events is stored in + * .nr_events[PERF_RECORD_LOST_SAMPLES] while total_dropped_samples tells + * exactly how many samples the BPF program in fact dropped, i.e. it is the sum + * of all struct perf_record_lost_samples.lost fields reported with the misc + * field set in the header. * * The total_period is needed because by default auto-freq is used, so * multiplying nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get @@ -28,6 +39,7 @@ struct events_stats { u64 total_lost; u64 total_lost_samples; + u64 total_dropped_samples; u64 total_aux_lost; u64 total_aux_partial; u64 total_aux_collision; @@ -45,14 +57,16 @@ struct events_stats { struct hists_stats { u64 total_period; u64 total_non_filtered_period; + u64 total_latency; + u64 total_non_filtered_latency; u32 nr_samples; u32 nr_non_filtered_samples; u32 nr_lost_samples; + u32 nr_dropped_samples; }; void events_stats__inc(struct events_stats *stats, u32 type); -size_t events_stats__fprintf(struct events_stats *stats, FILE *fp, - bool skip_empty); +size_t events_stats__fprintf(struct events_stats *stats, FILE *fp); #endif /* __PERF_EVENTS_STATS_ */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 7ef43f72098e..03674d2cbd01 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -33,6 +33,10 @@ #include "util/bpf-filter.h" #include "util/stat.h" #include "util/util.h" +#include "util/env.h" +#include "util/intel-tpebs.h" +#include "util/metricgroup.h" +#include "util/strbuf.h" #include <signal.h> #include <unistd.h> #include <sched.h> @@ -46,6 +50,7 @@ #include <sys/mman.h> #include <sys/prctl.h> #include <sys/timerfd.h> +#include <sys/wait.h> #include <linux/bitops.h> #include <linux/hash.h> @@ -78,6 +83,9 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus, evlist->ctl_fd.fd = -1; evlist->ctl_fd.ack = -1; evlist->ctl_fd.pos = -1; + evlist->nr_br_cntr = -1; + metricgroup__rblist_init(&evlist->metric_events); + INIT_LIST_HEAD(&evlist->deferred_samples); } struct evlist *evlist__new(void) @@ -94,16 +102,31 @@ struct evlist *evlist__new_default(void) { struct evlist *evlist = evlist__new(); bool can_profile_kernel; - int err; + struct perf_pmu *pmu = NULL; if (!evlist) return NULL; can_profile_kernel = perf_event_paranoid_check(1); - err = parse_event(evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu"); - if (err) { - evlist__delete(evlist); - evlist = NULL; + + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { + char buf[256]; + int err; + + snprintf(buf, sizeof(buf), "%s/cycles/%s", pmu->name, + can_profile_kernel ? "P" : "Pu"); + err = parse_event(evlist, buf); + if (err) { + evlist__delete(evlist); + return NULL; + } + } + + if (evlist->core.nr_entries > 1) { + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) + evsel__set_sample_id(evsel, /*can_sample_identifier=*/false); } return evlist; @@ -161,6 +184,7 @@ static void evlist__purge(struct evlist *evlist) void evlist__exit(struct evlist *evlist) { + metricgroup__rblist_exit(&evlist->metric_events); event_enable_timer__exit(&evlist->eet); zfree(&evlist->mmap); zfree(&evlist->overwrite_mmap); @@ -251,6 +275,9 @@ static struct evsel *evlist__dummy_event(struct evlist *evlist) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_DUMMY, .size = sizeof(attr), /* to capture ABI version */ + /* Avoid frequency mode for dummy events to avoid associated timers. */ + .freq = 0, + .sample_period = 1, }; return evsel__new_idx(&attr, evlist->core.nr_entries); @@ -277,8 +304,6 @@ struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide) evsel->core.attr.exclude_kernel = 1; evsel->core.attr.exclude_guest = 1; evsel->core.attr.exclude_hv = 1; - evsel->core.attr.freq = 0; - evsel->core.attr.sample_period = 1; evsel->core.system_wide = system_wide; evsel->no_aux_samples = true; evsel->name = strdup("dummy:u"); @@ -290,7 +315,8 @@ struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide) #ifdef HAVE_LIBTRACEEVENT struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide) { - struct evsel *evsel = evsel__newtp_idx("sched", "sched_switch", 0); + struct evsel *evsel = evsel__newtp_idx("sched", "sched_switch", 0, + /*format=*/true); if (IS_ERR(evsel)) return evsel; @@ -306,62 +332,6 @@ struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide) } #endif -int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) -{ - struct evsel *evsel, *n; - LIST_HEAD(head); - size_t i; - - for (i = 0; i < nr_attrs; i++) { - evsel = evsel__new_idx(attrs + i, evlist->core.nr_entries + i); - if (evsel == NULL) - goto out_delete_partial_list; - list_add_tail(&evsel->core.node, &head); - } - - evlist__splice_list_tail(evlist, &head); - - return 0; - -out_delete_partial_list: - __evlist__for_each_entry_safe(&head, n, evsel) - evsel__delete(evsel); - return -1; -} - -int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) -{ - size_t i; - - for (i = 0; i < nr_attrs; i++) - event_attr_init(attrs + i); - - return evlist__add_attrs(evlist, attrs, nr_attrs); -} - -__weak int arch_evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, - size_t nr_attrs) -{ - if (!nr_attrs) - return 0; - - return __evlist__add_default_attrs(evlist, attrs, nr_attrs); -} - -struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id) -{ - struct evsel *evsel; - - evlist__for_each_entry(evlist, evsel) { - if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT && - (int)evsel->core.attr.config == id) - return evsel; - } - - return NULL; -} - struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name) { struct evsel *evsel; @@ -1048,14 +1018,13 @@ int evlist__create_maps(struct evlist *evlist, struct target *target) * per-thread data. thread_map__new_str will call * thread_map__new_all_cpus to enumerate all threads. */ - threads = thread_map__new_str(target->pid, target->tid, target->uid, - all_threads); + threads = thread_map__new_str(target->pid, target->tid, all_threads); if (!threads) return -1; - if (target__uses_dummy_map(target)) - cpus = perf_cpu_map__dummy_new(); + if (target__uses_dummy_map(target) && !evlist__has_bpf_output(evlist)) + cpus = perf_cpu_map__new_any_cpu(); else cpus = perf_cpu_map__new(target->cpu_list); @@ -1077,7 +1046,8 @@ out_delete_threads: return -1; } -int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel) +int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel, + struct target *target) { struct evsel *evsel; int err = 0; @@ -1099,7 +1069,7 @@ int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel) * non-tracepoint events can have BPF filters. */ if (!list_empty(&evsel->bpf_filters)) { - err = perf_bpf_filter__prepare(evsel); + err = perf_bpf_filter__prepare(evsel, target); if (err) { *err_evsel = evsel; break; @@ -1185,11 +1155,6 @@ int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) return ret; } -int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid) -{ - return evlist__set_tp_filter_pids(evlist, 1, &pid); -} - int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) { char *filter = asprintf__tp_filter_pids(npids, pids); @@ -1252,6 +1217,72 @@ u64 evlist__combined_branch_type(struct evlist *evlist) return branch_type; } +static struct evsel * +evlist__find_dup_event_from_prev(struct evlist *evlist, struct evsel *event) +{ + struct evsel *pos; + + evlist__for_each_entry(evlist, pos) { + if (event == pos) + break; + if ((pos->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) && + !strcmp(pos->name, event->name)) + return pos; + } + return NULL; +} + +#define MAX_NR_ABBR_NAME (26 * 11) + +/* + * The abbr name is from A to Z9. If the number of event + * which requires the branch counter > MAX_NR_ABBR_NAME, + * return NA. + */ +static void evlist__new_abbr_name(char *name) +{ + static int idx; + int i = idx / 26; + + if (idx >= MAX_NR_ABBR_NAME) { + name[0] = 'N'; + name[1] = 'A'; + name[2] = '\0'; + return; + } + + name[0] = 'A' + (idx % 26); + + if (!i) + name[1] = '\0'; + else { + name[1] = '0' + i - 1; + name[2] = '\0'; + } + + idx++; +} + +void evlist__update_br_cntr(struct evlist *evlist) +{ + struct evsel *evsel, *dup; + int i = 0; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + evsel->br_cntr_idx = i++; + evsel__leader(evsel)->br_cntr_nr++; + + dup = evlist__find_dup_event_from_prev(evlist, evsel); + if (dup) + memcpy(evsel->abbr_name, dup->abbr_name, 3 * sizeof(char)); + else + evlist__new_abbr_name(evsel->abbr_name); + } + } + evlist->nr_br_cntr = i; +} + bool evlist__valid_read_format(struct evlist *evlist) { struct evsel *first = evlist__first(evlist), *pos = first; @@ -1351,21 +1382,20 @@ static int evlist__create_syswide_maps(struct evlist *evlist) * error, and we may not want to do that fallback to a * default cpu identity map :-\ */ - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) - goto out; + return -ENOMEM; threads = perf_thread_map__new_dummy(); - if (!threads) - goto out_put; + if (!threads) { + perf_cpu_map__put(cpus); + return -ENOMEM; + } perf_evlist__set_maps(&evlist->core, cpus, threads); - perf_thread_map__put(threads); -out_put: perf_cpu_map__put(cpus); -out: - return -ENOMEM; + return 0; } int evlist__open(struct evlist *evlist) @@ -1404,6 +1434,8 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const int child_ready_pipe[2], go_pipe[2]; char bf; + evlist->workload.cork_fd = -1; + if (pipe(child_ready_pipe) < 0) { perror("failed to create 'ready' pipe"); return -1; @@ -1456,7 +1488,7 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const * For cancelling the workload without actually running it, * the parent will just close workload.cork_fd, without writing * anything, i.e. read will return zero and we just exit() - * here. + * here (See evlist__cancel_workload()). */ if (ret != 1) { if (ret == -1) @@ -1520,7 +1552,7 @@ out_close_ready_pipe: int evlist__start_workload(struct evlist *evlist) { - if (evlist->workload.cork_fd > 0) { + if (evlist->workload.cork_fd >= 0) { char bf = 0; int ret; /* @@ -1531,12 +1563,24 @@ int evlist__start_workload(struct evlist *evlist) perror("unable to write to pipe"); close(evlist->workload.cork_fd); + evlist->workload.cork_fd = -1; return ret; } return 0; } +void evlist__cancel_workload(struct evlist *evlist) +{ + int status; + + if (evlist->workload.cork_fd >= 0) { + close(evlist->workload.cork_fd); + evlist->workload.cork_fd = -1; + waitpid(evlist->workload.pid, &status, WNOHANG); + } +} + int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) { struct evsel *evsel = evlist__event2evsel(evlist, event); @@ -1694,6 +1738,24 @@ void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_ev tracking_evsel->tracking = true; } +struct evsel *evlist__findnew_tracking_event(struct evlist *evlist, bool system_wide) +{ + struct evsel *evsel; + + evsel = evlist__get_tracking_event(evlist); + if (!evsel__is_dummy_event(evsel)) { + evsel = evlist__add_aux_dummy(evlist, system_wide); + if (!evsel) + return NULL; + + evlist__set_tracking_event(evlist, evsel); + } else if (system_wide) { + perf_evlist__go_system_wide(&evlist->core, &evsel->core); + } + + return evsel; +} + struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str) { struct evsel *evsel; @@ -2417,23 +2479,36 @@ struct evsel *evlist__find_evsel(struct evlist *evlist, int idx) return NULL; } -int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf) +void evlist__format_evsels(struct evlist *evlist, struct strbuf *sb, size_t max_length) { - struct evsel *evsel; - int printed = 0; + struct evsel *evsel, *leader = NULL; + bool first = true; evlist__for_each_entry(evlist, evsel) { + struct evsel *new_leader = evsel__leader(evsel); + if (evsel__is_dummy_event(evsel)) continue; - if (size > (strlen(evsel__name(evsel)) + (printed ? 2 : 1))) { - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "," : "", evsel__name(evsel)); - } else { - printed += scnprintf(bf + printed, size - printed, "%s...", printed ? "," : ""); - break; + + if (leader != new_leader && leader && leader->core.nr_members > 1) + strbuf_addch(sb, '}'); + + if (!first) + strbuf_addch(sb, ','); + + if (sb->len > max_length) { + strbuf_addstr(sb, "..."); + return; } - } + if (leader != new_leader && new_leader->core.nr_members > 1) + strbuf_addch(sb, '{'); - return printed; + strbuf_addstr(sb, evsel__name(evsel)); + first = false; + leader = new_leader; + } + if (leader && leader->core.nr_members > 1) + strbuf_addch(sb, '}'); } void evlist__check_mem_load_aux(struct evlist *evlist) @@ -2483,19 +2558,86 @@ void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_lis return; evlist__for_each_entry(evlist, pos) { - struct perf_cpu_map *intersect, *to_test; - const struct perf_pmu *pmu = evsel__find_pmu(pos); + evsel__warn_user_requested_cpus(pos, user_requested_cpus); + } + perf_cpu_map__put(user_requested_cpus); +} - to_test = pmu && pmu->is_core ? pmu->cpus : cpu_map__online(); - intersect = perf_cpu_map__intersect(to_test, user_requested_cpus); - if (!perf_cpu_map__equal(intersect, user_requested_cpus)) { - char buf[128]; +/* Should uniquify be disabled for the evlist? */ +static bool evlist__disable_uniquify(const struct evlist *evlist) +{ + struct evsel *counter; + struct perf_pmu *last_pmu = NULL; + bool first = true; - cpu_map__snprint(to_test, buf, sizeof(buf)); - pr_warning("WARNING: A requested CPU in '%s' is not supported by PMU '%s' (CPUs %s) for event '%s'\n", - cpu_list, pmu ? pmu->name : "cpu", buf, evsel__name(pos)); + evlist__for_each_entry(evlist, counter) { + /* If PMUs vary then uniquify can be useful. */ + if (!first && counter->pmu != last_pmu) + return false; + first = false; + if (counter->pmu) { + /* Allow uniquify for uncore PMUs. */ + if (!counter->pmu->is_core) + return false; + /* Keep hybrid event names uniquified for clarity. */ + if (perf_pmus__num_core_pmus() > 1) + return false; } - perf_cpu_map__put(intersect); + last_pmu = counter->pmu; } - perf_cpu_map__put(user_requested_cpus); + return true; +} + +static bool evlist__set_needs_uniquify(struct evlist *evlist, const struct perf_stat_config *config) +{ + struct evsel *counter; + bool needs_uniquify = false; + + if (evlist__disable_uniquify(evlist)) { + evlist__for_each_entry(evlist, counter) + counter->uniquified_name = true; + return false; + } + + evlist__for_each_entry(evlist, counter) { + if (evsel__set_needs_uniquify(counter, config)) + needs_uniquify = true; + } + return needs_uniquify; +} + +void evlist__uniquify_evsel_names(struct evlist *evlist, const struct perf_stat_config *config) +{ + if (evlist__set_needs_uniquify(evlist, config)) { + struct evsel *pos; + + evlist__for_each_entry(evlist, pos) + evsel__uniquify_counter(pos); + } +} + +bool evlist__has_bpf_output(struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel__is_bpf_output(evsel)) + return true; + } + + return false; +} + +bool evlist__needs_bpf_sb_event(struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel__is_dummy_event(evsel)) + continue; + if (!evsel->core.attr.exclude_kernel) + return true; + } + + return false; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 664c6bf7b3e0..911834ae7c2a 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -12,6 +12,7 @@ #include <perf/evlist.h> #include "events_stats.h" #include "evsel.h" +#include "rblist.h" #include <pthread.h> #include <signal.h> #include <unistd.h> @@ -19,7 +20,10 @@ struct pollfd; struct thread_map; struct perf_cpu_map; +struct perf_stat_config; struct record_opts; +struct strbuf; +struct target; /* * State machine of bkw_mmap_state: @@ -56,6 +60,7 @@ struct evlist { bool enabled; int id_pos; int is_pos; + int nr_br_cntr; u64 combined_sample_type; enum bkw_mmap_state bkw_mmap_state; struct { @@ -66,7 +71,7 @@ struct evlist { struct mmap *overwrite_mmap; struct evsel *selected; struct events_stats stats; - struct perf_env *env; + struct perf_session *session; void (*trace_event_sample_raw)(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); @@ -82,6 +87,13 @@ struct evlist { int pos; /* index at evlist core object to check signals */ } ctl_fd; struct event_enable_timer *eet; + /** + * @metric_events: A list of struct metric_event which each have a list + * of struct metric_expr. + */ + struct rblist metric_events; + /* samples with deferred_callchain would wait here. */ + struct list_head deferred_samples; }; struct evsel_str_handler { @@ -100,19 +112,8 @@ void evlist__delete(struct evlist *evlist); void evlist__add(struct evlist *evlist, struct evsel *entry); void evlist__remove(struct evlist *evlist, struct evsel *evsel); -int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs); - -int __evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, size_t nr_attrs); - -int arch_evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, - size_t nr_attrs); - -#define evlist__add_default_attrs(evlist, array) \ - arch_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) - int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs); +int arch_evlist__add_required_events(struct list_head *list); int evlist__add_dummy(struct evlist *evlist); struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide); @@ -142,7 +143,6 @@ int __evlist__set_tracepoints_handlers(struct evlist *evlist, __evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array)) int evlist__set_tp_filter(struct evlist *evlist, const char *filter); -int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid); int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); int evlist__append_tp_filter(struct evlist *evlist, const char *filter); @@ -150,7 +150,6 @@ int evlist__append_tp_filter(struct evlist *evlist, const char *filter); int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid); int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); -struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id); struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name); int evlist__add_pollfd(struct evlist *evlist, int fd); @@ -184,6 +183,7 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const char *argv[], bool pipe_output, void (*exec_error)(int signo, siginfo_t *info, void *ucontext)); int evlist__start_workload(struct evlist *evlist); +void evlist__cancel_workload(struct evlist *evlist); struct option; @@ -212,11 +212,13 @@ void evlist__enable_non_dummy(struct evlist *evlist); void evlist__set_selected(struct evlist *evlist, struct evsel *evsel); int evlist__create_maps(struct evlist *evlist, struct target *target); -int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel); +int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel, + struct target *target); u64 __evlist__combined_sample_type(struct evlist *evlist); u64 evlist__combined_sample_type(struct evlist *evlist); u64 evlist__combined_branch_type(struct evlist *evlist); +void evlist__update_br_cntr(struct evlist *evlist); bool evlist__sample_id_all(struct evlist *evlist); u16 evlist__id_hdr_size(struct evlist *evlist); @@ -387,6 +389,7 @@ bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr); struct evsel *evlist__get_tracking_event(struct evlist *evlist); void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel); +struct evsel *evlist__findnew_tracking_event(struct evlist *evlist, bool system_wide); struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str); @@ -438,8 +441,11 @@ int event_enable_timer__process(struct event_enable_timer *eet); struct evsel *evlist__find_evsel(struct evlist *evlist, int idx); -int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf); +void evlist__format_evsels(struct evlist *evlist, struct strbuf *sb, size_t max_length); void evlist__check_mem_load_aux(struct evlist *evlist); void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list); +void evlist__uniquify_evsel_names(struct evlist *evlist, const struct perf_stat_config *config); +bool evlist__has_bpf_output(struct evlist *evlist); +bool evlist__needs_bpf_sb_event(struct evlist *evlist); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 762e2b2634a5..9cd706f62793 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -5,6 +5,11 @@ * Parts came from builtin-{top,stat,record}.c, see those files for further * copyright notes. */ +/* + * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select + * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu. + */ +#define __SANE_USERSPACE_TYPES__ #include <byteswap.h> #include <errno.h> @@ -19,6 +24,7 @@ #include <linux/zalloc.h> #include <sys/ioctl.h> #include <sys/resource.h> +#include <sys/syscall.h> #include <sys/types.h> #include <dirent.h> #include <stdlib.h> @@ -30,6 +36,7 @@ #include "counts.h" #include "event.h" #include "evsel.h" +#include "time-utils.h" #include "util/env.h" #include "util/evsel_config.h" #include "util/evsel_fprintf.h" @@ -41,6 +48,7 @@ #include "record.h" #include "debug.h" #include "trace-event.h" +#include "session.h" #include "stat.h" #include "string2.h" #include "memswap.h" @@ -49,56 +57,152 @@ #include "off_cpu.h" #include "pmu.h" #include "pmus.h" +#include "drm_pmu.h" +#include "hwmon_pmu.h" +#include "tool_pmu.h" +#include "tp_pmu.h" +#include "rlimit.h" #include "../perf-sys.h" #include "util/parse-branch-options.h" #include "util/bpf-filter.h" +#include "util/hist.h" #include <internal/xyarray.h> #include <internal/lib.h> #include <internal/threadmap.h> +#include "util/intel-tpebs.h" #include <linux/ctype.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif struct perf_missing_features perf_missing_features; static clockid_t clockid; -static const char *const perf_tool_event__tool_names[PERF_TOOL_MAX] = { - NULL, - "duration_time", - "user_time", - "system_time", -}; - -const char *perf_tool_event__to_str(enum perf_tool_event ev) +static int evsel__no_extra_init(struct evsel *evsel __maybe_unused) { - if (ev > PERF_TOOL_NONE && ev < PERF_TOOL_MAX) - return perf_tool_event__tool_names[ev]; - - return NULL; + return 0; } -enum perf_tool_event perf_tool_event__from_str(const char *str) +static bool test_attr__enabled(void) { - int i; + static bool test_attr__enabled; + static bool test_attr__enabled_tested; - perf_tool_event__for_each_event(i) { - if (!strcmp(str, perf_tool_event__tool_names[i])) - return i; + if (!test_attr__enabled_tested) { + char *dir = getenv("PERF_TEST_ATTR"); + + test_attr__enabled = (dir != NULL); + test_attr__enabled_tested = true; } - return PERF_TOOL_NONE; + return test_attr__enabled; } +#define __WRITE_ASS(str, fmt, data) \ +do { \ + if (fprintf(file, #str "=%"fmt "\n", data) < 0) { \ + perror("test attr - failed to write event file"); \ + fclose(file); \ + return -1; \ + } \ +} while (0) -static int evsel__no_extra_init(struct evsel *evsel __maybe_unused) +#define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field) + +static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, + int fd, int group_fd, unsigned long flags) { + FILE *file; + char path[PATH_MAX]; + char *dir = getenv("PERF_TEST_ATTR"); + + snprintf(path, PATH_MAX, "%s/event-%d-%llu-%d", dir, + attr->type, attr->config, fd); + + file = fopen(path, "w+"); + if (!file) { + perror("test attr - failed to open event file"); + return -1; + } + + if (fprintf(file, "[event-%d-%llu-%d]\n", + attr->type, attr->config, fd) < 0) { + perror("test attr - failed to write event file"); + fclose(file); + return -1; + } + + /* syscall arguments */ + __WRITE_ASS(fd, "d", fd); + __WRITE_ASS(group_fd, "d", group_fd); + __WRITE_ASS(cpu, "d", cpu.cpu); + __WRITE_ASS(pid, "d", pid); + __WRITE_ASS(flags, "lu", flags); + + /* struct perf_event_attr */ + WRITE_ASS(type, PRIu32); + WRITE_ASS(size, PRIu32); + WRITE_ASS(config, "llu"); + WRITE_ASS(sample_period, "llu"); + WRITE_ASS(sample_type, "llu"); + WRITE_ASS(read_format, "llu"); + WRITE_ASS(disabled, "d"); + WRITE_ASS(inherit, "d"); + WRITE_ASS(pinned, "d"); + WRITE_ASS(exclusive, "d"); + WRITE_ASS(exclude_user, "d"); + WRITE_ASS(exclude_kernel, "d"); + WRITE_ASS(exclude_hv, "d"); + WRITE_ASS(exclude_idle, "d"); + WRITE_ASS(mmap, "d"); + WRITE_ASS(comm, "d"); + WRITE_ASS(freq, "d"); + WRITE_ASS(inherit_stat, "d"); + WRITE_ASS(enable_on_exec, "d"); + WRITE_ASS(task, "d"); + WRITE_ASS(watermark, "d"); + WRITE_ASS(precise_ip, "d"); + WRITE_ASS(mmap_data, "d"); + WRITE_ASS(sample_id_all, "d"); + WRITE_ASS(exclude_host, "d"); + WRITE_ASS(exclude_guest, "d"); + WRITE_ASS(exclude_callchain_kernel, "d"); + WRITE_ASS(exclude_callchain_user, "d"); + WRITE_ASS(mmap2, "d"); + WRITE_ASS(comm_exec, "d"); + WRITE_ASS(context_switch, "d"); + WRITE_ASS(write_backward, "d"); + WRITE_ASS(namespaces, "d"); + WRITE_ASS(use_clockid, "d"); + WRITE_ASS(wakeup_events, PRIu32); + WRITE_ASS(bp_type, PRIu32); + WRITE_ASS(config1, "llu"); + WRITE_ASS(config2, "llu"); + WRITE_ASS(branch_sample_type, "llu"); + WRITE_ASS(sample_regs_user, "llu"); + WRITE_ASS(sample_stack_user, PRIu32); + + fclose(file); return 0; } -void __weak test_attr__ready(void) { } +#undef __WRITE_ASS +#undef WRITE_ASS + +static void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, + int fd, int group_fd, unsigned long flags) +{ + int errno_saved = errno; + + if ((fd != -1) && store_event(attr, pid, cpu, fd, group_fd, flags)) { + pr_err("test attr FAILED"); + exit(128); + } + + errno = errno_saved; +} static void evsel__no_extra_fini(struct evsel *evsel __maybe_unused) { @@ -136,6 +240,16 @@ set_methods: return 0; } +const char *evsel__pmu_name(const struct evsel *evsel) +{ + struct perf_pmu *pmu = evsel__find_pmu(evsel); + + if (pmu) + return pmu->name; + + return event_type(evsel->core.attr.type); +} + #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) int __evsel__sample_size(u64 sample_type) @@ -288,12 +402,13 @@ void evsel__init(struct evsel *evsel, evsel->sample_size = __evsel__sample_size(attr->sample_type); evsel__calc_id_pos(evsel); evsel->cmdline_group_boundary = false; - evsel->metric_events = NULL; evsel->per_pkg_mask = NULL; evsel->collect_stat = false; - evsel->pmu_name = NULL; evsel->group_pmu_name = NULL; evsel->skippable = false; + evsel->supported = true; + evsel->alternate_hw_config = PERF_COUNT_HW_MAX; + evsel->script_output_type = -1; // FIXME: OUTPUT_TYPE_UNSET, see builtin-script.c } struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) @@ -353,7 +468,7 @@ static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src) * The assumption is that @orig is not configured nor opened yet. * So we only care about the attributes that can be set while it's parsed. */ -struct evsel *evsel__clone(struct evsel *orig) +struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig) { struct evsel *evsel; @@ -366,12 +481,16 @@ struct evsel *evsel__clone(struct evsel *orig) if (orig->bpf_obj) return NULL; - evsel = evsel__new(&orig->core.attr); + if (dest) + evsel = dest; + else + evsel = evsel__new(&orig->core.attr); + if (evsel == NULL) return NULL; evsel->core.cpus = perf_cpu_map__get(orig->core.cpus); - evsel->core.own_cpus = perf_cpu_map__get(orig->core.own_cpus); + evsel->core.pmu_cpus = perf_cpu_map__get(orig->core.pmu_cpus); evsel->core.threads = perf_thread_map__get(orig->core.threads); evsel->core.nr_members = orig->core.nr_members; evsel->core.system_wide = orig->core.system_wide; @@ -388,11 +507,6 @@ struct evsel *evsel__clone(struct evsel *orig) if (evsel->group_name == NULL) goto out_err; } - if (orig->pmu_name) { - evsel->pmu_name = strdup(orig->pmu_name); - if (evsel->pmu_name == NULL) - goto out_err; - } if (orig->group_pmu_name) { evsel->group_pmu_name = strdup(orig->group_pmu_name); if (evsel->group_pmu_name == NULL) @@ -410,18 +524,29 @@ struct evsel *evsel__clone(struct evsel *orig) } evsel->cgrp = cgroup__get(orig->cgrp); #ifdef HAVE_LIBTRACEEVENT + if (orig->tp_sys) { + evsel->tp_sys = strdup(orig->tp_sys); + if (evsel->tp_sys == NULL) + goto out_err; + } + if (orig->tp_name) { + evsel->tp_name = strdup(orig->tp_name); + if (evsel->tp_name == NULL) + goto out_err; + } evsel->tp_format = orig->tp_format; #endif evsel->handler = orig->handler; evsel->core.leader = orig->core.leader; + evsel->metric_leader = orig->metric_leader; evsel->max_events = orig->max_events; - evsel->tool_event = orig->tool_event; - free((char *)evsel->unit); - evsel->unit = strdup(orig->unit); - if (evsel->unit == NULL) - goto out_err; - + zfree(&evsel->unit); + if (orig->unit) { + evsel->unit = strdup(orig->unit); + if (evsel->unit == NULL) + goto out_err; + } evsel->scale = orig->scale; evsel->snapshot = orig->snapshot; evsel->per_pkg = orig->per_pkg; @@ -431,15 +556,17 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->exclude_GH = orig->exclude_GH; evsel->sample_read = orig->sample_read; - evsel->auto_merge_stats = orig->auto_merge_stats; evsel->collect_stat = orig->collect_stat; evsel->weak_group = orig->weak_group; evsel->use_config_name = orig->use_config_name; evsel->pmu = orig->pmu; + evsel->first_wildcard_match = orig->first_wildcard_match; if (evsel__copy_config_terms(evsel, orig) < 0) goto out_err; + evsel->alternate_hw_config = orig->alternate_hw_config; + return evsel; out_err: @@ -450,44 +577,84 @@ out_err: /* * Returns pointer with encoded error via <linux/err.h> interface. */ -#ifdef HAVE_LIBTRACEEVENT -struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx) +struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx, bool format) { + struct perf_event_attr attr = { + .type = PERF_TYPE_TRACEPOINT, + .sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | + PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), + }; struct evsel *evsel = zalloc(perf_evsel__object.size); - int err = -ENOMEM; + int err = -ENOMEM, id = -1; - if (evsel == NULL) { + if (evsel == NULL) goto out_err; - } else { - struct perf_event_attr attr = { - .type = PERF_TYPE_TRACEPOINT, - .sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | - PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), - }; - if (asprintf(&evsel->name, "%s:%s", sys, name) < 0) - goto out_free; - evsel->tp_format = trace_event__tp_format(sys, name); - if (IS_ERR(evsel->tp_format)) { - err = PTR_ERR(evsel->tp_format); + if (asprintf(&evsel->name, "%s:%s", sys, name) < 0) + goto out_free; + +#ifdef HAVE_LIBTRACEEVENT + evsel->tp_sys = strdup(sys); + if (!evsel->tp_sys) + goto out_free; + + evsel->tp_name = strdup(name); + if (!evsel->tp_name) + goto out_free; +#endif + + event_attr_init(&attr); + + if (format) { + id = tp_pmu__id(sys, name); + if (id < 0) { + err = id; goto out_free; } - - event_attr_init(&attr); - attr.config = evsel->tp_format->id; - attr.sample_period = 1; - evsel__init(evsel, &attr, idx); } - + attr.config = (__u64)id; + attr.sample_period = 1; + evsel__init(evsel, &attr, idx); return evsel; out_free: zfree(&evsel->name); +#ifdef HAVE_LIBTRACEEVENT + zfree(&evsel->tp_sys); + zfree(&evsel->tp_name); +#endif free(evsel); out_err: return ERR_PTR(err); } + +#ifdef HAVE_LIBTRACEEVENT +struct tep_event *evsel__tp_format(struct evsel *evsel) +{ + struct tep_event *tp_format = evsel->tp_format; + + if (tp_format) + return tp_format; + + if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) + return NULL; + + if (!evsel->tp_sys) + tp_format = trace_event__tp_format_id(evsel->core.attr.config); + else + tp_format = trace_event__tp_format(evsel->tp_sys, evsel->tp_name); + + if (IS_ERR(tp_format)) { + int err = -PTR_ERR(evsel->tp_format); + + pr_err("Error getting tracepoint format '%s' '%s'(%d)\n", + evsel__name(evsel), strerror(err), err); + return NULL; + } + evsel->tp_format = tp_format; + return evsel->tp_format; +} #endif const char *const evsel__hw_names[PERF_COUNT_HW_MAX] = { @@ -537,7 +704,6 @@ static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size) { int colon = 0, r = 0; struct perf_event_attr *attr = &evsel->core.attr; - bool exclude_guest_default = false; #define MOD_PRINT(context, mod) do { \ if (!attr->exclude_##context) { \ @@ -549,17 +715,15 @@ static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size) MOD_PRINT(kernel, 'k'); MOD_PRINT(user, 'u'); MOD_PRINT(hv, 'h'); - exclude_guest_default = true; } if (attr->precise_ip) { if (!colon) colon = ++r; r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp"); - exclude_guest_default = true; } - if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) { + if (attr->exclude_host || attr->exclude_guest) { MOD_PRINT(host, 'H'); MOD_PRINT(guest, 'G'); } @@ -606,11 +770,6 @@ static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size) return r + evsel__add_modifiers(evsel, bf + r, size - r); } -static int evsel__tool_name(enum perf_tool_event ev, char *bf, size_t size) -{ - return scnprintf(bf, size, "%s", perf_tool_event__to_str(ev)); -} - static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) { int r; @@ -761,10 +920,7 @@ const char *evsel__name(struct evsel *evsel) break; case PERF_TYPE_SOFTWARE: - if (evsel__is_tool(evsel)) - evsel__tool_name(evsel->tool_event, bf, sizeof(bf)); - else - evsel__sw_name(evsel, bf, sizeof(bf)); + evsel__sw_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_TRACEPOINT: @@ -775,6 +931,10 @@ const char *evsel__name(struct evsel *evsel) evsel__bp_name(evsel, bf, sizeof(bf)); break; + case PERF_PMU_TYPE_TOOL: + scnprintf(bf, sizeof(bf), "%s", evsel__tool_pmu_event_name(evsel)); + break; + default: scnprintf(bf, sizeof(bf), "unknown attr type: %d", evsel->core.attr.type); @@ -800,7 +960,7 @@ const char *evsel__metric_id(const struct evsel *evsel) return evsel->metric_id; if (evsel__is_tool(evsel)) - return perf_tool_event__to_str(evsel->tool_event); + return evsel__tool_pmu_event_name(evsel); return "unknown"; } @@ -823,16 +983,22 @@ const char *evsel__group_name(struct evsel *evsel) int evsel__group_desc(struct evsel *evsel, char *buf, size_t size) { int ret = 0; + bool first = true; struct evsel *pos; const char *group_name = evsel__group_name(evsel); if (!evsel->forced_leader) ret = scnprintf(buf, size, "%s { ", group_name); - ret += scnprintf(buf + ret, size - ret, "%s", evsel__name(evsel)); + for_each_group_evsel(pos, evsel) { + if (symbol_conf.skip_empty && + evsel__hists(pos)->stats.nr_samples == 0) + continue; - for_each_group_member(pos, evsel) - ret += scnprintf(buf + ret, size - ret, ", %s", evsel__name(pos)); + ret += scnprintf(buf + ret, size - ret, "%s%s", + first ? "" : ", ", evsel__name(pos)); + first = false; + } if (!evsel->forced_leader) ret += scnprintf(buf + ret, size - ret, " }"); @@ -875,10 +1041,13 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o if (param->record_mode == CALLCHAIN_DWARF) { if (!function) { + const char *arch = perf_env__arch(evsel__env(evsel)); + evsel__set_sample_bit(evsel, REGS_USER); evsel__set_sample_bit(evsel, STACK_USER); - if (opts->sample_user_regs && DWARF_MINIMAL_REGS != PERF_REGS_MASK) { - attr->sample_regs_user |= DWARF_MINIMAL_REGS; + if (opts->sample_user_regs && + DWARF_MINIMAL_REGS(arch) != arch__user_reg_mask()) { + attr->sample_regs_user |= DWARF_MINIMAL_REGS(arch); pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, " "specifying a subset with --user-regs may render DWARF unwinding unreliable, " "so the minimal registers set (IP, SP) is explicitly forced.\n"); @@ -897,6 +1066,9 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o pr_info("Disabling user space callchains for function trace event.\n"); attr->exclude_callchain_user = 1; } + + if (param->defer && !attr->exclude_callchain_user) + attr->defer_callchain = 1; } void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts, @@ -923,6 +1095,71 @@ static void evsel__reset_callgraph(struct evsel *evsel, struct callchain_param * } } +static void evsel__apply_ratio_to_prev(struct evsel *evsel, + struct perf_event_attr *attr, + struct record_opts *opts, + const char *buf) +{ + struct perf_event_attr *prev_attr = NULL; + struct evsel *evsel_prev = NULL; + u64 type = evsel->core.attr.sample_type; + u64 prev_type = 0; + double rtp; + + rtp = strtod(buf, NULL); + if (rtp <= 0) { + pr_err("Invalid ratio-to-prev value %lf\n", rtp); + return; + } + if (evsel == evsel__leader(evsel)) { + pr_err("Invalid use of ratio-to-prev term without preceding element in group\n"); + return; + } + if (!evsel->pmu->is_core) { + pr_err("Event using ratio-to-prev term must have a core PMU\n"); + return; + } + + evsel_prev = evsel__prev(evsel); + if (!evsel_prev) { + pr_err("Previous event does not exist.\n"); + return; + } + + if (evsel_prev->pmu->type != evsel->pmu->type) { + pr_err("Compared events (\"%s\", \"%s\") must have same PMU\n", + evsel->name, evsel_prev->name); + return; + } + + prev_attr = &evsel_prev->core.attr; + prev_type = evsel_prev->core.attr.sample_type; + + if (!(prev_type & PERF_SAMPLE_PERIOD)) { + attr->sample_period = prev_attr->sample_period * rtp; + attr->freq = 0; + evsel__reset_sample_bit(evsel, PERIOD); + } else if (!(type & PERF_SAMPLE_PERIOD)) { + prev_attr->sample_period = attr->sample_period / rtp; + prev_attr->freq = 0; + evsel__reset_sample_bit(evsel_prev, PERIOD); + } else { + if (opts->user_interval != ULLONG_MAX) { + prev_attr->sample_period = opts->user_interval; + attr->sample_period = prev_attr->sample_period * rtp; + prev_attr->freq = 0; + attr->freq = 0; + evsel__reset_sample_bit(evsel_prev, PERIOD); + evsel__reset_sample_bit(evsel, PERIOD); + } else { + pr_err("Event period term or count (-c) must be set when using ratio-to-prev term.\n"); + return; + } + } + + arch_evsel__apply_ratio_to_prev(evsel, attr); +} + static void evsel__apply_config_terms(struct evsel *evsel, struct record_opts *opts, bool track) { @@ -936,6 +1173,7 @@ static void evsel__apply_config_terms(struct evsel *evsel, u32 dump_size = 0; int max_stack = 0; const char *callgraph_buf = NULL; + const char *rtp_buf = NULL; list_for_each_entry(term, config_terms, list) { switch (term->type) { @@ -998,11 +1236,17 @@ static void evsel__apply_config_terms(struct evsel *evsel, case EVSEL__CONFIG_TERM_AUX_OUTPUT: attr->aux_output = term->val.aux_output ? 1 : 0; break; + case EVSEL__CONFIG_TERM_AUX_ACTION: + /* Already applied by auxtrace */ + break; case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE: /* Already applied by auxtrace */ break; case EVSEL__CONFIG_TERM_CFG_CHG: break; + case EVSEL__CONFIG_TERM_RATIO_TO_PREV: + rtp_buf = term->val.str; + break; default: break; } @@ -1054,6 +1298,8 @@ static void evsel__apply_config_terms(struct evsel *evsel, evsel__config_callchain(evsel, opts, ¶m); } } + if (rtp_buf) + evsel__apply_ratio_to_prev(evsel, attr, opts, rtp_buf); } struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type) @@ -1078,6 +1324,11 @@ void __weak arch__post_evsel_config(struct evsel *evsel __maybe_unused, { } +void __weak arch_evsel__apply_ratio_to_prev(struct evsel *evsel __maybe_unused, + struct perf_event_attr *attr __maybe_unused) +{ +} + static void evsel__set_default_freq_period(struct record_opts *opts, struct perf_event_attr *attr) { @@ -1089,9 +1340,10 @@ static void evsel__set_default_freq_period(struct record_opts *opts, } } -static bool evsel__is_offcpu_event(struct evsel *evsel) +bool evsel__is_offcpu_event(struct evsel *evsel) { - return evsel__is_bpf_output(evsel) && evsel__name_is(evsel, OFFCPU_EVENT); + return evsel__is_bpf_output(evsel) && evsel__name_is(evsel, OFFCPU_EVENT) && + evsel->core.attr.sample_type & PERF_SAMPLE_RAW; } /* @@ -1131,7 +1383,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread; attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1; - attr->inherit = !opts->no_inherit; + attr->inherit = target__has_cpu(&opts->target) ? 0 : !opts->no_inherit; attr->write_backward = opts->overwrite ? 1 : 0; attr->read_format = PERF_FORMAT_LOST; @@ -1153,7 +1405,15 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, */ if (leader->core.nr_members > 1) { attr->read_format |= PERF_FORMAT_GROUP; - attr->inherit = 0; + } + + /* + * Inherit + SAMPLE_READ requires SAMPLE_TID in the read_format + */ + if (attr->inherit) { + evsel__set_sample_bit(evsel, TID); + evsel->core.attr.read_format |= + PERF_FORMAT_ID; } } @@ -1231,7 +1491,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, evsel__set_sample_bit(evsel, CPU); } - if (opts->sample_address) + if (opts->sample_data_src) evsel__set_sample_bit(evsel, DATA_SRC); if (opts->sample_phys_addr) @@ -1246,14 +1506,16 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, attr->branch_sample_type = opts->branch_stack; } - if (opts->sample_weight) + if (opts->sample_weight || evsel->retire_lat) { arch_evsel__set_sample_weight(evsel); - + evsel->retire_lat = false; + } attr->task = track; attr->mmap = track; attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; attr->build_id = track && opts->build_id; + attr->defer_output = track && callchain && callchain->defer; /* * ksymbol is tracked separately with text poke because it needs to be @@ -1330,7 +1592,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, attr->exclude_user = 1; } - if (evsel->core.own_cpus || evsel->unit) + if (evsel->core.pmu_cpus || evsel->unit) evsel->core.attr.read_format |= PERF_FORMAT_ID; /* @@ -1360,8 +1622,10 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, if (evsel__is_dummy_event(evsel)) evsel__reset_sample_bit(evsel, BRANCH_STACK); - if (evsel__is_offcpu_event(evsel)) + if (evsel__is_offcpu_event(evsel)) { evsel->core.attr.sample_type &= OFFCPU_SAMPLE_TYPES; + attr->inherit = 0; + } arch__post_evsel_config(evsel, attr); } @@ -1458,10 +1722,21 @@ static void evsel__free_config_terms(struct evsel *evsel) free_config_terms(&evsel->config_terms); } +static void (*evsel__priv_destructor)(void *priv); + +void evsel__set_priv_destructor(void (*destructor)(void *priv)) +{ + assert(evsel__priv_destructor == NULL); + + evsel__priv_destructor = destructor; +} + void evsel__exit(struct evsel *evsel) { assert(list_empty(&evsel->core.node)); assert(evsel->evlist == NULL); + if (evsel__is_retire_lat(evsel)) + evsel__tpebs_close(evsel); bpf_counter__destroy(evsel); perf_bpf_filter__destroy(evsel); evsel__free_counts(evsel); @@ -1469,20 +1744,26 @@ void evsel__exit(struct evsel *evsel) perf_evsel__free_id(&evsel->core); evsel__free_config_terms(evsel); cgroup__put(evsel->cgrp); - perf_cpu_map__put(evsel->core.cpus); - perf_cpu_map__put(evsel->core.own_cpus); - perf_thread_map__put(evsel->core.threads); + perf_evsel__exit(&evsel->core); zfree(&evsel->group_name); zfree(&evsel->name); - zfree(&evsel->pmu_name); +#ifdef HAVE_LIBTRACEEVENT + zfree(&evsel->tp_sys); + zfree(&evsel->tp_name); +#endif + zfree(&evsel->filter); zfree(&evsel->group_pmu_name); zfree(&evsel->unit); zfree(&evsel->metric_id); evsel__zero_per_pkg(evsel); hashmap__free(evsel->per_pkg_mask); evsel->per_pkg_mask = NULL; - zfree(&evsel->metric_events); + if (evsel__priv_destructor) + evsel__priv_destructor(evsel->priv); perf_evsel__object.fini(evsel); + if (evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME || + evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) + xyarray__delete(evsel->start_times); } void evsel__delete(struct evsel *evsel) @@ -1524,6 +1805,12 @@ static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread, count = perf_counts(counter->counts, cpu_map_idx, thread); + if (evsel__is_retire_lat(counter)) { + evsel__tpebs_read(counter, cpu_map_idx, thread); + perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true); + return; + } + count->val = val; count->ena = ena; count->run = run; @@ -1532,6 +1819,60 @@ static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread, perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true); } +static bool evsel__group_has_tpebs(struct evsel *leader) +{ + struct evsel *evsel; + + for_each_group_evsel(evsel, leader) { + if (evsel__is_retire_lat(evsel)) + return true; + } + return false; +} + +static u64 evsel__group_read_nr_members(struct evsel *leader) +{ + u64 nr = leader->core.nr_members; + struct evsel *evsel; + + for_each_group_evsel(evsel, leader) { + if (evsel__is_retire_lat(evsel)) + nr--; + } + return nr; +} + +static u64 evsel__group_read_size(struct evsel *leader) +{ + u64 read_format = leader->core.attr.read_format; + int entry = sizeof(u64); /* value */ + int size = 0; + int nr = 1; + + if (!evsel__group_has_tpebs(leader)) + return perf_evsel__read_size(&leader->core); + + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + size += sizeof(u64); + + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + size += sizeof(u64); + + if (read_format & PERF_FORMAT_ID) + entry += sizeof(u64); + + if (read_format & PERF_FORMAT_LOST) + entry += sizeof(u64); + + if (read_format & PERF_FORMAT_GROUP) { + nr = evsel__group_read_nr_members(leader); + size += sizeof(u64); + } + + size += entry * nr; + return size; +} + static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int thread, u64 *data) { u64 read_format = leader->core.attr.read_format; @@ -1540,7 +1881,7 @@ static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int nr = *data++; - if (nr != (u64) leader->core.nr_members) + if (nr != evsel__group_read_nr_members(leader)) return -EINVAL; if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -1570,7 +1911,7 @@ static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread) { struct perf_stat_evsel *ps = leader->stats; u64 read_format = leader->core.attr.read_format; - int size = perf_evsel__read_size(&leader->core); + int size = evsel__group_read_size(leader); u64 *data = ps->group_data; if (!(read_format & PERF_FORMAT_ID)) @@ -1596,11 +1937,42 @@ static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread) return evsel__process_group_data(leader, cpu_map_idx, thread, data); } +bool __evsel__match(const struct evsel *evsel, u32 type, u64 config) +{ + + u32 e_type = evsel->core.attr.type; + u64 e_config = evsel->core.attr.config; + + if (e_type == type && e_config == config) + return true; + if (type != PERF_TYPE_HARDWARE && type != PERF_TYPE_HW_CACHE) + return false; + if ((e_type == PERF_TYPE_HARDWARE || e_type == PERF_TYPE_HW_CACHE) && + perf_pmus__supports_extended_type()) + e_config &= PERF_HW_EVENT_MASK; + if (e_type == type && e_config == config) + return true; + if (type == PERF_TYPE_HARDWARE && evsel->pmu && evsel->pmu->is_core && + evsel->alternate_hw_config == config) + return true; + return false; +} + int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread) { - u64 read_format = evsel->core.attr.read_format; + if (evsel__is_tool(evsel)) + return evsel__tool_pmu_read(evsel, cpu_map_idx, thread); + + if (evsel__is_hwmon(evsel)) + return evsel__hwmon_pmu_read(evsel, cpu_map_idx, thread); - if (read_format & PERF_FORMAT_GROUP) + if (evsel__is_drm(evsel)) + return evsel__drm_pmu_read(evsel, cpu_map_idx, thread); + + if (evsel__is_retire_lat(evsel)) + return evsel__tpebs_read(evsel, cpu_map_idx, thread); + + if (evsel->core.attr.read_format & PERF_FORMAT_GROUP) return evsel__read_group(evsel, cpu_map_idx, thread); return evsel__read_one(evsel, cpu_map_idx, thread); @@ -1652,7 +2024,7 @@ static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread) struct evsel *leader = evsel__leader(evsel); int fd; - if (evsel__is_group_leader(evsel)) + if (!evsel->supported || evsel__is_group_leader(evsel)) return -1; /* @@ -1666,7 +2038,7 @@ static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread) return -1; fd = FD(leader, cpu_map_idx, thread); - BUG_ON(fd == -1 && !leader->skippable); + BUG_ON(fd == -1 && leader->supported); /* * When the leader has been skipped, return -2 to distinguish from no @@ -1789,6 +2161,7 @@ static struct perf_thread_map *empty_thread_map; static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads) { + int ret = 0; int nthreads = perf_thread_map__nr(threads); if ((perf_missing_features.write_backward && evsel->core.attr.write_backward) || @@ -1797,7 +2170,7 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, if (cpus == NULL) { if (empty_cpu_map == NULL) { - empty_cpu_map = perf_cpu_map__dummy_new(); + empty_cpu_map = perf_cpu_map__new_any_cpu(); if (empty_cpu_map == NULL) return -ENOMEM; } @@ -1819,15 +2192,27 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0) return -ENOMEM; + if (evsel__is_tool(evsel)) + ret = evsel__tool_pmu_prepare_open(evsel, cpus, nthreads); + evsel->open_flags = PERF_FLAG_FD_CLOEXEC; if (evsel->cgrp) evsel->open_flags |= PERF_FLAG_PID_CGROUP; - return 0; + return ret; } static void evsel__disable_missing_features(struct evsel *evsel) { + if (perf_missing_features.defer_callchain && evsel->core.attr.defer_callchain) + evsel->core.attr.defer_callchain = 0; + if (perf_missing_features.defer_callchain && evsel->core.attr.defer_output) + evsel->core.attr.defer_output = 0; + if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit && + (evsel->core.attr.sample_type & PERF_SAMPLE_READ)) + evsel->core.attr.inherit = 0; + if (perf_missing_features.branch_counters) + evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS; if (perf_missing_features.read_lost) evsel->core.attr.read_format &= ~PERF_FORMAT_LOST; if (perf_missing_features.weight_struct) { @@ -1875,140 +2260,404 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, return err; } -bool evsel__detect_missing_features(struct evsel *evsel) +static bool __has_attr_feature(struct perf_event_attr *attr, + struct perf_cpu cpu, unsigned long flags) { + int fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu, + /*group_fd=*/-1, flags); + close(fd); + + if (fd < 0) { + attr->exclude_kernel = 1; + + fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu, + /*group_fd=*/-1, flags); + close(fd); + } + + if (fd < 0) { + attr->exclude_hv = 1; + + fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu, + /*group_fd=*/-1, flags); + close(fd); + } + + if (fd < 0) { + attr->exclude_guest = 1; + + fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu, + /*group_fd=*/-1, flags); + close(fd); + } + + attr->exclude_kernel = 0; + attr->exclude_guest = 0; + attr->exclude_hv = 0; + + return fd >= 0; +} + +static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags) +{ + struct perf_cpu cpu = {.cpu = -1}; + + return __has_attr_feature(attr, cpu, flags); +} + +static void evsel__detect_missing_pmu_features(struct evsel *evsel) +{ + struct perf_event_attr attr = { + .type = evsel->core.attr.type, + .config = evsel->core.attr.config, + .disabled = 1, + }; + struct perf_pmu *pmu = evsel->pmu; + int old_errno; + + old_errno = errno; + + if (pmu == NULL) + pmu = evsel->pmu = evsel__find_pmu(evsel); + + if (pmu == NULL || pmu->missing_features.checked) + goto out; + /* * Must probe features in the order they were added to the - * perf_event_attr interface. + * perf_event_attr interface. These are kernel core limitation but + * specific to PMUs with branch stack. So we can detect with the given + * hardware event and stop on the first one succeeded. */ - if (!perf_missing_features.read_lost && - (evsel->core.attr.read_format & PERF_FORMAT_LOST)) { - perf_missing_features.read_lost = true; - pr_debug2("switching off PERF_FORMAT_LOST support\n"); + + /* Please add new feature detection here. */ + + attr.exclude_guest = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + pmu->missing_features.exclude_guest = true; + pr_debug2("switching off exclude_guest for PMU %s\n", pmu->name); + +found: + pmu->missing_features.checked = true; +out: + errno = old_errno; +} + +static void evsel__detect_missing_brstack_features(struct evsel *evsel) +{ + static bool detection_done = false; + struct perf_event_attr attr = { + .type = evsel->core.attr.type, + .config = evsel->core.attr.config, + .disabled = 1, + .sample_type = PERF_SAMPLE_BRANCH_STACK, + .sample_period = 1000, + }; + int old_errno; + + if (detection_done) + return; + + old_errno = errno; + + /* + * Must probe features in the order they were added to the + * perf_event_attr interface. These are PMU specific limitation + * so we can detect with the given hardware event and stop on the + * first one succeeded. + */ + + /* Please add new feature detection here. */ + + attr.branch_sample_type = PERF_SAMPLE_BRANCH_COUNTERS; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.branch_counters = true; + pr_debug2("switching off branch counters support\n"); + + attr.branch_sample_type = PERF_SAMPLE_BRANCH_HW_INDEX; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.branch_hw_idx = true; + pr_debug2("switching off branch HW index support\n"); + + attr.branch_sample_type = PERF_SAMPLE_BRANCH_NO_CYCLES | PERF_SAMPLE_BRANCH_NO_FLAGS; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.lbr_flags = true; + pr_debug2_peo("switching off branch sample type no (cycles/flags)\n"); + +found: + detection_done = true; + errno = old_errno; +} + +static bool evsel__probe_aux_action(struct evsel *evsel, struct perf_cpu cpu) +{ + struct perf_event_attr attr = evsel->core.attr; + int old_errno = errno; + + attr.disabled = 1; + attr.aux_start_paused = 1; + + if (__has_attr_feature(&attr, cpu, /*flags=*/0)) { + errno = old_errno; return true; - } else if (!perf_missing_features.weight_struct && - (evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) { - perf_missing_features.weight_struct = true; - pr_debug2("switching off weight struct support\n"); + } + + /* + * EOPNOTSUPP means the kernel supports the feature but the PMU does + * not, so keep that distinction if possible. + */ + if (errno != EOPNOTSUPP) + errno = old_errno; + + return false; +} + +static void evsel__detect_missing_aux_action_feature(struct evsel *evsel, struct perf_cpu cpu) +{ + static bool detection_done; + struct evsel *leader; + + /* + * Don't bother probing aux_action if it is not being used or has been + * probed before. + */ + if (!evsel->core.attr.aux_action || detection_done) + return; + + detection_done = true; + + /* + * The leader is an AUX area event. If it has failed, assume the feature + * is not supported. + */ + leader = evsel__leader(evsel); + if (evsel == leader) { + perf_missing_features.aux_action = true; + return; + } + + /* + * AUX area event with aux_action must have been opened successfully + * already, so feature is supported. + */ + if (leader->core.attr.aux_action) + return; + + if (!evsel__probe_aux_action(leader, cpu)) + perf_missing_features.aux_action = true; +} + +static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu cpu) +{ + static bool detection_done = false; + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_TASK_CLOCK, + .disabled = 1, + }; + int old_errno; + + evsel__detect_missing_aux_action_feature(evsel, cpu); + + evsel__detect_missing_pmu_features(evsel); + + if (evsel__has_br_stack(evsel)) + evsel__detect_missing_brstack_features(evsel); + + if (detection_done) + goto check; + + old_errno = errno; + + /* + * Must probe features in the order they were added to the + * perf_event_attr interface. These are kernel core limitation + * not PMU-specific so we can detect with a software event and + * stop on the first one succeeded. + */ + + /* Please add new feature detection here. */ + + attr.defer_callchain = true; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.defer_callchain = true; + pr_debug2("switching off deferred callchain support\n"); + attr.defer_callchain = false; + + attr.inherit = true; + attr.sample_type = PERF_SAMPLE_READ | PERF_SAMPLE_TID; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.inherit_sample_read = true; + pr_debug2("Using PERF_SAMPLE_READ / :S modifier is not compatible with inherit, falling back to no-inherit.\n"); + attr.inherit = false; + attr.sample_type = 0; + + attr.read_format = PERF_FORMAT_LOST; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.read_lost = true; + pr_debug2("switching off PERF_FORMAT_LOST support\n"); + attr.read_format = 0; + + attr.sample_type = PERF_SAMPLE_WEIGHT_STRUCT; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.weight_struct = true; + pr_debug2("switching off weight struct support\n"); + attr.sample_type = 0; + + attr.sample_type = PERF_SAMPLE_CODE_PAGE_SIZE; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.code_page_size = true; + pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support\n"); + attr.sample_type = 0; + + attr.sample_type = PERF_SAMPLE_DATA_PAGE_SIZE; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.data_page_size = true; + pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support\n"); + attr.sample_type = 0; + + attr.cgroup = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.cgroup = true; + pr_debug2_peo("Kernel has no cgroup sampling support\n"); + attr.cgroup = 0; + + attr.aux_output = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.aux_output = true; + pr_debug2_peo("Kernel has no attr.aux_output support\n"); + attr.aux_output = 0; + + attr.bpf_event = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.bpf = true; + pr_debug2_peo("switching off bpf_event\n"); + attr.bpf_event = 0; + + attr.ksymbol = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.ksymbol = true; + pr_debug2_peo("switching off ksymbol\n"); + attr.ksymbol = 0; + + attr.write_backward = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.write_backward = true; + pr_debug2_peo("switching off write_backward\n"); + attr.write_backward = 0; + + attr.use_clockid = 1; + attr.clockid = CLOCK_MONOTONIC; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.clockid = true; + pr_debug2_peo("switching off clockid\n"); + attr.use_clockid = 0; + attr.clockid = 0; + + if (has_attr_feature(&attr, /*flags=*/PERF_FLAG_FD_CLOEXEC)) + goto found; + perf_missing_features.cloexec = true; + pr_debug2_peo("switching off cloexec flag\n"); + + attr.mmap2 = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.mmap2 = true; + pr_debug2_peo("switching off mmap2\n"); + attr.mmap2 = 0; + + /* set this unconditionally? */ + perf_missing_features.sample_id_all = true; + pr_debug2_peo("switching off sample_id_all\n"); + + attr.inherit = 1; + attr.read_format = PERF_FORMAT_GROUP; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.group_read = true; + pr_debug2_peo("switching off group read\n"); + attr.inherit = 0; + attr.read_format = 0; + +found: + detection_done = true; + errno = old_errno; + +check: + if ((evsel->core.attr.defer_callchain || evsel->core.attr.defer_output) && + perf_missing_features.defer_callchain) return true; - } else if (!perf_missing_features.code_page_size && - (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) { - perf_missing_features.code_page_size = true; - pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n"); - return false; - } else if (!perf_missing_features.data_page_size && - (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) { - perf_missing_features.data_page_size = true; - pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n"); - return false; - } else if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) { - perf_missing_features.cgroup = true; - pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n"); - return false; - } else if (!perf_missing_features.branch_hw_idx && - (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) { - perf_missing_features.branch_hw_idx = true; - pr_debug2("switching off branch HW index support\n"); + + if (evsel->core.attr.inherit && + (evsel->core.attr.sample_type & PERF_SAMPLE_READ) && + perf_missing_features.inherit_sample_read) return true; - } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) { - perf_missing_features.aux_output = true; - pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n"); - return false; - } else if (!perf_missing_features.bpf && evsel->core.attr.bpf_event) { - perf_missing_features.bpf = true; - pr_debug2_peo("switching off bpf_event\n"); + + if ((evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) && + perf_missing_features.branch_counters) return true; - } else if (!perf_missing_features.ksymbol && evsel->core.attr.ksymbol) { - perf_missing_features.ksymbol = true; - pr_debug2_peo("switching off ksymbol\n"); + + if ((evsel->core.attr.read_format & PERF_FORMAT_LOST) && + perf_missing_features.read_lost) return true; - } else if (!perf_missing_features.write_backward && evsel->core.attr.write_backward) { - perf_missing_features.write_backward = true; - pr_debug2_peo("switching off write_backward\n"); - return false; - } else if (!perf_missing_features.clockid_wrong && evsel->core.attr.use_clockid) { - perf_missing_features.clockid_wrong = true; - pr_debug2_peo("switching off clockid\n"); + + if ((evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT) && + perf_missing_features.weight_struct) return true; - } else if (!perf_missing_features.clockid && evsel->core.attr.use_clockid) { - perf_missing_features.clockid = true; - pr_debug2_peo("switching off use_clockid\n"); + + if (evsel->core.attr.use_clockid && evsel->core.attr.clockid != CLOCK_MONOTONIC && + !perf_missing_features.clockid) { + perf_missing_features.clockid_wrong = true; return true; - } else if (!perf_missing_features.cloexec && (evsel->open_flags & PERF_FLAG_FD_CLOEXEC)) { - perf_missing_features.cloexec = true; - pr_debug2_peo("switching off cloexec flag\n"); + } + + if (evsel->core.attr.use_clockid && perf_missing_features.clockid) return true; - } else if (!perf_missing_features.mmap2 && evsel->core.attr.mmap2) { - perf_missing_features.mmap2 = true; - pr_debug2_peo("switching off mmap2\n"); + + if ((evsel->open_flags & PERF_FLAG_FD_CLOEXEC) && + perf_missing_features.cloexec) return true; - } else if (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) { - if (evsel->pmu == NULL) - evsel->pmu = evsel__find_pmu(evsel); - - if (evsel->pmu) - evsel->pmu->missing_features.exclude_guest = true; - else { - /* we cannot find PMU, disable attrs now */ - evsel->core.attr.exclude_host = false; - evsel->core.attr.exclude_guest = false; - } - if (evsel->exclude_GH) { - pr_debug2_peo("PMU has no exclude_host/guest support, bailing out\n"); - return false; - } - if (!perf_missing_features.exclude_guest) { - perf_missing_features.exclude_guest = true; - pr_debug2_peo("switching off exclude_guest, exclude_host\n"); - } + if (evsel->core.attr.mmap2 && perf_missing_features.mmap2) return true; - } else if (!perf_missing_features.sample_id_all) { - perf_missing_features.sample_id_all = true; - pr_debug2_peo("switching off sample_id_all\n"); + + if ((evsel->core.attr.branch_sample_type & (PERF_SAMPLE_BRANCH_NO_FLAGS | + PERF_SAMPLE_BRANCH_NO_CYCLES)) && + perf_missing_features.lbr_flags) return true; - } else if (!perf_missing_features.lbr_flags && - (evsel->core.attr.branch_sample_type & - (PERF_SAMPLE_BRANCH_NO_CYCLES | - PERF_SAMPLE_BRANCH_NO_FLAGS))) { - perf_missing_features.lbr_flags = true; - pr_debug2_peo("switching off branch sample type no (cycles/flags)\n"); + + if (evsel->core.attr.inherit && (evsel->core.attr.read_format & PERF_FORMAT_GROUP) && + perf_missing_features.group_read) return true; - } else if (!perf_missing_features.group_read && - evsel->core.attr.inherit && - (evsel->core.attr.read_format & PERF_FORMAT_GROUP) && - evsel__is_group_leader(evsel)) { - perf_missing_features.group_read = true; - pr_debug2_peo("switching off group read\n"); + + if (evsel->core.attr.ksymbol && perf_missing_features.ksymbol) return true; - } else { - return false; - } -} -bool evsel__increase_rlimit(enum rlimit_action *set_rlimit) -{ - int old_errno; - struct rlimit l; + if (evsel->core.attr.bpf_event && perf_missing_features.bpf) + return true; - if (*set_rlimit < INCREASED_MAX) { - old_errno = errno; + if ((evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) && + perf_missing_features.branch_hw_idx) + return true; - if (getrlimit(RLIMIT_NOFILE, &l) == 0) { - if (*set_rlimit == NO_CHANGE) { - l.rlim_cur = l.rlim_max; - } else { - l.rlim_cur = l.rlim_max + 1000; - l.rlim_max = l.rlim_cur; - } - if (setrlimit(RLIMIT_NOFILE, &l) == 0) { - (*set_rlimit) += 1; - errno = old_errno; - return true; - } - } - errno = old_errno; - } + if (evsel->core.attr.sample_id_all && perf_missing_features.sample_id_all) + return true; return false; } @@ -2020,10 +2669,16 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int idx, thread, nthreads; int pid = -1, err, old_errno; enum rlimit_action set_rlimit = NO_CHANGE; + struct perf_cpu cpu; + + if (evsel__is_retire_lat(evsel)) { + err = evsel__tpebs_open(evsel); + goto out; + } err = __evsel__prepare_open(evsel, cpus, threads); if (err) - return err; + goto out; if (cpus == NULL) cpus = empty_cpu_map; @@ -2042,7 +2697,27 @@ fallback_missing_features: pr_debug3("Opening: %s\n", evsel__name(evsel)); display_attr(&evsel->core.attr); + if (evsel__is_tool(evsel)) { + err = evsel__tool_pmu_open(evsel, threads, + start_cpu_map_idx, + end_cpu_map_idx); + goto out; + } + if (evsel__is_hwmon(evsel)) { + err = evsel__hwmon_pmu_open(evsel, threads, + start_cpu_map_idx, + end_cpu_map_idx); + goto out; + } + if (evsel__is_drm(evsel)) { + err = evsel__drm_pmu_open(evsel, threads, + start_cpu_map_idx, + end_cpu_map_idx); + goto out; + } + for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { + cpu = perf_cpu_map__cpu(cpus, idx); for (thread = 0; thread < nthreads; thread++) { int fd, group_fd; @@ -2061,14 +2736,11 @@ retry_open: goto out_close; } - test_attr__ready(); - /* Debug message used by test scripts */ pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", - pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags); + pid, cpu.cpu, group_fd, evsel->open_flags); - fd = sys_perf_event_open(&evsel->core.attr, pid, - perf_cpu_map__cpu(cpus, idx).cpu, + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, group_fd, evsel->open_flags); FD(evsel, idx, thread) = fd; @@ -2083,9 +2755,8 @@ retry_open: bpf_counter__install_pe(evsel, idx, fd); - if (unlikely(test_attr__enabled)) { - test_attr__open(&evsel->core.attr, pid, - perf_cpu_map__cpu(cpus, idx), + if (unlikely(test_attr__enabled())) { + test_attr__open(&evsel->core.attr, pid, cpu, fd, group_fd, evsel->open_flags); } @@ -2121,12 +2792,10 @@ retry_open: } } - return 0; + err = 0; + goto out; try_fallback: - if (evsel__precise_ip_fallback(evsel)) - goto retry_open; - if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus), idx, threads, thread, err)) { /* We just removed 1 thread, so lower the upper nthreads limit. */ @@ -2140,14 +2809,15 @@ try_fallback: * perf stat needs between 5 and 22 fds per CPU. When we run out * of them try to increase the limits. */ - if (err == -EMFILE && evsel__increase_rlimit(&set_rlimit)) + if (err == -EMFILE && rlimit__increase_nofile(&set_rlimit)) goto retry_open; - if (err != -EINVAL || idx > 0 || thread > 0) - goto out_close; - - if (evsel__detect_missing_features(evsel)) + if (err == -EINVAL && evsel__detect_missing_features(evsel, cpu)) goto fallback_missing_features; + + if (evsel__precise_ip_fallback(evsel)) + goto retry_open; + out_close: if (err) threads->err_thread = thread; @@ -2162,6 +2832,9 @@ out_close: thread = nthreads; } while (--idx >= 0); errno = old_errno; +out: + if (err) + evsel->supported = false; return err; } @@ -2173,21 +2846,38 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, void evsel__close(struct evsel *evsel) { + if (evsel__is_retire_lat(evsel)) + evsel__tpebs_close(evsel); perf_evsel__close(&evsel->core); perf_evsel__free_id(&evsel->core); } -int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx) +int evsel__open_per_cpu_and_thread(struct evsel *evsel, + struct perf_cpu_map *cpus, int cpu_map_idx, + struct perf_thread_map *threads) { if (cpu_map_idx == -1) - return evsel__open_cpu(evsel, cpus, NULL, 0, perf_cpu_map__nr(cpus)); + return evsel__open_cpu(evsel, cpus, threads, 0, perf_cpu_map__nr(cpus)); - return evsel__open_cpu(evsel, cpus, NULL, cpu_map_idx, cpu_map_idx + 1); + return evsel__open_cpu(evsel, cpus, threads, cpu_map_idx, cpu_map_idx + 1); +} + +int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx) +{ + struct perf_thread_map *threads = thread_map__new_by_tid(-1); + int ret = evsel__open_per_cpu_and_thread(evsel, cpus, cpu_map_idx, threads); + + perf_thread_map__put(threads); + return ret; } int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads) { - return evsel__open(evsel, NULL, threads); + struct perf_cpu_map *cpus = perf_cpu_map__new_any_cpu(); + int ret = evsel__open_per_cpu_and_thread(evsel, cpus, -1, threads); + + perf_cpu_map__put(cpus); + return ret; } static int perf_evsel__parse_id_sample(const struct evsel *evsel, @@ -2280,11 +2970,18 @@ perf_event__check_size(union perf_event *event, unsigned int sample_size) return 0; } -void __weak arch_perf_parse_sample_weight(struct perf_sample *data, - const __u64 *array, - u64 type __maybe_unused) +static void perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type) { - data->weight = *array; + union perf_sample_weight weight; + + weight.full = *array; + if (type & PERF_SAMPLE_WEIGHT_STRUCT) { + data->weight = weight.var1_dw; + data->ins_lat = weight.var2_w; + data->weight3 = weight.var3_w; + } else { + data->weight = weight.full; + } } u64 evsel__bitfield_swap_branch_flags(u64 value) @@ -2341,6 +3038,52 @@ u64 evsel__bitfield_swap_branch_flags(u64 value) return new_val; } +static inline bool evsel__has_branch_counters(const struct evsel *evsel) +{ + struct evsel *leader = evsel__leader(evsel); + + /* The branch counters feature only supports group */ + if (!leader || !evsel->evlist) + return false; + + if (evsel->evlist->nr_br_cntr < 0) + evlist__update_br_cntr(evsel->evlist); + + if (leader->br_cntr_nr > 0) + return true; + + return false; +} + +static int __set_offcpu_sample(struct perf_sample *data) +{ + u64 *array = data->raw_data; + u32 max_size = data->raw_size, *p32; + const void *endp = (void *)array + max_size; + + if (array == NULL) + return -EFAULT; + + OVERFLOW_CHECK_u64(array); + p32 = (void *)array++; + data->pid = p32[0]; + data->tid = p32[1]; + + OVERFLOW_CHECK_u64(array); + data->period = *array++; + + OVERFLOW_CHECK_u64(array); + data->callchain = (struct ip_callchain *)array++; + OVERFLOW_CHECK(array, data->callchain->nr * sizeof(u64), max_size); + data->ip = data->callchain->ips[1]; + array += data->callchain->nr; + + OVERFLOW_CHECK_u64(array); + data->cgroup = *array; + + return 0; +} + int evsel__parse_sample(struct evsel *evsel, union perf_event *event, struct perf_sample *data) { @@ -2363,10 +3106,23 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, data->period = evsel->core.attr.sample_period; data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; data->misc = event->header.misc; - data->id = -1ULL; data->data_src = PERF_MEM_DATA_SRC_NONE; data->vcpu = -1; + if (event->header.type == PERF_RECORD_CALLCHAIN_DEFERRED) { + const u64 max_callchain_nr = UINT64_MAX / sizeof(u64); + + data->callchain = (struct ip_callchain *)&event->callchain_deferred.nr; + if (data->callchain->nr > max_callchain_nr) + return -EFAULT; + + data->deferred_cookie = event->callchain_deferred.cookie; + + if (evsel->core.attr.sample_id_all) + perf_evsel__parse_id_sample(evsel, event, data); + return 0; + } + if (event->header.type != PERF_RECORD_SAMPLE) { if (!evsel->core.attr.sample_id_all) return 0; @@ -2491,12 +3247,25 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, if (type & PERF_SAMPLE_CALLCHAIN) { const u64 max_callchain_nr = UINT64_MAX / sizeof(u64); + u64 callchain_nr; OVERFLOW_CHECK_u64(array); data->callchain = (struct ip_callchain *)array++; - if (data->callchain->nr > max_callchain_nr) + callchain_nr = data->callchain->nr; + if (callchain_nr > max_callchain_nr) return -EFAULT; - sz = data->callchain->nr * sizeof(u64); + sz = callchain_nr * sizeof(u64); + /* + * Save the cookie for the deferred user callchain. The last 2 + * entries in the callchain should be the context marker and the + * cookie. The cookie will be used to match PERF_RECORD_ + * CALLCHAIN_DEFERRED later. + */ + if (evsel->core.attr.defer_callchain && callchain_nr >= 2 && + data->callchain->ips[callchain_nr - 2] == PERF_CONTEXT_USER_DEFERRED) { + data->deferred_cookie = data->callchain->ips[callchain_nr - 1]; + data->deferred_callchain = true; + } OVERFLOW_CHECK(array, sz, max_size); array = (void *)array + sz; } @@ -2574,20 +3343,30 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, OVERFLOW_CHECK(array, sz, max_size); array = (void *)array + sz; + + if (evsel__has_branch_counters(evsel)) { + data->branch_stack_cntr = (u64 *)array; + sz = data->branch_stack->nr * sizeof(u64); + + OVERFLOW_CHECK(array, sz, max_size); + array = (void *)array + sz; + } } if (type & PERF_SAMPLE_REGS_USER) { + struct regs_dump *regs = perf_sample__user_regs(data); + OVERFLOW_CHECK_u64(array); - data->user_regs.abi = *array; + regs->abi = *array; array++; - if (data->user_regs.abi) { + if (regs->abi) { u64 mask = evsel->core.attr.sample_regs_user; sz = hweight64(mask) * sizeof(u64); OVERFLOW_CHECK(array, sz, max_size); - data->user_regs.mask = mask; - data->user_regs.regs = (u64 *)array; + regs->mask = mask; + regs->regs = (u64 *)array; array = (void *)array + sz; } } @@ -2615,7 +3394,7 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, if (type & PERF_SAMPLE_WEIGHT_TYPE) { OVERFLOW_CHECK_u64(array); - arch_perf_parse_sample_weight(data, array, type); + perf_parse_sample_weight(data, array, type); array++; } @@ -2631,19 +3410,20 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, array++; } - data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE; if (type & PERF_SAMPLE_REGS_INTR) { + struct regs_dump *regs = perf_sample__intr_regs(data); + OVERFLOW_CHECK_u64(array); - data->intr_regs.abi = *array; + regs->abi = *array; array++; - if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) { + if (regs->abi != PERF_SAMPLE_REGS_ABI_NONE) { u64 mask = evsel->core.attr.sample_regs_intr; sz = hweight64(mask) * sizeof(u64); OVERFLOW_CHECK(array, sz, max_size); - data->intr_regs.mask = mask; - data->intr_regs.regs = (u64 *)array; + regs->mask = mask; + regs->regs = (u64 *)array; array = (void *)array + sz; } } @@ -2685,6 +3465,9 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, array = (void *)array + sz; } + if (evsel__is_offcpu_event(evsel)) + return __set_offcpu_sample(data); + return 0; } @@ -2731,7 +3514,7 @@ int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event, return 0; } -u16 evsel__id_hdr_size(struct evsel *evsel) +u16 evsel__id_hdr_size(const struct evsel *evsel) { u64 sample_type = evsel->core.attr.sample_type; u16 size = 0; @@ -2760,7 +3543,16 @@ u16 evsel__id_hdr_size(struct evsel *evsel) #ifdef HAVE_LIBTRACEEVENT struct tep_format_field *evsel__field(struct evsel *evsel, const char *name) { - return tep_find_field(evsel->tp_format, name); + struct tep_event *tp_format = evsel__tp_format(evsel); + + return tp_format ? tep_find_field(tp_format, name) : NULL; +} + +struct tep_format_field *evsel__common_field(struct evsel *evsel, const char *name) +{ + struct tep_event *tp_format = evsel__tp_format(evsel); + + return tp_format ? tep_find_common_field(tp_format, name) : NULL; } void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name) @@ -2826,14 +3618,53 @@ u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *n { struct tep_format_field *field = evsel__field(evsel, name); - if (!field) - return 0; + return field ? format_field__intval(field, sample, evsel->needs_swap) : 0; +} + +u64 evsel__intval_common(struct evsel *evsel, struct perf_sample *sample, const char *name) +{ + struct tep_format_field *field = evsel__common_field(evsel, name); return field ? format_field__intval(field, sample, evsel->needs_swap) : 0; } + +char evsel__taskstate(struct evsel *evsel, struct perf_sample *sample, const char *name) +{ + static struct tep_format_field *prev_state_field; + static const char *states; + struct tep_format_field *field; + unsigned long long val; + unsigned int bit; + char state = '?'; /* '?' denotes unknown task state */ + + field = evsel__field(evsel, name); + + if (!field) + return state; + + if (!states || field != prev_state_field) { + states = parse_task_states(field); + if (!states) + return state; + prev_state_field = field; + } + + /* + * Note since the kernel exposes TASK_REPORT_MAX to userspace + * to denote the 'preempted' state, we might as welll report + * 'R' for this case, which make senses to users as well. + * + * We can change this if we have a good reason in the future. + */ + val = evsel__intval(evsel, sample, name); + bit = val ? ffs(val) : 0; + state = (!bit || bit > strlen(states)) ? 'R' : states[bit-1]; + return state; +} #endif -bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize) +bool evsel__fallback(struct evsel *evsel, struct target *target, int err, + char *msg, size_t msgsize) { int paranoid; @@ -2841,18 +3672,19 @@ bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize) evsel->core.attr.type == PERF_TYPE_HARDWARE && evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES) { /* - * If it's cycles then fall back to hrtimer based - * cpu-clock-tick sw counter, which is always available even if - * no PMU support. + * If it's cycles then fall back to hrtimer based cpu-clock sw + * counter, which is always available even if no PMU support. * * PPC returns ENXIO until 2.6.37 (behavior changed with commit * b0a873e). */ - scnprintf(msg, msgsize, "%s", -"The cycles event is not supported, trying to fall back to cpu-clock-ticks"); - evsel->core.attr.type = PERF_TYPE_SOFTWARE; - evsel->core.attr.config = PERF_COUNT_SW_CPU_CLOCK; + evsel->core.attr.config = target__has_cpu(target) + ? PERF_COUNT_SW_CPU_CLOCK + : PERF_COUNT_SW_TASK_CLOCK; + scnprintf(msg, msgsize, + "The cycles event is not supported, trying to fall back to %s", + target__has_cpu(target) ? "cpu-clock" : "task-clock"); zfree(&evsel->name); return true; @@ -2864,7 +3696,7 @@ bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize) /* If event has exclude user then don't exclude kernel. */ if (evsel->core.attr.exclude_user) - return false; + goto no_fallback; /* Is there already the separator in the name. */ if (strchr(name, '/') || @@ -2872,7 +3704,7 @@ bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize) sep = ""; if (asprintf(&new_name, "%s%su", name, sep) < 0) - return false; + goto no_fallback; free(evsel->name); evsel->name = new_name; @@ -2883,8 +3715,31 @@ bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize) evsel->core.attr.exclude_hv = 1; return true; - } + } else if (err == EOPNOTSUPP && !evsel->core.attr.exclude_guest && + !evsel->exclude_GH) { + const char *name = evsel__name(evsel); + char *new_name; + const char *sep = ":"; + + /* Is there already the separator in the name. */ + if (strchr(name, '/') || + (strchr(name, ':') && !evsel->is_libpfm_event)) + sep = ""; + + if (asprintf(&new_name, "%s%sH", name, sep) < 0) + goto no_fallback; + + free(evsel->name); + evsel->name = new_name; + /* Apple M1 requires exclude_guest */ + scnprintf(msg, msgsize, "Trying to fall back to excluding guest samples"); + evsel->core.attr.exclude_guest = 1; + return true; + } +no_fallback: + scnprintf(msg, msgsize, "No fallback found for '%s' for error %d", + evsel__name(evsel), err); return false; } @@ -2924,7 +3779,80 @@ static bool find_process(const char *name) return ret ? false : true; } +static int dump_perf_event_processes(char *msg, size_t size) +{ + DIR *proc_dir; + struct dirent *proc_entry; + int printed = 0; + + proc_dir = opendir(procfs__mountpoint()); + if (!proc_dir) + return 0; + + /* Walk through the /proc directory. */ + while ((proc_entry = readdir(proc_dir)) != NULL) { + char buf[256]; + DIR *fd_dir; + struct dirent *fd_entry; + int fd_dir_fd; + + if (proc_entry->d_type != DT_DIR || + !isdigit(proc_entry->d_name[0]) || + strlen(proc_entry->d_name) > sizeof(buf) - 4) + continue; + + scnprintf(buf, sizeof(buf), "%s/fd", proc_entry->d_name); + fd_dir_fd = openat(dirfd(proc_dir), buf, O_DIRECTORY); + if (fd_dir_fd == -1) + continue; + fd_dir = fdopendir(fd_dir_fd); + if (!fd_dir) { + close(fd_dir_fd); + continue; + } + while ((fd_entry = readdir(fd_dir)) != NULL) { + ssize_t link_size; + + if (fd_entry->d_type != DT_LNK) + continue; + link_size = readlinkat(fd_dir_fd, fd_entry->d_name, buf, sizeof(buf)); + if (link_size < 0) + continue; + /* Take care as readlink doesn't null terminate the string. */ + if (!strncmp(buf, "anon_inode:[perf_event]", link_size)) { + int cmdline_fd; + ssize_t cmdline_size; + + scnprintf(buf, sizeof(buf), "%s/cmdline", proc_entry->d_name); + cmdline_fd = openat(dirfd(proc_dir), buf, O_RDONLY); + if (cmdline_fd == -1) + continue; + cmdline_size = read(cmdline_fd, buf, sizeof(buf) - 1); + close(cmdline_fd); + if (cmdline_size < 0) + continue; + buf[cmdline_size] = '\0'; + for (ssize_t i = 0; i < cmdline_size; i++) { + if (buf[i] == '\0') + buf[i] = ' '; + } + + if (printed == 0) + printed += scnprintf(msg, size, "Possible processes:\n"); + + printed += scnprintf(msg + printed, size - printed, + "%s %s\n", proc_entry->d_name, buf); + break; + } + } + closedir(fd_dir); + } + closedir(proc_dir); + return printed; +} + int __weak arch_evsel__open_strerror(struct evsel *evsel __maybe_unused, + int err __maybe_unused, char *msg __maybe_unused, size_t size __maybe_unused) { @@ -2934,6 +3862,7 @@ int __weak arch_evsel__open_strerror(struct evsel *evsel __maybe_unused, int evsel__open_strerror(struct evsel *evsel, struct target *target, int err, char *msg, size_t size) { + struct perf_pmu *pmu; char sbuf[STRERR_BUFSIZE]; int printed = 0, enforced = 0; int ret; @@ -2957,7 +3886,7 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, printed += scnprintf(msg, size, "No permission to enable %s event.\n\n", evsel__name(evsel)); - return scnprintf(msg + printed, size - printed, + return printed + scnprintf(msg + printed, size - printed, "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n" "access to performance monitoring and observability operations for processes\n" "without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n" @@ -3002,6 +3931,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg, size, "%s: PMU Hardware doesn't support 'aux_output' feature", evsel__name(evsel)); + if (evsel->core.attr.aux_action) + return scnprintf(msg, size, + "%s: PMU Hardware doesn't support 'aux_action' feature", + evsel__name(evsel)); if (evsel->core.attr.sample_period != 0) return scnprintf(msg, size, "%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'", @@ -3014,12 +3947,21 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg, size, "%s", "No hardware sampling interrupt available.\n"); #endif + if (!target__has_cpu(target)) + return scnprintf(msg, size, + "Unsupported event (%s) in per-thread mode, enable system wide with '-a'.", + evsel__name(evsel)); break; case EBUSY: if (find_process("oprofiled")) return scnprintf(msg, size, "The PMU counters are busy/taken by another profiler.\n" "We found oprofile daemon running, please stop it and try again."); + printed += scnprintf( + msg, size, + "The PMU %s counters are busy and in use by another process.\n", + evsel->pmu ? evsel->pmu->name : ""); + return printed + dump_perf_event_processes(msg + printed, size - printed); break; case EINVAL: if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size) @@ -3032,9 +3974,12 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg, size, "clockid feature not supported."); if (perf_missing_features.clockid_wrong) return scnprintf(msg, size, "wrong clockid (%d).", clockid); + if (perf_missing_features.aux_action) + return scnprintf(msg, size, "The 'aux_action' feature is not supported, update the kernel."); if (perf_missing_features.aux_output) return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel."); - if (!target__has_cpu(target)) + pmu = evsel__find_pmu(evsel); + if (!pmu->is_core && !target__has_cpu(target)) return scnprintf(msg, size, "Invalid event (%s) in per-thread mode, enable system wide with '-a'.", evsel__name(evsel)); @@ -3047,27 +3992,38 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, break; } - ret = arch_evsel__open_strerror(evsel, msg, size); + ret = arch_evsel__open_strerror(evsel, err, msg, size); if (ret) return ret; return scnprintf(msg, size, "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n" - "/bin/dmesg | grep -i perf may provide additional information.\n", + "\"dmesg | grep -i perf\" may provide additional information.\n", err, str_error_r(err, sbuf, sizeof(sbuf)), evsel__name(evsel)); } +struct perf_session *evsel__session(struct evsel *evsel) +{ + return evsel && evsel->evlist ? evsel->evlist->session : NULL; +} + struct perf_env *evsel__env(struct evsel *evsel) { - if (evsel && evsel->evlist && evsel->evlist->env) - return evsel->evlist->env; - return &perf_env; + struct perf_session *session = evsel__session(evsel); + + return session ? perf_session__env(session) : NULL; } static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist) { int cpu_map_idx, thread; + if (evsel__is_retire_lat(evsel)) + return 0; + + if (perf_pmu__kind(evsel->pmu) != PERF_PMU_KIND_PE) + return 0; + for (cpu_map_idx = 0; cpu_map_idx < xyarray__max_x(evsel->core.fd); cpu_map_idx++) { for (thread = 0; thread < xyarray__max_y(evsel->core.fd); thread++) { @@ -3113,14 +4069,16 @@ void evsel__zero_per_pkg(struct evsel *evsel) */ bool evsel__is_hybrid(const struct evsel *evsel) { - if (perf_pmus__num_core_pmus() == 1) + if (!evsel->core.is_pmu_core) return false; - return evsel->core.is_pmu_core; + return perf_pmus__num_core_pmus() > 1; } struct evsel *evsel__leader(const struct evsel *evsel) { + if (evsel->core.leader == NULL) + return NULL; return container_of(evsel->core.leader, struct evsel, core); } @@ -3169,3 +4127,152 @@ void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader) leader->core.nr_members--; } } + +bool evsel__set_needs_uniquify(struct evsel *counter, const struct perf_stat_config *config) +{ + struct evsel *evsel; + + if (counter->needs_uniquify) { + /* Already set. */ + return true; + } + + if (counter->use_config_name || counter->is_libpfm_event) { + /* Original name will be used. */ + return false; + } + + if (!config->hybrid_merge && evsel__is_hybrid(counter)) { + /* Unique hybrid counters necessary. */ + counter->needs_uniquify = true; + return true; + } + + if (counter->core.attr.type < PERF_TYPE_MAX && counter->core.attr.type != PERF_TYPE_RAW) { + /* Legacy event, don't uniquify. */ + return false; + } + + if (counter->pmu && counter->pmu->is_core && + counter->alternate_hw_config != PERF_COUNT_HW_MAX) { + /* A sysfs or json event replacing a legacy event, don't uniquify. */ + return false; + } + + if (config->aggr_mode == AGGR_NONE) { + /* Always unique with no aggregation. */ + counter->needs_uniquify = true; + return true; + } + + if (counter->first_wildcard_match != NULL) { + /* + * If stats are merged then only the first_wildcard_match is + * displayed, there is no need to uniquify this evsel as the + * name won't be shown. + */ + return false; + } + + /* + * Do other non-merged events in the evlist have the same name? If so + * uniquify is necessary. + */ + evlist__for_each_entry(counter->evlist, evsel) { + if (evsel == counter || evsel->first_wildcard_match || evsel->pmu == counter->pmu) + continue; + + if (evsel__name_is(counter, evsel__name(evsel))) { + counter->needs_uniquify = true; + return true; + } + } + return false; +} + +void evsel__uniquify_counter(struct evsel *counter) +{ + const char *name, *pmu_name, *config; + char *new_name; + int len, ret; + + /* No uniquification necessary. */ + if (!counter->needs_uniquify) + return; + + /* The evsel was already uniquified. */ + if (counter->uniquified_name) + return; + + /* Avoid checking to uniquify twice. */ + counter->uniquified_name = true; + + name = evsel__name(counter); + config = strchr(name, '/'); + pmu_name = counter->pmu->name; + + /* Already prefixed by the PMU name? */ + len = pmu_name_len_no_suffix(pmu_name); + + if (!strncmp(name, pmu_name, len)) { + /* + * If the PMU name is there, then there is no sense in not + * having a slash. Do this for robustness. + */ + if (config == NULL) + config = name - 1; + + ret = asprintf(&new_name, "%s/%s", pmu_name, config + 1); + } else if (config) { + len = config - name; + if (config[1] == '/') { + /* case: event// */ + ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 2); + } else { + /* case: event/.../ */ + ret = asprintf(&new_name, "%s/%.*s,%s", pmu_name, len, name, config + 1); + } + } else { + config = strchr(name, ':'); + if (config) { + /* case: event:.. */ + len = config - name; + + ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 1); + } else { + /* case: event */ + ret = asprintf(&new_name, "%s/%s/", pmu_name, name); + } + } + if (ret > 0) { + free(counter->name); + counter->name = new_name; + } else { + /* ENOMEM from asprintf. */ + counter->uniquified_name = false; + } +} + +void evsel__warn_user_requested_cpus(struct evsel *evsel, struct perf_cpu_map *user_requested_cpus) +{ + struct perf_cpu_map *intersect, *online = NULL; + const struct perf_pmu *pmu = evsel__find_pmu(evsel); + + if (pmu && pmu->is_core) { + intersect = perf_cpu_map__intersect(pmu->cpus, user_requested_cpus); + } else { + online = cpu_map__online(); + intersect = perf_cpu_map__intersect(online, user_requested_cpus); + } + if (!perf_cpu_map__equal(intersect, user_requested_cpus)) { + char buf1[128]; + char buf2[128]; + + cpu_map__snprint(user_requested_cpus, buf1, sizeof(buf1)); + cpu_map__snprint(online ?: pmu->cpus, buf2, sizeof(buf2)); + pr_warning("WARNING: A requested CPU in '%s' is not supported by PMU '%s' (CPUs %s) for event '%s'\n", + buf1, pmu ? pmu->name : "cpu", buf2, evsel__name(evsel)); + } + perf_cpu_map__put(intersect); + perf_cpu_map__put(online); +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 848534ec74fa..a08130ff2e47 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -11,10 +11,12 @@ #include <perf/evsel.h> #include "symbol_conf.h" #include "pmus.h" +#include "pmu.h" struct bpf_object; struct cgroup; struct perf_counts; +struct perf_stat_config; struct perf_stat_evsel; union perf_event; struct bpf_counter_ops; @@ -22,25 +24,9 @@ struct target; struct hashmap; struct bperf_leader_bpf; struct bperf_follower_bpf; -struct perf_pmu; typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); -enum perf_tool_event { - PERF_TOOL_NONE = 0, - PERF_TOOL_DURATION_TIME = 1, - PERF_TOOL_USER_TIME = 2, - PERF_TOOL_SYSTEM_TIME = 3, - - PERF_TOOL_MAX, -}; - -const char *perf_tool_event__to_str(enum perf_tool_event ev); -enum perf_tool_event perf_tool_event__from_str(const char *str); - -#define perf_tool_event__for_each_event(ev) \ - for ((ev) = PERF_TOOL_DURATION_TIME; (ev) < PERF_TOOL_MAX; ev++) - /** struct evsel - event selector * * @evlist - evlist this evsel is in, if it is in one. @@ -72,9 +58,10 @@ struct evsel { struct { char *name; char *group_name; - const char *pmu_name; const char *group_pmu_name; #ifdef HAVE_LIBTRACEEVENT + char *tp_sys; + char *tp_name; struct tep_event *tp_format; #endif char *filter; @@ -83,7 +70,11 @@ struct evsel { const char *unit; struct cgroup *cgrp; const char *metric_id; - enum perf_tool_event tool_event; + /* + * This point to the first evsel with the same name, intended to store the + * aggregated counts in aggregation mode. + */ + struct evsel *first_wildcard_match; /* parse modifier helper */ int exclude_GH; int sample_read; @@ -92,22 +83,23 @@ struct evsel { bool percore; bool precise_max; bool is_libpfm_event; - bool auto_merge_stats; bool collect_stat; bool weak_group; bool bpf_counter; bool use_config_name; bool skippable; + bool retire_lat; + bool dont_regroup; int bpf_fd; struct bpf_object *bpf_obj; struct list_head config_terms; + u64 alternate_hw_config; }; /* * metric fields are similar, but needs more care as they can have * references to other metric (evsel). */ - struct evsel **metric_events; struct evsel *metric_leader; void *handler; @@ -127,13 +119,14 @@ struct evsel { bool ignore_missing_thread; bool forced_leader; bool cmdline_group_boundary; - bool merged_stat; bool reset_group; - bool errored; bool needs_auxtrace_mmap; bool default_metricgroup; /* A member of the Default metricgroup */ + bool default_show_events; /* If a default group member, show the event */ + bool needs_uniquify; struct hashmap *per_pkg_mask; int err; + int script_output_type; struct { evsel__sb_cb_t *cb; void *data; @@ -148,6 +141,20 @@ struct evsel { __u64 synth_sample_type; /* + * Store the branch counter related information. + * br_cntr_idx: The idx of the branch counter event in the evlist + * br_cntr_nr: The number of the branch counter event in the group + * (Only available for the leader event) + * abbr_name: The abbreviation name assigned to an event which is + * logged by the branch counter. + * The abbr name is from A to Z9. NA is applied if out + * of the range. + */ + int br_cntr_idx; + int br_cntr_nr; + char abbr_name[3]; + + /* * bpf_counter_ops serves two use cases: * 1. perf-stat -b counting events used byBPF programs * 2. perf-stat --use-bpf use BPF programs to aggregate counts @@ -168,8 +175,28 @@ struct evsel { unsigned long open_flags; int precise_ip_original; - /* for missing_features */ + /* The PMU the event is from. Used for missing_features, PMU name, etc. */ struct perf_pmu *pmu; + + /* For tool events */ + /* Beginning time subtracted when the counter is read. */ + union { + /* Defaults for retirement latency events. */ + struct _retirement_latency { + double mean; + double min; + double max; + } retirement_latency; + /* duration_time is a single global time. */ + __u64 start_time; + /* + * user_time and system_time read an initial value potentially + * per-CPU or per-pid. + */ + struct xyarray *start_times; + }; + /* Is the tool's fd for /proc/pid/stat or /proc/stat. */ + bool pid_stat; }; struct perf_missing_features { @@ -191,6 +218,10 @@ struct perf_missing_features { bool code_page_size; bool weight_struct; bool read_lost; + bool branch_counters; + bool aux_action; + bool inherit_sample_read; + bool defer_callchain; }; extern struct perf_missing_features perf_missing_features; @@ -217,6 +248,7 @@ int evsel__object_config(size_t object_size, void (*fini)(struct evsel *evsel)); struct perf_pmu *evsel__find_pmu(const struct evsel *evsel); +const char *evsel__pmu_name(const struct evsel *evsel); bool evsel__is_aux_event(const struct evsel *evsel); struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx); @@ -226,32 +258,31 @@ static inline struct evsel *evsel__new(struct perf_event_attr *attr) return evsel__new_idx(attr, 0); } -struct evsel *evsel__clone(struct evsel *orig); +struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig); int copy_config_terms(struct list_head *dst, struct list_head *src); void free_config_terms(struct list_head *config_terms); -#ifdef HAVE_LIBTRACEEVENT -struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx); - /* * Returns pointer with encoded error via <linux/err.h> interface. */ +struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx, bool format); static inline struct evsel *evsel__newtp(const char *sys, const char *name) { - return evsel__newtp_idx(sys, name, 0); + return evsel__newtp_idx(sys, name, 0, true); } -#endif #ifdef HAVE_LIBTRACEEVENT -struct tep_event *event_format__new(const char *sys, const char *name); +struct tep_event *evsel__tp_format(struct evsel *evsel); #endif void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx); void evsel__exit(struct evsel *evsel); void evsel__delete(struct evsel *evsel); +void evsel__set_priv_destructor(void (*destructor)(void *priv)); + struct callchain_param; void evsel__config(struct evsel *evsel, struct record_opts *opts, @@ -290,9 +321,9 @@ const char *evsel__name(struct evsel *evsel); bool evsel__name_is(struct evsel *evsel, const char *name); const char *evsel__metric_id(const struct evsel *evsel); -static inline bool evsel__is_tool(const struct evsel *evsel) +static inline bool evsel__is_retire_lat(const struct evsel *evsel) { - return evsel->tool_event != PERF_TOOL_NONE; + return evsel->retire_lat; } const char *evsel__group_name(struct evsel *evsel); @@ -311,7 +342,8 @@ void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier); void arch_evsel__set_sample_weight(struct evsel *evsel); void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr); -int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size); +int arch_evsel__open_strerror(struct evsel *evsel, int err, char *msg, size_t size); +void arch_evsel__apply_ratio_to_prev(struct evsel *evsel, struct perf_event_attr *attr); int evsel__set_filter(struct evsel *evsel, const char *filter); int evsel__append_tp_filter(struct evsel *evsel, const char *filter); @@ -321,6 +353,9 @@ int evsel__enable(struct evsel *evsel); int evsel__disable(struct evsel *evsel); int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx); +int evsel__open_per_cpu_and_thread(struct evsel *evsel, + struct perf_cpu_map *cpus, int cpu_map_idx, + struct perf_thread_map *threads); int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx); int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads); int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, @@ -328,10 +363,6 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, void evsel__close(struct evsel *evsel); int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); -bool evsel__detect_missing_features(struct evsel *evsel); - -enum rlimit_action { NO_CHANGE, SET_TO_MAX, INCREASED_MAX }; -bool evsel__increase_rlimit(enum rlimit_action *set_rlimit); bool evsel__precise_ip_fallback(struct evsel *evsel); @@ -340,6 +371,8 @@ struct perf_sample; #ifdef HAVE_LIBTRACEEVENT void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name); u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name); +u64 evsel__intval_common(struct evsel *evsel, struct perf_sample *sample, const char *name); +char evsel__taskstate(struct evsel *evsel, struct perf_sample *sample, const char *name); static inline char *evsel__strval(struct evsel *evsel, struct perf_sample *sample, const char *name) { @@ -352,27 +385,12 @@ struct tep_format_field; u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample, bool needs_swap); struct tep_format_field *evsel__field(struct evsel *evsel, const char *name); +struct tep_format_field *evsel__common_field(struct evsel *evsel, const char *name); -static inline bool __evsel__match(const struct evsel *evsel, u32 type, u64 config) -{ - if (evsel->core.attr.type != type) - return false; - - if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) && - perf_pmus__supports_extended_type()) - return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config; - - return evsel->core.attr.config == config; -} +bool __evsel__match(const struct evsel *evsel, u32 type, u64 config); #define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t, PERF_COUNT_##c) -static inline bool evsel__match2(struct evsel *e1, struct evsel *e2) -{ - return (e1->core.attr.type == e2->core.attr.type) && - (e1->core.attr.config == e2->core.attr.config); -} - int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread); int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale); @@ -407,7 +425,7 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event, u64 *timestamp); -u16 evsel__id_hdr_size(struct evsel *evsel); +u16 evsel__id_hdr_size(const struct evsel *evsel); static inline struct evsel *evsel__next(struct evsel *evsel) { @@ -460,7 +478,8 @@ static inline bool evsel__is_clock(const struct evsel *evsel) evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK); } -bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize); +bool evsel__fallback(struct evsel *evsel, struct target *target, int err, + char *msg, size_t msgsize); int evsel__open_strerror(struct evsel *evsel, struct target *target, int err, char *msg, size_t size); @@ -525,6 +544,7 @@ static inline bool evsel__is_dummy_event(struct evsel *evsel) (evsel->core.attr.config == PERF_COUNT_SW_DUMMY); } +struct perf_session *evsel__session(struct evsel *evsel); struct perf_env *evsel__env(struct evsel *evsel); int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); @@ -540,6 +560,9 @@ void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader); bool arch_evsel__must_be_in_group(const struct evsel *evsel); +bool evsel__set_needs_uniquify(struct evsel *counter, const struct perf_stat_config *config); +void evsel__uniquify_counter(struct evsel *counter); + /* * Macro to swap the bit-field postition and size. * Used when, @@ -555,4 +578,8 @@ u64 evsel__bitfield_swap_branch_flags(u64 value); void evsel__set_config_if_unset(struct perf_pmu *pmu, struct evsel *evsel, const char *config_name, u64 val); +bool evsel__is_offcpu_event(struct evsel *evsel); + +void evsel__warn_user_requested_cpus(struct evsel *evsel, struct perf_cpu_map *user_requested_cpus); + #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h index aee6f808b512..bcd3a978f0c4 100644 --- a/tools/perf/util/evsel_config.h +++ b/tools/perf/util/evsel_config.h @@ -25,8 +25,10 @@ enum evsel_term_type { EVSEL__CONFIG_TERM_BRANCH, EVSEL__CONFIG_TERM_PERCORE, EVSEL__CONFIG_TERM_AUX_OUTPUT, + EVSEL__CONFIG_TERM_AUX_ACTION, EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE, EVSEL__CONFIG_TERM_CFG_CHG, + EVSEL__CONFIG_TERM_RATIO_TO_PREV, }; struct evsel_config_term { @@ -47,6 +49,7 @@ struct evsel_config_term { u32 aux_sample_size; u64 cfg_chg; char *str; + int cpu; } val; bool weak; }; diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 8719b3cb5646..10f1a03c2860 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -14,7 +14,7 @@ #include "dso.h" #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) @@ -81,13 +81,15 @@ int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE #ifdef HAVE_LIBTRACEEVENT if (details->trace_fields) { struct tep_format_field *field; + const struct tep_event *tp_format; if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) { printed += comma_fprintf(fp, &first, " (not a tracepoint)"); goto out; } - field = evsel->tp_format->format.fields; + tp_format = evsel__tp_format(evsel); + field = tp_format ? tp_format->format.fields : NULL; if (field == NULL) { printed += comma_fprintf(fp, &first, " (no trace field)"); goto out; @@ -107,7 +109,6 @@ out: return ++printed; } -#ifndef PYTHON_PERF int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, unsigned int print_opts, struct callchain_cursor *cursor, struct strlist *bt_stop_list, FILE *fp) @@ -167,7 +168,10 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, node_al.addr = addr; node_al.map = map__get(map); - if (print_symoffset) { + if (sample->deferred_callchain && + sample->deferred_cookie == node->ip) { + printed += fprintf(fp, "(cookie)"); + } else if (print_symoffset) { printed += __symbol__fprintf_symname_offs(sym, &node_al, print_unknown_as_addr, true, fp); @@ -248,4 +252,3 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, return printed; } -#endif /* PYTHON_PERF */ diff --git a/tools/perf/util/evswitch.c b/tools/perf/util/evswitch.c index 40cb56a9347d..d4c06a3f825a 100644 --- a/tools/perf/util/evswitch.c +++ b/tools/perf/util/evswitch.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only // Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> +#include <errno.h> #include "evswitch.h" #include "evlist.h" diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 4814262e3805..465fe2e9bbbe 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -5,27 +5,22 @@ #include <stdlib.h> #include <string.h> #include "metricgroup.h" -#include "cpumap.h" -#include "cputopo.h" #include "debug.h" #include "evlist.h" #include "expr.h" -#include "expr-bison.h" -#include "expr-flex.h" -#include "util/hashmap.h" #include "smt.h" -#include "tsc.h" -#include <api/fs/fs.h> +#include "tool_pmu.h" +#include <util/expr-bison.h> +#include <util/expr-flex.h> +#include "util/hashmap.h" +#include "util/header.h" +#include "util/pmu.h" +#include <perf/cpumap.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/zalloc.h> #include <ctype.h> #include <math.h> -#include "pmu.h" - -#ifdef PARSER_DEBUG -extern int expr_debug; -#endif struct expr_id_data { union { @@ -171,8 +166,12 @@ int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id, data_ptr->kind = EXPR_ID_DATA__VALUE; ret = hashmap__set(ctx->ids, id, data_ptr, &old_key, &old_data); - if (ret) + if (ret) { free(data_ptr); + } else if (old_data) { + data_ptr->val.val += old_data->val.val; + data_ptr->val.source_count += old_data->val.source_count; + } free(old_key); free(old_data); return ret; @@ -220,6 +219,8 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref) int expr__get_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **data) { + if (!ctx || !id) + return -1; return hashmap__find(ctx->ids, id, data) ? 0 : -1; } @@ -290,7 +291,7 @@ struct expr_parse_ctx *expr__ctx_new(void) { struct expr_parse_ctx *ctx; - ctx = malloc(sizeof(struct expr_parse_ctx)); + ctx = calloc(1, sizeof(struct expr_parse_ctx)); if (!ctx) return NULL; @@ -299,9 +300,6 @@ struct expr_parse_ctx *expr__ctx_new(void) free(ctx); return NULL; } - ctx->sctx.user_requested_cpu_list = NULL; - ctx->sctx.runtime = 0; - ctx->sctx.system_wide = false; return ctx; } @@ -395,83 +393,24 @@ double expr_id_data__source_count(const struct expr_id_data *data) return data->val.source_count; } -#if !defined(__i386__) && !defined(__x86_64__) -double arch_get_tsc_freq(void) -{ - return 0.0; -} -#endif - -static double has_pmem(void) -{ - static bool has_pmem, cached; - const char *sysfs = sysfs__mountpoint(); - char path[PATH_MAX]; - - if (!cached) { - snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs); - has_pmem = access(path, F_OK) == 0; - cached = true; - } - return has_pmem ? 1.0 : 0.0; -} - double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx) { - const struct cpu_topology *topology; double result = NAN; - - if (!strcmp("#num_cpus", literal)) { - result = cpu__max_present_cpu().cpu; - goto out; - } - - if (!strcasecmp("#system_tsc_freq", literal)) { - result = arch_get_tsc_freq(); - goto out; - } - - /* - * Assume that topology strings are consistent, such as CPUs "0-1" - * wouldn't be listed as "0,1", and so after deduplication the number of - * these strings gives an indication of the number of packages, dies, - * etc. - */ - if (!strcasecmp("#smt_on", literal)) { - result = smt_on() ? 1.0 : 0.0; - goto out; - } - if (!strcmp("#core_wide", literal)) { - result = core_wide(ctx->system_wide, ctx->user_requested_cpu_list) - ? 1.0 : 0.0; - goto out; - } - if (!strcmp("#num_packages", literal)) { - topology = online_topology(); - result = topology->package_cpus_lists; - goto out; - } - if (!strcmp("#num_dies", literal)) { - topology = online_topology(); - result = topology->die_cpus_lists; - goto out; - } - if (!strcmp("#num_cores", literal)) { - topology = online_topology(); - result = topology->core_cpus_lists; - goto out; - } - if (!strcmp("#slots", literal)) { - result = perf_pmu__cpu_slots_per_cycle(); - goto out; - } - if (!strcmp("#has_pmem", literal)) { - result = has_pmem(); - goto out; + enum tool_pmu_event ev = tool_pmu__str_to_event(literal + 1); + + if (ev != TOOL_PMU__EVENT_NONE) { + u64 count; + + if (tool_pmu__read_event(ev, /*evsel=*/NULL, + ctx->system_wide, ctx->user_requested_cpu_list, + &count)) + result = count; + else + pr_err("Failure to read '%s'", literal); + } else { + pr_err("Unrecognized literal '%s'", literal); } - pr_err("Unrecognized literal '%s'", literal); -out: pr_debug2("literal: %s = %f\n", literal, result); return result; } @@ -491,7 +430,41 @@ double expr__has_event(const struct expr_parse_ctx *ctx, bool compute_ids, const tmp = evlist__new(); if (!tmp) return NAN; - ret = parse_event(tmp, id) ? 0 : 1; + + if (strchr(id, '@')) { + char *tmp_id, *p; + + tmp_id = strdup(id); + if (!tmp_id) { + ret = NAN; + goto out; + } + p = strchr(tmp_id, '@'); + *p = '/'; + p = strrchr(tmp_id, '@'); + *p = '/'; + ret = parse_event(tmp, tmp_id) ? 0 : 1; + free(tmp_id); + } else { + ret = parse_event(tmp, id) ? 0 : 1; + } +out: evlist__delete(tmp); return ret; } + +double expr__strcmp_cpuid_str(const struct expr_parse_ctx *ctx __maybe_unused, + bool compute_ids __maybe_unused, const char *test_id) +{ + double ret; + struct perf_cpu cpu = {-1}; + char *cpuid = get_cpuid_allow_env_override(cpu); + + if (!cpuid) + return NAN; + + ret = !strcmp_cpuid_str(test_id, cpuid); + + free(cpuid); + return ret; +} diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 3c1e49b3e35d..c0cec29ddc29 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -55,5 +55,6 @@ double expr_id_data__value(const struct expr_id_data *data); double expr_id_data__source_count(const struct expr_id_data *data); double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx); double expr__has_event(const struct expr_parse_ctx *ctx, bool compute_ids, const char *id); +double expr__strcmp_cpuid_str(const struct expr_parse_ctx *ctx, bool compute_ids, const char *id); #endif diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l index dbb117414710..a2fc43159ee9 100644 --- a/tools/perf/util/expr.l +++ b/tools/perf/util/expr.l @@ -94,6 +94,14 @@ static int literal(yyscan_t scanner, const struct expr_scanner_ctx *sctx) } return LITERAL; } + +static int nan_value(yyscan_t scanner) +{ + YYSTYPE *yylval = expr_get_lval(scanner); + + yylval->num = NAN; + return NUMBER; +} %} number ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+)(e-?[0-9]+)? @@ -114,6 +122,8 @@ if { return IF; } else { return ELSE; } source_count { return SOURCE_COUNT; } has_event { return HAS_EVENT; } +strcmp_cpuid_str { return STRCMP_CPUID_STR; } +NaN { return nan_value(yyscanner); } {literal} { return literal(yyscanner, sctx); } {number} { return value(yyscanner); } {symbol} { return str(yyscanner, ID, sctx->runtime); } diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index dd504afd8f36..e364790babb5 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -1,12 +1,16 @@ /* Simple expression parser */ %{ +#ifndef NDEBUG #define YYDEBUG 1 +#endif #include <assert.h> #include <math.h> #include <stdlib.h> #include "util/debug.h" #define IN_EXPR_Y 1 #include "expr.h" +#include "expr-bison.h" +int expr_lex(YYSTYPE * yylval_param , void *yyscanner); %} %define api.pure full @@ -37,7 +41,7 @@ } ids; } -%token ID NUMBER MIN MAX IF ELSE LITERAL D_RATIO SOURCE_COUNT HAS_EVENT EXPR_ERROR +%token ID NUMBER MIN MAX IF ELSE LITERAL D_RATIO SOURCE_COUNT HAS_EVENT STRCMP_CPUID_STR EXPR_ERROR %left MIN MAX IF %left '|' %left '^' @@ -56,7 +60,7 @@ static void expr_error(double *final_val __maybe_unused, struct expr_parse_ctx *ctx __maybe_unused, bool compute_ids __maybe_unused, - void *scanner, + void *scanner __maybe_unused, const char *s) { pr_debug("%s\n", s); @@ -205,6 +209,12 @@ expr: NUMBER $$.ids = NULL; free($3); } +| STRCMP_CPUID_STR '(' ID ')' +{ + $$.val = expr__strcmp_cpuid_str(ctx, compute_ids, $3); + $$.ids = NULL; + free($3); +} | expr '|' expr { if (is_const($1.val) && is_const($3.val)) { diff --git a/tools/perf/util/fncache.c b/tools/perf/util/fncache.c index 6225cbc52310..bf9559c55c63 100644 --- a/tools/perf/util/fncache.c +++ b/tools/perf/util/fncache.c @@ -1,53 +1,58 @@ // SPDX-License-Identifier: GPL-2.0-only /* Manage a cache of file names' existence */ +#include <pthread.h> #include <stdlib.h> -#include <unistd.h> #include <string.h> -#include <linux/list.h> +#include <unistd.h> +#include <linux/compiler.h> #include "fncache.h" +#include "hashmap.h" -struct fncache { - struct hlist_node nd; - bool res; - char name[]; -}; +static struct hashmap *fncache; -#define FNHSIZE 61 +static size_t fncache__hash(long key, void *ctx __maybe_unused) +{ + return str_hash((const char *)key); +} -static struct hlist_head fncache_hash[FNHSIZE]; +static bool fncache__equal(long key1, long key2, void *ctx __maybe_unused) +{ + return strcmp((const char *)key1, (const char *)key2) == 0; +} -unsigned shash(const unsigned char *s) +static void fncache__init(void) { - unsigned h = 0; - while (*s) - h = 65599 * h + *s++; - return h ^ (h >> 16); + fncache = hashmap__new(fncache__hash, fncache__equal, /*ctx=*/NULL); +} + +static struct hashmap *fncache__get(void) +{ + static pthread_once_t fncache_once = PTHREAD_ONCE_INIT; + + pthread_once(&fncache_once, fncache__init); + + return fncache; } static bool lookup_fncache(const char *name, bool *res) { - int h = shash((const unsigned char *)name) % FNHSIZE; - struct fncache *n; - - hlist_for_each_entry(n, &fncache_hash[h], nd) { - if (!strcmp(n->name, name)) { - *res = n->res; - return true; - } - } - return false; + long val; + + if (!hashmap__find(fncache__get(), name, &val)) + return false; + + *res = (val != 0); + return true; } static void update_fncache(const char *name, bool res) { - struct fncache *n = malloc(sizeof(struct fncache) + strlen(name) + 1); - int h = shash((const unsigned char *)name) % FNHSIZE; - - if (!n) - return; - strcpy(n->name, name); - n->res = res; - hlist_add_head(&n->nd, &fncache_hash[h]); + char *old_key = NULL, *key = strdup(name); + + if (key) { + hashmap__set(fncache__get(), key, res, &old_key, /*old_value*/NULL); + free(old_key); + } } /* No LRU, only use when bounded in some other way. */ diff --git a/tools/perf/util/fncache.h b/tools/perf/util/fncache.h index fe020beaefb1..b6a0f209493e 100644 --- a/tools/perf/util/fncache.h +++ b/tools/perf/util/fncache.h @@ -1,7 +1,6 @@ #ifndef _FCACHE_H #define _FCACHE_H 1 -unsigned shash(const unsigned char *s); bool file_available(const char *name); #endif diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h index 558efcb98d25..950f2efafad2 100644 --- a/tools/perf/util/ftrace.h +++ b/tools/perf/util/ftrace.h @@ -6,6 +6,8 @@ #include "target.h" struct evlist; +struct hashamp; +struct stats; struct perf_ftrace { struct evlist *evlist; @@ -15,16 +17,28 @@ struct perf_ftrace { struct list_head notrace; struct list_head graph_funcs; struct list_head nograph_funcs; + struct list_head event_pair; + struct hashmap *profile_hash; unsigned long percpu_buffer_size; bool inherit; bool use_nsec; + unsigned int bucket_range; + unsigned int min_latency; + unsigned int max_latency; + unsigned int bucket_num; + bool hide_empty; int graph_depth; int func_stack_trace; int func_irq_info; + int graph_args; + int graph_retval; + int graph_retval_hex; + int graph_retaddr; int graph_nosleep_time; int graph_noirqs; int graph_verbose; int graph_thresh; + int graph_tail; }; struct filter_entry { @@ -40,7 +54,7 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace); int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace); int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace); int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace, - int buckets[]); + int buckets[], struct stats *stats); int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace); #else /* !HAVE_BPF_SKEL */ @@ -65,7 +79,8 @@ perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused) static inline int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, - int buckets[] __maybe_unused) + int buckets[] __maybe_unused, + struct stats *stats __maybe_unused) { return -1; } diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index fefc72066c4e..a1cd5196f4ec 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -12,15 +12,14 @@ #include <libelf.h> #include <string.h> #include <stdlib.h> -#include <unistd.h> #include <inttypes.h> -#include <fcntl.h> #include <err.h> -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #include <dwarf.h> #endif #include "genelf.h" +#include "sha1.h" #include "../util/jitdump.h" #include <linux/compiler.h> @@ -28,25 +27,6 @@ #define NT_GNU_BUILD_ID 3 #endif -#define BUILD_ID_URANDOM /* different uuid for each run */ - -#ifdef HAVE_LIBCRYPTO_SUPPORT - -#define BUILD_ID_MD5 -#undef BUILD_ID_SHA /* does not seem to work well when linked with Java */ -#undef BUILD_ID_URANDOM /* different uuid for each run */ - -#ifdef BUILD_ID_SHA -#include <openssl/sha.h> -#endif - -#ifdef BUILD_ID_MD5 -#include <openssl/evp.h> -#include <openssl/md5.h> -#endif -#endif - - typedef struct { unsigned int namesz; /* Size of entry's owner string */ unsigned int descsz; /* Size of the note descriptor */ @@ -54,11 +34,6 @@ typedef struct { char name[0]; /* Start of the name+desc data */ } Elf_Note; -struct options { - char *output; - int fd; -}; - static char shd_string_table[] = { 0, '.', 't', 'e', 'x', 't', 0, /* 1 */ @@ -76,7 +51,7 @@ static char shd_string_table[] = { static struct buildid_note { Elf_Note desc; /* descsz: size of build-id, must be multiple of 4 */ char name[4]; /* GNU\0 */ - char build_id[20]; + u8 build_id[SHA1_DIGEST_SIZE]; } bnote; static Elf_Sym symtab[]={ @@ -97,65 +72,6 @@ static Elf_Sym symtab[]={ } }; -#ifdef BUILD_ID_URANDOM -static void -gen_build_id(struct buildid_note *note, - unsigned long load_addr __maybe_unused, - const void *code __maybe_unused, - size_t csize __maybe_unused) -{ - int fd; - size_t sz = sizeof(note->build_id); - ssize_t sret; - - fd = open("/dev/urandom", O_RDONLY); - if (fd == -1) - err(1, "cannot access /dev/urandom for buildid"); - - sret = read(fd, note->build_id, sz); - - close(fd); - - if (sret != (ssize_t)sz) - memset(note->build_id, 0, sz); -} -#endif - -#ifdef BUILD_ID_SHA -static void -gen_build_id(struct buildid_note *note, - unsigned long load_addr __maybe_unused, - const void *code, - size_t csize) -{ - if (sizeof(note->build_id) < SHA_DIGEST_LENGTH) - errx(1, "build_id too small for SHA1"); - - SHA1(code, csize, (unsigned char *)note->build_id); -} -#endif - -#ifdef BUILD_ID_MD5 -static void -gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *code, size_t csize) -{ - EVP_MD_CTX *mdctx; - - if (sizeof(note->build_id) < 16) - errx(1, "build_id too small for MD5"); - - mdctx = EVP_MD_CTX_new(); - if (!mdctx) - errx(2, "failed to create EVP_MD_CTX"); - - EVP_DigestInit_ex(mdctx, EVP_md5(), NULL); - EVP_DigestUpdate(mdctx, &load_addr, sizeof(load_addr)); - EVP_DigestUpdate(mdctx, code, csize); - EVP_DigestFinal_ex(mdctx, (unsigned char *)note->build_id, NULL); - EVP_MD_CTX_free(mdctx); -} -#endif - static int jit_add_eh_frame_info(Elf *e, void* unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size, uint64_t base_offset) @@ -244,7 +160,7 @@ jit_add_eh_frame_info(Elf *e, void* unwinding, uint64_t unwinding_header_size, * csize: the code size in bytes */ int -jit_write_elf(int fd, uint64_t load_addr, const char *sym, +jit_write_elf(int fd, uint64_t load_addr __maybe_unused, const char *sym, const void *code, int csize, void *debug __maybe_unused, int nr_debug_entries __maybe_unused, void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size) @@ -257,6 +173,8 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, Elf_Shdr *shdr; uint64_t eh_frame_base_offset; char *strsym = NULL; + void *build_id_data = NULL, *tmp; + int build_id_data_len; int symlen; int retval = -1; @@ -293,9 +211,9 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, */ phdr = elf_newphdr(e, 1); phdr[0].p_type = PT_LOAD; - phdr[0].p_offset = 0; - phdr[0].p_vaddr = 0; - phdr[0].p_paddr = 0; + phdr[0].p_offset = GEN_ELF_TEXT_OFFSET; + phdr[0].p_vaddr = GEN_ELF_TEXT_OFFSET; + phdr[0].p_paddr = GEN_ELF_TEXT_OFFSET; phdr[0].p_filesz = csize; phdr[0].p_memsz = csize; phdr[0].p_flags = PF_X | PF_R; @@ -335,6 +253,14 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC; shdr->sh_entsize = 0; + build_id_data = malloc(csize); + if (build_id_data == NULL) { + warnx("cannot allocate build-id data"); + goto error; + } + memcpy(build_id_data, code, csize); + build_id_data_len = csize; + /* * Setup .eh_frame_hdr and .eh_frame */ @@ -418,6 +344,15 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, shdr->sh_entsize = sizeof(Elf_Sym); shdr->sh_link = unwinding ? 6 : 4; /* index of .strtab section */ + tmp = realloc(build_id_data, build_id_data_len + sizeof(symtab)); + if (tmp == NULL) { + warnx("cannot allocate build-id data"); + goto error; + } + memcpy(tmp + build_id_data_len, symtab, sizeof(symtab)); + build_id_data = tmp; + build_id_data_len += sizeof(symtab); + /* * setup symbols string table * 2 = 1 for 0 in 1st entry, 1 for the 0 at end of symbol for 2nd entry @@ -460,6 +395,15 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, shdr->sh_flags = 0; shdr->sh_entsize = 0; + tmp = realloc(build_id_data, build_id_data_len + symlen); + if (tmp == NULL) { + warnx("cannot allocate build-id data"); + goto error; + } + memcpy(tmp + build_id_data_len, strsym, symlen); + build_id_data = tmp; + build_id_data_len += symlen; + /* * setup build-id section */ @@ -478,7 +422,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, /* * build-id generation */ - gen_build_id(&bnote, load_addr, code, csize); + sha1(build_id_data, build_id_data_len, bnote.build_id); bnote.desc.namesz = sizeof(bnote.name); /* must include 0 termination */ bnote.desc.descsz = sizeof(bnote.build_id); bnote.desc.type = NT_GNU_BUILD_ID; @@ -504,7 +448,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, shdr->sh_size = sizeof(bnote); shdr->sh_entsize = 0; -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT if (debug && nr_debug_entries) { retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries); if (retval) @@ -523,7 +467,7 @@ error: (void)elf_end(e); free(strsym); - + free(build_id_data); return retval; } diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index 5f18d20ea903..9f0b875d6548 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -8,7 +8,7 @@ int jit_write_elf(int fd, uint64_t code_addr, const char *sym, const void *code, int csize, void *debug, int nr_debug_entries, void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size); -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT /* genelf_debug.c */ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries); #endif @@ -43,6 +43,9 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent #elif defined(__riscv) && __riscv_xlen == 64 #define GEN_ELF_ARCH EM_RISCV #define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__riscv) && __riscv_xlen == 32 +#define GEN_ELF_ARCH EM_RISCV +#define GEN_ELF_CLASS ELFCLASS32 #elif defined(__loongarch__) #define GEN_ELF_ARCH EM_LOONGARCH #define GEN_ELF_CLASS ELFCLASS64 diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh index 1b5140e5ce99..6a73c903d690 100755 --- a/tools/perf/util/generate-cmdlist.sh +++ b/tools/perf/util/generate-cmdlist.sh @@ -38,7 +38,7 @@ do done echo "#endif /* HAVE_LIBELF_SUPPORT */" -echo "#if defined(HAVE_LIBTRACEEVENT) && (defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT))" +echo "#if defined(HAVE_LIBTRACEEVENT)" sed -n -e 's/^perf-\([^ ]*\)[ ].* audit*/\1/p' command-list.txt | sort | while read cmd @@ -51,7 +51,7 @@ do p }' "Documentation/perf-$cmd.txt" done -echo "#endif /* HAVE_LIBTRACEEVENT && (HAVE_LIBAUDIT_SUPPORT || HAVE_SYSCALL_TABLE_SUPPORT) */" +echo "#endif /* HAVE_LIBTRACEEVENT */" echo "#ifdef HAVE_LIBTRACEEVENT" sed -n -e 's/^perf-\([^ ]*\)[ ].* traceevent.*/\1/p' command-list.txt | diff --git a/tools/perf/util/get_current_dir_name.c b/tools/perf/util/get_current_dir_name.c deleted file mode 100644 index e68935e9ac8c..000000000000 --- a/tools/perf/util/get_current_dir_name.c +++ /dev/null @@ -1,18 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -// Copyright (C) 2018, 2019 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> -// -#ifndef HAVE_GET_CURRENT_DIR_NAME -#include "get_current_dir_name.h" -#include <limits.h> -#include <string.h> -#include <unistd.h> - -/* Android's 'bionic' library, for one, doesn't have this */ - -char *get_current_dir_name(void) -{ - char pwd[PATH_MAX]; - - return getcwd(pwd, sizeof(pwd)) == NULL ? NULL : strdup(pwd); -} -#endif // HAVE_GET_CURRENT_DIR_NAME diff --git a/tools/perf/util/get_current_dir_name.h b/tools/perf/util/get_current_dir_name.h deleted file mode 100644 index 69f7d5537d32..000000000000 --- a/tools/perf/util/get_current_dir_name.h +++ /dev/null @@ -1,8 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -// Copyright (C) 2018, 2019 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> -// -#ifndef __PERF_GET_CURRENT_DIR_NAME_H -#ifndef HAVE_GET_CURRENT_DIR_NAME -char *get_current_dir_name(void); -#endif // HAVE_GET_CURRENT_DIR_NAME -#endif // __PERF_GET_CURRENT_DIR_NAME_H diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h index 0a5bf1937a7c..0c4f155e8eb7 100644 --- a/tools/perf/util/hashmap.h +++ b/tools/perf/util/hashmap.h @@ -80,16 +80,6 @@ struct hashmap { size_t sz; }; -#define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \ - .hash_fn = (hash_fn), \ - .equal_fn = (equal_fn), \ - .ctx = (ctx), \ - .buckets = NULL, \ - .cap = 0, \ - .cap_bits = 0, \ - .sz = 0, \ -} - void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn, hashmap_equal_fn equal_fn, void *ctx); struct hashmap *hashmap__new(hashmap_hash_fn hash_fn, @@ -176,8 +166,8 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @bkt: integer used as a bucket loop cursor */ #define hashmap__for_each_entry(map, cur, bkt) \ - for (bkt = 0; bkt < map->cap; bkt++) \ - for (cur = map->buckets[bkt]; cur; cur = cur->next) + for (bkt = 0; bkt < (map)->cap; bkt++) \ + for (cur = (map)->buckets[bkt]; cur; cur = cur->next) /* * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe @@ -188,8 +178,8 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @bkt: integer used as a bucket loop cursor */ #define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \ - for (bkt = 0; bkt < map->cap; bkt++) \ - for (cur = map->buckets[bkt]; \ + for (bkt = 0; bkt < (map)->cap; bkt++) \ + for (cur = (map)->buckets[bkt]; \ cur && ({tmp = cur->next; true; }); \ cur = tmp) @@ -200,19 +190,19 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @key: key to iterate entries for */ #define hashmap__for_each_key_entry(map, cur, _key) \ - for (cur = map->buckets \ - ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + for (cur = (map)->buckets \ + ? (map)->buckets[hash_bits((map)->hash_fn((_key), (map)->ctx), (map)->cap_bits)] \ : NULL; \ cur; \ cur = cur->next) \ - if (map->equal_fn(cur->key, (_key), map->ctx)) + if ((map)->equal_fn(cur->key, (_key), (map)->ctx)) #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ - for (cur = map->buckets \ - ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + for (cur = (map)->buckets \ + ? (map)->buckets[hash_bits((map)->hash_fn((_key), (map)->ctx), (map)->cap_bits)] \ : NULL; \ cur && ({ tmp = cur->next; true; }); \ cur = tmp) \ - if (map->equal_fn(cur->key, (_key), map->ctx)) + if ((map)->equal_fn(cur->key, (_key), (map)->ctx)) #endif /* __LIBBPF_HASHMAP_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 52fbf526fe74..f5cad377c99e 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -44,6 +44,7 @@ #include "build-id.h" #include "data.h" #include <api/fs/fs.h> +#include <api/io_dir.h> #include "asm/bug.h" #include "tool.h" #include "time-utils.h" @@ -58,7 +59,7 @@ #include <internal/lib.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif /* @@ -334,7 +335,6 @@ static int write_build_id(struct feat_fd *ff, pr_debug("failed to write buildid table\n"); return err; } - perf_session__cache_build_ids(session); return 0; } @@ -456,6 +456,8 @@ static int write_cpudesc(struct feat_fd *ff, #define CPUINFO_PROC { "Processor", } #elif defined(__xtensa__) #define CPUINFO_PROC { "core ID", } +#elif defined(__loongarch__) +#define CPUINFO_PROC { "Model Name", } #else #define CPUINFO_PROC { "model name", } #endif @@ -554,6 +556,7 @@ static int write_event_desc(struct feat_fd *ff, static int write_cmdline(struct feat_fd *ff, struct evlist *evlist __maybe_unused) { + struct perf_env *env = &ff->ph->env; char pbuf[MAXPATHLEN], *buf; int i, ret, n; @@ -561,7 +564,7 @@ static int write_cmdline(struct feat_fd *ff, buf = perf_exe(pbuf, MAXPATHLEN); /* account for binary path */ - n = perf_env.nr_cmdline + 1; + n = env->nr_cmdline + 1; ret = do_write(ff, &n, sizeof(n)); if (ret < 0) @@ -571,8 +574,8 @@ static int write_cmdline(struct feat_fd *ff, if (ret < 0) return ret; - for (i = 0 ; i < perf_env.nr_cmdline; i++) { - ret = do_write_string(ff, perf_env.cmdline_argv[i]); + for (i = 0 ; i < env->nr_cmdline; i++) { + ret = do_write_string(ff, env->cmdline_argv[i]); if (ret < 0) return ret; } @@ -583,6 +586,7 @@ static int write_cmdline(struct feat_fd *ff, static int write_cpu_topology(struct feat_fd *ff, struct evlist *evlist __maybe_unused) { + struct perf_env *env = &ff->ph->env; struct cpu_topology *tp; u32 i; int ret, j; @@ -610,17 +614,17 @@ static int write_cpu_topology(struct feat_fd *ff, break; } - ret = perf_env__read_cpu_topology_map(&perf_env); + ret = perf_env__read_cpu_topology_map(env); if (ret < 0) goto done; - for (j = 0; j < perf_env.nr_cpus_avail; j++) { - ret = do_write(ff, &perf_env.cpu[j].core_id, - sizeof(perf_env.cpu[j].core_id)); + for (j = 0; j < env->nr_cpus_avail; j++) { + ret = do_write(ff, &env->cpu[j].core_id, + sizeof(env->cpu[j].core_id)); if (ret < 0) return ret; - ret = do_write(ff, &perf_env.cpu[j].socket_id, - sizeof(perf_env.cpu[j].socket_id)); + ret = do_write(ff, &env->cpu[j].socket_id, + sizeof(env->cpu[j].socket_id)); if (ret < 0) return ret; } @@ -638,9 +642,9 @@ static int write_cpu_topology(struct feat_fd *ff, goto done; } - for (j = 0; j < perf_env.nr_cpus_avail; j++) { - ret = do_write(ff, &perf_env.cpu[j].die_id, - sizeof(perf_env.cpu[j].die_id)); + for (j = 0; j < env->nr_cpus_avail; j++) { + ret = do_write(ff, &env->cpu[j].die_id, + sizeof(env->cpu[j].die_id)); if (ret < 0) return ret; } @@ -746,20 +750,14 @@ static int write_pmu_mappings(struct feat_fd *ff, * Do a first pass to count number of pmu to avoid lseek so this * works in pipe mode as well. */ - while ((pmu = perf_pmus__scan(pmu))) { - if (!pmu->name) - continue; + while ((pmu = perf_pmus__scan(pmu))) pmu_num++; - } ret = do_write(ff, &pmu_num, sizeof(pmu_num)); if (ret < 0) return ret; while ((pmu = perf_pmus__scan(pmu))) { - if (!pmu->name) - continue; - ret = do_write(ff, &pmu->type, sizeof(pmu->type)); if (ret < 0) return ret; @@ -823,11 +821,31 @@ static int write_group_desc(struct feat_fd *ff, * Each architecture should provide a more precise id string that * can be use to match the architecture's "mapfile". */ -char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +char * __weak get_cpuid_str(struct perf_cpu cpu __maybe_unused) { return NULL; } +char *get_cpuid_allow_env_override(struct perf_cpu cpu) +{ + char *cpuid; + static bool printed; + + cpuid = getenv("PERF_CPUID"); + if (cpuid) + cpuid = strdup(cpuid); + if (!cpuid) + cpuid = get_cpuid_str(cpu); + if (!cpuid) + return NULL; + + if (!printed) { + pr_debug("Using CPUID %s\n", cpuid); + printed = true; + } + return cpuid; +} + /* Return zero when the cpuid from the mapfile.csv matches the * cpuid string generated on this platform. * Otherwise return non-zero. @@ -860,18 +878,19 @@ int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid) * default get_cpuid(): nothing gets recorded * actual implementation must be in arch/$(SRCARCH)/util/header.c */ -int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused) +int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused, + struct perf_cpu cpu __maybe_unused) { return ENOSYS; /* Not implemented */ } -static int write_cpuid(struct feat_fd *ff, - struct evlist *evlist __maybe_unused) +static int write_cpuid(struct feat_fd *ff, struct evlist *evlist) { + struct perf_cpu cpu = perf_cpu_map__min(evlist->core.all_cpus); char buffer[64]; int ret; - ret = get_cpuid(buffer, sizeof(buffer)); + ret = get_cpuid(buffer, sizeof(buffer), cpu); if (ret) return -1; @@ -991,57 +1010,6 @@ static int write_dir_format(struct feat_fd *ff, return do_write(ff, &data->dir.version, sizeof(data->dir.version)); } -/* - * Check whether a CPU is online - * - * Returns: - * 1 -> if CPU is online - * 0 -> if CPU is offline - * -1 -> error case - */ -int is_cpu_online(unsigned int cpu) -{ - char *str; - size_t strlen; - char buf[256]; - int status = -1; - struct stat statbuf; - - snprintf(buf, sizeof(buf), - "/sys/devices/system/cpu/cpu%d", cpu); - if (stat(buf, &statbuf) != 0) - return 0; - - /* - * Check if /sys/devices/system/cpu/cpux/online file - * exists. Some cases cpu0 won't have online file since - * it is not expected to be turned off generally. - * In kernels without CONFIG_HOTPLUG_CPU, this - * file won't exist - */ - snprintf(buf, sizeof(buf), - "/sys/devices/system/cpu/cpu%d/online", cpu); - if (stat(buf, &statbuf) != 0) - return 1; - - /* - * Read online file using sysfs__read_str. - * If read or open fails, return -1. - * If read succeeds, return value from file - * which gets stored in "str" - */ - snprintf(buf, sizeof(buf), - "devices/system/cpu/cpu%d/online", cpu); - - if (sysfs__read_str(buf, &str, &strlen) < 0) - return status; - - status = atoi(str); - - free(str); - return status; -} - #ifdef HAVE_LIBBPF_SUPPORT static int write_bpf_prog_info(struct feat_fd *ff, struct evlist *evlist __maybe_unused) @@ -1049,13 +1017,13 @@ static int write_bpf_prog_info(struct feat_fd *ff, struct perf_env *env = &ff->ph->env; struct rb_root *root; struct rb_node *next; - int ret; + int ret = 0; down_read(&env->bpf_progs.lock); ret = do_write(ff, &env->bpf_progs.infos_cnt, sizeof(env->bpf_progs.infos_cnt)); - if (ret < 0) + if (ret < 0 || env->bpf_progs.infos_cnt == 0) goto out; root = &env->bpf_progs.infos; @@ -1091,14 +1059,14 @@ static int write_bpf_btf(struct feat_fd *ff, struct perf_env *env = &ff->ph->env; struct rb_root *root; struct rb_node *next; - int ret; + int ret = 0; down_read(&env->bpf_progs.lock); ret = do_write(ff, &env->bpf_progs.btfs_cnt, sizeof(env->bpf_progs.btfs_cnt)); - if (ret < 0) + if (ret < 0 || env->bpf_progs.btfs_cnt == 0) goto out; root = &env->bpf_progs.btfs; @@ -1345,11 +1313,11 @@ static int memory_node__read(struct memory_node *n, unsigned long idx) { unsigned int phys, size = 0; char path[PATH_MAX]; - struct dirent *ent; - DIR *dir; + struct io_dirent64 *ent; + struct io_dir dir; #define for_each_memory(mem, dir) \ - while ((ent = readdir(dir))) \ + while ((ent = io_dir__readdir(&dir)) != NULL) \ if (strcmp(ent->d_name, ".") && \ strcmp(ent->d_name, "..") && \ sscanf(ent->d_name, "memory%u", &mem) == 1) @@ -1358,9 +1326,9 @@ static int memory_node__read(struct memory_node *n, unsigned long idx) "%s/devices/system/node/node%lu", sysfs__mountpoint(), idx); - dir = opendir(path); - if (!dir) { - pr_warning("failed: can't open memory sysfs data\n"); + io_dir__init(&dir, open(path, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + if (dir.dirfd < 0) { + pr_warning("failed: can't open memory sysfs data '%s'\n", path); return -1; } @@ -1372,20 +1340,20 @@ static int memory_node__read(struct memory_node *n, unsigned long idx) n->set = bitmap_zalloc(size); if (!n->set) { - closedir(dir); + close(dir.dirfd); return -ENOMEM; } n->node = idx; n->size = size; - rewinddir(dir); + io_dir__rewinddir(&dir); for_each_memory(phys, dir) { __set_bit(phys, n->set); } - closedir(dir); + close(dir.dirfd); return 0; } @@ -1408,8 +1376,8 @@ static int memory_node__sort(const void *a, const void *b) static int build_mem_topology(struct memory_node **nodesp, u64 *cntp) { char path[PATH_MAX]; - struct dirent *ent; - DIR *dir; + struct io_dirent64 *ent; + struct io_dir dir; int ret = 0; size_t cnt = 0, size = 0; struct memory_node *nodes = NULL; @@ -1417,14 +1385,14 @@ static int build_mem_topology(struct memory_node **nodesp, u64 *cntp) scnprintf(path, PATH_MAX, "%s/devices/system/node/", sysfs__mountpoint()); - dir = opendir(path); - if (!dir) { + io_dir__init(&dir, open(path, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + if (dir.dirfd < 0) { pr_debug2("%s: couldn't read %s, does this arch have topology information?\n", __func__, path); return -1; } - while (!ret && (ent = readdir(dir))) { + while (!ret && (ent = io_dir__readdir(&dir))) { unsigned int idx; int r; @@ -1448,10 +1416,12 @@ static int build_mem_topology(struct memory_node **nodesp, u64 *cntp) nodes = new_nodes; size += 4; } - ret = memory_node__read(&nodes[cnt++], idx); + ret = memory_node__read(&nodes[cnt], idx); + if (!ret) + cnt += 1; } out: - closedir(dir); + close(dir.dirfd); if (!ret) { *cntp = cnt; *nodesp = nodes; @@ -1584,7 +1554,7 @@ static int __write_pmu_caps(struct feat_fd *ff, struct perf_pmu *pmu, static int write_cpu_pmu_caps(struct feat_fd *ff, struct evlist *evlist __maybe_unused) { - struct perf_pmu *cpu_pmu = perf_pmus__find("cpu"); + struct perf_pmu *cpu_pmu = perf_pmus__find_core_pmu(); int ret; if (!cpu_pmu) @@ -1605,8 +1575,15 @@ static int write_pmu_caps(struct feat_fd *ff, int ret; while ((pmu = perf_pmus__scan(pmu))) { - if (!pmu->name || !strcmp(pmu->name, "cpu") || - perf_pmu__caps_parse(pmu) <= 0) + if (!strcmp(pmu->name, "cpu")) { + /* + * The "cpu" PMU is special and covered by + * HEADER_CPU_PMU_CAPS. Note, core PMUs are + * counted/written here for ARM, s390 and Intel hybrid. + */ + continue; + } + if (perf_pmu__caps_parse(pmu) <= 0) continue; nr_pmu++; } @@ -1619,23 +1596,17 @@ static int write_pmu_caps(struct feat_fd *ff, return 0; /* - * Write hybrid pmu caps first to maintain compatibility with - * older perf tool. + * Note older perf tools assume core PMUs come first, this is a property + * of perf_pmus__scan. */ - if (perf_pmus__num_core_pmus() > 1) { - pmu = NULL; - while ((pmu = perf_pmus__scan_core(pmu))) { - ret = __write_pmu_caps(ff, pmu, true); - if (ret < 0) - return ret; - } - } - pmu = NULL; while ((pmu = perf_pmus__scan(pmu))) { - if (pmu->is_core || !pmu->nr_caps) + if (!strcmp(pmu->name, "cpu")) { + /* Skip as above. */ + continue; + } + if (perf_pmu__caps_parse(pmu) <= 0) continue; - ret = __write_pmu_caps(ff, pmu, true); if (ret < 0) return ret; @@ -1844,14 +1815,17 @@ static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp) root = &env->bpf_progs.infos; next = rb_first(root); + if (!next) + printf("# bpf_prog_info empty\n"); + while (next) { struct bpf_prog_info_node *node; node = rb_entry(next, struct bpf_prog_info_node, rb_node); next = rb_next(&node->rb_node); - bpf_event__print_bpf_prog_info(&node->info_linear->info, - env, fp); + __bpf_event__print_bpf_prog_info(&node->info_linear->info, + env, fp); } up_read(&env->bpf_progs.lock); @@ -1868,6 +1842,9 @@ static void print_bpf_btf(struct feat_fd *ff, FILE *fp) root = &env->bpf_progs.btfs; next = rb_first(root); + if (!next) + printf("# btf info empty\n"); + while (next) { struct btf_node *node; @@ -2141,29 +2118,39 @@ static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp) static void print_pmu_caps(struct feat_fd *ff, FILE *fp) { + struct perf_env *env = &ff->ph->env; struct pmu_caps *pmu_caps; - for (int i = 0; i < ff->ph->env.nr_pmus_with_caps; i++) { - pmu_caps = &ff->ph->env.pmu_caps[i]; + for (int i = 0; i < env->nr_pmus_with_caps; i++) { + pmu_caps = &env->pmu_caps[i]; __print_pmu_caps(fp, pmu_caps->nr_caps, pmu_caps->caps, pmu_caps->pmu_name); } + + if (strcmp(perf_env__arch(env), "x86") == 0 && + perf_env__has_pmu_mapping(env, "ibs_op")) { + char *max_precise = perf_env__find_pmu_cap(env, "cpu", "max_precise"); + + if (max_precise != NULL && atoi(max_precise) == 0) + fprintf(fp, "# AMD systems uses ibs_op// PMU for some precise events, e.g.: cycles:p, see the 'perf list' man page for further details.\n"); + } } static void print_pmu_mappings(struct feat_fd *ff, FILE *fp) { + struct perf_env *env = &ff->ph->env; const char *delimiter = "# pmu mappings: "; char *str, *tmp; u32 pmu_num; u32 type; - pmu_num = ff->ph->env.nr_pmu_mappings; + pmu_num = env->nr_pmu_mappings; if (!pmu_num) { fprintf(fp, "# pmu mappings: not available\n"); return; } - str = ff->ph->env.pmu_mappings; + str = env->pmu_mappings; while (pmu_num) { type = strtoul(str, &tmp, 0); @@ -2245,17 +2232,18 @@ static void memory_node__fprintf(struct memory_node *n, static void print_mem_topology(struct feat_fd *ff, FILE *fp) { + struct perf_env *env = &ff->ph->env; struct memory_node *nodes; int i, nr; - nodes = ff->ph->env.memory_nodes; - nr = ff->ph->env.nr_memory_nodes; + nodes = env->memory_nodes; + nr = env->nr_memory_nodes; fprintf(fp, "# memory nodes (nr %d, block size 0x%llx):\n", - nr, ff->ph->env.memory_bsize); + nr, env->memory_bsize); for (i = 0; i < nr; i++) { - memory_node__fprintf(&nodes[i], ff->ph->env.memory_bsize, fp); + memory_node__fprintf(&nodes[i], env->memory_bsize, fp); } } @@ -2301,7 +2289,7 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev, build_id__init(&bid, bev->data, size); dso__set_build_id(dso, &bid); - dso->header_build_id = 1; + dso__set_header_build_id(dso, true); if (dso_space != DSO_SPACE__USER) { struct kmod_path m = { .name = NULL, }; @@ -2309,13 +2297,13 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev, if (!kmod_path__parse_name(&m, filename) && m.kmod) dso__set_module_info(dso, &m, machine); - dso->kernel = dso_space; + dso__set_kernel(dso, dso_space); free(m.name); } - build_id__sprintf(&dso->bid, sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); pr_debug("build id event received for %s: %s [%zu]\n", - dso->long_name, sbuild_id, size); + dso__long_name(dso), sbuild_id, size); dso__put(dso); } @@ -2453,6 +2441,7 @@ static int process_build_id(struct feat_fd *ff, void *data __maybe_unused) static int process_nrcpus(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; int ret; u32 nr_cpus_avail, nr_cpus_online; @@ -2463,20 +2452,21 @@ static int process_nrcpus(struct feat_fd *ff, void *data __maybe_unused) ret = do_read_u32(ff, &nr_cpus_online); if (ret) return ret; - ff->ph->env.nr_cpus_avail = (int)nr_cpus_avail; - ff->ph->env.nr_cpus_online = (int)nr_cpus_online; + env->nr_cpus_avail = (int)nr_cpus_avail; + env->nr_cpus_online = (int)nr_cpus_online; return 0; } static int process_total_mem(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; u64 total_mem; int ret; ret = do_read_u64(ff, &total_mem); if (ret) return -1; - ff->ph->env.total_mem = (unsigned long long)total_mem; + env->total_mem = (unsigned long long)total_mem; return 0; } @@ -2537,13 +2527,14 @@ process_event_desc(struct feat_fd *ff, void *data __maybe_unused) static int process_cmdline(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; char *str, *cmdline = NULL, **argv = NULL; u32 nr, i, len = 0; if (do_read_u32(ff, &nr)) return -1; - ff->ph->env.nr_cmdline = nr; + env->nr_cmdline = nr; cmdline = zalloc(ff->size + nr + 1); if (!cmdline) @@ -2563,8 +2554,8 @@ static int process_cmdline(struct feat_fd *ff, void *data __maybe_unused) len += strlen(str) + 1; free(str); } - ff->ph->env.cmdline = cmdline; - ff->ph->env.cmdline_argv = (const char **) argv; + env->cmdline = cmdline; + env->cmdline_argv = (const char **) argv; return 0; error: @@ -2576,21 +2567,20 @@ error: static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) { u32 nr, i; - char *str; + char *str = NULL; struct strbuf sb; - int cpu_nr = ff->ph->env.nr_cpus_avail; + struct perf_env *env = &ff->ph->env; + int cpu_nr = env->nr_cpus_avail; u64 size = 0; - struct perf_header *ph = ff->ph; - bool do_core_id_test = true; - ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu)); - if (!ph->env.cpu) + env->cpu = calloc(cpu_nr, sizeof(*env->cpu)); + if (!env->cpu) return -1; if (do_read_u32(ff, &nr)) goto free_cpu; - ph->env.nr_sibling_cores = nr; + env->nr_sibling_cores = nr; size += sizeof(u32); if (strbuf_init(&sb, 128) < 0) goto free_cpu; @@ -2604,14 +2594,14 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) if (strbuf_add(&sb, str, strlen(str) + 1) < 0) goto error; size += string_size(str); - free(str); + zfree(&str); } - ph->env.sibling_cores = strbuf_detach(&sb, NULL); + env->sibling_cores = strbuf_detach(&sb, NULL); if (do_read_u32(ff, &nr)) return -1; - ph->env.nr_sibling_threads = nr; + env->nr_sibling_threads = nr; size += sizeof(u32); for (i = 0; i < nr; i++) { @@ -2623,45 +2613,30 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) if (strbuf_add(&sb, str, strlen(str) + 1) < 0) goto error; size += string_size(str); - free(str); + zfree(&str); } - ph->env.sibling_threads = strbuf_detach(&sb, NULL); + env->sibling_threads = strbuf_detach(&sb, NULL); /* * The header may be from old perf, * which doesn't include core id and socket id information. */ if (ff->size <= size) { - zfree(&ph->env.cpu); + zfree(&env->cpu); return 0; } - /* On s390 the socket_id number is not related to the numbers of cpus. - * The socket_id number might be higher than the numbers of cpus. - * This depends on the configuration. - * AArch64 is the same. - */ - if (ph->env.arch && (!strncmp(ph->env.arch, "s390", 4) - || !strncmp(ph->env.arch, "aarch64", 7))) - do_core_id_test = false; - for (i = 0; i < (u32)cpu_nr; i++) { if (do_read_u32(ff, &nr)) goto free_cpu; - ph->env.cpu[i].core_id = nr; + env->cpu[i].core_id = nr; size += sizeof(u32); if (do_read_u32(ff, &nr)) goto free_cpu; - if (do_core_id_test && nr != (u32)-1 && nr > (u32)cpu_nr) { - pr_debug("socket_id number is too big." - "You may need to upgrade the perf tool.\n"); - goto free_cpu; - } - - ph->env.cpu[i].socket_id = nr; + env->cpu[i].socket_id = nr; size += sizeof(u32); } @@ -2675,7 +2650,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) if (do_read_u32(ff, &nr)) return -1; - ph->env.nr_sibling_dies = nr; + env->nr_sibling_dies = nr; size += sizeof(u32); for (i = 0; i < nr; i++) { @@ -2687,28 +2662,30 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) if (strbuf_add(&sb, str, strlen(str) + 1) < 0) goto error; size += string_size(str); - free(str); + zfree(&str); } - ph->env.sibling_dies = strbuf_detach(&sb, NULL); + env->sibling_dies = strbuf_detach(&sb, NULL); for (i = 0; i < (u32)cpu_nr; i++) { if (do_read_u32(ff, &nr)) goto free_cpu; - ph->env.cpu[i].die_id = nr; + env->cpu[i].die_id = nr; } return 0; error: strbuf_release(&sb); + zfree(&str); free_cpu: - zfree(&ph->env.cpu); + zfree(&env->cpu); return -1; } static int process_numa_topology(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; struct numa_node *nodes, *n; u32 nr, i; char *str; @@ -2739,13 +2716,12 @@ static int process_numa_topology(struct feat_fd *ff, void *data __maybe_unused) goto error; n->map = perf_cpu_map__new(str); + free(str); if (!n->map) goto error; - - free(str); } - ff->ph->env.nr_numa_nodes = nr; - ff->ph->env.numa_nodes = nodes; + env->nr_numa_nodes = nr; + env->numa_nodes = nodes; return 0; error: @@ -2755,6 +2731,7 @@ error: static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; char *name; u32 pmu_num; u32 type; @@ -2768,7 +2745,7 @@ static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused) return 0; } - ff->ph->env.nr_pmu_mappings = pmu_num; + env->nr_pmu_mappings = pmu_num; if (strbuf_init(&sb, 128) < 0) return -1; @@ -2787,12 +2764,14 @@ static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused) goto error; if (!strcmp(name, "msr")) - ff->ph->env.msr_pmu_type = type; + env->msr_pmu_type = type; free(name); pmu_num--; } - ff->ph->env.pmu_mappings = strbuf_detach(&sb, NULL); + /* AMD may set it by evlist__has_amd_ibs() from perf_session__new() */ + free(env->pmu_mappings); + env->pmu_mappings = strbuf_detach(&sb, NULL); return 0; error: @@ -2802,6 +2781,7 @@ error: static int process_group_desc(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; size_t ret = -1; u32 i, nr, nr_groups; struct perf_session *session; @@ -2815,7 +2795,7 @@ static int process_group_desc(struct feat_fd *ff, void *data __maybe_unused) if (do_read_u32(ff, &nr_groups)) return -1; - ff->ph->env.nr_groups = nr_groups; + env->nr_groups = nr_groups; if (!nr_groups) { pr_debug("group desc not available\n"); return 0; @@ -2899,6 +2879,7 @@ static int process_auxtrace(struct feat_fd *ff, void *data __maybe_unused) static int process_cache(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; struct cpu_cache_level *caches; u32 cnt, i, version; @@ -2916,10 +2897,10 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused) return -1; for (i = 0; i < cnt; i++) { - struct cpu_cache_level c; + struct cpu_cache_level *c = &caches[i]; #define _R(v) \ - if (do_read_u32(ff, &c.v))\ + if (do_read_u32(ff, &c->v)) \ goto out_free_caches; \ _R(level) @@ -2929,22 +2910,25 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused) #undef _R #define _R(v) \ - c.v = do_read_string(ff); \ - if (!c.v) \ - goto out_free_caches; + c->v = do_read_string(ff); \ + if (!c->v) \ + goto out_free_caches; \ _R(type) _R(size) _R(map) #undef _R - - caches[i] = c; } - ff->ph->env.caches = caches; - ff->ph->env.caches_cnt = cnt; + env->caches = caches; + env->caches_cnt = cnt; return 0; out_free_caches: + for (i = 0; i < cnt; i++) { + free(caches[i].type); + free(caches[i].size); + free(caches[i].map); + } free(caches); return -1; } @@ -2973,6 +2957,7 @@ static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused) static int process_mem_topology(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; struct memory_node *nodes; u64 version, i, nr, bsize; int ret = -1; @@ -3011,9 +2996,9 @@ static int process_mem_topology(struct feat_fd *ff, nodes[i] = n; } - ff->ph->env.memory_bsize = bsize; - ff->ph->env.memory_nodes = nodes; - ff->ph->env.nr_memory_nodes = nr; + env->memory_bsize = bsize; + env->memory_nodes = nodes; + env->nr_memory_nodes = nr; ret = 0; out: @@ -3025,7 +3010,9 @@ out: static int process_clockid(struct feat_fd *ff, void *data __maybe_unused) { - if (do_read_u64(ff, &ff->ph->env.clock.clockid_res_ns)) + struct perf_env *env = &ff->ph->env; + + if (do_read_u64(ff, &env->clock.clockid_res_ns)) return -1; return 0; @@ -3034,6 +3021,7 @@ static int process_clockid(struct feat_fd *ff, static int process_clock_data(struct feat_fd *ff, void *_data __maybe_unused) { + struct perf_env *env = &ff->ph->env; u32 data32; u64 data64; @@ -3048,26 +3036,27 @@ static int process_clock_data(struct feat_fd *ff, if (do_read_u32(ff, &data32)) return -1; - ff->ph->env.clock.clockid = data32; + env->clock.clockid = data32; /* TOD ref time */ if (do_read_u64(ff, &data64)) return -1; - ff->ph->env.clock.tod_ns = data64; + env->clock.tod_ns = data64; /* clockid ref time */ if (do_read_u64(ff, &data64)) return -1; - ff->ph->env.clock.clockid_ns = data64; - ff->ph->env.clock.enabled = true; + env->clock.clockid_ns = data64; + env->clock.enabled = true; return 0; } static int process_hybrid_topology(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; struct hybrid_node *nodes, *n; u32 nr, i; @@ -3091,8 +3080,8 @@ static int process_hybrid_topology(struct feat_fd *ff, goto error; } - ff->ph->env.nr_hybrid_nodes = nr; - ff->ph->env.hybrid_nodes = nodes; + env->nr_hybrid_nodes = nr; + env->hybrid_nodes = nodes; return 0; error: @@ -3178,7 +3167,11 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) /* after reading from file, translate offset to address */ bpil_offs_to_addr(info_linear); info_node->info_linear = info_linear; - perf_env__insert_bpf_prog_info(env, info_node); + info_node->metadata = NULL; + if (!__perf_env__insert_bpf_prog_info(env, info_node)) { + free(info_linear); + free(info_node); + } } up_write(&env->bpf_progs.lock); @@ -3225,7 +3218,8 @@ static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused) if (__do_read(ff, node->data, data_size)) goto out; - perf_env__insert_btf(env, node); + if (!__perf_env__insert_btf(env, node)) + free(node); node = NULL; } @@ -3240,26 +3234,30 @@ out: static int process_compressed(struct feat_fd *ff, void *data __maybe_unused) { - if (do_read_u32(ff, &(ff->ph->env.comp_ver))) + struct perf_env *env = &ff->ph->env; + + if (do_read_u32(ff, &(env->comp_ver))) return -1; - if (do_read_u32(ff, &(ff->ph->env.comp_type))) + if (do_read_u32(ff, &(env->comp_type))) return -1; - if (do_read_u32(ff, &(ff->ph->env.comp_level))) + if (do_read_u32(ff, &(env->comp_level))) return -1; - if (do_read_u32(ff, &(ff->ph->env.comp_ratio))) + if (do_read_u32(ff, &(env->comp_ratio))) return -1; - if (do_read_u32(ff, &(ff->ph->env.comp_mmap_len))) + if (do_read_u32(ff, &(env->comp_mmap_len))) return -1; return 0; } static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps, - char ***caps, unsigned int *max_branches) + char ***caps, unsigned int *max_branches, + unsigned int *br_cntr_nr, + unsigned int *br_cntr_width) { char *name, *value, *ptr; u32 nr_pmu_caps, i; @@ -3294,6 +3292,12 @@ static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps, if (!strcmp(name, "branches")) *max_branches = atoi(value); + if (!strcmp(name, "branch_counter_nr")) + *br_cntr_nr = atoi(value); + + if (!strcmp(name, "branch_counter_width")) + *br_cntr_width = atoi(value); + free(value); free(name); } @@ -3316,17 +3320,21 @@ error: static int process_cpu_pmu_caps(struct feat_fd *ff, void *data __maybe_unused) { - int ret = __process_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps, - &ff->ph->env.cpu_pmu_caps, - &ff->ph->env.max_branches); + struct perf_env *env = &ff->ph->env; + int ret = __process_pmu_caps(ff, &env->nr_cpu_pmu_caps, + &env->cpu_pmu_caps, + &env->max_branches, + &env->br_cntr_nr, + &env->br_cntr_width); - if (!ret && !ff->ph->env.cpu_pmu_caps) + if (!ret && !env->cpu_pmu_caps) pr_debug("cpu pmu capabilities not available\n"); return ret; } static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; struct pmu_caps *pmu_caps; u32 nr_pmu, i; int ret; @@ -3347,7 +3355,9 @@ static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused) for (i = 0; i < nr_pmu; i++) { ret = __process_pmu_caps(ff, &pmu_caps[i].nr_caps, &pmu_caps[i].caps, - &pmu_caps[i].max_branches); + &pmu_caps[i].max_branches, + &pmu_caps[i].br_cntr_nr, + &pmu_caps[i].br_cntr_width); if (ret) goto err; @@ -3362,8 +3372,8 @@ static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused) } } - ff->ph->env.nr_pmus_with_caps = nr_pmu; - ff->ph->env.pmu_caps = pmu_caps; + env->nr_pmus_with_caps = nr_pmu; + env->pmu_caps = pmu_caps; return 0; err: @@ -3588,18 +3598,16 @@ static int perf_header__adds_write(struct perf_header *header, struct feat_copier *fc) { int nr_sections; - struct feat_fd ff; + struct feat_fd ff = { + .fd = fd, + .ph = header, + }; struct perf_file_section *feat_sec, *p; int sec_size; u64 sec_start; int feat; int err; - ff = (struct feat_fd){ - .fd = fd, - .ph = header, - }; - nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS); if (!nr_sections) return 0; @@ -3626,6 +3634,7 @@ static int perf_header__adds_write(struct perf_header *header, err = do_write(&ff, feat_sec, sec_size); if (err < 0) pr_debug("failed to write feature section\n"); + free(ff.buf); /* TODO: added to silence clang-tidy. */ free(feat_sec); return err; } @@ -3633,11 +3642,11 @@ static int perf_header__adds_write(struct perf_header *header, int perf_header__write_pipe(int fd) { struct perf_pipe_file_header f_header; - struct feat_fd ff; + struct feat_fd ff = { + .fd = fd, + }; int err; - ff = (struct feat_fd){ .fd = fd }; - f_header = (struct perf_pipe_file_header){ .magic = PERF_MAGIC, .size = sizeof(f_header), @@ -3648,37 +3657,58 @@ int perf_header__write_pipe(int fd) pr_debug("failed to write perf pipe header\n"); return err; } - + free(ff.buf); return 0; } static int perf_session__do_write_header(struct perf_session *session, struct evlist *evlist, int fd, bool at_exit, - struct feat_copier *fc) + struct feat_copier *fc, + bool write_attrs_after_data) { struct perf_file_header f_header; - struct perf_file_attr f_attr; struct perf_header *header = &session->header; struct evsel *evsel; - struct feat_fd ff; - u64 attr_offset; + struct feat_fd ff = { + .ph = header, + .fd = fd, + }; + u64 attr_offset = sizeof(f_header), attr_size = 0; int err; - ff = (struct feat_fd){ .fd = fd}; - lseek(fd, sizeof(f_header), SEEK_SET); + if (write_attrs_after_data && at_exit) { + /* + * Write features at the end of the file first so that + * attributes may come after them. + */ + if (!header->data_offset && header->data_size) { + pr_err("File contains data but offset unknown\n"); + err = -1; + goto err_out; + } + header->feat_offset = header->data_offset + header->data_size; + err = perf_header__adds_write(header, evlist, fd, fc); + if (err < 0) + goto err_out; + attr_offset = lseek(fd, 0, SEEK_CUR); + } else { + lseek(fd, attr_offset, SEEK_SET); + } evlist__for_each_entry(session->evlist, evsel) { - evsel->id_offset = lseek(fd, 0, SEEK_CUR); - err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64)); - if (err < 0) { - pr_debug("failed to write perf header\n"); - return err; + evsel->id_offset = attr_offset; + /* Avoid writing at the end of the file until the session is exiting. */ + if (!write_attrs_after_data || at_exit) { + err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64)); + if (err < 0) { + pr_debug("failed to write perf header\n"); + goto err_out; + } } + attr_offset += evsel->core.ids * sizeof(u64); } - attr_offset = lseek(ff.fd, 0, SEEK_CUR); - evlist__for_each_entry(evlist, evsel) { if (evsel->core.attr.size < sizeof(evsel->core.attr)) { /* @@ -3688,37 +3718,46 @@ static int perf_session__do_write_header(struct perf_session *session, */ evsel->core.attr.size = sizeof(evsel->core.attr); } - f_attr = (struct perf_file_attr){ - .attr = evsel->core.attr, - .ids = { - .offset = evsel->id_offset, - .size = evsel->core.ids * sizeof(u64), + /* Avoid writing at the end of the file until the session is exiting. */ + if (!write_attrs_after_data || at_exit) { + struct perf_file_attr f_attr = { + .attr = evsel->core.attr, + .ids = { + .offset = evsel->id_offset, + .size = evsel->core.ids * sizeof(u64), + } + }; + err = do_write(&ff, &f_attr, sizeof(f_attr)); + if (err < 0) { + pr_debug("failed to write perf header attribute\n"); + goto err_out; } - }; - err = do_write(&ff, &f_attr, sizeof(f_attr)); - if (err < 0) { - pr_debug("failed to write perf header attribute\n"); - return err; } + attr_size += sizeof(struct perf_file_attr); } - if (!header->data_offset) - header->data_offset = lseek(fd, 0, SEEK_CUR); + if (!header->data_offset) { + if (write_attrs_after_data) + header->data_offset = sizeof(f_header); + else + header->data_offset = attr_offset + attr_size; + } header->feat_offset = header->data_offset + header->data_size; - if (at_exit) { + if (!write_attrs_after_data && at_exit) { + /* Write features now feat_offset is known. */ err = perf_header__adds_write(header, evlist, fd, fc); if (err < 0) - return err; + goto err_out; } f_header = (struct perf_file_header){ .magic = PERF_MAGIC, .size = sizeof(f_header), - .attr_size = sizeof(f_attr), + .attr_size = sizeof(struct perf_file_attr), .attrs = { .offset = attr_offset, - .size = evlist->core.nr_entries * sizeof(f_attr), + .size = attr_size, }, .data = { .offset = header->data_offset, @@ -3733,18 +3772,22 @@ static int perf_session__do_write_header(struct perf_session *session, err = do_write(&ff, &f_header, sizeof(f_header)); if (err < 0) { pr_debug("failed to write perf header\n"); - return err; + goto err_out; + } else { + lseek(fd, 0, SEEK_END); + err = 0; } - lseek(fd, header->data_offset + header->data_size, SEEK_SET); - - return 0; +err_out: + free(ff.buf); + return err; } int perf_session__write_header(struct perf_session *session, struct evlist *evlist, int fd, bool at_exit) { - return perf_session__do_write_header(session, evlist, fd, at_exit, NULL); + return perf_session__do_write_header(session, evlist, fd, at_exit, /*fc=*/NULL, + /*write_attrs_after_data=*/false); } size_t perf_session__data_offset(const struct evlist *evlist) @@ -3764,9 +3807,11 @@ size_t perf_session__data_offset(const struct evlist *evlist) int perf_session__inject_header(struct perf_session *session, struct evlist *evlist, int fd, - struct feat_copier *fc) + struct feat_copier *fc, + bool write_attrs_after_data) { - return perf_session__do_write_header(session, evlist, fd, true, fc); + return perf_session__do_write_header(session, evlist, fd, true, fc, + write_attrs_after_data); } static int perf_header__getbuffer64(struct perf_header *header, @@ -3959,6 +4004,24 @@ int perf_file_header__read(struct perf_file_header *header, adds_features)); } + if (header->size > header->attrs.offset) { + pr_err("Perf file header corrupt: header overlaps attrs\n"); + return -1; + } + + if (header->size > header->data.offset) { + pr_err("Perf file header corrupt: header overlaps data\n"); + return -1; + } + + if ((header->attrs.offset <= header->data.offset && + header->attrs.offset + header->attrs.size > header->data.offset) || + (header->attrs.offset > header->data.offset && + header->data.offset + header->data.size > header->attrs.offset)) { + pr_err("Perf file header corrupt: Attributes and data overlap\n"); + return -1; + } + if (header->size != sizeof(*header)) { /* Support the previous format */ if (header->size == offsetof(typeof(*header), adds_features)) @@ -4039,13 +4102,8 @@ static int perf_file_section__process(struct perf_file_section *section, static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, struct perf_header *ph, - struct perf_data* data, - bool repipe, int repipe_fd) + struct perf_data *data) { - struct feat_fd ff = { - .fd = repipe_fd, - .ph = ph, - }; ssize_t ret; ret = perf_data__read(data, header, sizeof(*header)); @@ -4060,19 +4118,15 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, if (ph->needs_swap) header->size = bswap_64(header->size); - if (repipe && do_write(&ff, header, sizeof(*header)) < 0) - return -1; - return 0; } -static int perf_header__read_pipe(struct perf_session *session, int repipe_fd) +static int perf_header__read_pipe(struct perf_session *session) { struct perf_header *header = &session->header; struct perf_pipe_file_header f_header; - if (perf_file_header__read_pipe(&f_header, header, session->data, - session->repipe, repipe_fd) < 0) { + if (perf_file_header__read_pipe(&f_header, header, session->data) < 0) { pr_debug("incompatible file format\n"); return -EINVAL; } @@ -4172,7 +4226,7 @@ static int evlist__prepare_tracepoint_events(struct evlist *evlist, struct tep_h } #endif -int perf_session__read_header(struct perf_session *session, int repipe_fd) +int perf_session__read_header(struct perf_session *session) { struct perf_data *data = session->data; struct perf_header *header = &session->header; @@ -4186,14 +4240,14 @@ int perf_session__read_header(struct perf_session *session, int repipe_fd) if (session->evlist == NULL) return -ENOMEM; - session->evlist->env = &header->env; + session->evlist->session = session; session->machines.host.env = &header->env; /* * We can read 'pipe' data event from regular file, * check for the pipe header regardless of source. */ - err = perf_header__read_pipe(session, repipe_fd); + err = perf_header__read_pipe(session); if (!err || perf_data__is_pipe(data)) { data->is_pipe = true; return err; @@ -4299,12 +4353,12 @@ out_delete_evlist: int perf_event__process_feature(struct perf_session *session, union perf_event *event) { - struct perf_tool *tool = session->tool; struct feat_fd ff = { .fd = 0 }; struct perf_record_header_feature *fe = (struct perf_record_header_feature *)event; int type = fe->header.type; u64 feat = fe->feat_id; int ret = 0; + bool print = dump_trace; if (type < 0 || type >= PERF_RECORD_HEADER_MAX) { pr_warning("invalid record type %d in pipe-mode\n", type); @@ -4315,28 +4369,35 @@ int perf_event__process_feature(struct perf_session *session, return -1; } - if (!feat_ops[feat].process) - return 0; - ff.buf = (void *)fe->data; ff.size = event->header.size - sizeof(*fe); ff.ph = &session->header; - if (feat_ops[feat].process(&ff, NULL)) { + if (feat_ops[feat].process && feat_ops[feat].process(&ff, NULL)) { ret = -1; goto out; } - if (!feat_ops[feat].print || !tool->show_feat_hdr) - goto out; + if (session->tool->show_feat_hdr) { + if (!feat_ops[feat].full_only || + session->tool->show_feat_hdr >= SHOW_FEAT_HEADER_FULL_INFO) { + print = true; + } else { + fprintf(stdout, "# %s info available, use -I to display\n", + feat_ops[feat].name); + } + } - if (!feat_ops[feat].full_only || - tool->show_feat_hdr >= SHOW_FEAT_HEADER_FULL_INFO) { - feat_ops[feat].print(&ff, stdout); - } else { - fprintf(stdout, "# %s info available, use -I to display\n", - feat_ops[feat].name); + if (dump_trace) + printf(", "); + + if (print) { + if (feat_ops[feat].print) + feat_ops[feat].print(&ff, stdout); + else + printf("# %s", feat_ops[feat].name); } + out: free_event_desc(ff.events); return ret; @@ -4364,9 +4425,10 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) ret += fprintf(fp, "... "); map = cpu_map__new_data(&ev->cpus.cpus); - if (map) + if (map) { ret += cpu_map__fprintf(map, fp); - else + perf_cpu_map__put(map); + } else ret += fprintf(fp, "failed to get cpus\n"); break; default: @@ -4377,14 +4439,23 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) return ret; } -int perf_event__process_attr(struct perf_tool *tool __maybe_unused, +size_t perf_event__fprintf_attr(union perf_event *event, FILE *fp) +{ + return perf_event_attr__fprintf(fp, &event->attr.attr, __desc_attr__fprintf, NULL); +} + +int perf_event__process_attr(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct evlist **pevlist) { - u32 i, ids, n_ids; + u32 i, n_ids; + u64 *ids; struct evsel *evsel; struct evlist *evlist = *pevlist; + if (dump_trace) + perf_event__fprintf_attr(event, stdout); + if (evlist == NULL) { *pevlist = evlist = evlist__new(); if (evlist == NULL) @@ -4397,9 +4468,8 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused, evlist__add(evlist, evsel); - ids = event->header.size; - ids -= (void *)&event->attr.id - (void *)event; - n_ids = ids / sizeof(u64); + n_ids = event->header.size - sizeof(event->header) - event->attr.attr.size; + n_ids = n_ids / sizeof(u64); /* * We don't have the cpu and thread maps on the header, so * for allocating the perf_sample_id table we fake 1 cpu and @@ -4408,14 +4478,15 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused, if (perf_evsel__alloc_id(&evsel->core, 1, n_ids)) return -ENOMEM; + ids = perf_record_header_attr_id(event); for (i = 0; i < n_ids; i++) { - perf_evlist__id_add(&evlist->core, &evsel->core, 0, i, event->attr.id[i]); + perf_evlist__id_add(&evlist->core, &evsel->core, 0, i, ids[i]); } return 0; } -int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, +int perf_event__process_event_update(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct evlist **pevlist) { @@ -4451,8 +4522,8 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, case PERF_EVENT_UPDATE__CPUS: map = cpu_map__new_data(&ev->cpus.cpus); if (map) { - perf_cpu_map__put(evsel->core.own_cpus); - evsel->core.own_cpus = map; + perf_cpu_map__put(evsel->core.pmu_cpus); + evsel->core.pmu_cpus = map; } else pr_err("failed to get event_update cpus\n"); default: @@ -4463,7 +4534,8 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, } #ifdef HAVE_LIBTRACEEVENT -int perf_event__process_tracing_data(struct perf_session *session, +int perf_event__process_tracing_data(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { ssize_t size_read, padding, size = event->tracing_data.size; @@ -4485,15 +4557,14 @@ int perf_event__process_tracing_data(struct perf_session *session, SEEK_SET); } - size_read = trace_report(fd, &session->tevent, - session->repipe); + size_read = trace_report(fd, &session->tevent, session->trace_event_repipe); padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read; if (readn(fd, buf, padding) < 0) { pr_err("%s: reading input file", __func__); return -1; } - if (session->repipe) { + if (session->trace_event_repipe) { int retw = write(STDOUT_FILENO, buf, padding); if (retw <= 0 || retw != padding) { pr_err("%s: repiping tracing data padding", __func__); @@ -4512,7 +4583,8 @@ int perf_event__process_tracing_data(struct perf_session *session, } #endif -int perf_event__process_build_id(struct perf_session *session, +int perf_event__process_build_id(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { __event_process_build_id(&event->build_id, diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 7c16a250e738..c058021c3150 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -10,7 +10,13 @@ #include <linux/bitmap.h> #include <linux/types.h> #include "env.h" -#include "pmu.h" +#include <perf/cpumap.h> + +struct evlist; +union perf_event; +struct perf_header; +struct perf_session; +struct perf_tool; enum { HEADER_RESERVED = 0, /* always cleared */ @@ -61,14 +67,28 @@ struct perf_file_section { u64 size; }; +/** + * struct perf_file_header: Header representation on disk. + */ struct perf_file_header { + /** @magic: Holds "PERFILE2". */ u64 magic; + /** @size: Size of this header - sizeof(struct perf_file_header). */ u64 size; + /** + * @attr_size: Size of attrs entries - sizeof(struct perf_event_attr) + + * sizeof(struct perf_file_section). + */ u64 attr_size; + /** @attrs: Offset and size of file section holding attributes. */ struct perf_file_section attrs; + /** @data: Offset and size of file section holding regular event data. */ struct perf_file_section data; - /* event_types is ignored */ + /** @event_types: Ignored. */ struct perf_file_section event_types; + /** + * @adds_features: Bitmap of features. The features are immediately after the data section. + */ DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); }; @@ -77,8 +97,6 @@ struct perf_pipe_file_header { u64 size; }; -struct perf_header; - int perf_file_header__read(struct perf_file_header *header, struct perf_header *ph, int fd); @@ -110,14 +128,9 @@ struct perf_header_feature_ops { bool synthesize; }; -struct evlist; -struct perf_session; -struct perf_tool; -union perf_event; - extern const char perf_version_string[]; -int perf_session__read_header(struct perf_session *session, int repipe_fd); +int perf_session__read_header(struct perf_session *session); int perf_session__write_header(struct perf_session *session, struct evlist *evlist, int fd, bool at_exit); @@ -136,7 +149,8 @@ struct feat_copier { int perf_session__inject_header(struct perf_session *session, struct evlist *evlist, int fd, - struct feat_copier *fc); + struct feat_copier *fc, + bool write_attrs_after_data); size_t perf_session__data_offset(const struct evlist *evlist); @@ -156,17 +170,20 @@ int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full); int perf_event__process_feature(struct perf_session *session, union perf_event *event); -int perf_event__process_attr(struct perf_tool *tool, union perf_event *event, +int perf_event__process_attr(const struct perf_tool *tool, union perf_event *event, struct evlist **pevlist); -int perf_event__process_event_update(struct perf_tool *tool, +int perf_event__process_event_update(const struct perf_tool *tool, union perf_event *event, struct evlist **pevlist); +size_t perf_event__fprintf_attr(union perf_event *event, FILE *fp); size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp); #ifdef HAVE_LIBTRACEEVENT -int perf_event__process_tracing_data(struct perf_session *session, +int perf_event__process_tracing_data(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); #endif -int perf_event__process_build_id(struct perf_session *session, +int perf_event__process_build_id(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); bool is_perf_magic(u64 magic); @@ -181,14 +198,16 @@ int write_padded(struct feat_fd *fd, const void *bf, #define MAX_CACHE_LVL 4 -int is_cpu_online(unsigned int cpu); int build_caches_for_cpu(u32 cpu, struct cpu_cache_level caches[], u32 *cntp); /* * arch specific callback */ -int get_cpuid(char *buffer, size_t sz); +int get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu); + +char *get_cpuid_str(struct perf_cpu cpu); + +char *get_cpuid_allow_env_override(struct perf_cpu cpu); -char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused); int strcmp_cpuid_str(const char *s1, const char *s2); #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c index eab99ea6ac01..a0a46e34f8d1 100644 --- a/tools/perf/util/help-unknown-cmd.c +++ b/tools/perf/util/help-unknown-cmd.c @@ -52,46 +52,48 @@ static int add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) return 0; } -const char *help_unknown_cmd(const char *cmd) +const char *help_unknown_cmd(const char *cmd, struct cmdnames *main_cmds) { unsigned int i, n = 0, best_similarity = 0; - struct cmdnames main_cmds, other_cmds; + struct cmdnames other_cmds; - memset(&main_cmds, 0, sizeof(main_cmds)); - memset(&other_cmds, 0, sizeof(main_cmds)); + memset(&other_cmds, 0, sizeof(other_cmds)); perf_config(perf_unknown_cmd_config, NULL); - load_command_list("perf-", &main_cmds, &other_cmds); + load_command_list("perf-", main_cmds, &other_cmds); - if (add_cmd_list(&main_cmds, &other_cmds) < 0) { + if (add_cmd_list(main_cmds, &other_cmds) < 0) { fprintf(stderr, "ERROR: Failed to allocate command list for unknown command.\n"); goto end; } - qsort(main_cmds.names, main_cmds.cnt, - sizeof(main_cmds.names), cmdname_compare); - uniq(&main_cmds); + qsort(main_cmds->names, main_cmds->cnt, + sizeof(main_cmds->names), cmdname_compare); + uniq(main_cmds); - if (main_cmds.cnt) { + if (main_cmds->cnt) { /* This reuses cmdname->len for similarity index */ - for (i = 0; i < main_cmds.cnt; ++i) - main_cmds.names[i]->len = - levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4); - - qsort(main_cmds.names, main_cmds.cnt, - sizeof(*main_cmds.names), levenshtein_compare); + for (i = 0; i < main_cmds->cnt; ++i) { + main_cmds->names[i]->len = + levenshtein(cmd, main_cmds->names[i]->name, + /*swap_penalty=*/0, + /*substition_penality=*/2, + /*insertion_penality=*/1, + /*deletion_penalty=*/1); + } + qsort(main_cmds->names, main_cmds->cnt, + sizeof(*main_cmds->names), levenshtein_compare); - best_similarity = main_cmds.names[0]->len; + best_similarity = main_cmds->names[0]->len; n = 1; - while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len) + while (n < main_cmds->cnt && best_similarity == main_cmds->names[n]->len) ++n; } if (autocorrect && n == 1) { - const char *assumed = main_cmds.names[0]->name; + const char *assumed = main_cmds->names[0]->name; - main_cmds.names[0] = NULL; - clean_cmdnames(&main_cmds); + main_cmds->names[0] = NULL; clean_cmdnames(&other_cmds); fprintf(stderr, "WARNING: You called a perf program named '%s', " "which does not exist.\n" @@ -107,15 +109,14 @@ const char *help_unknown_cmd(const char *cmd) fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd); - if (main_cmds.cnt && best_similarity < 6) { + if (main_cmds->cnt && best_similarity < 6) { fprintf(stderr, "\nDid you mean %s?\n", n < 2 ? "this": "one of these"); for (i = 0; i < n; i++) - fprintf(stderr, "\t%s\n", main_cmds.names[i]->name); + fprintf(stderr, "\t%s\n", main_cmds->names[i]->name); } end: - clean_cmdnames(&main_cmds); clean_cmdnames(&other_cmds); - exit(1); + return NULL; } diff --git a/tools/perf/util/hisi-ptt-decoder/Build b/tools/perf/util/hisi-ptt-decoder/Build index db3db8b75033..2ee0eb731656 100644 --- a/tools/perf/util/hisi-ptt-decoder/Build +++ b/tools/perf/util/hisi-ptt-decoder/Build @@ -1 +1 @@ -perf-$(CONFIG_AUXTRACE) += hisi-ptt-pkt-decoder.o +perf-util-y += hisi-ptt-pkt-decoder.o diff --git a/tools/perf/util/hisi-ptt.c b/tools/perf/util/hisi-ptt.c index 45b614bb73bf..e4cc4785f744 100644 --- a/tools/perf/util/hisi-ptt.c +++ b/tools/perf/util/hisi-ptt.c @@ -35,11 +35,6 @@ struct hisi_ptt { u32 pmu_type; }; -struct hisi_ptt_queue { - struct hisi_ptt *ptt; - struct auxtrace_buffer *buffer; -}; - static enum hisi_ptt_pkt_type hisi_ptt_check_packet_type(unsigned char *buf) { uint32_t head = *(uint32_t *)buf; @@ -84,14 +79,14 @@ static void hisi_ptt_dump_event(struct hisi_ptt *ptt, unsigned char *buf, static int hisi_ptt_process_event(struct perf_session *session __maybe_unused, union perf_event *event __maybe_unused, struct perf_sample *sample __maybe_unused, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { return 0; } static int hisi_ptt_process_auxtrace_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt, auxtrace); @@ -108,8 +103,10 @@ static int hisi_ptt_process_auxtrace_event(struct perf_session *session, data_offset = 0; } else { data_offset = lseek(fd, 0, SEEK_CUR); - if (data_offset == -1) + if (data_offset == -1) { + free(data); return -errno; + } } err = readn(fd, data, size); @@ -121,11 +118,12 @@ static int hisi_ptt_process_auxtrace_event(struct perf_session *session, if (dump_trace) hisi_ptt_dump_event(ptt, data, size); + free(data); return 0; } static int hisi_ptt_flush(struct perf_session *session __maybe_unused, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { return 0; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 3dc8a4968beb..ef4b569f7df4 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -9,6 +9,7 @@ #include "map_symbol.h" #include "branch.h" #include "mem-events.h" +#include "mem-info.h" #include "session.h" #include "namespaces.h" #include "cgroup.h" @@ -31,6 +32,9 @@ #include <linux/time64.h> #include <linux/zalloc.h> +static int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); +static int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); + static bool hists__filter_entry_by_dso(struct hists *hists, struct hist_entry *he); static bool hists__filter_entry_by_thread(struct hists *hists, @@ -39,6 +43,8 @@ static bool hists__filter_entry_by_symbol(struct hists *hists, struct hist_entry *he); static bool hists__filter_entry_by_socket(struct hists *hists, struct hist_entry *he); +static bool hists__filter_entry_by_parallelism(struct hists *hists, + struct hist_entry *he); u16 hists__col_len(struct hists *hists, enum hist_column col) { @@ -153,8 +159,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) } if (h->mem_info) { - if (h->mem_info->daddr.ms.sym) { - symlen = (int)h->mem_info->daddr.ms.sym->namelen + 4 + if (mem_info__daddr(h->mem_info)->ms.sym) { + symlen = (int)mem_info__daddr(h->mem_info)->ms.sym->namelen + 4 + unresolved_col_width + 2; hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen); @@ -168,8 +174,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) symlen); } - if (h->mem_info->iaddr.ms.sym) { - symlen = (int)h->mem_info->iaddr.ms.sym->namelen + 4 + if (mem_info__iaddr(h->mem_info)->ms.sym) { + symlen = (int)mem_info__iaddr(h->mem_info)->ms.sym->namelen + 4 + unresolved_col_width + 2; hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL, symlen); @@ -179,8 +185,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) symlen); } - if (h->mem_info->daddr.ms.map) { - symlen = dso__name_len(map__dso(h->mem_info->daddr.ms.map)); + if (mem_info__daddr(h->mem_info)->ms.map) { + symlen = dso__name_len(map__dso(mem_info__daddr(h->mem_info)->ms.map)); hists__new_col_len(hists, HISTC_MEM_DADDR_DSO, symlen); } else { @@ -203,6 +209,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_CGROUP, 6); hists__new_col_len(hists, HISTC_CGROUP_ID, 20); + hists__new_col_len(hists, HISTC_PARALLELISM, 11); hists__new_col_len(hists, HISTC_CPU, 3); hists__new_col_len(hists, HISTC_SOCKET, 6); hists__new_col_len(hists, HISTC_MEM_LOCKED, 6); @@ -217,6 +224,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13); hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13); hists__new_col_len(hists, HISTC_ADDR, BITS_PER_LONG / 4 + 2); + hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_PREDICTED, 9); + hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_ABORT, 5); + hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_CYCLES, 6); if (symbol_conf.nanosecs) hists__new_col_len(hists, HISTC_TIME, 16); @@ -295,9 +305,10 @@ static long hist_time(unsigned long htime) return htime; } -static void he_stat__add_period(struct he_stat *he_stat, u64 period) +static void he_stat__add_period(struct he_stat *he_stat, u64 period, u64 latency) { he_stat->period += period; + he_stat->latency += latency; he_stat->nr_events += 1; } @@ -308,14 +319,84 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) dest->period_us += src->period_us; dest->period_guest_sys += src->period_guest_sys; dest->period_guest_us += src->period_guest_us; + dest->weight1 += src->weight1; + dest->weight2 += src->weight2; + dest->weight3 += src->weight3; dest->nr_events += src->nr_events; + dest->latency += src->latency; } static void he_stat__decay(struct he_stat *he_stat) { he_stat->period = (he_stat->period * 7) / 8; he_stat->nr_events = (he_stat->nr_events * 7) / 8; - /* XXX need decay for weight too? */ + he_stat->weight1 = (he_stat->weight1 * 7) / 8; + he_stat->weight2 = (he_stat->weight2 * 7) / 8; + he_stat->weight3 = (he_stat->weight3 * 7) / 8; + he_stat->latency = (he_stat->latency * 7) / 8; +} + +static int hists__update_mem_stat(struct hists *hists, struct hist_entry *he, + struct mem_info *mi, u64 period) +{ + if (hists->nr_mem_stats == 0) + return 0; + + if (he->mem_stat == NULL) { + he->mem_stat = calloc(hists->nr_mem_stats, sizeof(*he->mem_stat)); + if (he->mem_stat == NULL) + return -1; + } + + for (int i = 0; i < hists->nr_mem_stats; i++) { + int idx = mem_stat_index(hists->mem_stat_types[i], + mem_info__const_data_src(mi)->val); + + assert(0 <= idx && idx < MEM_STAT_LEN); + he->mem_stat[i].entries[idx] += period; + hists->mem_stat_total[i].entries[idx] += period; + } + return 0; +} + +static void hists__add_mem_stat(struct hists *hists, struct hist_entry *dst, + struct hist_entry *src) +{ + if (hists->nr_mem_stats == 0) + return; + + for (int i = 0; i < hists->nr_mem_stats; i++) { + for (int k = 0; k < MEM_STAT_LEN; k++) + dst->mem_stat[i].entries[k] += src->mem_stat[i].entries[k]; + } +} + +static int hists__clone_mem_stat(struct hists *hists, struct hist_entry *dst, + struct hist_entry *src) +{ + if (hists->nr_mem_stats == 0) + return 0; + + dst->mem_stat = calloc(hists->nr_mem_stats, sizeof(*dst->mem_stat)); + if (dst->mem_stat == NULL) + return -1; + + for (int i = 0; i < hists->nr_mem_stats; i++) { + for (int k = 0; k < MEM_STAT_LEN; k++) + dst->mem_stat[i].entries[k] = src->mem_stat[i].entries[k]; + } + return 0; +} + +static void hists__decay_mem_stat(struct hists *hists, struct hist_entry *he) +{ + if (hists->nr_mem_stats == 0) + return; + + for (int i = 0; i < hists->nr_mem_stats; i++) { + for (int k = 0; k < MEM_STAT_LEN; k++) + he->mem_stat[i].entries[k] = (he->mem_stat[i].entries[k] * 7) / 8; + } } static void hists__delete_entry(struct hists *hists, struct hist_entry *he); @@ -323,7 +404,7 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he); static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) { u64 prev_period = he->stat.period; - u64 diff; + u64 prev_latency = he->stat.latency; if (prev_period == 0) return true; @@ -332,13 +413,18 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) if (symbol_conf.cumulate_callchain) he_stat__decay(he->stat_acc); decay_callchain(he->callchain); - - diff = prev_period - he->stat.period; + hists__decay_mem_stat(hists, he); if (!he->depth) { - hists->stats.total_period -= diff; - if (!he->filtered) - hists->stats.total_non_filtered_period -= diff; + u64 period_diff = prev_period - he->stat.period; + u64 latency_diff = prev_latency - he->stat.latency; + + hists->stats.total_period -= period_diff; + hists->stats.total_latency -= latency_diff; + if (!he->filtered) { + hists->stats.total_non_filtered_period -= period_diff; + hists->stats.total_non_filtered_latency -= latency_diff; + } } if (!he->leaf) { @@ -353,7 +439,7 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) } } - return he->stat.period == 0; + return he->stat.period == 0 && he->stat.latency == 0; } static void hists__delete_entry(struct hists *hists, struct hist_entry *he) @@ -466,13 +552,16 @@ static int hist_entry__init(struct hist_entry *he, memcpy(he->branch_info, template->branch_info, sizeof(*he->branch_info)); + he->branch_info->from.ms.maps = maps__get(he->branch_info->from.ms.maps); he->branch_info->from.ms.map = map__get(he->branch_info->from.ms.map); + he->branch_info->to.ms.maps = maps__get(he->branch_info->to.ms.maps); he->branch_info->to.ms.map = map__get(he->branch_info->to.ms.map); } if (he->mem_info) { - he->mem_info->iaddr.ms.map = map__get(he->mem_info->iaddr.ms.map); - he->mem_info->daddr.ms.map = map__get(he->mem_info->daddr.ms.map); + he->mem_info = mem_info__clone(template->mem_info); + if (he->mem_info == NULL) + goto err_infos; } if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) @@ -491,8 +580,8 @@ static int hist_entry__init(struct hist_entry *he, } if (symbol_conf.res_sample) { - he->res_samples = calloc(sizeof(struct res_sample), - symbol_conf.res_sample); + he->res_samples = calloc(symbol_conf.res_sample, + sizeof(struct res_sample)); if (!he->res_samples) goto err_srcline; } @@ -515,17 +604,14 @@ err_rawdata: err_infos: if (he->branch_info) { - map__put(he->branch_info->from.ms.map); - map__put(he->branch_info->to.ms.map); + map_symbol__exit(&he->branch_info->from.ms); + map_symbol__exit(&he->branch_info->to.ms); zfree(&he->branch_info); } - if (he->mem_info) { - map__put(he->mem_info->iaddr.ms.map); - map__put(he->mem_info->daddr.ms.map); - } + if (he->mem_info) + mem_info__zput(he->mem_info); err: - maps__zput(he->ms.maps); - map__zput(he->ms.map); + map_symbol__exit(&he->ms); zfree(&he->stat_acc); return -ENOMEM; } @@ -567,25 +653,27 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, he = NULL; } } - return he; } -static u8 symbol__parent_filter(const struct symbol *parent) +static filter_mask_t symbol__parent_filter(const struct symbol *parent) { if (symbol_conf.exclude_other && parent == NULL) return 1 << HIST_FILTER__PARENT; return 0; } -static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period) +static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period, u64 latency) { if (!hist_entry__has_callchains(he) || !symbol_conf.use_callchain) return; he->hists->callchain_period += period; - if (!he->filtered) + he->hists->callchain_latency += latency; + if (!he->filtered) { he->hists->callchain_non_filtered_period += period; + he->hists->callchain_non_filtered_latency += latency; + } } static struct hist_entry *hists__findnew_entry(struct hists *hists, @@ -598,6 +686,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, struct hist_entry *he; int64_t cmp; u64 period = entry->stat.period; + u64 latency = entry->stat.latency; bool leftmost = true; p = &hists->entries_in->rb_root.rb_node; @@ -615,19 +704,13 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, cmp = hist_entry__cmp(he, entry); if (!cmp) { if (sample_self) { - he_stat__add_period(&he->stat, period); - hist_entry__add_callchain_period(he, period); + he_stat__add_stat(&he->stat, &entry->stat); + hist_entry__add_callchain_period(he, period, latency); } if (symbol_conf.cumulate_callchain) - he_stat__add_period(he->stat_acc, period); - - /* - * This mem info was allocated from sample__resolve_mem - * and will not be used anymore. - */ - mem_info__zput(entry->mem_info); + he_stat__add_period(he->stat_acc, period, latency); - block_info__zput(entry->block_info); + block_info__delete(entry->block_info); kvm_info__zput(entry->kvm_info); @@ -637,7 +720,12 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, * mis-adjust symbol addresses when computing * the history counter to increment. */ - if (he->ms.map != entry->ms.map) { + if (hists__has(hists, sym) && he->ms.map != entry->ms.map) { + if (he->ms.sym) { + u64 addr = he->ms.sym->start; + he->ms.sym = map__find_symbol(entry->ms.map, addr); + } + map__put(he->ms.map); he->ms.map = map__get(entry->ms.map); } @@ -657,7 +745,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, return NULL; if (sample_self) - hist_entry__add_callchain_period(he, period); + hist_entry__add_callchain_period(he, period, latency); hists->nr_entries++; rb_link_node(&he->rb_node_in, parent, p); @@ -667,6 +755,10 @@ out: he_stat__add_cpumode_period(&he->stat, al->cpumode, period); if (symbol_conf.cumulate_callchain) he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period); + if (hists__update_mem_stat(hists, he, entry->mem_info, period) < 0) { + hist_entry__delete(he); + return NULL; + } return he; } @@ -729,9 +821,14 @@ __hists__add_entry(struct hists *hists, .ip = al->addr, .level = al->level, .code_page_size = sample->code_page_size, + .parallelism = al->parallelism, .stat = { .nr_events = 1, .period = sample->period, + .weight1 = sample->weight, + .weight2 = sample->ins_lat, + .weight3 = sample->weight3, + .latency = al->latency, }, .parent = sym_parent, .filtered = symbol__parent_filter(sym_parent) | al->filtered, @@ -747,7 +844,7 @@ __hists__add_entry(struct hists *hists, .time = hist_time(sample->time), .weight = sample->weight, .ins_lat = sample->ins_lat, - .p_stage_cyc = sample->p_stage_cyc, + .weight3 = sample->weight3, .simd_flags = sample->simd_flags, }, *he = hists__findnew_entry(hists, &entry, al, sample_self); @@ -826,7 +923,7 @@ iter_prepare_mem_entry(struct hist_entry_iter *iter, struct addr_location *al) if (mi == NULL) return -ENOMEM; - iter->priv = mi; + iter->mi = mi; return 0; } @@ -834,7 +931,7 @@ static int iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al) { u64 cost; - struct mem_info *mi = iter->priv; + struct mem_info *mi = iter->mi; struct hists *hists = evsel__hists(iter->evsel); struct perf_sample *sample = iter->sample; struct hist_entry *he; @@ -881,12 +978,7 @@ iter_finish_mem_entry(struct hist_entry_iter *iter, err = hist_entry__append_callchain(he, iter->sample); out: - /* - * We don't need to free iter->priv (mem_info) here since the mem info - * was either already freed in hists__findnew_entry() or passed to a - * new hist entry by hist_entry__new(). - */ - iter->priv = NULL; + mem_info__zput(iter->mi); iter->he = NULL; return err; @@ -905,7 +997,7 @@ iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al iter->curr = 0; iter->total = sample->branch_stack->nr; - iter->priv = bi; + iter->bi = bi; return 0; } @@ -919,7 +1011,7 @@ iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused, static int iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al) { - struct branch_info *bi = iter->priv; + struct branch_info *bi = iter->bi; int i = iter->curr; if (bi == NULL) @@ -948,7 +1040,7 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a int i = iter->curr; int err = 0; - bi = iter->priv; + bi = iter->bi; if (iter->hide_unresolved && !(bi[i].from.ms.sym && bi[i].to.ms.sym)) goto out; @@ -965,19 +1057,34 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a if (he == NULL) return -ENOMEM; - hists__inc_nr_samples(hists, he->filtered); - out: iter->he = he; iter->curr++; return err; } +static void branch_info__exit(struct branch_info *bi) +{ + map_symbol__exit(&bi->from.ms); + map_symbol__exit(&bi->to.ms); + zfree_srcline(&bi->srcline_from); + zfree_srcline(&bi->srcline_to); +} + static int iter_finish_branch_entry(struct hist_entry_iter *iter, struct addr_location *al __maybe_unused) { - zfree(&iter->priv); + struct evsel *evsel = iter->evsel; + struct hists *hists = evsel__hists(evsel); + + for (int i = 0; i < iter->total; i++) + branch_info__exit(&iter->bi[i]); + + if (iter->he) + hists__inc_nr_samples(hists, iter->he->filtered); + + zfree(&iter->bi); iter->he = NULL; return iter->curr >= iter->total ? 0 : -1; @@ -1045,7 +1152,7 @@ iter_prepare_cumulative_entry(struct hist_entry_iter *iter, if (he_cache == NULL) return -ENOMEM; - iter->priv = he_cache; + iter->he_cache = he_cache; iter->curr = 0; return 0; @@ -1058,7 +1165,7 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter, struct evsel *evsel = iter->evsel; struct hists *hists = evsel__hists(evsel); struct perf_sample *sample = iter->sample; - struct hist_entry **he_cache = iter->priv; + struct hist_entry **he_cache = iter->he_cache; struct hist_entry *he; int err = 0; @@ -1116,7 +1223,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, { struct evsel *evsel = iter->evsel; struct perf_sample *sample = iter->sample; - struct hist_entry **he_cache = iter->priv; + struct hist_entry **he_cache = iter->he_cache; struct hist_entry *he; struct hist_entry he_tmp = { .hists = evsel__hists(evsel), @@ -1182,7 +1289,9 @@ static int iter_finish_cumulative_entry(struct hist_entry_iter *iter, struct addr_location *al __maybe_unused) { - zfree(&iter->priv); + mem_info__zput(iter->mi); + zfree(&iter->bi); + zfree(&iter->he_cache); iter->he = NULL; return 0; @@ -1272,19 +1381,35 @@ out: return err; } -int64_t -hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) +static int64_t +hist_entry__cmp_impl(struct perf_hpp_list *hpp_list, struct hist_entry *left, + struct hist_entry *right, unsigned long fn_offset, + bool ignore_dynamic, bool ignore_skipped) { struct hists *hists = left->hists; struct perf_hpp_fmt *fmt; - int64_t cmp = 0; + perf_hpp_fmt_cmp_t *fn; + int64_t cmp; - hists__for_each_sort_list(hists, fmt) { - if (perf_hpp__is_dynamic_entry(fmt) && + /* + * Never collapse filtered and non-filtered entries. + * Note this is not the same as having an extra (invisible) fmt + * that corresponds to the filtered status. + */ + cmp = (int64_t)!!left->filtered - (int64_t)!!right->filtered; + if (cmp) + return cmp; + + perf_hpp_list__for_each_sort_list(hpp_list, fmt) { + if (ignore_dynamic && perf_hpp__is_dynamic_entry(fmt) && !perf_hpp__defined_dynamic_entry(fmt, hists)) continue; - cmp = fmt->cmp(fmt, left, right); + if (ignore_skipped && perf_hpp__should_skip(fmt, hists)) + continue; + + fn = (void *)fmt + fn_offset; + cmp = (*fn)(fmt, left, right); if (cmp) break; } @@ -1293,49 +1418,65 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) } int64_t -hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) +hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) { - struct hists *hists = left->hists; - struct perf_hpp_fmt *fmt; - int64_t cmp = 0; + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, + offsetof(struct perf_hpp_fmt, cmp), true, false); +} - hists__for_each_sort_list(hists, fmt) { - if (perf_hpp__is_dynamic_entry(fmt) && - !perf_hpp__defined_dynamic_entry(fmt, hists)) - continue; +static int64_t +hist_entry__sort(struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, + offsetof(struct perf_hpp_fmt, sort), false, true); +} - cmp = fmt->collapse(fmt, left, right); - if (cmp) - break; - } +int64_t +hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, + offsetof(struct perf_hpp_fmt, collapse), true, false); +} - return cmp; +static int64_t +hist_entry__collapse_hierarchy(struct perf_hpp_list *hpp_list, + struct hist_entry *left, + struct hist_entry *right) +{ + return hist_entry__cmp_impl(hpp_list, left, right, + offsetof(struct perf_hpp_fmt, collapse), false, false); } void hist_entry__delete(struct hist_entry *he) { struct hist_entry_ops *ops = he->ops; + if (symbol_conf.report_hierarchy) { + struct rb_root *root = &he->hroot_out.rb_root; + struct hist_entry *child, *tmp; + + rbtree_postorder_for_each_entry_safe(child, tmp, root, rb_node) + hist_entry__delete(child); + + *root = RB_ROOT; + } + thread__zput(he->thread); - maps__zput(he->ms.maps); - map__zput(he->ms.map); + map_symbol__exit(&he->ms); if (he->branch_info) { - map__zput(he->branch_info->from.ms.map); - map__zput(he->branch_info->to.ms.map); - zfree_srcline(&he->branch_info->srcline_from); - zfree_srcline(&he->branch_info->srcline_to); + branch_info__exit(he->branch_info); zfree(&he->branch_info); } if (he->mem_info) { - map__zput(he->mem_info->iaddr.ms.map); - map__zput(he->mem_info->daddr.ms.map); + map_symbol__exit(&mem_info__iaddr(he->mem_info)->ms); + map_symbol__exit(&mem_info__daddr(he->mem_info)->ms); mem_info__zput(he->mem_info); } if (he->block_info) - block_info__zput(he->block_info); + block_info__delete(he->block_info); if (he->kvm_info) kvm_info__zput(he->kvm_info); @@ -1348,6 +1489,7 @@ void hist_entry__delete(struct hist_entry *he) free_callchain(he->callchain); zfree(&he->trace_output); zfree(&he->raw_data); + zfree(&he->mem_stat); ops->free(he); } @@ -1410,6 +1552,10 @@ static void hist_entry__check_and_remove_filter(struct hist_entry *he, if (symbol_conf.sym_list == NULL) return; break; + case HIST_FILTER__PARALLELISM: + if (__bitmap_weight(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1) == 0) + return; + break; case HIST_FILTER__PARENT: case HIST_FILTER__GUEST: case HIST_FILTER__HOST: @@ -1468,6 +1614,9 @@ static void hist_entry__apply_hierarchy_filters(struct hist_entry *he) hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL, perf_hpp__is_sym_entry); + hist_entry__check_and_remove_filter(he, HIST_FILTER__PARALLELISM, + perf_hpp__is_parallelism_entry); + hists__apply_filters(he->hists, he); } @@ -1487,16 +1636,10 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, while (*p != NULL) { parent = *p; iter = rb_entry(parent, struct hist_entry, rb_node_in); - - cmp = 0; - perf_hpp_list__for_each_sort_list(hpp_list, fmt) { - cmp = fmt->collapse(fmt, iter, he); - if (cmp) - break; - } - + cmp = hist_entry__collapse_hierarchy(hpp_list, iter, he); if (!cmp) { he_stat__add_stat(&iter->stat, &he->stat); + hists__add_mem_stat(hists, iter, he); return iter; } @@ -1538,6 +1681,11 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, new->srcfile = NULL; } + if (hists__clone_mem_stat(hists, new, he) < 0) { + hist_entry__delete(new); + return NULL; + } + rb_link_node(&new->rb_node_in, parent, p); rb_insert_color_cached(&new->rb_node_in, root, leftmost); return new; @@ -1620,6 +1768,7 @@ static int hists__collapse_insert_entry(struct hists *hists, he_stat__add_stat(&iter->stat, &he->stat); if (symbol_conf.cumulate_callchain) he_stat__add_stat(iter->stat_acc, he->stat_acc); + hists__add_mem_stat(hists, iter, he); if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) { struct callchain_cursor *cursor = get_tls_callchain_cursor(); @@ -1671,6 +1820,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he) hists__filter_entry_by_thread(hists, he); hists__filter_entry_by_symbol(hists, he); hists__filter_entry_by_socket(hists, he); + hists__filter_entry_by_parallelism(hists, he); } int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) @@ -1714,34 +1864,18 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) return 0; } -static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b) -{ - struct hists *hists = a->hists; - struct perf_hpp_fmt *fmt; - int64_t cmp = 0; - - hists__for_each_sort_list(hists, fmt) { - if (perf_hpp__should_skip(fmt, a->hists)) - continue; - - cmp = fmt->sort(fmt, a, b); - if (cmp) - break; - } - - return cmp; -} - static void hists__reset_filter_stats(struct hists *hists) { hists->nr_non_filtered_entries = 0; hists->stats.total_non_filtered_period = 0; + hists->stats.total_non_filtered_latency = 0; } void hists__reset_stats(struct hists *hists) { hists->nr_entries = 0; hists->stats.total_period = 0; + hists->stats.total_latency = 0; hists__reset_filter_stats(hists); } @@ -1750,6 +1884,7 @@ static void hists__inc_filter_stats(struct hists *hists, struct hist_entry *h) { hists->nr_non_filtered_entries++; hists->stats.total_non_filtered_period += h->stat.period; + hists->stats.total_non_filtered_latency += h->stat.latency; } void hists__inc_stats(struct hists *hists, struct hist_entry *h) @@ -1759,6 +1894,7 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h) hists->nr_entries++; hists->stats.total_period += h->stat.period; + hists->stats.total_latency += h->stat.latency; } static void hierarchy_recalc_total_periods(struct hists *hists) @@ -1770,6 +1906,8 @@ static void hierarchy_recalc_total_periods(struct hists *hists) hists->stats.total_period = 0; hists->stats.total_non_filtered_period = 0; + hists->stats.total_latency = 0; + hists->stats.total_non_filtered_latency = 0; /* * recalculate total period using top-level entries only @@ -1781,8 +1919,11 @@ static void hierarchy_recalc_total_periods(struct hists *hists) node = rb_next(node); hists->stats.total_period += he->stat.period; - if (!he->filtered) + hists->stats.total_latency += he->stat.latency; + if (!he->filtered) { hists->stats.total_non_filtered_period += he->stat.period; + hists->stats.total_non_filtered_latency += he->stat.latency; + } } } @@ -2130,7 +2271,7 @@ static bool hists__filter_entry_by_dso(struct hists *hists, struct hist_entry *he) { if (hists->dso_filter != NULL && - (he->ms.map == NULL || map__dso(he->ms.map) != hists->dso_filter)) { + (he->ms.map == NULL || !RC_CHK_EQUAL(map__dso(he->ms.map), hists->dso_filter))) { he->filtered |= (1 << HIST_FILTER__DSO); return true; } @@ -2142,7 +2283,7 @@ static bool hists__filter_entry_by_thread(struct hists *hists, struct hist_entry *he) { if (hists->thread_filter != NULL && - RC_CHK_ACCESS(he->thread) != RC_CHK_ACCESS(hists->thread_filter)) { + !RC_CHK_EQUAL(he->thread, hists->thread_filter)) { he->filtered |= (1 << HIST_FILTER__THREAD); return true; } @@ -2175,6 +2316,16 @@ static bool hists__filter_entry_by_socket(struct hists *hists, return false; } +static bool hists__filter_entry_by_parallelism(struct hists *hists, + struct hist_entry *he) +{ + if (test_bit(he->parallelism, hists->parallelism_filter)) { + he->filtered |= (1 << HIST_FILTER__PARALLELISM); + return true; + } + return false; +} + typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he); static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter) @@ -2344,6 +2495,16 @@ void hists__filter_by_socket(struct hists *hists) hists__filter_entry_by_socket); } +void hists__filter_by_parallelism(struct hists *hists) +{ + if (symbol_conf.report_hierarchy) + hists__filter_hierarchy(hists, HIST_FILTER__PARALLELISM, + hists->parallelism_filter); + else + hists__filter_by_type(hists, HIST_FILTER__PARALLELISM, + hists__filter_entry_by_parallelism); +} + void events_stats__inc(struct events_stats *stats, u32 type) { ++stats->nr_events[0]; @@ -2372,6 +2533,11 @@ void hists__inc_nr_lost_samples(struct hists *hists, u32 lost) hists->stats.nr_lost_samples += lost; } +void hists__inc_nr_dropped_samples(struct hists *hists, u32 lost) +{ + hists->stats.nr_dropped_samples += lost; +} + static struct hist_entry *hists__add_dummy_entry(struct hists *hists, struct hist_entry *pair) { @@ -2428,21 +2594,15 @@ static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists, struct rb_node **p; struct rb_node *parent = NULL; struct hist_entry *he; - struct perf_hpp_fmt *fmt; bool leftmost = true; p = &root->rb_root.rb_node; while (*p != NULL) { - int64_t cmp = 0; + int64_t cmp; parent = *p; he = rb_entry(parent, struct hist_entry, rb_node_in); - - perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { - cmp = fmt->collapse(fmt, he, pair); - if (cmp) - break; - } + cmp = hist_entry__collapse_hierarchy(he->hpp_list, he, pair); if (!cmp) goto out; @@ -2500,16 +2660,10 @@ static struct hist_entry *hists__find_hierarchy_entry(struct rb_root_cached *roo while (n) { struct hist_entry *iter; - struct perf_hpp_fmt *fmt; - int64_t cmp = 0; + int64_t cmp; iter = rb_entry(n, struct hist_entry, rb_node_in); - perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { - cmp = fmt->collapse(fmt, iter, he); - if (cmp) - break; - } - + cmp = hist_entry__collapse_hierarchy(he->hpp_list, iter, he); if (cmp < 0) n = n->rb_left; else if (cmp > 0) @@ -2669,15 +2823,13 @@ int hists__unlink(struct hists *hists) void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, struct perf_sample *sample, bool nonany_branch_mode, - u64 *total_cycles) + u64 *total_cycles, struct evsel *evsel) { struct branch_info *bi; struct branch_entry *entries = perf_sample__branch_entries(sample); /* If we have branch cycles always annotate them. */ if (bs && bs->nr && entries[0].flags.cycles) { - int i; - bi = sample__resolve_bstack(sample, al); if (bi) { struct addr_map_symbol *prev = NULL; @@ -2692,39 +2844,50 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, * Note that perf stores branches reversed from * program order! */ - for (i = bs->nr - 1; i >= 0; i--) { + for (int i = bs->nr - 1; i >= 0; i--) { addr_map_symbol__account_cycles(&bi[i].from, nonany_branch_mode ? NULL : prev, - bi[i].flags.cycles); + bi[i].flags.cycles, evsel, + bi[i].branch_stack_cntr); prev = &bi[i].to; if (total_cycles) *total_cycles += bi[i].flags.cycles; } + for (unsigned int i = 0; i < bs->nr; i++) { + map_symbol__exit(&bi[i].to.ms); + map_symbol__exit(&bi[i].from.ms); + } free(bi); } } } -size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp, - bool skip_empty) +size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp) { struct evsel *pos; size_t ret = 0; evlist__for_each_entry(evlist, pos) { struct hists *hists = evsel__hists(pos); + u64 total_samples = hists->stats.nr_samples; - if (skip_empty && !hists->stats.nr_samples && !hists->stats.nr_lost_samples) + total_samples += hists->stats.nr_lost_samples; + total_samples += hists->stats.nr_dropped_samples; + + if (symbol_conf.skip_empty && total_samples == 0) continue; ret += fprintf(fp, "%s stats:\n", evsel__name(pos)); if (hists->stats.nr_samples) - ret += fprintf(fp, "%16s events: %10d\n", + ret += fprintf(fp, "%20s events: %10d\n", "SAMPLE", hists->stats.nr_samples); if (hists->stats.nr_lost_samples) - ret += fprintf(fp, "%16s events: %10d\n", + ret += fprintf(fp, "%20s events: %10d\n", "LOST_SAMPLES", hists->stats.nr_lost_samples); + if (hists->stats.nr_dropped_samples) + ret += fprintf(fp, "%20s events: %10d\n", + "LOST_SAMPLES (BPF)", hists->stats.nr_dropped_samples); } return ret; @@ -2737,6 +2900,12 @@ u64 hists__total_period(struct hists *hists) hists->stats.total_period; } +u64 hists__total_latency(struct hists *hists) +{ + return symbol_conf.filter_relative ? hists->stats.total_non_filtered_latency : + hists->stats.total_latency; +} + int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq) { char unit; @@ -2808,7 +2977,7 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh } if (dso) printed += scnprintf(bf + printed, size - printed, - ", DSO: %s", dso->short_name); + ", DSO: %s", dso__short_name(dso)); if (socket_id > -1) printed += scnprintf(bf + printed, size - printed, ", Processor Socket: %d", socket_id); @@ -2848,6 +3017,7 @@ int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list) hists->entries = RB_ROOT_CACHED; mutex_init(&hists->lock); hists->socket_filter = -1; + hists->parallelism_filter = symbol_conf.parallelism_filter; hists->hpp_list = hpp_list; INIT_LIST_HEAD(&hists->hpp_formats); return 0; @@ -2882,6 +3052,8 @@ static void hists_evsel__exit(struct evsel *evsel) struct perf_hpp_list_node *node, *tmp; hists__delete_all_entries(hists); + zfree(&hists->mem_stat_types); + zfree(&hists->mem_stat_total); list_for_each_entry_safe(node, tmp, &hists->hpp_formats, list) { perf_hpp_list__for_each_format_safe(&node->hpp, fmt, pos) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index afc9f1c7f4dc..1d5ea632ca4e 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -4,21 +4,23 @@ #include <linux/rbtree.h> #include <linux/types.h> -#include "evsel.h" +#include "callchain.h" #include "color.h" #include "events_stats.h" +#include "evsel.h" +#include "map_symbol.h" +#include "mem-events.h" #include "mutex.h" +#include "sample.h" +#include "spark.h" +#include "stat.h" -struct hist_entry; -struct hist_entry_ops; struct addr_location; -struct map_symbol; struct mem_info; struct kvm_info; struct branch_info; struct branch_stack; struct block_info; -struct symbol; struct ui_progress; enum hist_filter { @@ -30,17 +32,22 @@ enum hist_filter { HIST_FILTER__HOST, HIST_FILTER__SOCKET, HIST_FILTER__C2C, + HIST_FILTER__PARALLELISM, }; +typedef u16 filter_mask_t; + enum hist_column { HISTC_SYMBOL, HISTC_TIME, HISTC_DSO, HISTC_THREAD, + HISTC_TGID, HISTC_COMM, HISTC_CGROUP_ID, HISTC_CGROUP, HISTC_PARENT, + HISTC_PARALLELISM, HISTC_CPU, HISTC_SOCKET, HISTC_SRCLINE, @@ -82,12 +89,26 @@ enum hist_column { HISTC_ADDR_TO, HISTC_ADDR, HISTC_SIMD, + HISTC_TYPE, + HISTC_TYPE_OFFSET, + HISTC_SYMBOL_OFFSET, + HISTC_TYPE_CACHELINE, + HISTC_CALLCHAIN_BRANCH_PREDICTED, + HISTC_CALLCHAIN_BRANCH_ABORT, + HISTC_CALLCHAIN_BRANCH_CYCLES, HISTC_NR_COLS, /* Last entry */ }; struct thread; struct dso; +#define MEM_STAT_LEN 8 + +struct he_mem_stat { + /* meaning of entries depends on enum mem_stat_type */ + u64 entries[MEM_STAT_LEN]; +}; + struct hists { struct rb_root_cached entries_in_array[2]; struct rb_root_cached *entries_in; @@ -97,10 +118,13 @@ struct hists { u64 nr_non_filtered_entries; u64 callchain_period; u64 callchain_non_filtered_period; + u64 callchain_latency; + u64 callchain_non_filtered_latency; struct thread *thread_filter; const struct dso *dso_filter; const char *uid_filter_str; const char *symbol_filter_str; + unsigned long *parallelism_filter; struct mutex lock; struct hists_stats stats; u64 event_stream; @@ -110,6 +134,9 @@ struct hists { struct perf_hpp_list *hpp_list; struct list_head hpp_formats; int nr_hpp_node; + int nr_mem_stats; + enum mem_stat_type *mem_stat_types; + struct he_mem_stat *mem_stat_total; }; #define hists__has(__h, __f) (__h)->hpp_list->__f @@ -128,18 +155,20 @@ struct hist_entry_iter { int total; int curr; - bool hide_unresolved; - struct evsel *evsel; struct perf_sample *sample; struct hist_entry *he; struct symbol *parent; - void *priv; + + struct mem_info *mi; + struct branch_info *bi; + struct hist_entry **he_cache; const struct hist_iter_ops *ops; /* user-defined callback function (optional) */ int (*add_entry_cb)(struct hist_entry_iter *iter, struct addr_location *al, bool single, void *arg); + bool hide_unresolved; }; extern const struct hist_iter_ops hist_iter_normal; @@ -147,6 +176,171 @@ extern const struct hist_iter_ops hist_iter_branch; extern const struct hist_iter_ops hist_iter_mem; extern const struct hist_iter_ops hist_iter_cumulative; +struct res_sample { + u64 time; + int cpu; + int tid; +}; + +struct he_stat { + u64 period; + /* + * Period re-scaled from CPU time to wall-clock time (divided by the + * parallelism at the time of the sample). This represents effect of + * the event on latency rather than CPU consumption. + */ + u64 latency; + u64 period_sys; + u64 period_us; + u64 period_guest_sys; + u64 period_guest_us; + u64 weight1; + u64 weight2; + u64 weight3; + u32 nr_events; +}; + +struct namespace_id { + u64 dev; + u64 ino; +}; + +struct hist_entry_diff { + bool computed; + union { + /* PERF_HPP__DELTA */ + double period_ratio_delta; + + /* PERF_HPP__RATIO */ + double period_ratio; + + /* HISTC_WEIGHTED_DIFF */ + s64 wdiff; + + /* PERF_HPP_DIFF__CYCLES */ + s64 cycles; + }; + struct stats stats; + unsigned long svals[NUM_SPARKS]; +}; + +struct hist_entry_ops { + void *(*new)(size_t size); + void (*free)(void *ptr); +}; + +/** + * struct hist_entry - histogram entry + * + * @row_offset - offset from the first callchain expanded to appear on screen + * @nr_rows - rows expanded in callchain, recalculated on folding/unfolding + */ +struct hist_entry { + struct rb_node rb_node_in; + struct rb_node rb_node; + union { + struct list_head node; + struct list_head head; + } pairs; + struct he_stat stat; + struct he_stat *stat_acc; + struct he_mem_stat *mem_stat; + struct map_symbol ms; + struct thread *thread; + struct comm *comm; + struct namespace_id cgroup_id; + u64 cgroup; + u64 ip; + u64 transaction; + u64 code_page_size; + u64 weight; + u64 ins_lat; + /** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */ + u64 weight3; + s32 socket; + s32 cpu; + int parallelism; + int mem_type_off; + u8 cpumode; + u8 depth; + struct simd_flags simd_flags; + + /* We are added by hists__add_dummy_entry. */ + bool dummy; + bool leaf; + + char level; + filter_mask_t filtered; + + u16 callchain_size; + union { + /* + * Since perf diff only supports the stdio output, TUI + * fields are only accessed from perf report (or perf + * top). So make it a union to reduce memory usage. + */ + struct hist_entry_diff diff; + struct /* for TUI */ { + u16 row_offset; + u16 nr_rows; + bool init_have_children; + bool unfolded; + bool has_children; + bool has_no_entry; + }; + }; + char *srcline; + char *srcfile; + struct symbol *parent; + struct branch_info *branch_info; + long time; + struct hists *hists; + struct mem_info *mem_info; + struct block_info *block_info; + struct kvm_info *kvm_info; + void *raw_data; + u32 raw_size; + int num_res; + struct res_sample *res_samples; + void *trace_output; + struct perf_hpp_list *hpp_list; + struct hist_entry *parent_he; + struct hist_entry_ops *ops; + struct annotated_data_type *mem_type; + union { + /* this is for hierarchical entry structure */ + struct { + struct rb_root_cached hroot_in; + struct rb_root_cached hroot_out; + }; /* non-leaf entries */ + struct rb_root sorted_chain; /* leaf entry has callchains */ + }; + struct callchain_root callchain[0]; /* must be last member */ +}; + +static __pure inline bool hist_entry__has_callchains(struct hist_entry *he) +{ + return he->callchain_size != 0; +} + +static inline bool hist_entry__has_pairs(struct hist_entry *he) +{ + return !list_empty(&he->pairs.node); +} + +static inline struct hist_entry *hist_entry__next_pair(struct hist_entry *he) +{ + if (hist_entry__has_pairs(he)) + return list_entry(he->pairs.node.next, struct hist_entry, pairs.node); + return NULL; +} + +static inline void hist_entry__add_pair(struct hist_entry *pair, + struct hist_entry *he) +{ + list_add_tail(&pair->pairs.node, &he->pairs.head); +} + struct hist_entry *hists__add_entry(struct hists *hists, struct addr_location *al, struct symbol *parent, @@ -176,13 +370,13 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, struct perf_hpp; struct perf_hpp_fmt; -int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); -int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); int hist_entry__transaction_len(void); int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size, struct hists *hists); int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp, struct perf_hpp_fmt *fmt, int printed); +int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, size_t size, + unsigned int width); void hist_entry__delete(struct hist_entry *he); typedef int (*hists__resort_cb_t)(struct hist_entry *he, void *arg); @@ -202,27 +396,30 @@ void hists__output_recalc_col_len(struct hists *hists, int max_rows); struct hist_entry *hists__get_entry(struct hists *hists, int idx); u64 hists__total_period(struct hists *hists); +u64 hists__total_latency(struct hists *hists); void hists__reset_stats(struct hists *hists); void hists__inc_stats(struct hists *hists, struct hist_entry *h); void hists__inc_nr_events(struct hists *hists); void hists__inc_nr_samples(struct hists *hists, bool filtered); void hists__inc_nr_lost_samples(struct hists *hists, u32 lost); +void hists__inc_nr_dropped_samples(struct hists *hists, u32 lost); size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, int max_cols, float min_pcnt, FILE *fp, bool ignore_callchains); -size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp, - bool skip_empty); +size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp); void hists__filter_by_dso(struct hists *hists); void hists__filter_by_thread(struct hists *hists); void hists__filter_by_symbol(struct hists *hists); void hists__filter_by_socket(struct hists *hists); +void hists__filter_by_parallelism(struct hists *hists); static inline bool hists__has_filter(struct hists *hists) { return hists->thread_filter || hists->dso_filter || - hists->symbol_filter_str || (hists->socket_filter > -1); + hists->symbol_filter_str || (hists->socket_filter > -1) || + hists->parallelism_filter; } u16 hists__col_len(struct hists *hists, enum hist_column col); @@ -235,6 +432,20 @@ void hists__match(struct hists *leader, struct hists *other); int hists__link(struct hists *leader, struct hists *other); int hists__unlink(struct hists *hists); +static inline float hist_entry__get_percent_limit(struct hist_entry *he) +{ + u64 period = he->stat.period; + u64 total_period = hists__total_period(he->hists); + + if (unlikely(total_period == 0)) + return 0; + + if (symbol_conf.cumulate_callchain) + period = he->stat_acc->period; + + return period * 100.0 / total_period; +} + struct hists_evsel { struct evsel evsel; struct hists hists; @@ -270,6 +481,9 @@ struct perf_hpp { bool skip; }; +typedef int64_t (*perf_hpp_fmt_cmp_t)( + struct perf_hpp_fmt *, struct hist_entry *, struct hist_entry *); + struct perf_hpp_fmt { const char *name; int (*header)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, @@ -281,12 +495,9 @@ struct perf_hpp_fmt { struct hist_entry *he); int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he); - int64_t (*cmp)(struct perf_hpp_fmt *fmt, - struct hist_entry *a, struct hist_entry *b); - int64_t (*collapse)(struct perf_hpp_fmt *fmt, - struct hist_entry *a, struct hist_entry *b); - int64_t (*sort)(struct perf_hpp_fmt *fmt, - struct hist_entry *a, struct hist_entry *b); + perf_hpp_fmt_cmp_t cmp; + perf_hpp_fmt_cmp_t collapse; + perf_hpp_fmt_cmp_t sort; bool (*equal)(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b); void (*free)(struct perf_hpp_fmt *fmt); @@ -367,24 +578,37 @@ extern struct perf_hpp_fmt perf_hpp__format[]; enum { /* Matches perf_hpp__format array. */ PERF_HPP__OVERHEAD, + PERF_HPP__LATENCY, PERF_HPP__OVERHEAD_SYS, PERF_HPP__OVERHEAD_US, PERF_HPP__OVERHEAD_GUEST_SYS, PERF_HPP__OVERHEAD_GUEST_US, PERF_HPP__OVERHEAD_ACC, + PERF_HPP__LATENCY_ACC, PERF_HPP__SAMPLES, PERF_HPP__PERIOD, + PERF_HPP__WEIGHT1, + PERF_HPP__WEIGHT2, + PERF_HPP__WEIGHT3, + PERF_HPP__MEM_STAT_OP, + PERF_HPP__MEM_STAT_CACHE, + PERF_HPP__MEM_STAT_MEMORY, + PERF_HPP__MEM_STAT_SNOOP, + PERF_HPP__MEM_STAT_DTLB, PERF_HPP__MAX_INDEX }; void perf_hpp__init(void); -void perf_hpp__cancel_cumulate(void); +void perf_hpp__cancel_cumulate(struct evlist *evlist); +void perf_hpp__cancel_latency(struct evlist *evlist); void perf_hpp__setup_output_field(struct perf_hpp_list *list); void perf_hpp__reset_output_field(struct perf_hpp_list *list); void perf_hpp__append_sort_keys(struct perf_hpp_list *list); int perf_hpp__setup_hists_formats(struct perf_hpp_list *list, struct evlist *evlist); +int perf_hpp__alloc_mem_stats(struct perf_hpp_list *list, + struct evlist *evlist); bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format); @@ -397,6 +621,7 @@ bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt); bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt); bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt); bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_parallelism_entry(struct perf_hpp_fmt *fmt); struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt); @@ -420,16 +645,28 @@ void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists); void perf_hpp__set_user_width(const char *width_list_str); void hists__reset_column_width(struct hists *hists); +enum perf_hpp_fmt_type { + PERF_HPP_FMT_TYPE__RAW, + PERF_HPP_FMT_TYPE__PERCENT, + PERF_HPP_FMT_TYPE__LATENCY, + PERF_HPP_FMT_TYPE__AVERAGE, +}; + typedef u64 (*hpp_field_fn)(struct hist_entry *he); typedef int (*hpp_callback_fn)(struct perf_hpp *hpp, bool front); typedef int (*hpp_snprint_fn)(struct perf_hpp *hpp, const char *fmt, ...); int hpp__fmt(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he, hpp_field_fn get_field, - const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent); + const char *fmtstr, hpp_snprint_fn print_fn, + enum perf_hpp_fmt_type fmtype); int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he, hpp_field_fn get_field, - const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent); + const char *fmtstr, hpp_snprint_fn print_fn, + enum perf_hpp_fmt_type fmtype); +int hpp__fmt_mem_stat(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he, enum mem_stat_type mst, + const char *fmtstr, hpp_snprint_fn print_fn); static inline void advance_hpp(struct perf_hpp *hpp, int inc) { @@ -457,32 +694,36 @@ struct hist_browser_timer { int refresh; }; -struct annotation_options; -struct res_sample; - enum rstype { A_NORMAL, A_ASM, A_SOURCE }; -struct block_hist; +struct block_hist { + struct hists block_hists; + struct perf_hpp_list block_list; + struct perf_hpp_fmt block_fmt; + int block_idx; + bool valid; + struct hist_entry he; +}; + +#define NO_ADDR 0 #ifdef HAVE_SLANG_SUPPORT #include "../ui/keysyms.h" void attr_to_script(char *buf, struct perf_event_attr *attr); -int map_symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *annotation_opts); +int __hist_entry__tui_annotate(struct hist_entry *he, struct map_symbol *ms, + struct evsel *evsel, + struct hist_browser_timer *hbt, u64 al_addr); int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel, - struct hist_browser_timer *hbt, - struct annotation_options *annotation_opts); + struct hist_browser_timer *hbt, u64 al_addr); int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, - float min_pcnt, struct perf_env *env, bool warn_lost_event, - struct annotation_options *annotation_options); + float min_pcnt, struct perf_env *env, bool warn_lost_event); int script_browse(const char *script_opt, struct evsel *evsel); @@ -492,8 +733,7 @@ int res_sample_browse(struct res_sample *res_samples, int num_res, void res_sample_init(void); int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, - float min_percent, struct perf_env *env, - struct annotation_options *annotation_opts); + float min_percent, struct perf_env *env); #else static inline int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, @@ -501,15 +741,15 @@ int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, struct hist_browser_timer *hbt __maybe_unused, float min_pcnt __maybe_unused, struct perf_env *env __maybe_unused, - bool warn_lost_event __maybe_unused, - struct annotation_options *annotation_options __maybe_unused) + bool warn_lost_event __maybe_unused) { return 0; } -static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused, - struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused, - struct annotation_options *annotation_options __maybe_unused) +static inline int __hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, + struct map_symbol *ms __maybe_unused, + struct evsel *evsel __maybe_unused, + struct hist_browser_timer *hbt __maybe_unused, + u64 al_addr __maybe_unused) { return 0; } @@ -517,7 +757,7 @@ static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused, static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, struct evsel *evsel __maybe_unused, struct hist_browser_timer *hbt __maybe_unused, - struct annotation_options *annotation_opts __maybe_unused) + u64 al_addr __maybe_unused) { return 0; } @@ -541,8 +781,7 @@ static inline void res_sample_init(void) {} static inline int block_hists_tui_browse(struct block_hist *bh __maybe_unused, struct evsel *evsel __maybe_unused, float min_percent __maybe_unused, - struct perf_env *env __maybe_unused, - struct annotation_options *annotation_opts __maybe_unused) + struct perf_env *env __maybe_unused) { return 0; } @@ -558,7 +797,7 @@ unsigned int hists__overhead_width(struct hists *hists); void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, struct perf_sample *sample, bool nonany_branch_mode, - u64 *total_cycles); + u64 *total_cycles, struct evsel *evsel); struct option; int parse_filter_percentage(const struct option *opt, const char *arg, int unset); diff --git a/tools/perf/util/hwmon_pmu.c b/tools/perf/util/hwmon_pmu.c new file mode 100644 index 000000000000..279d6b1a47f0 --- /dev/null +++ b/tools/perf/util/hwmon_pmu.c @@ -0,0 +1,835 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#include "counts.h" +#include "debug.h" +#include "evsel.h" +#include "hashmap.h" +#include "hwmon_pmu.h" +#include "pmu.h" +#include <internal/xyarray.h> +#include <internal/threadmap.h> +#include <perf/threadmap.h> +#include <sys/types.h> +#include <assert.h> +#include <ctype.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <api/fs/fs.h> +#include <api/io.h> +#include <api/io_dir.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/zalloc.h> + +/** Strings that correspond to enum hwmon_type. */ +static const char * const hwmon_type_strs[HWMON_TYPE_MAX] = { + NULL, + "cpu", + "curr", + "energy", + "fan", + "humidity", + "in", + "intrusion", + "power", + "pwm", + "temp", +}; +#define LONGEST_HWMON_TYPE_STR "intrusion" + +/** Strings that correspond to enum hwmon_item. */ +static const char * const hwmon_item_strs[HWMON_ITEM__MAX] = { + NULL, + "accuracy", + "alarm", + "auto_channels_temp", + "average", + "average_highest", + "average_interval", + "average_interval_max", + "average_interval_min", + "average_lowest", + "average_max", + "average_min", + "beep", + "cap", + "cap_hyst", + "cap_max", + "cap_min", + "crit", + "crit_hyst", + "div", + "emergency", + "emergency_hist", + "enable", + "fault", + "freq", + "highest", + "input", + "label", + "lcrit", + "lcrit_hyst", + "lowest", + "max", + "max_hyst", + "min", + "min_hyst", + "mod", + "offset", + "pulses", + "rated_max", + "rated_min", + "reset_history", + "target", + "type", + "vid", +}; +#define LONGEST_HWMON_ITEM_STR "average_interval_max" + +static const char *const hwmon_units[HWMON_TYPE_MAX] = { + NULL, + "V", /* cpu */ + "A", /* curr */ + "J", /* energy */ + "rpm", /* fan */ + "%", /* humidity */ + "V", /* in */ + "", /* intrusion */ + "W", /* power */ + "Hz", /* pwm */ + "'C", /* temp */ +}; + +struct hwmon_pmu { + struct perf_pmu pmu; + struct hashmap events; + char *hwmon_dir; +}; + +/** + * struct hwmon_pmu_event_value: Value in hwmon_pmu->events. + * + * Hwmon files are of the form <type><number>_<item> and may have a suffix + * _alarm. + */ +struct hwmon_pmu_event_value { + /** @items: which item files are present. */ + DECLARE_BITMAP(items, HWMON_ITEM__MAX); + /** @alarm_items: which item files are present. */ + DECLARE_BITMAP(alarm_items, HWMON_ITEM__MAX); + /** @label: contents of <type><number>_label if present. */ + char *label; + /** @name: name computed from label of the form <type>_<label>. */ + char *name; +}; + +bool perf_pmu__is_hwmon(const struct perf_pmu *pmu) +{ + return pmu && pmu->type >= PERF_PMU_TYPE_HWMON_START && + pmu->type <= PERF_PMU_TYPE_HWMON_END; +} + +bool evsel__is_hwmon(const struct evsel *evsel) +{ + return perf_pmu__is_hwmon(evsel->pmu); +} + +static size_t hwmon_pmu__event_hashmap_hash(long key, void *ctx __maybe_unused) +{ + return ((union hwmon_pmu_event_key)key).type_and_num; +} + +static bool hwmon_pmu__event_hashmap_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return ((union hwmon_pmu_event_key)key1).type_and_num == + ((union hwmon_pmu_event_key)key2).type_and_num; +} + +static int hwmon_strcmp(const void *a, const void *b) +{ + const char *sa = a; + const char * const *sb = b; + + return strcmp(sa, *sb); +} + +bool parse_hwmon_filename(const char *filename, + enum hwmon_type *type, + int *number, + enum hwmon_item *item, + bool *alarm) +{ + char fn_type[24]; + const char **elem; + const char *fn_item = NULL; + size_t fn_item_len; + + assert(strlen(LONGEST_HWMON_TYPE_STR) < sizeof(fn_type)); + strlcpy(fn_type, filename, sizeof(fn_type)); + for (size_t i = 0; fn_type[i] != '\0'; i++) { + if (fn_type[i] >= '0' && fn_type[i] <= '9') { + fn_type[i] = '\0'; + *number = strtoul(&filename[i], (char **)&fn_item, 10); + if (*fn_item == '_') + fn_item++; + break; + } + if (fn_type[i] == '_') { + fn_type[i] = '\0'; + *number = -1; + fn_item = &filename[i + 1]; + break; + } + } + if (fn_item == NULL || fn_type[0] == '\0' || (item != NULL && fn_item[0] == '\0')) { + pr_debug3("hwmon_pmu: not a hwmon file '%s'\n", filename); + return false; + } + elem = bsearch(&fn_type, hwmon_type_strs + 1, ARRAY_SIZE(hwmon_type_strs) - 1, + sizeof(hwmon_type_strs[0]), hwmon_strcmp); + if (!elem) { + pr_debug3("hwmon_pmu: not a hwmon type '%s' in file name '%s'\n", + fn_type, filename); + return false; + } + + *type = elem - &hwmon_type_strs[0]; + if (!item) + return true; + + *alarm = false; + fn_item_len = strlen(fn_item); + if (fn_item_len > 6 && !strcmp(&fn_item[fn_item_len - 6], "_alarm")) { + assert(strlen(LONGEST_HWMON_ITEM_STR) < sizeof(fn_type)); + strlcpy(fn_type, fn_item, fn_item_len - 5); + fn_item = fn_type; + *alarm = true; + } + elem = bsearch(fn_item, hwmon_item_strs + 1, ARRAY_SIZE(hwmon_item_strs) - 1, + sizeof(hwmon_item_strs[0]), hwmon_strcmp); + if (!elem) { + pr_debug3("hwmon_pmu: not a hwmon item '%s' in file name '%s'\n", + fn_item, filename); + return false; + } + *item = elem - &hwmon_item_strs[0]; + return true; +} + +static void fix_name(char *p) +{ + char *s = strchr(p, '\n'); + + if (s) + *s = '\0'; + + while (*p != '\0') { + if (strchr(" :,/\n\t", *p)) + *p = '_'; + else + *p = tolower(*p); + p++; + } +} + +static int hwmon_pmu__read_events(struct hwmon_pmu *pmu) +{ + int err = 0; + struct hashmap_entry *cur, *tmp; + size_t bkt; + struct io_dirent64 *ent; + struct io_dir dir; + + if (pmu->pmu.sysfs_aliases_loaded) + return 0; + + /* Use openat so that the directory contents are refreshed. */ + io_dir__init(&dir, open(pmu->hwmon_dir, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + + if (dir.dirfd < 0) + return -ENOENT; + + while ((ent = io_dir__readdir(&dir)) != NULL) { + enum hwmon_type type; + int number; + enum hwmon_item item; + bool alarm; + union hwmon_pmu_event_key key = { .type_and_num = 0 }; + struct hwmon_pmu_event_value *value; + + if (ent->d_type != DT_REG) + continue; + + if (!parse_hwmon_filename(ent->d_name, &type, &number, &item, &alarm)) { + pr_debug3("Not a hwmon file '%s'\n", ent->d_name); + continue; + } + key.num = number; + key.type = type; + if (!hashmap__find(&pmu->events, key.type_and_num, &value)) { + value = zalloc(sizeof(*value)); + if (!value) { + err = -ENOMEM; + goto err_out; + } + err = hashmap__add(&pmu->events, key.type_and_num, value); + if (err) { + free(value); + err = -ENOMEM; + goto err_out; + } + } + __set_bit(item, alarm ? value->alarm_items : value->items); + if (item == HWMON_ITEM_LABEL) { + char buf[128]; + int fd = openat(dir.dirfd, ent->d_name, O_RDONLY); + ssize_t read_len; + + if (fd < 0) + continue; + + read_len = read(fd, buf, sizeof(buf)); + + while (read_len > 0 && buf[read_len - 1] == '\n') + read_len--; + + if (read_len > 0) + buf[read_len] = '\0'; + + if (buf[0] == '\0') { + pr_debug("hwmon_pmu: empty label file %s %s\n", + pmu->pmu.name, ent->d_name); + close(fd); + continue; + } + value->label = strdup(buf); + if (!value->label) { + pr_debug("hwmon_pmu: memory allocation failure\n"); + close(fd); + continue; + } + snprintf(buf, sizeof(buf), "%s_%s", hwmon_type_strs[type], value->label); + fix_name(buf); + value->name = strdup(buf); + if (!value->name) + pr_debug("hwmon_pmu: memory allocation failure\n"); + close(fd); + } + } + if (hashmap__size(&pmu->events) == 0) + pr_debug2("hwmon_pmu: %s has no events\n", pmu->pmu.name); + + hashmap__for_each_entry_safe((&pmu->events), cur, tmp, bkt) { + union hwmon_pmu_event_key key = { + .type_and_num = cur->key, + }; + struct hwmon_pmu_event_value *value = cur->pvalue; + + if (!test_bit(HWMON_ITEM_INPUT, value->items)) { + pr_debug("hwmon_pmu: %s removing event '%s%d' that has no input file\n", + pmu->pmu.name, hwmon_type_strs[key.type], key.num); + hashmap__delete(&pmu->events, key.type_and_num, &key, &value); + zfree(&value->label); + zfree(&value->name); + free(value); + } + } + pmu->pmu.sysfs_aliases_loaded = true; + +err_out: + close(dir.dirfd); + return err; +} + +struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, const char *hwmon_dir, + const char *sysfs_name, const char *name) +{ + char buf[64]; + struct hwmon_pmu *hwm; + __u32 type = PERF_PMU_TYPE_HWMON_START + strtoul(sysfs_name + 5, NULL, 10); + + if (type > PERF_PMU_TYPE_HWMON_END) { + pr_err("Unable to encode hwmon type from %s in valid PMU type\n", sysfs_name); + return NULL; + } + + snprintf(buf, sizeof(buf), "hwmon_%s", name); + fix_name(buf + 6); + + hwm = zalloc(sizeof(*hwm)); + if (!hwm) + return NULL; + + if (perf_pmu__init(&hwm->pmu, type, buf) != 0) { + perf_pmu__delete(&hwm->pmu); + return NULL; + } + + hwm->hwmon_dir = strdup(hwmon_dir); + if (!hwm->hwmon_dir) { + perf_pmu__delete(&hwm->pmu); + return NULL; + } + hwm->pmu.alias_name = strdup(sysfs_name); + if (!hwm->pmu.alias_name) { + perf_pmu__delete(&hwm->pmu); + return NULL; + } + hwm->pmu.cpus = perf_cpu_map__new_int(0); + if (!hwm->pmu.cpus) { + perf_pmu__delete(&hwm->pmu); + return NULL; + } + INIT_LIST_HEAD(&hwm->pmu.format); + INIT_LIST_HEAD(&hwm->pmu.caps); + hashmap__init(&hwm->events, hwmon_pmu__event_hashmap_hash, + hwmon_pmu__event_hashmap_equal, /*ctx=*/NULL); + + list_add_tail(&hwm->pmu.list, pmus); + return &hwm->pmu; +} + +void hwmon_pmu__exit(struct perf_pmu *pmu) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + struct hashmap_entry *cur, *tmp; + size_t bkt; + + hashmap__for_each_entry_safe((&hwm->events), cur, tmp, bkt) { + struct hwmon_pmu_event_value *value = cur->pvalue; + + zfree(&value->label); + zfree(&value->name); + free(value); + } + hashmap__clear(&hwm->events); + zfree(&hwm->hwmon_dir); +} + +static size_t hwmon_pmu__describe_items(struct hwmon_pmu *hwm, char *out_buf, size_t out_buf_len, + union hwmon_pmu_event_key key, + const unsigned long *items, bool is_alarm) +{ + size_t bit; + char buf[64]; + size_t len = 0; + int dir = open(hwm->hwmon_dir, O_CLOEXEC | O_DIRECTORY | O_RDONLY); + + if (dir < 0) + return 0; + + for_each_set_bit(bit, items, HWMON_ITEM__MAX) { + int fd; + + if (bit == HWMON_ITEM_LABEL || bit == HWMON_ITEM_INPUT) + continue; + + snprintf(buf, sizeof(buf), "%s%d_%s%s", + hwmon_type_strs[key.type], + key.num, + hwmon_item_strs[bit], + is_alarm ? "_alarm" : ""); + fd = openat(dir, buf, O_RDONLY); + if (fd > 0) { + ssize_t read_len = read(fd, buf, sizeof(buf)); + + while (read_len > 0 && buf[read_len - 1] == '\n') + read_len--; + + if (read_len > 0) { + long long val; + + buf[read_len] = '\0'; + val = strtoll(buf, /*endptr=*/NULL, 10); + len += snprintf(out_buf + len, out_buf_len - len, "%s%s%s=%g%s", + len == 0 ? " " : ", ", + hwmon_item_strs[bit], + is_alarm ? "_alarm" : "", + (double)val / 1000.0, + hwmon_units[key.type]); + } + close(fd); + } + } + close(dir); + return len; +} + +int hwmon_pmu__for_each_event(struct perf_pmu *pmu, void *state, pmu_event_callback cb) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + struct hashmap_entry *cur; + size_t bkt; + + if (hwmon_pmu__read_events(hwm)) + return false; + + hashmap__for_each_entry((&hwm->events), cur, bkt) { + static const char *const hwmon_scale_units[HWMON_TYPE_MAX] = { + NULL, + "0.001V", /* cpu */ + "0.001A", /* curr */ + "0.001J", /* energy */ + "1rpm", /* fan */ + "0.001%", /* humidity */ + "0.001V", /* in */ + NULL, /* intrusion */ + "0.001W", /* power */ + "1Hz", /* pwm */ + "0.001'C", /* temp */ + }; + static const char *const hwmon_desc[HWMON_TYPE_MAX] = { + NULL, + "CPU core reference voltage", /* cpu */ + "Current", /* curr */ + "Cumulative energy use", /* energy */ + "Fan", /* fan */ + "Humidity", /* humidity */ + "Voltage", /* in */ + "Chassis intrusion detection", /* intrusion */ + "Power use", /* power */ + "Pulse width modulation fan control", /* pwm */ + "Temperature", /* temp */ + }; + char alias_buf[64]; + char desc_buf[256]; + char encoding_buf[128]; + union hwmon_pmu_event_key key = { + .type_and_num = cur->key, + }; + struct hwmon_pmu_event_value *value = cur->pvalue; + struct pmu_event_info info = { + .pmu = pmu, + .name = value->name, + .alias = alias_buf, + .scale_unit = hwmon_scale_units[key.type], + .desc = desc_buf, + .long_desc = NULL, + .encoding_desc = encoding_buf, + .topic = "hwmon", + .pmu_name = pmu->name, + .event_type_desc = "Hwmon event", + }; + int ret; + size_t len; + + len = snprintf(alias_buf, sizeof(alias_buf), "%s%d", + hwmon_type_strs[key.type], key.num); + if (!info.name) { + info.name = info.alias; + info.alias = NULL; + } + + len = snprintf(desc_buf, sizeof(desc_buf), "%s in unit %s named %s.", + hwmon_desc[key.type], + pmu->name + 6, + value->label ?: info.name); + + len += hwmon_pmu__describe_items(hwm, desc_buf + len, sizeof(desc_buf) - len, + key, value->items, /*is_alarm=*/false); + + len += hwmon_pmu__describe_items(hwm, desc_buf + len, sizeof(desc_buf) - len, + key, value->alarm_items, /*is_alarm=*/true); + + snprintf(encoding_buf, sizeof(encoding_buf), "%s/config=0x%lx/", + pmu->name, cur->key); + + ret = cb(state, &info); + if (ret) + return ret; + } + return 0; +} + +size_t hwmon_pmu__num_events(struct perf_pmu *pmu) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + + hwmon_pmu__read_events(hwm); + return hashmap__size(&hwm->events); +} + +bool hwmon_pmu__have_event(struct perf_pmu *pmu, const char *name) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + enum hwmon_type type; + int number; + union hwmon_pmu_event_key key = { .type_and_num = 0 }; + struct hashmap_entry *cur; + size_t bkt; + + if (!parse_hwmon_filename(name, &type, &number, /*item=*/NULL, /*is_alarm=*/NULL)) + return false; + + if (hwmon_pmu__read_events(hwm)) + return false; + + key.type = type; + key.num = number; + if (hashmap_find(&hwm->events, key.type_and_num, /*value=*/NULL)) + return true; + if (key.num != -1) + return false; + /* Item is of form <type>_ which means we should match <type>_<label>. */ + hashmap__for_each_entry((&hwm->events), cur, bkt) { + struct hwmon_pmu_event_value *value = cur->pvalue; + + key.type_and_num = cur->key; + if (key.type == type && value->name && !strcasecmp(name, value->name)) + return true; + } + return false; +} + +static int hwmon_pmu__config_term(const struct hwmon_pmu *hwm, + struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err) +{ + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) { + enum hwmon_type type; + int number; + + if (parse_hwmon_filename(term->config, &type, &number, + /*item=*/NULL, /*is_alarm=*/NULL)) { + if (number == -1) { + /* + * Item is of form <type>_ which means we should + * match <type>_<label>. + */ + struct hashmap_entry *cur; + size_t bkt; + + attr->config = 0; + hashmap__for_each_entry((&hwm->events), cur, bkt) { + union hwmon_pmu_event_key key = { + .type_and_num = cur->key, + }; + struct hwmon_pmu_event_value *value = cur->pvalue; + + if (key.type == type && value->name && + !strcasecmp(term->config, value->name)) { + attr->config = key.type_and_num; + break; + } + } + if (attr->config == 0) + return -EINVAL; + } else { + union hwmon_pmu_event_key key = { + .type_and_num = 0, + }; + + key.type = type; + key.num = number; + attr->config = key.type_and_num; + } + return 0; + } + } + if (err) { + char *err_str; + + parse_events_error__handle(err, term->err_val, + asprintf(&err_str, + "unexpected hwmon event term (%s) %s", + parse_events__term_type_str(term->type_term), + term->config) < 0 + ? strdup("unexpected hwmon event term") + : err_str, + NULL); + } + return -EINVAL; +} + +int hwmon_pmu__config_terms(const struct perf_pmu *pmu, + struct perf_event_attr *attr, + struct parse_events_terms *terms, + struct parse_events_error *err) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + struct parse_events_term *term; + int ret; + + ret = hwmon_pmu__read_events(hwm); + if (ret) + return ret; + + list_for_each_entry(term, &terms->terms, list) { + if (hwmon_pmu__config_term(hwm, attr, term, err)) + return -EINVAL; + } + + return 0; + +} + +int hwmon_pmu__check_alias(struct parse_events_terms *terms, struct perf_pmu_info *info, + struct parse_events_error *err) +{ + struct parse_events_term *term = + list_first_entry(&terms->terms, struct parse_events_term, list); + + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) { + enum hwmon_type type; + int number; + + if (parse_hwmon_filename(term->config, &type, &number, + /*item=*/NULL, /*is_alarm=*/NULL)) { + info->unit = hwmon_units[type]; + if (type == HWMON_TYPE_FAN || type == HWMON_TYPE_PWM || + type == HWMON_TYPE_INTRUSION) + info->scale = 1; + else + info->scale = 0.001; + } + return 0; + } + if (err) { + char *err_str; + + parse_events_error__handle(err, term->err_val, + asprintf(&err_str, + "unexpected hwmon event term (%s) %s", + parse_events__term_type_str(term->type_term), + term->config) < 0 + ? strdup("unexpected hwmon event term") + : err_str, + NULL); + } + return -EINVAL; +} + +int perf_pmus__read_hwmon_pmus(struct list_head *pmus) +{ + char *line = NULL; + struct io_dirent64 *class_hwmon_ent; + struct io_dir class_hwmon_dir; + char buf[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return 0; + + scnprintf(buf, sizeof(buf), "%s/class/hwmon/", sysfs); + io_dir__init(&class_hwmon_dir, open(buf, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + + if (class_hwmon_dir.dirfd < 0) + return 0; + + while ((class_hwmon_ent = io_dir__readdir(&class_hwmon_dir)) != NULL) { + size_t line_len; + int hwmon_dir, name_fd; + struct io io; + char buf2[128]; + + if (class_hwmon_ent->d_type != DT_LNK) + continue; + + scnprintf(buf, sizeof(buf), "%s/class/hwmon/%s", sysfs, class_hwmon_ent->d_name); + hwmon_dir = open(buf, O_DIRECTORY); + if (hwmon_dir == -1) { + pr_debug("hwmon_pmu: not a directory: '%s/class/hwmon/%s'\n", + sysfs, class_hwmon_ent->d_name); + continue; + } + name_fd = openat(hwmon_dir, "name", O_RDONLY); + if (name_fd == -1) { + pr_debug("hwmon_pmu: failure to open '%s/class/hwmon/%s/name'\n", + sysfs, class_hwmon_ent->d_name); + close(hwmon_dir); + continue; + } + io__init(&io, name_fd, buf2, sizeof(buf2)); + if (io__getline(&io, &line, &line_len) > 0 && line[line_len - 1] == '\n') + line[line_len - 1] = '\0'; + hwmon_pmu__new(pmus, buf, class_hwmon_ent->d_name, line); + close(name_fd); + close(hwmon_dir); + } + free(line); + close(class_hwmon_dir.dirfd); + return 0; +} + +#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) + +int evsel__hwmon_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx) +{ + struct hwmon_pmu *hwm = container_of(evsel->pmu, struct hwmon_pmu, pmu); + union hwmon_pmu_event_key key = { + .type_and_num = evsel->core.attr.config, + }; + int idx = 0, thread = 0, nthreads, err = 0; + int dir = open(hwm->hwmon_dir, O_CLOEXEC | O_DIRECTORY | O_RDONLY); + + if (dir < 0) + return -errno; + + nthreads = perf_thread_map__nr(threads); + for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { + for (thread = 0; thread < nthreads; thread++) { + char buf[64]; + int fd; + + snprintf(buf, sizeof(buf), "%s%d_input", + hwmon_type_strs[key.type], key.num); + + fd = openat(dir, buf, O_RDONLY); + FD(evsel, idx, thread) = fd; + if (fd < 0) { + err = -errno; + goto out_close; + } + } + } + close(dir); + return 0; +out_close: + if (err) + threads->err_thread = thread; + + do { + while (--thread >= 0) { + if (FD(evsel, idx, thread) >= 0) + close(FD(evsel, idx, thread)); + FD(evsel, idx, thread) = -1; + } + thread = nthreads; + } while (--idx >= 0); + close(dir); + return err; +} + +int evsel__hwmon_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) +{ + char buf[32]; + int fd; + ssize_t len; + struct perf_counts_values *count, *old_count = NULL; + + if (evsel->prev_raw_counts) + old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + + count = perf_counts(evsel->counts, cpu_map_idx, thread); + fd = FD(evsel, cpu_map_idx, thread); + len = pread(fd, buf, sizeof(buf), 0); + if (len <= 0) { + count->lost++; + return -EINVAL; + } + buf[len] = '\0'; + if (old_count) { + count->val = old_count->val + strtoll(buf, NULL, 10); + count->run = old_count->run + 1; + count->ena = old_count->ena + 1; + } else { + count->val = strtoll(buf, NULL, 10); + count->run++; + count->ena++; + } + return 0; +} diff --git a/tools/perf/util/hwmon_pmu.h b/tools/perf/util/hwmon_pmu.h new file mode 100644 index 000000000000..d1e403c8b70b --- /dev/null +++ b/tools/perf/util/hwmon_pmu.h @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __HWMON_PMU_H +#define __HWMON_PMU_H + +#include "pmu.h" +#include <stdbool.h> + +struct list_head; +struct perf_thread_map; + +/** + * enum hwmon_type: + * + * As described in Documentation/hwmon/sysfs-interface.rst hwmon events are + * defined over multiple files of the form <type><num>_<item>. This enum + * captures potential <type> values. + * + * This enum is exposed for testing. + */ +enum hwmon_type { + HWMON_TYPE_NONE, + + HWMON_TYPE_CPU, + HWMON_TYPE_CURR, + HWMON_TYPE_ENERGY, + HWMON_TYPE_FAN, + HWMON_TYPE_HUMIDITY, + HWMON_TYPE_IN, + HWMON_TYPE_INTRUSION, + HWMON_TYPE_POWER, + HWMON_TYPE_PWM, + HWMON_TYPE_TEMP, + + HWMON_TYPE_MAX +}; + +/** + * enum hwmon_item: + * + * Similar to enum hwmon_type but describes the item part of a sysfs filename. + * + * This enum is exposed for testing. + */ +enum hwmon_item { + HWMON_ITEM_NONE, + + HWMON_ITEM_ACCURACY, + HWMON_ITEM_ALARM, + HWMON_ITEM_AUTO_CHANNELS_TEMP, + HWMON_ITEM_AVERAGE, + HWMON_ITEM_AVERAGE_HIGHEST, + HWMON_ITEM_AVERAGE_INTERVAL, + HWMON_ITEM_AVERAGE_INTERVAL_MAX, + HWMON_ITEM_AVERAGE_INTERVAL_MIN, + HWMON_ITEM_AVERAGE_LOWEST, + HWMON_ITEM_AVERAGE_MAX, + HWMON_ITEM_AVERAGE_MIN, + HWMON_ITEM_BEEP, + HWMON_ITEM_CAP, + HWMON_ITEM_CAP_HYST, + HWMON_ITEM_CAP_MAX, + HWMON_ITEM_CAP_MIN, + HWMON_ITEM_CRIT, + HWMON_ITEM_CRIT_HYST, + HWMON_ITEM_DIV, + HWMON_ITEM_EMERGENCY, + HWMON_ITEM_EMERGENCY_HIST, + HWMON_ITEM_ENABLE, + HWMON_ITEM_FAULT, + HWMON_ITEM_FREQ, + HWMON_ITEM_HIGHEST, + HWMON_ITEM_INPUT, + HWMON_ITEM_LABEL, + HWMON_ITEM_LCRIT, + HWMON_ITEM_LCRIT_HYST, + HWMON_ITEM_LOWEST, + HWMON_ITEM_MAX, + HWMON_ITEM_MAX_HYST, + HWMON_ITEM_MIN, + HWMON_ITEM_MIN_HYST, + HWMON_ITEM_MOD, + HWMON_ITEM_OFFSET, + HWMON_ITEM_PULSES, + HWMON_ITEM_RATED_MAX, + HWMON_ITEM_RATED_MIN, + HWMON_ITEM_RESET_HISTORY, + HWMON_ITEM_TARGET, + HWMON_ITEM_TYPE, + HWMON_ITEM_VID, + + HWMON_ITEM__MAX, +}; + +/** + * union hwmon_pmu_event_key: Key for hwmon_pmu->events as such each key + * represents an event. + * union is exposed for testing to ensure problems are avoided on big + * endian machines. + * + * Related hwmon files start <type><number> that this key represents. + */ +union hwmon_pmu_event_key { + long type_and_num; + struct { + int num :16; + enum hwmon_type type :8; + }; +}; + +bool perf_pmu__is_hwmon(const struct perf_pmu *pmu); +bool evsel__is_hwmon(const struct evsel *evsel); + +/** + * parse_hwmon_filename() - Parse filename into constituent parts. + * + * @filename: To be parsed, of the form <type><number>_<item>. + * @type: The type defined from the parsed file name. + * @number: The number of the type, for example there may be more than 1 fan. + * @item: A hwmon <type><number> may have multiple associated items. + * @alarm: Is the filename for an alarm value? + * + * An example of a hwmon filename is "temp1_input". The type is temp for a + * temperature value. The number is 1. The item within the file is an input + * value - the temperature itself. This file doesn't contain an alarm value. + * + * Exposed for testing. + */ +bool parse_hwmon_filename(const char *filename, + enum hwmon_type *type, + int *number, + enum hwmon_item *item, + bool *alarm); + +/** + * hwmon_pmu__new() - Allocate and construct a hwmon PMU. + * + * @pmus: The list of PMUs to be added to. + * @hwmon_dir: The path to a hwmon directory. + * @sysfs_name: Name of the hwmon sysfs directory like hwmon0. + * @name: The contents of the "name" file in the hwmon directory. + * + * Exposed for testing. Regular construction should happen via + * perf_pmus__read_hwmon_pmus. + */ +struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, const char *hwmon_dir, + const char *sysfs_name, const char *name); +void hwmon_pmu__exit(struct perf_pmu *pmu); + +int hwmon_pmu__for_each_event(struct perf_pmu *pmu, void *state, pmu_event_callback cb); +size_t hwmon_pmu__num_events(struct perf_pmu *pmu); +bool hwmon_pmu__have_event(struct perf_pmu *pmu, const char *name); +int hwmon_pmu__config_terms(const struct perf_pmu *pmu, + struct perf_event_attr *attr, + struct parse_events_terms *terms, + struct parse_events_error *err); +int hwmon_pmu__check_alias(struct parse_events_terms *terms, struct perf_pmu_info *info, + struct parse_events_error *err); + +int perf_pmus__read_hwmon_pmus(struct list_head *pmus); + + +int evsel__hwmon_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx); +int evsel__hwmon_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread); + +#endif /* __HWMON_PMU_H */ diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h index 7d99a084e82d..6f1b9f6b2466 100644 --- a/tools/perf/util/include/dwarf-regs.h +++ b/tools/perf/util/include/dwarf-regs.h @@ -1,23 +1,133 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _PERF_DWARF_REGS_H_ #define _PERF_DWARF_REGS_H_ +#include "annotate.h" +#include <elf.h> -#ifdef HAVE_DWARF_SUPPORT -const char *get_arch_regstr(unsigned int n); -/* - * get_dwarf_regstr - Returns ftrace register string from DWARF regnum - * n: DWARF register number - * machine: ELF machine signature (EM_*) +#ifndef EM_AARCH64 +#define EM_AARCH64 183 /* ARM 64 bit */ +#endif + +#ifndef EM_CSKY +#define EM_CSKY 252 /* C-SKY */ +#endif +#ifndef EF_CSKY_ABIV1 +#define EF_CSKY_ABIV1 0X10000000 +#endif +#ifndef EF_CSKY_ABIV2 +#define EF_CSKY_ABIV2 0X20000000 +#endif + +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 /* LoongArch */ +#endif + +/* EM_HOST gives the ELF machine for host, EF_HOST gives additional flags. */ +#if defined(__x86_64__) + #define EM_HOST EM_X86_64 +#elif defined(__i386__) + #define EM_HOST EM_386 +#elif defined(__aarch64__) + #define EM_HOST EM_AARCH64 +#elif defined(__arm__) + #define EM_HOST EM_ARM +#elif defined(__alpha__) + #define EM_HOST EM_ALPHA +#elif defined(__arc__) + #define EM_HOST EM_ARC +#elif defined(__AVR__) + #define EM_HOST EM_AVR +#elif defined(__AVR32__) + #define EM_HOST EM_AVR32 +#elif defined(__bfin__) + #define EM_HOST EM_BLACKFIN +#elif defined(__csky__) + #define EM_HOST EM_CSKY + #if defined(__CSKYABIV2__) + #define EF_HOST EF_CSKY_ABIV2 + #else + #define EF_HOST EF_CSKY_ABIV1 + #endif +#elif defined(__cris__) + #define EM_HOST EM_CRIS +#elif defined(__hppa__) // HP PA-RISC + #define EM_HOST EM_PARISC +#elif defined(__loongarch__) + #define EM_HOST EM_LOONGARCH +#elif defined(__mips__) + #define EM_HOST EM_MIPS +#elif defined(__m32r__) + #define EM_HOST EM_M32R +#elif defined(__microblaze__) + #define EM_HOST EM_MICROBLAZE +#elif defined(__MSP430__) + #define EM_HOST EM_MSP430 +#elif defined(__powerpc64__) + #define EM_HOST EM_PPC64 +#elif defined(__powerpc__) + #define EM_HOST EM_PPC +#elif defined(__riscv) + #define EM_HOST EM_RISCV +#elif defined(__s390x__) + #define EM_HOST EM_S390 +#elif defined(__sh__) + #define EM_HOST EM_SH +#elif defined(__sparc64__) || defined(__sparc__) + #define EM_HOST EM_SPARC +#elif defined(__xtensa__) + #define EM_HOST EM_XTENSA +#else + /* Unknown host ELF machine type. */ + #define EM_HOST EM_NONE +#endif + +#if !defined(EF_HOST) + #define EF_HOST 0 +#endif + +#define DWARF_REG_PC 0xd3af9c /* random number */ +#define DWARF_REG_FB 0xd3affb /* random number */ + +#ifdef HAVE_LIBDW_SUPPORT +const char *get_csky_regstr(unsigned int n, unsigned int flags); + +/** + * get_dwarf_regstr() - Returns ftrace register string from DWARF regnum. + * @n: DWARF register number. + * @machine: ELF machine signature (EM_*). + * @flags: ELF flags for things like ABI differences. */ -const char *get_dwarf_regstr(unsigned int n, unsigned int machine); +const char *get_dwarf_regstr(unsigned int n, unsigned int machine, unsigned int flags); + +int get_x86_regnum(const char *name); + +#if !defined(__x86_64__) && !defined(__i386__) +int get_arch_regnum(const char *name); #endif -#ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET /* - * Arch should support fetching the offset of a register in pt_regs - * by its name. See kernel's regs_query_register_offset in - * arch/xxx/kernel/ptrace.c. + * get_dwarf_regnum - Returns DWARF regnum from register name + * name: architecture register name + * machine: ELF machine signature (EM_*) */ -int regs_query_register_offset(const char *name); +int get_dwarf_regnum(const char *name, unsigned int machine, unsigned int flags); + +void get_powerpc_regs(u32 raw_insn, int is_source, struct annotated_op_loc *op_loc); + +#else /* HAVE_LIBDW_SUPPORT */ + +static inline int get_dwarf_regnum(const char *name __maybe_unused, + unsigned int machine __maybe_unused, + unsigned int flags __maybe_unused) +{ + return -1; +} + +static inline void get_powerpc_regs(u32 raw_insn __maybe_unused, int is_source __maybe_unused, + struct annotated_op_loc *op_loc __maybe_unused) +{ + return; +} #endif + #endif diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h index 75e2248416f5..34e2fdfe7300 100644 --- a/tools/perf/util/include/linux/linkage.h +++ b/tools/perf/util/include/linux/linkage.h @@ -115,8 +115,12 @@ SYM_ALIAS(alias, name, SYM_T_FUNC, SYM_L_WEAK) #endif +#ifndef SYM_FUNC_ALIAS_MEMFUNC +#define SYM_FUNC_ALIAS_MEMFUNC SYM_FUNC_ALIAS +#endif + // In the kernel sources (include/linux/cfi_types.h), this has a different -// definition when CONFIG_CFI_CLANG is used, for tools/ just use the !clang +// definition when CONFIG_CFI is used, for tools/ just use the !cfi // definition: #ifndef SYM_TYPED_START #define SYM_TYPED_START(name, linkage, align...) \ @@ -128,4 +132,8 @@ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif +#ifndef SYM_PIC_ALIAS +#define SYM_PIC_ALIAS(sym) SYM_ALIAS(__pi_ ## sym, sym, SYM_T_FUNC, SYM_L_GLOBAL) +#endif + #endif /* PERF_LINUX_LINKAGE_H_ */ diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index ec1b3bd9f530..382255393fb3 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -100,7 +100,7 @@ static void intel_bts_dump(struct intel_bts *bts __maybe_unused, else sz = len; printf("."); - color_fprintf(stdout, color, " %08x: ", pos); + color_fprintf(stdout, color, " %08zx: ", pos); for (i = 0; i < sz; i++) color_fprintf(stdout, color, " %02x", buf[i]); for (; i < br_sz; i++) @@ -275,12 +275,13 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, int ret; struct intel_bts *bts = btsq->bts; union perf_event event; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; if (bts->synth_opts.initial_skip && bts->num_events++ <= bts->synth_opts.initial_skip) return 0; + perf_sample__init(&sample, /*all=*/true); sample.ip = le64_to_cpu(branch->from); sample.cpumode = intel_bts_cpumode(bts, sample.ip); sample.pid = btsq->pid; @@ -312,6 +313,7 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, pr_err("Intel BTS: failed to deliver branch event, error %d\n", ret); + perf_sample__exit(&sample); return ret; } @@ -591,7 +593,7 @@ static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp) static int intel_bts_process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, - struct perf_tool *tool) + const struct perf_tool *tool) { struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, auxtrace); @@ -634,7 +636,7 @@ static int intel_bts_process_event(struct perf_session *session, static int intel_bts_process_auxtrace_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, auxtrace); @@ -675,7 +677,7 @@ static int intel_bts_process_auxtrace_event(struct perf_session *session, } static int intel_bts_flush(struct perf_session *session, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, auxtrace); @@ -737,35 +739,6 @@ static bool intel_bts_evsel_is_auxtrace(struct perf_session *session, return evsel->core.attr.type == bts->pmu_type; } -struct intel_bts_synth { - struct perf_tool dummy_tool; - struct perf_session *session; -}; - -static int intel_bts_event_synth(struct perf_tool *tool, - union perf_event *event, - struct perf_sample *sample __maybe_unused, - struct machine *machine __maybe_unused) -{ - struct intel_bts_synth *intel_bts_synth = - container_of(tool, struct intel_bts_synth, dummy_tool); - - return perf_session__deliver_synth_event(intel_bts_synth->session, - event, NULL); -} - -static int intel_bts_synth_event(struct perf_session *session, - struct perf_event_attr *attr, u64 id) -{ - struct intel_bts_synth intel_bts_synth; - - memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth)); - intel_bts_synth.session = session; - - return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1, - &id, intel_bts_event_synth); -} - static int intel_bts_synth_events(struct intel_bts *bts, struct perf_session *session) { @@ -804,9 +777,7 @@ static int intel_bts_synth_events(struct intel_bts *bts, attr.sample_id_all = evsel->core.attr.sample_id_all; attr.read_format = evsel->core.attr.read_format; - id = evsel->core.id[0] + 1000000000; - if (!id) - id = 1; + id = auxtrace_synth_id_range_start(evsel); if (bts->synth_opts.branches) { attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; @@ -814,7 +785,7 @@ static int intel_bts_synth_events(struct intel_bts *bts, attr.sample_type |= PERF_SAMPLE_ADDR; pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", id, (u64)attr.sample_type); - err = intel_bts_synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) { pr_err("%s: failed to synthesize 'branches' event type\n", __func__); @@ -837,7 +808,7 @@ static int intel_bts_synth_events(struct intel_bts *bts, static const char * const intel_bts_info_fmts[] = { [INTEL_BTS_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_BTS_TIME_SHIFT] = " Time Shift %"PRIu64"\n", - [INTEL_BTS_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_BTS_TIME_MULT] = " Time Multiplier %"PRIu64"\n", [INTEL_BTS_TIME_ZERO] = " Time Zero %"PRIu64"\n", [INTEL_BTS_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", [INTEL_BTS_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index b41c2e9c6f88..8fd7e4330044 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build @@ -1,4 +1,4 @@ -perf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o +perf-util-y += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o inat_tables_script = $(srctree)/tools/arch/x86/tools/gen-insn-attr-x86.awk inat_tables_maps = $(srctree)/tools/arch/x86/lib/x86-opcode-map.txt @@ -7,16 +7,20 @@ $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_table $(call rule_mkdir) @$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@ -# Busybox's diff doesn't have -I, avoid warning in the case +perf-util-y += inat.o insn.o -$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c +$(OUTPUT)util/intel-pt-decoder/inat.o: $(srctree)/tools/arch/x86/lib/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c $(call rule_mkdir) $(call if_changed_dep,cc_o_c) -CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder +CFLAGS_inat.o += -I$(OUTPUT)util/intel-pt-decoder + +$(OUTPUT)util/intel-pt-decoder/insn.o: $(srctree)/tools/arch/x86/lib/insn.c + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) ifeq ($(CC_NO_CLANG), 1) - CFLAGS_intel-pt-insn-decoder.o += -Wno-override-init + CFLAGS_insn.o += -Wno-override-init endif -CFLAGS_intel-pt-insn-decoder.o += -Wno-packed +CFLAGS_insn.o += -Wno-packed diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index b450178e3420..e733f6b1f7ac 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1319,6 +1319,8 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder, bool no_tip) bool ret = false; decoder->state.type &= ~INTEL_PT_BRANCH; + decoder->state.insn_op = INTEL_PT_OP_OTHER; + decoder->state.insn_len = 0; if (decoder->set_fup_cfe_ip || decoder->set_fup_cfe) { bool ip = decoder->set_fup_cfe_ip; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index c5d57027ec23..72c7a4e15d61 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -11,9 +11,6 @@ #include <byteswap.h> #include "../../../arch/x86/include/asm/insn.h" -#include "../../../arch/x86/lib/inat.c" -#include "../../../arch/x86/lib/insn.c" - #include "event.h" #include "intel-pt-insn-decoder.h" @@ -35,7 +32,7 @@ static void intel_pt_insn_decoder(struct insn *insn, intel_pt_insn->rel = 0; intel_pt_insn->emulated_ptwrite = false; - if (insn_is_avx(insn)) { + if (insn_is_avx_or_xop(insn)) { intel_pt_insn->op = INTEL_PT_OP_OTHER; intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH; intel_pt_insn->length = insn->length; @@ -92,6 +89,15 @@ static void intel_pt_insn_decoder(struct insn *insn, op = INTEL_PT_OP_JCC; branch = INTEL_PT_BR_CONDITIONAL; break; + case 0xa1: + if (insn_is_rex2(insn)) { /* jmpabs */ + intel_pt_insn->op = INTEL_PT_OP_JMP; + /* jmpabs causes a TIP packet like an indirect branch */ + intel_pt_insn->branch = INTEL_PT_BR_INDIRECT; + intel_pt_insn->length = insn->length; + return; + } + break; case 0xc2: /* near ret */ case 0xc3: /* near ret */ case 0xca: /* far ret */ @@ -200,12 +206,13 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, return 0; } -int arch_is_branch(const unsigned char *buf, size_t len, int x86_64) +int arch_is_uncond_branch(const unsigned char *buf, size_t len, int x86_64) { struct intel_pt_insn in; if (intel_pt_get_insn(buf, len, x86_64, &in) < 0) return -1; - return in.branch != INTEL_PT_BR_NO_BRANCH; + return in.branch == INTEL_PT_BR_UNCONDITIONAL || + in.branch == INTEL_PT_BR_INDIRECT; } const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused, diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index af9710622a1f..94fb16cf9e0c 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -8,7 +8,9 @@ #include <string.h> #include <endian.h> #include <byteswap.h> +#include <linux/kernel.h> #include <linux/compiler.h> +#include <linux/unaligned.h> #include "intel-pt-pkt-decoder.h" @@ -17,17 +19,11 @@ #define BIT63 ((uint64_t)1 << 63) #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#define le16_to_cpu bswap_16 -#define le32_to_cpu bswap_32 -#define le64_to_cpu bswap_64 #define memcpy_le64(d, s, n) do { \ memcpy((d), (s), (n)); \ *(d) = le64_to_cpu(*(d)); \ } while (0) #else -#define le16_to_cpu -#define le32_to_cpu -#define le64_to_cpu #define memcpy_le64 memcpy #endif @@ -83,7 +79,7 @@ static int intel_pt_get_long_tnt(const unsigned char *buf, size_t len, if (len < 8) return INTEL_PT_NEED_MORE_BYTES; - payload = le64_to_cpu(*(uint64_t *)buf); + payload = get_unaligned_le64(buf); for (count = 47; count; count--) { if (payload & BIT63) @@ -124,26 +120,21 @@ static int intel_pt_get_cbr(const unsigned char *buf, size_t len, if (len < 4) return INTEL_PT_NEED_MORE_BYTES; packet->type = INTEL_PT_CBR; - packet->payload = le16_to_cpu(*(uint16_t *)(buf + 2)); + packet->payload = get_unaligned_le16(buf + 2); return 4; } static int intel_pt_get_vmcs(const unsigned char *buf, size_t len, struct intel_pt_pkt *packet) { - unsigned int count = (52 - 5) >> 3; - - if (count < 1 || count > 7) - return INTEL_PT_BAD_PACKET; - - if (len < count + 2) + if (len < 7) return INTEL_PT_NEED_MORE_BYTES; packet->type = INTEL_PT_VMCS; - packet->count = count; - memcpy_le64(&packet->payload, buf + 2, count); + packet->count = 5; + memcpy_le64(&packet->payload, buf + 2, 5); - return count + 2; + return 7; } static int intel_pt_get_ovf(struct intel_pt_pkt *packet) @@ -199,7 +190,7 @@ static int intel_pt_get_mnt(const unsigned char *buf, size_t len, if (len < 11) return INTEL_PT_NEED_MORE_BYTES; packet->type = INTEL_PT_MNT; - memcpy_le64(&packet->payload, buf + 3, 8); + packet->payload = get_unaligned_le64(buf + 3); return 11; } @@ -228,12 +219,12 @@ static int intel_pt_get_ptwrite(const unsigned char *buf, size_t len, case 0: if (len < 6) return INTEL_PT_NEED_MORE_BYTES; - packet->payload = le32_to_cpu(*(uint32_t *)(buf + 2)); + packet->payload = get_unaligned_le32(buf + 2); return 6; case 1: if (len < 10) return INTEL_PT_NEED_MORE_BYTES; - packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2)); + packet->payload = get_unaligned_le64(buf + 2); return 10; default: return INTEL_PT_BAD_PACKET; @@ -258,7 +249,7 @@ static int intel_pt_get_mwait(const unsigned char *buf, size_t len, if (len < 10) return INTEL_PT_NEED_MORE_BYTES; packet->type = INTEL_PT_MWAIT; - packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2)); + packet->payload = get_unaligned_le64(buf + 2); return 10; } @@ -311,7 +302,7 @@ static int intel_pt_get_bip_8(const unsigned char *buf, size_t len, return INTEL_PT_NEED_MORE_BYTES; packet->type = INTEL_PT_BIP; packet->count = buf[0] >> 3; - memcpy_le64(&packet->payload, buf + 1, 8); + packet->payload = get_unaligned_le64(buf + 1); return 9; } @@ -350,7 +341,7 @@ static int intel_pt_get_evd(const unsigned char *buf, size_t len, packet->type = INTEL_PT_EVD; packet->count = buf[2] & 0x3f; packet->payload = buf[3]; - memcpy_le64(&packet->payload, buf + 3, 8); + packet->payload = get_unaligned_le64(buf + 3); return 11; } @@ -465,13 +456,13 @@ static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte, if (len < 3) return INTEL_PT_NEED_MORE_BYTES; ip_len = 2; - packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1)); + packet->payload = get_unaligned_le16(buf + 1); break; case 2: if (len < 5) return INTEL_PT_NEED_MORE_BYTES; ip_len = 4; - packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1)); + packet->payload = get_unaligned_le32(buf + 1); break; case 3: case 4: @@ -484,7 +475,7 @@ static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte, if (len < 9) return INTEL_PT_NEED_MORE_BYTES; ip_len = 8; - packet->payload = le64_to_cpu(*(uint64_t *)(buf + 1)); + packet->payload = get_unaligned_le64(buf + 1); break; default: return INTEL_PT_BAD_PACKET; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index dbf0bc71a63b..fc9eec8b54b8 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -127,6 +127,7 @@ struct intel_pt { bool single_pebs; bool sample_pebs; + int pebs_data_src_fmt; struct evsel *pebs_evsel; u64 evt_sample_type; @@ -175,6 +176,7 @@ enum switch_state { struct intel_pt_pebs_event { struct evsel *evsel; u64 id; + int data_src_fmt; }; struct intel_pt_queue { @@ -249,7 +251,7 @@ static void intel_pt_dump(struct intel_pt *pt __maybe_unused, else pkt_len = 1; printf("."); - color_fprintf(stdout, color, " %08x: ", pos); + color_fprintf(stdout, color, " %08zx: ", pos); for (i = 0; i < pkt_len; i++) color_fprintf(stdout, color, " %02x", buf[i]); for (; i < 16; i++) @@ -598,15 +600,15 @@ static struct auxtrace_cache *intel_pt_cache(struct dso *dso, struct auxtrace_cache *c; unsigned int bits; - if (dso->auxtrace_cache) - return dso->auxtrace_cache; + if (dso__auxtrace_cache(dso)) + return dso__auxtrace_cache(dso); bits = intel_pt_cache_size(dso, machine); /* Ignoring cache creation failure */ c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200); - dso->auxtrace_cache = c; + dso__set_auxtrace_cache(dso, c); return c; } @@ -650,7 +652,7 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) if (!c) return NULL; - return auxtrace_cache__lookup(dso->auxtrace_cache, offset); + return auxtrace_cache__lookup(dso__auxtrace_cache(dso), offset); } static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine, @@ -661,7 +663,7 @@ static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine, if (!c) return; - auxtrace_cache__remove(dso->auxtrace_cache, offset); + auxtrace_cache__remove(dso__auxtrace_cache(dso), offset); } static inline bool intel_pt_guest_kernel_ip(uint64_t ip) @@ -764,6 +766,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, addr_location__init(&al); intel_pt_insn->length = 0; + intel_pt_insn->op = INTEL_PT_OP_OTHER; if (to_ip && *ip == to_ip) goto out_no_cache; @@ -820,8 +823,8 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, } dso = map__dso(al.map); - if (dso->data.status == DSO_DATA_STATUS_ERROR && - dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE)) { + if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR && + dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE)) { ret = -ENOENT; goto out_ret; } @@ -854,7 +857,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, /* Load maps to ensure dso->is_64_bit has been updated */ map__load(al.map); - x86_64 = dso->is_64_bit; + x86_64 = dso__is_64_bit(dso); while (1) { len = dso__data_read_offset(dso, machine, @@ -898,6 +901,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, if (to_ip && *ip == to_ip) { intel_pt_insn->length = 0; + intel_pt_insn->op = INTEL_PT_OP_OTHER; goto out_no_cache; } @@ -1008,7 +1012,7 @@ static int __intel_pt_pgd_ip(uint64_t ip, void *data) offset = map__map_ip(al.map, ip); - res = intel_pt_match_pgd_ip(ptq->pt, ip, offset, map__dso(al.map)->long_name); + res = intel_pt_match_pgd_ip(ptq->pt, ip, offset, dso__long_name(map__dso(al.map))); addr_location__exit(&al); return res; } @@ -1512,9 +1516,11 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq) } else if (ptq->state->flags & INTEL_PT_ASYNC) { if (!ptq->state->to_ip) ptq->flags = PERF_IP_FLAG_BRANCH | + PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_TRACE_END; else if (ptq->state->from_nr && !ptq->state->to_nr) ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | + PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_VMEXIT; else ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | @@ -1760,12 +1766,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct dummy_branch_stack { u64 nr; u64 hw_idx; struct branch_entry entries; } dummy_bs; + int ret; if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) return 0; @@ -1773,6 +1780,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_b_sample(pt, ptq, event, &sample); sample.id = ptq->pt->branches_id; @@ -1802,8 +1810,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt; } - return intel_pt_deliver_synth_event(pt, event, &sample, + perf_sample__exit(&sample); + ret = intel_pt_deliver_synth_event(pt, event, &sample, pt->branches_sample_type); + return ret; } static void intel_pt_prep_sample(struct intel_pt *pt, @@ -1831,11 +1841,13 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_sample(pt, ptq, event, &sample); sample.id = ptq->pt->instructions_id; @@ -1855,16 +1867,19 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) ptq->last_insn_cnt = ptq->state->tot_insn_cnt; - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->instructions_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->instructions_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; u64 period = 0; + int ret; if (ptq->sample_ipc) period = ptq->ipc_cyc_cnt - ptq->last_cy_cyc_cnt; @@ -1872,6 +1887,7 @@ static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq) if (!period || intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_sample(pt, ptq, event, &sample); sample.id = ptq->pt->cycles_id; @@ -1883,25 +1899,31 @@ static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq) ptq->last_cy_insn_cnt = ptq->ipc_insn_cnt; ptq->last_cy_cyc_cnt = ptq->ipc_cyc_cnt; - return intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_sample(pt, ptq, event, &sample); sample.id = ptq->pt->transactions_id; sample.stream_id = ptq->pt->transactions_id; - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->transactions_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->transactions_sample_type); + perf_sample__exit(&sample); + return ret; } static void intel_pt_prep_p_sample(struct intel_pt *pt, @@ -1949,15 +1971,17 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_cbr raw; u32 flags; + int ret; if (intel_pt_skip_cbr_event(pt)) return 0; ptq->cbr_seen = ptq->state->cbr; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->cbr_id; @@ -1971,20 +1995,24 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_psb raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->psb_id; @@ -1997,20 +2025,24 @@ static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_mwait raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->mwait_id; @@ -2022,20 +2054,24 @@ static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_pwre raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->pwre_id; @@ -2047,20 +2083,24 @@ static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_exstop raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->exstop_id; @@ -2072,20 +2112,24 @@ static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_pwrx raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->pwrx_id; @@ -2097,8 +2141,10 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } /* @@ -2228,19 +2274,160 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack, } } -static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id) +#define P(a, b) PERF_MEM_S(a, b) +#define OP_LH (P(OP, LOAD) | P(LVL, HIT)) +#define LEVEL(x) P(LVLNUM, x) +#define REM P(REMOTE, REMOTE) +#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) + +#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10 +#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1) + +/* Based on kernel __intel_pmu_pebs_data_source_grt() and pebs_data_source */ +static const u64 pebs_data_source_grt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP Hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP Fwd */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* RAM hit|SNP Hit */ + OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* Remote L3 hit|SNP Hit */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* RAM hit|SNP None or Miss */ + OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* Remote RAM hit|SNP None or Miss */ + OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */ +}; + +/* Based on kernel __intel_pmu_pebs_data_source_cmt() and pebs_data_source */ +static const u64 pebs_data_source_cmt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* L3 hit|SNP Hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP Fwd */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* RAM hit|SNP Hit */ + OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE), /* Remote L3 hit|SNP Hit */ + OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD), /* RAM hit|SNP None or Miss */ + OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM), /* Remote RAM hit|SNP None or Miss */ + OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */ +}; + +/* Based on kernel pebs_set_tlb_lock() */ +static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock) +{ + /* + * TLB access + * 0 = did not miss 2nd level TLB + * 1 = missed 2nd level TLB + */ + if (tlb) + *val |= P(TLB, MISS) | P(TLB, L2); + else + *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); + + /* locked prefix */ + if (lock) + *val |= P(LOCK, LOCKED); +} + +/* Based on kernel __grt_latency_data() */ +static u64 intel_pt_grt_latency_data(u8 dse, bool tlb, bool lock, bool blk, + const u64 *pebs_data_source) +{ + u64 val; + + dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK; + val = pebs_data_source[dse]; + + pebs_set_tlb_lock(&val, tlb, lock); + + if (blk) + val |= P(BLK, DATA); + else + val |= P(BLK, NA); + + return val; +} + +/* Default value for data source */ +#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\ + PERF_MEM_S(LVL, NA) |\ + PERF_MEM_S(SNOOP, NA) |\ + PERF_MEM_S(LOCK, NA) |\ + PERF_MEM_S(TLB, NA) |\ + PERF_MEM_S(LVLNUM, NA)) + +enum DATA_SRC_FORMAT { + DATA_SRC_FORMAT_ERR = -1, + DATA_SRC_FORMAT_NA = 0, + DATA_SRC_FORMAT_GRT = 1, + DATA_SRC_FORMAT_CMT = 2, +}; + +/* Based on kernel grt_latency_data() and cmt_latency_data */ +static u64 intel_pt_get_data_src(u64 mem_aux_info, int data_src_fmt) +{ + switch (data_src_fmt) { + case DATA_SRC_FORMAT_GRT: { + union { + u64 val; + struct { + unsigned int dse:4; + unsigned int locked:1; + unsigned int stlb_miss:1; + unsigned int fwd_blk:1; + unsigned int reserved:25; + }; + } x = {.val = mem_aux_info}; + return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk, + pebs_data_source_grt); + } + case DATA_SRC_FORMAT_CMT: { + union { + u64 val; + struct { + unsigned int dse:5; + unsigned int locked:1; + unsigned int stlb_miss:1; + unsigned int fwd_blk:1; + unsigned int reserved:24; + }; + } x = {.val = mem_aux_info}; + return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk, + pebs_data_source_cmt); + } + default: + return PERF_MEM_NA; + } +} + +static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, + u64 id, int data_src_fmt) { const struct intel_pt_blk_items *items = &ptq->state->items; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; union perf_event *event = ptq->event_buf; struct intel_pt *pt = ptq->pt; u64 sample_type = evsel->core.attr.sample_type; u8 cpumode; - u64 regs[8 * sizeof(sample.intr_regs.mask)]; + u64 regs[8 * sizeof(sample.intr_regs->mask)]; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_a_sample(ptq, event, &sample); sample.id = id; @@ -2287,15 +2474,16 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse items->mask[INTEL_PT_XMM_POS])) { u64 regs_mask = evsel->core.attr.sample_regs_intr; u64 *pos; + struct regs_dump *intr_regs = perf_sample__intr_regs(&sample); - sample.intr_regs.abi = items->is_32_bit ? + intr_regs->abi = items->is_32_bit ? PERF_SAMPLE_REGS_ABI_32 : PERF_SAMPLE_REGS_ABI_64; - sample.intr_regs.regs = regs; + intr_regs->regs = regs; - pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask); + pos = intel_pt_add_gp_regs(intr_regs, regs, items, regs_mask); - intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask); + intel_pt_add_xmm(intr_regs, pos, items, regs_mask); } if (sample_type & PERF_SAMPLE_BRANCH_STACK) { @@ -2346,6 +2534,18 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse } } + if (sample_type & PERF_SAMPLE_DATA_SRC) { + if (items->has_mem_aux_info && data_src_fmt) { + if (data_src_fmt < 0) { + pr_err("Intel PT missing data_src info\n"); + return -1; + } + sample.data_src = intel_pt_get_data_src(items->mem_aux_info, data_src_fmt); + } else { + sample.data_src = PERF_MEM_NA; + } + } + if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) { u64 ax = items->has_rax ? items->rax : 0; /* Refer kernel's intel_hsw_transaction() */ @@ -2357,16 +2557,19 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse sample.transaction = txn; } - return intel_pt_deliver_synth_event(pt, event, &sample, sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; struct evsel *evsel = pt->pebs_evsel; + int data_src_fmt = pt->pebs_data_src_fmt; u64 id = evsel->core.id[0]; - return intel_pt_do_synth_pebs_sample(ptq, evsel, id); + return intel_pt_do_synth_pebs_sample(ptq, evsel, id, data_src_fmt); } static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) @@ -2391,7 +2594,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) hw_id); return intel_pt_synth_single_pebs_sample(ptq); } - err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id); + err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id, pe->data_src_fmt); if (err) return err; } @@ -2403,16 +2606,17 @@ static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct { struct perf_synth_intel_evt cfe; struct perf_synth_intel_evd evd[INTEL_PT_MAX_EVDS]; } raw; - int i; + int i, ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->evt_id; @@ -2434,20 +2638,24 @@ static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq) ptq->state->evd_cnt * sizeof(struct perf_synth_intel_evd); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->evt_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->evt_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_iflag_chg raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->iflag_chg_id; @@ -2467,8 +2675,10 @@ static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->iflag_chg_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->iflag_chg_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, @@ -3351,6 +3561,49 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt, event->itrace_start.tid); } +/* + * Events with data_src are identified by L1_Hit_Indication + * refer https://github.com/intel/perfmon + */ +static int intel_pt_data_src_fmt(struct intel_pt *pt, struct evsel *evsel) +{ + struct perf_env *env = pt->machine->env; + int fmt = DATA_SRC_FORMAT_NA; + + if (!env->cpuid) + return DATA_SRC_FORMAT_ERR; + + /* + * PEBS-via-PT is only supported on E-core non-hybrid. Of those only + * Gracemont and Crestmont have data_src. Check for: + * Alderlake N (Gracemont) + * Sierra Forest (Crestmont) + * Grand Ridge (Crestmont) + */ + + if (!strncmp(env->cpuid, "GenuineIntel,6,190,", 19)) + fmt = DATA_SRC_FORMAT_GRT; + + if (!strncmp(env->cpuid, "GenuineIntel,6,175,", 19) || + !strncmp(env->cpuid, "GenuineIntel,6,182,", 19)) + fmt = DATA_SRC_FORMAT_CMT; + + if (fmt == DATA_SRC_FORMAT_NA) + return fmt; + + /* + * Only data_src events are: + * mem-loads event=0xd0,umask=0x5 + * mem-stores event=0xd0,umask=0x6 + */ + if (evsel->core.attr.type == PERF_TYPE_RAW && + ((evsel->core.attr.config & 0xffff) == 0x5d0 || + (evsel->core.attr.config & 0xffff) == 0x6d0)) + return fmt; + + return DATA_SRC_FORMAT_NA; +} + static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample) @@ -3371,6 +3624,7 @@ static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, ptq->pebs[hw_id].evsel = evsel; ptq->pebs[hw_id].id = sample->id; + ptq->pebs[hw_id].data_src_fmt = intel_pt_data_src_fmt(pt, evsel); return 0; } @@ -3414,7 +3668,7 @@ static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event) } dso = map__dso(al.map); - if (!dso || !dso->auxtrace_cache) + if (!dso || !dso__auxtrace_cache(dso)) continue; offset = map__map_ip(al.map, addr); @@ -3434,7 +3688,7 @@ static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event) } else { intel_pt_cache_invalidate(dso, machine, offset); intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n", - dso->long_name, addr); + dso__long_name(dso), addr); } } out: @@ -3445,7 +3699,7 @@ out: static int intel_pt_process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, - struct perf_tool *tool) + const struct perf_tool *tool) { struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, auxtrace); @@ -3529,7 +3783,7 @@ static int intel_pt_process_event(struct perf_session *session, return err; } -static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool) +static int intel_pt_flush(struct perf_session *session, const struct perf_tool *tool) { struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, auxtrace); @@ -3596,7 +3850,7 @@ static bool intel_pt_evsel_is_auxtrace(struct perf_session *session, static int intel_pt_process_auxtrace_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, auxtrace); @@ -3655,37 +3909,15 @@ static int intel_pt_queue_data(struct perf_session *session, data_offset, timestamp); } -struct intel_pt_synth { - struct perf_tool dummy_tool; - struct perf_session *session; -}; - -static int intel_pt_event_synth(struct perf_tool *tool, - union perf_event *event, - struct perf_sample *sample __maybe_unused, - struct machine *machine __maybe_unused) -{ - struct intel_pt_synth *intel_pt_synth = - container_of(tool, struct intel_pt_synth, dummy_tool); - - return perf_session__deliver_synth_event(intel_pt_synth->session, event, - NULL); -} - static int intel_pt_synth_event(struct perf_session *session, const char *name, struct perf_event_attr *attr, u64 id) { - struct intel_pt_synth intel_pt_synth; int err; pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n", name, id, (u64)attr->sample_type); - memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth)); - intel_pt_synth.session = session; - - err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1, - &id, intel_pt_event_synth); + err = perf_session__deliver_synth_attr_event(session, attr, id); if (err) pr_err("%s: failed to synthesize '%s' event type\n", __func__, name); @@ -3755,9 +3987,7 @@ static int intel_pt_synth_events(struct intel_pt *pt, attr.sample_id_all = evsel->core.attr.sample_id_all; attr.read_format = evsel->core.attr.read_format; - id = evsel->core.id[0] + 1000000000; - if (!id) - id = 1; + id = auxtrace_synth_id_range_start(evsel); if (pt->synth_opts.branches) { attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; @@ -3942,6 +4172,7 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt) } pt->single_pebs = true; pt->sample_pebs = true; + pt->pebs_data_src_fmt = intel_pt_data_src_fmt(pt, evsel); pt->pebs_evsel = evsel; } } @@ -4128,7 +4359,7 @@ static int intel_pt_parse_vm_tm_corr_args(struct intel_pt *pt) static const char * const intel_pt_info_fmts[] = { [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", - [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_PT_TIME_MULT] = " Time Multiplier %"PRIu64"\n", [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n", [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n", diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h index c7d6068e3a6b..18fd0be52e6c 100644 --- a/tools/perf/util/intel-pt.h +++ b/tools/perf/util/intel-pt.h @@ -42,6 +42,7 @@ struct auxtrace_record *intel_pt_recording_init(int *err); int intel_pt_process_auxtrace_info(union perf_event *event, struct perf_session *session); -struct perf_event_attr *intel_pt_pmu_default_config(struct perf_pmu *pmu); +void intel_pt_pmu_default_config(const struct perf_pmu *intel_pt_pmu, + struct perf_event_attr *attr); #endif diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c new file mode 100644 index 000000000000..3c958d738ca6 --- /dev/null +++ b/tools/perf/util/intel-tpebs.c @@ -0,0 +1,663 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * intel_tpebs.c: Intel TPEBS support + */ + +#include <api/fs/fs.h> +#include <sys/param.h> +#include <subcmd/run-command.h> +#include <thread.h> +#include "intel-tpebs.h" +#include <linux/list.h> +#include <linux/zalloc.h> +#include <linux/err.h> +#include "sample.h" +#include "counts.h" +#include "debug.h" +#include "evlist.h" +#include "evsel.h" +#include "mutex.h" +#include "session.h" +#include "stat.h" +#include "tool.h" +#include "cpumap.h" +#include "metricgroup.h" +#include "stat.h" +#include <sys/stat.h> +#include <sys/file.h> +#include <errno.h> +#include <poll.h> +#include <math.h> + +#define PERF_DATA "-" + +bool tpebs_recording; +enum tpebs_mode tpebs_mode; +static LIST_HEAD(tpebs_results); +static pthread_t tpebs_reader_thread; +static struct child_process tpebs_cmd; +static int control_fd[2], ack_fd[2]; +static struct mutex tpebs_mtx; + +struct tpebs_retire_lat { + struct list_head nd; + /** @evsel: The evsel that opened the retire_lat event. */ + struct evsel *evsel; + /** @event: Event passed to perf record. */ + char *event; + /** @stats: Recorded retirement latency stats. */ + struct stats stats; + /** @last: Last retirement latency read. */ + uint64_t last; + /* Has the event been sent to perf record? */ + bool started; +}; + +static void tpebs_mtx_init(void) +{ + mutex_init(&tpebs_mtx); +} + +static struct mutex *tpebs_mtx_get(void) +{ + static pthread_once_t tpebs_mtx_once = PTHREAD_ONCE_INIT; + + pthread_once(&tpebs_mtx_once, tpebs_mtx_init); + return &tpebs_mtx; +} + +static struct tpebs_retire_lat *tpebs_retire_lat__find(struct evsel *evsel) + EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get()); + +static int evsel__tpebs_start_perf_record(struct evsel *evsel) +{ + const char **record_argv; + int tpebs_event_size = 0, i = 0, ret; + char control_fd_buf[32]; + char cpumap_buf[50]; + struct tpebs_retire_lat *t; + + list_for_each_entry(t, &tpebs_results, nd) + tpebs_event_size++; + + record_argv = malloc((10 + 2 * tpebs_event_size) * sizeof(*record_argv)); + if (!record_argv) + return -ENOMEM; + + record_argv[i++] = "perf"; + record_argv[i++] = "record"; + record_argv[i++] = "-W"; + record_argv[i++] = "--synth=no"; + + scnprintf(control_fd_buf, sizeof(control_fd_buf), "--control=fd:%d,%d", + control_fd[0], ack_fd[1]); + record_argv[i++] = control_fd_buf; + + record_argv[i++] = "-o"; + record_argv[i++] = PERF_DATA; + + if (!perf_cpu_map__is_any_cpu_or_is_empty(evsel->evlist->core.user_requested_cpus)) { + cpu_map__snprint(evsel->evlist->core.user_requested_cpus, cpumap_buf, + sizeof(cpumap_buf)); + record_argv[i++] = "-C"; + record_argv[i++] = cpumap_buf; + } + + list_for_each_entry(t, &tpebs_results, nd) { + record_argv[i++] = "-e"; + record_argv[i++] = t->event; + } + record_argv[i++] = NULL; + assert(i == 10 + 2 * tpebs_event_size || i == 8 + 2 * tpebs_event_size); + /* Note, no workload given so system wide is implied. */ + + assert(tpebs_cmd.pid == 0); + tpebs_cmd.argv = record_argv; + tpebs_cmd.out = -1; + ret = start_command(&tpebs_cmd); + zfree(&tpebs_cmd.argv); + list_for_each_entry(t, &tpebs_results, nd) + t->started = true; + + return ret; +} + +static bool is_child_pid(pid_t parent, pid_t child) +{ + if (parent < 0 || child < 0) + return false; + + while (true) { + char path[PATH_MAX]; + char line[256]; + FILE *fp; + +new_child: + if (parent == child) + return true; + + if (child <= 0) + return false; + + scnprintf(path, sizeof(path), "%s/%d/status", procfs__mountpoint(), child); + fp = fopen(path, "r"); + if (!fp) { + /* Presumably the process went away. Assume not a child. */ + return false; + } + while (fgets(line, sizeof(line), fp) != NULL) { + if (strncmp(line, "PPid:", 5) == 0) { + fclose(fp); + if (sscanf(line + 5, "%d", &child) != 1) { + /* Unexpected error parsing. */ + return false; + } + goto new_child; + } + } + /* Unexpected EOF. */ + fclose(fp); + return false; + } +} + +static bool should_ignore_sample(const struct perf_sample *sample, const struct tpebs_retire_lat *t) +{ + pid_t workload_pid, sample_pid = sample->pid; + + /* + * During evlist__purge the evlist will be removed prior to the + * evsel__exit calling evsel__tpebs_close and taking the + * tpebs_mtx. Avoid a segfault by ignoring samples in this case. + */ + if (t->evsel->evlist == NULL) + return true; + + workload_pid = t->evsel->evlist->workload.pid; + if (workload_pid < 0 || workload_pid == sample_pid) + return false; + + if (!t->evsel->core.attr.inherit) + return true; + + return !is_child_pid(workload_pid, sample_pid); +} + +static int process_sample_event(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample, + struct evsel *evsel, + struct machine *machine __maybe_unused) +{ + struct tpebs_retire_lat *t; + + mutex_lock(tpebs_mtx_get()); + if (tpebs_cmd.pid == 0) { + /* Record has terminated. */ + mutex_unlock(tpebs_mtx_get()); + return 0; + } + t = tpebs_retire_lat__find(evsel); + if (!t) { + mutex_unlock(tpebs_mtx_get()); + return -EINVAL; + } + if (should_ignore_sample(sample, t)) { + mutex_unlock(tpebs_mtx_get()); + return 0; + } + /* + * Need to handle per core results? We are assuming average retire + * latency value will be used. Save the number of samples and the sum of + * retire latency value for each event. + */ + t->last = sample->weight3; + update_stats(&t->stats, sample->weight3); + mutex_unlock(tpebs_mtx_get()); + return 0; +} + +static int process_feature_event(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event) +{ + if (event->feat.feat_id < HEADER_LAST_FEATURE) + return perf_event__process_feature(session, event); + return 0; +} + +static void *__sample_reader(void *arg __maybe_unused) +{ + struct perf_session *session; + struct perf_data data = { + .mode = PERF_DATA_MODE_READ, + .path = PERF_DATA, + .file.fd = tpebs_cmd.out, + }; + struct perf_tool tool; + + perf_tool__init(&tool, /*ordered_events=*/false); + tool.sample = process_sample_event; + tool.feature = process_feature_event; + tool.attr = perf_event__process_attr; + + session = perf_session__new(&data, &tool); + if (IS_ERR(session)) + return NULL; + perf_session__process_events(session); + perf_session__delete(session); + + return NULL; +} + +static int tpebs_send_record_cmd(const char *msg) EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get()) +{ + struct pollfd pollfd = { .events = POLLIN, }; + int ret, len, retries = 0; + char ack_buf[8]; + + /* Check if the command exited before the send, done with the lock held. */ + if (tpebs_cmd.pid == 0) + return 0; + + /* + * Let go of the lock while sending/receiving as blocking can starve the + * sample reading thread. + */ + mutex_unlock(tpebs_mtx_get()); + + /* Send perf record command.*/ + len = strlen(msg); + ret = write(control_fd[1], msg, len); + if (ret != len) { + pr_err("perf record control write control message '%s' failed\n", msg); + ret = -EPIPE; + goto out; + } + + if (!strcmp(msg, EVLIST_CTL_CMD_STOP_TAG)) { + ret = 0; + goto out; + } + + /* Wait for an ack. */ + pollfd.fd = ack_fd[0]; + + /* + * We need this poll to ensure the ack_fd PIPE will not hang + * when perf record failed for any reason. The timeout value + * 3000ms is an empirical selection. + */ +again: + if (!poll(&pollfd, 1, 500)) { + if (check_if_command_finished(&tpebs_cmd)) { + ret = 0; + goto out; + } + + if (retries++ < 6) + goto again; + pr_err("tpebs failed: perf record ack timeout for '%s'\n", msg); + ret = -ETIMEDOUT; + goto out; + } + + if (!(pollfd.revents & POLLIN)) { + if (check_if_command_finished(&tpebs_cmd)) { + ret = 0; + goto out; + } + + pr_err("tpebs failed: did not received an ack for '%s'\n", msg); + ret = -EPIPE; + goto out; + } + + ret = read(ack_fd[0], ack_buf, sizeof(ack_buf)); + if (ret > 0) + ret = strcmp(ack_buf, EVLIST_CTL_CMD_ACK_TAG); + else + pr_err("tpebs: perf record control ack failed\n"); +out: + /* Re-take lock as expected by caller. */ + mutex_lock(tpebs_mtx_get()); + return ret; +} + +/* + * tpebs_stop - stop the sample data read thread and the perf record process. + */ +static int tpebs_stop(void) EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get()) +{ + int ret = 0; + + /* Like tpebs_start, we should only run tpebs_end once. */ + if (tpebs_cmd.pid != 0) { + tpebs_send_record_cmd(EVLIST_CTL_CMD_STOP_TAG); + tpebs_cmd.pid = 0; + mutex_unlock(tpebs_mtx_get()); + pthread_join(tpebs_reader_thread, NULL); + mutex_lock(tpebs_mtx_get()); + close(control_fd[0]); + close(control_fd[1]); + close(ack_fd[0]); + close(ack_fd[1]); + close(tpebs_cmd.out); + ret = finish_command(&tpebs_cmd); + tpebs_cmd.pid = 0; + if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL) + ret = 0; + } + return ret; +} + +/** + * evsel__tpebs_event() - Create string event encoding to pass to `perf record`. + */ +static int evsel__tpebs_event(struct evsel *evsel, char **event) +{ + char *name, *modifier; + int ret; + + name = strdup(evsel->name); + if (!name) + return -ENOMEM; + + modifier = strrchr(name, 'R'); + if (!modifier) { + ret = -EINVAL; + goto out; + } + *modifier = 'p'; + modifier = strchr(name, ':'); + if (!modifier) + modifier = strrchr(name, '/'); + if (!modifier) { + ret = -EINVAL; + goto out; + } + *modifier = '\0'; + if (asprintf(event, "%s/name=tpebs_event_%p/%s", name, evsel, modifier + 1) > 0) + ret = 0; + else + ret = -ENOMEM; +out: + if (ret) + pr_err("Tpebs event modifier broken '%s'\n", evsel->name); + free(name); + return ret; +} + +static struct tpebs_retire_lat *tpebs_retire_lat__new(struct evsel *evsel) +{ + struct tpebs_retire_lat *result = zalloc(sizeof(*result)); + int ret; + + if (!result) + return NULL; + + ret = evsel__tpebs_event(evsel, &result->event); + if (ret) { + free(result); + return NULL; + } + result->evsel = evsel; + return result; +} + +static void tpebs_retire_lat__delete(struct tpebs_retire_lat *r) +{ + zfree(&r->event); + free(r); +} + +static struct tpebs_retire_lat *tpebs_retire_lat__find(struct evsel *evsel) +{ + struct tpebs_retire_lat *t; + unsigned long num; + const char *evsel_name; + + /* + * Evsels will match for evlist with the retirement latency event. The + * name with "tpebs_event_" prefix will be present on events being read + * from `perf record`. + */ + if (evsel__is_retire_lat(evsel)) { + list_for_each_entry(t, &tpebs_results, nd) { + if (t->evsel == evsel) + return t; + } + return NULL; + } + evsel_name = strstr(evsel->name, "tpebs_event_"); + if (!evsel_name) { + /* Unexpected that the perf record should have other events. */ + return NULL; + } + errno = 0; + num = strtoull(evsel_name + 12, NULL, 16); + if (errno) { + pr_err("Bad evsel for tpebs find '%s'\n", evsel->name); + return NULL; + } + list_for_each_entry(t, &tpebs_results, nd) { + if ((unsigned long)t->evsel == num) + return t; + } + return NULL; +} + +/** + * evsel__tpebs_prepare - create tpebs data structures ready for opening. + * @evsel: retire_latency evsel, all evsels on its list will be prepared. + */ +static int evsel__tpebs_prepare(struct evsel *evsel) +{ + struct evsel *pos; + struct tpebs_retire_lat *tpebs_event; + + mutex_lock(tpebs_mtx_get()); + tpebs_event = tpebs_retire_lat__find(evsel); + if (tpebs_event) { + /* evsel, or an identically named one, was already prepared. */ + mutex_unlock(tpebs_mtx_get()); + return 0; + } + tpebs_event = tpebs_retire_lat__new(evsel); + if (!tpebs_event) { + mutex_unlock(tpebs_mtx_get()); + return -ENOMEM; + } + list_add_tail(&tpebs_event->nd, &tpebs_results); + mutex_unlock(tpebs_mtx_get()); + + /* + * Eagerly prepare all other evsels on the list to try to ensure that by + * open they are all known. + */ + evlist__for_each_entry(evsel->evlist, pos) { + int ret; + + if (pos == evsel || !pos->retire_lat) + continue; + + ret = evsel__tpebs_prepare(pos); + if (ret) + return ret; + } + return 0; +} + +/** + * evsel__tpebs_open - starts tpebs execution. + * @evsel: retire_latency evsel, all evsels on its list will be selected. Each + * evsel is sampled to get the average retire_latency value. + */ +int evsel__tpebs_open(struct evsel *evsel) +{ + int ret; + bool tpebs_empty; + + /* We should only run tpebs_start when tpebs_recording is enabled. */ + if (!tpebs_recording) + return 0; + /* Only start the events once. */ + if (tpebs_cmd.pid != 0) { + struct tpebs_retire_lat *t; + bool valid; + + mutex_lock(tpebs_mtx_get()); + t = tpebs_retire_lat__find(evsel); + valid = t && t->started; + mutex_unlock(tpebs_mtx_get()); + /* May fail as the event wasn't started. */ + return valid ? 0 : -EBUSY; + } + + ret = evsel__tpebs_prepare(evsel); + if (ret) + return ret; + + mutex_lock(tpebs_mtx_get()); + tpebs_empty = list_empty(&tpebs_results); + if (!tpebs_empty) { + /*Create control and ack fd for --control*/ + if (pipe(control_fd) < 0) { + pr_err("tpebs: Failed to create control fifo"); + ret = -1; + goto out; + } + if (pipe(ack_fd) < 0) { + pr_err("tpebs: Failed to create control fifo"); + ret = -1; + goto out; + } + + ret = evsel__tpebs_start_perf_record(evsel); + if (ret) + goto out; + + if (pthread_create(&tpebs_reader_thread, /*attr=*/NULL, __sample_reader, + /*arg=*/NULL)) { + kill(tpebs_cmd.pid, SIGTERM); + close(tpebs_cmd.out); + pr_err("Could not create thread to process sample data.\n"); + ret = -1; + goto out; + } + ret = tpebs_send_record_cmd(EVLIST_CTL_CMD_ENABLE_TAG); + } +out: + if (ret) { + struct tpebs_retire_lat *t = tpebs_retire_lat__find(evsel); + + list_del_init(&t->nd); + tpebs_retire_lat__delete(t); + } + mutex_unlock(tpebs_mtx_get()); + return ret; +} + +int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread) +{ + struct perf_counts_values *count, *old_count = NULL; + struct tpebs_retire_lat *t; + uint64_t val; + int ret; + + /* Only set retire_latency value to the first CPU and thread. */ + if (cpu_map_idx != 0 || thread != 0) + return 0; + + if (evsel->prev_raw_counts) + old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + + count = perf_counts(evsel->counts, cpu_map_idx, thread); + + mutex_lock(tpebs_mtx_get()); + t = tpebs_retire_lat__find(evsel); + /* + * If reading the first tpebs result, send a ping to the record + * process. Allow the sample reader a chance to read by releasing and + * reacquiring the lock. + */ + if (t && &t->nd == tpebs_results.next) { + ret = tpebs_send_record_cmd(EVLIST_CTL_CMD_PING_TAG); + mutex_unlock(tpebs_mtx_get()); + if (ret) + return ret; + mutex_lock(tpebs_mtx_get()); + } + if (t == NULL || t->stats.n == 0) { + /* No sample data, use default. */ + if (tpebs_recording) { + pr_warning_once( + "Using precomputed retirement latency data as no samples\n"); + } + val = 0; + switch (tpebs_mode) { + case TPEBS_MODE__MIN: + val = rint(evsel->retirement_latency.min); + break; + case TPEBS_MODE__MAX: + val = rint(evsel->retirement_latency.max); + break; + default: + case TPEBS_MODE__LAST: + case TPEBS_MODE__MEAN: + val = rint(evsel->retirement_latency.mean); + break; + } + } else { + switch (tpebs_mode) { + case TPEBS_MODE__MIN: + val = t->stats.min; + break; + case TPEBS_MODE__MAX: + val = t->stats.max; + break; + case TPEBS_MODE__LAST: + val = t->last; + break; + default: + case TPEBS_MODE__MEAN: + val = rint(t->stats.mean); + break; + } + } + mutex_unlock(tpebs_mtx_get()); + + if (old_count) { + count->val = old_count->val + val; + count->run = old_count->run + 1; + count->ena = old_count->ena + 1; + } else { + count->val = val; + count->run++; + count->ena++; + } + return 0; +} + +/** + * evsel__tpebs_close() - delete tpebs related data. If the last event, stop the + * created thread and process by calling tpebs_stop(). + * + * This function is called in evsel__close() to be symmetric with + * evsel__tpebs_open() being called in evsel__open(). + */ +void evsel__tpebs_close(struct evsel *evsel) +{ + struct tpebs_retire_lat *t; + + mutex_lock(tpebs_mtx_get()); + t = tpebs_retire_lat__find(evsel); + if (t) { + list_del_init(&t->nd); + tpebs_retire_lat__delete(t); + + if (list_empty(&tpebs_results)) + tpebs_stop(); + } + mutex_unlock(tpebs_mtx_get()); +} diff --git a/tools/perf/util/intel-tpebs.h b/tools/perf/util/intel-tpebs.h new file mode 100644 index 000000000000..9475e2e6ea74 --- /dev/null +++ b/tools/perf/util/intel-tpebs.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * intel_tpebs.h: Intel TEPBS support + */ +#ifndef __INTEL_TPEBS_H +#define __INTEL_TPEBS_H + +struct evlist; +struct evsel; + +enum tpebs_mode { + TPEBS_MODE__MEAN, + TPEBS_MODE__MIN, + TPEBS_MODE__MAX, + TPEBS_MODE__LAST, +}; + +extern bool tpebs_recording; +extern enum tpebs_mode tpebs_mode; + +int evsel__tpebs_open(struct evsel *evsel); +void evsel__tpebs_close(struct evsel *evsel); +int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread); + +#endif /* __INTEL_TPEBS_H */ diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h index fb810e1b2de7..f4037203e9ec 100644 --- a/tools/perf/util/jit.h +++ b/tools/perf/util/jit.h @@ -5,7 +5,8 @@ #include <data.h> int jit_process(struct perf_session *session, struct perf_data *output, - struct machine *machine, char *filename, pid_t pid, pid_t tid, u64 *nbytes); + struct machine *machine, const char *filename, + pid_t pid, pid_t tid, u64 *nbytes); int jit_inject_record(const char *filename); diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 6b2b96c16ccd..f00814e37de9 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -14,9 +14,9 @@ #include <sys/mman.h> #include <linux/stringify.h> -#include "build-id.h" #include "event.h" #include "debug.h" +#include "dso.h" #include "evlist.h" #include "namespaces.h" #include "symbol.h" @@ -233,7 +233,8 @@ jit_open(struct jit_buf_desc *jd, const char *name) /* * keep dirname for generating files and mmap records */ - strcpy(jd->dir, name); + strncpy(jd->dir, name, PATH_MAX); + jd->dir[PATH_MAX - 1] = '\0'; dirname(jd->dir); free(buf); @@ -424,7 +425,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) { struct perf_sample sample; union perf_event *event; - struct perf_tool *tool = jd->session->tool; + const struct perf_tool *tool = jd->session->tool; uint64_t code, addr; uintptr_t uaddr; char *filename; @@ -516,7 +517,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) * create pseudo sample to induce dso hit increment * use first address as sample address */ - memset(&sample, 0, sizeof(sample)); + perf_sample__init(&sample, /*all=*/true); sample.cpumode = PERF_RECORD_MISC_USER; sample.pid = pid; sample.tid = tid; @@ -531,10 +532,26 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) /* * mark dso as use to generate buildid in the header */ - if (!ret) - build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine); - + if (!ret) { + struct dso_id dso_id = { + { + .maj = event->mmap2.maj, + .min = event->mmap2.min, + .ino = event->mmap2.ino, + .ino_generation = event->mmap2.ino_generation, + }, + .mmap2_valid = true, + .mmap2_ino_generation_valid = true, + }; + struct dso *dso = machine__findnew_dso_id(jd->machine, filename, &dso_id); + + if (dso) + dso__set_hit(dso); + + dso__put(dso); + } out: + perf_sample__exit(&sample); free(event); return ret; } @@ -543,7 +560,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) { struct perf_sample sample; union perf_event *event; - struct perf_tool *tool = jd->session->tool; + const struct perf_tool *tool = jd->session->tool; char *filename; size_t size; struct stat st; @@ -611,7 +628,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) * create pseudo sample to induce dso hit increment * use first address as sample address */ - memset(&sample, 0, sizeof(sample)); + perf_sample__init(&sample, /*all=*/true); sample.cpumode = PERF_RECORD_MISC_USER; sample.pid = pid; sample.tid = tid; @@ -620,12 +637,13 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) ret = perf_event__process_mmap2(tool, event, &sample, jd->machine); if (ret) - return ret; + goto out; ret = jit_inject_event(jd, event); if (!ret) build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine); - +out: + perf_sample__exit(&sample); return ret; } @@ -675,6 +693,7 @@ jit_repipe_unwinding_info(struct jit_buf_desc *jd, union jr_entry *jr) jd->eh_frame_hdr_size = jr->unwinding.eh_frame_hdr_size; jd->unwinding_size = jr->unwinding.unwinding_size; jd->unwinding_mapped_size = jr->unwinding.mapped_size; + free(jd->unwinding_data); jd->unwinding_data = unwinding_data; return 0; @@ -709,7 +728,7 @@ jit_process_dump(struct jit_buf_desc *jd) } static int -jit_inject(struct jit_buf_desc *jd, char *path) +jit_inject(struct jit_buf_desc *jd, const char *path) { int ret; @@ -736,7 +755,7 @@ jit_inject(struct jit_buf_desc *jd, char *path) * as captured in the RECORD_MMAP record */ static int -jit_detect(char *mmap_name, pid_t pid, struct nsinfo *nsi) +jit_detect(const char *mmap_name, pid_t pid, struct nsinfo *nsi, bool *in_pidns) { char *p; char *end = NULL; @@ -772,11 +791,16 @@ jit_detect(char *mmap_name, pid_t pid, struct nsinfo *nsi) if (!end) return -1; + *in_pidns = pid == nsinfo__nstgid(nsi); /* * pid does not match mmap pid * pid==0 in system-wide mode (synthesized) + * + * If the pid in the file name is equal to the nstgid, then + * the agent ran inside a container and perf outside the + * container, so record it for further use in jit_inject(). */ - if (pid && pid2 != nsinfo__nstgid(nsi)) + if (pid && !(pid2 == pid || *in_pidns)) return -1; /* * validate suffix @@ -820,7 +844,7 @@ int jit_process(struct perf_session *session, struct perf_data *output, struct machine *machine, - char *filename, + const char *filename, pid_t pid, pid_t tid, u64 *nbytes) @@ -829,6 +853,7 @@ jit_process(struct perf_session *session, struct nsinfo *nsi; struct evsel *first; struct jit_buf_desc jd; + bool in_pidns = false; int ret; thread = machine__findnew_thread(machine, pid, tid); @@ -843,7 +868,7 @@ jit_process(struct perf_session *session, /* * first, detect marker mmap (i.e., the jitdump mmap) */ - if (jit_detect(filename, pid, nsi)) { + if (jit_detect(filename, pid, nsi, &in_pidns)) { nsinfo__put(nsi); /* @@ -865,6 +890,9 @@ jit_process(struct perf_session *session, jd.machine = machine; jd.nsi = nsi; + if (in_pidns) + nsinfo__set_in_pidns(nsi); + /* * track sample_type to compute id_all layout * perf sets the same sample type to all events as of now diff --git a/tools/perf/util/kvm-stat.c b/tools/perf/util/kvm-stat.c new file mode 100644 index 000000000000..38ace736db5c --- /dev/null +++ b/tools/perf/util/kvm-stat.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "debug.h" +#include "evsel.h" +#include "kvm-stat.h" + +#if defined(HAVE_KVM_STAT_SUPPORT) && defined(HAVE_LIBTRACEEVENT) + +bool kvm_exit_event(struct evsel *evsel) +{ + return evsel__name_is(evsel, kvm_exit_trace); +} + +void exit_event_get_key(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + key->info = 0; + key->key = evsel__intval(evsel, sample, kvm_exit_reason); +} + + +bool exit_event_begin(struct evsel *evsel, + struct perf_sample *sample, struct event_key *key) +{ + if (kvm_exit_event(evsel)) { + exit_event_get_key(evsel, sample, key); + return true; + } + + return false; +} + +bool kvm_entry_event(struct evsel *evsel) +{ + return evsel__name_is(evsel, kvm_entry_trace); +} + +bool exit_event_end(struct evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + return kvm_entry_event(evsel); +} + +static const char *get_exit_reason(struct perf_kvm_stat *kvm, + struct exit_reasons_table *tbl, + u64 exit_code) +{ + while (tbl->reason != NULL) { + if (tbl->exit_code == exit_code) + return tbl->reason; + tbl++; + } + + pr_err("unknown kvm exit code:%lld on %s\n", + (unsigned long long)exit_code, kvm->exit_reasons_isa); + return "UNKNOWN"; +} + +void exit_event_decode_key(struct perf_kvm_stat *kvm, + struct event_key *key, + char *decode) +{ + const char *exit_reason = get_exit_reason(kvm, key->exit_reasons, + key->key); + + scnprintf(decode, KVM_EVENT_NAME_LEN, "%s", exit_reason); +} + +#endif diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h index 3e9ac754c3d1..a356b839c2ee 100644 --- a/tools/perf/util/kvm-stat.h +++ b/tools/perf/util/kvm-stat.h @@ -10,6 +10,7 @@ #include "symbol.h" #include "record.h" +#include <errno.h> #include <stdlib.h> #include <linux/zalloc.h> @@ -115,6 +116,8 @@ struct kvm_reg_events_ops { struct kvm_events_ops *ops; }; +#if defined(HAVE_KVM_STAT_SUPPORT) && defined(HAVE_LIBTRACEEVENT) + void exit_event_get_key(struct evsel *evsel, struct perf_sample *sample, struct event_key *key); @@ -127,6 +130,7 @@ bool exit_event_end(struct evsel *evsel, void exit_event_decode_key(struct perf_kvm_stat *kvm, struct event_key *key, char *decode); +#endif bool kvm_exit_event(struct evsel *evsel); bool kvm_entry_event(struct evsel *evsel); @@ -187,5 +191,15 @@ static inline struct kvm_info *kvm_info__new(void) #define kvm_info__zput(ki) do { } while (0) #endif /* HAVE_KVM_STAT_SUPPORT */ +#define STRDUP_FAIL_EXIT(s) \ + ({ char *_p; \ + _p = strdup(s); \ + if (!_p) { \ + ret = -ENOMEM; \ + goto EXIT; \ + } \ + _p; \ + }) + extern int kvm_add_default_arch_event(int *argc, const char **argv); #endif /* __PERF_KVM_STAT_H */ diff --git a/tools/perf/util/kwork.h b/tools/perf/util/kwork.h index 53b7327550b8..db00269b73f2 100644 --- a/tools/perf/util/kwork.h +++ b/tools/perf/util/kwork.h @@ -1,6 +1,7 @@ #ifndef PERF_UTIL_KWORK_H #define PERF_UTIL_KWORK_H +#include "perf.h" #include "util/tool.h" #include "util/time-utils.h" @@ -16,6 +17,7 @@ enum kwork_class_type { KWORK_CLASS_IRQ, KWORK_CLASS_SOFTIRQ, KWORK_CLASS_WORKQUEUE, + KWORK_CLASS_SCHED, KWORK_CLASS_MAX, }; @@ -23,6 +25,7 @@ enum kwork_report_type { KWORK_REPORT_RUNTIME, KWORK_REPORT_LATENCY, KWORK_REPORT_TIMEHIST, + KWORK_REPORT_TOP, }; enum kwork_trace_type { @@ -91,6 +94,7 @@ struct kwork_atom_page { DECLARE_BITMAP(bitmap, NR_ATOM_PER_PAGE); }; +struct perf_kwork; struct kwork_class; struct kwork_work { /* @@ -127,6 +131,13 @@ struct kwork_work { u64 max_latency_start; u64 max_latency_end; u64 total_latency; + + /* + * top report + */ + u32 cpu_usage; + u32 tgid; + bool is_kthread; }; struct kwork_class { @@ -142,8 +153,10 @@ struct kwork_class { int (*class_init)(struct kwork_class *class, struct perf_session *session); - void (*work_init)(struct kwork_class *class, + void (*work_init)(struct perf_kwork *kwork, + struct kwork_class *class, struct kwork_work *work, + enum kwork_trace_type src_type, struct evsel *evsel, struct perf_sample *sample, struct machine *machine); @@ -152,7 +165,6 @@ struct kwork_class { char *buf, int len); }; -struct perf_kwork; struct trace_kwork_handler { int (*raise_event)(struct perf_kwork *kwork, struct kwork_class *class, struct evsel *evsel, @@ -165,6 +177,23 @@ struct trace_kwork_handler { int (*exit_event)(struct perf_kwork *kwork, struct kwork_class *class, struct evsel *evsel, struct perf_sample *sample, struct machine *machine); + + int (*sched_switch_event)(struct perf_kwork *kwork, + struct kwork_class *class, struct evsel *evsel, + struct perf_sample *sample, struct machine *machine); +}; + +struct __top_cpus_runtime { + u64 load; + u64 idle; + u64 irq; + u64 softirq; + u64 total; +}; + +struct kwork_top_stat { + DECLARE_BITMAP(all_cpus_bitmap, MAX_NR_CPUS); + struct __top_cpus_runtime *cpus_runtime; }; struct perf_kwork { @@ -218,12 +247,19 @@ struct perf_kwork { u64 all_runtime; u64 all_count; u64 nr_skipped_events[KWORK_TRACE_MAX + 1]; -}; -struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork, + /* + * perf kwork top data + */ + struct kwork_top_stat top_stat; + + /* Add work callback. */ + struct kwork_work *(*add_work)(struct perf_kwork *kwork, struct kwork_class *class, struct kwork_work *key); +}; + #ifdef HAVE_BPF_SKEL int perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork); @@ -233,6 +269,13 @@ void perf_kwork__report_cleanup_bpf(void); void perf_kwork__trace_start(void); void perf_kwork__trace_finish(void); +int perf_kwork__top_prepare_bpf(struct perf_kwork *kwork); +int perf_kwork__top_read_bpf(struct perf_kwork *kwork); +void perf_kwork__top_cleanup_bpf(void); + +void perf_kwork__top_start(void); +void perf_kwork__top_finish(void); + #else /* !HAVE_BPF_SKEL */ static inline int @@ -252,6 +295,23 @@ static inline void perf_kwork__report_cleanup_bpf(void) {} static inline void perf_kwork__trace_start(void) {} static inline void perf_kwork__trace_finish(void) {} +static inline int +perf_kwork__top_prepare_bpf(struct perf_kwork *kwork __maybe_unused) +{ + return -1; +} + +static inline int +perf_kwork__top_read_bpf(struct perf_kwork *kwork __maybe_unused) +{ + return -1; +} + +static inline void perf_kwork__top_cleanup_bpf(void) {} + +static inline void perf_kwork__top_start(void) {} +static inline void perf_kwork__top_finish(void) {} + #endif /* HAVE_BPF_SKEL */ #endif /* PERF_UTIL_KWORK_H */ diff --git a/tools/perf/util/libbfd.c b/tools/perf/util/libbfd.c new file mode 100644 index 000000000000..cc0c474cbfaa --- /dev/null +++ b/tools/perf/util/libbfd.c @@ -0,0 +1,643 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "libbfd.h" +#include "annotate.h" +#include "bpf-event.h" +#include "bpf-utils.h" +#include "debug.h" +#include "dso.h" +#include "env.h" +#include "map.h" +#include "srcline.h" +#include "symbol.h" +#include "symbol_conf.h" +#include "util.h" +#include <tools/dis-asm-compat.h> +#ifdef HAVE_LIBBPF_SUPPORT +#include <bpf/bpf.h> +#include <bpf/btf.h> +#endif +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#define PACKAGE "perf" +#include <bfd.h> + +/* + * Implement addr2line using libbfd. + */ +struct a2l_data { + const char *input; + u64 addr; + + bool found; + const char *filename; + const char *funcname; + unsigned int line; + + bfd *abfd; + asymbol **syms; +}; + +static bool perf_bfd_lock(void *bfd_mutex) +{ + mutex_lock(bfd_mutex); + return true; +} + +static bool perf_bfd_unlock(void *bfd_mutex) +{ + mutex_unlock(bfd_mutex); + return true; +} + +static void perf_bfd_init(void) +{ + static struct mutex bfd_mutex; + + mutex_init_recursive(&bfd_mutex); + + if (bfd_init() != BFD_INIT_MAGIC) { + pr_err("Error initializing libbfd\n"); + return; + } + if (!bfd_thread_init(perf_bfd_lock, perf_bfd_unlock, &bfd_mutex)) + pr_err("Error initializing libbfd threading\n"); +} + +static void ensure_bfd_init(void) +{ + static pthread_once_t bfd_init_once = PTHREAD_ONCE_INIT; + + pthread_once(&bfd_init_once, perf_bfd_init); +} + +static int bfd_error(const char *string) +{ + const char *errmsg; + + errmsg = bfd_errmsg(bfd_get_error()); + fflush(stdout); + + if (string) + pr_debug("%s: %s\n", string, errmsg); + else + pr_debug("%s\n", errmsg); + + return -1; +} + +static int slurp_symtab(bfd *abfd, struct a2l_data *a2l) +{ + long storage; + long symcount; + asymbol **syms; + bfd_boolean dynamic = FALSE; + + if ((bfd_get_file_flags(abfd) & HAS_SYMS) == 0) + return bfd_error(bfd_get_filename(abfd)); + + storage = bfd_get_symtab_upper_bound(abfd); + if (storage == 0L) { + storage = bfd_get_dynamic_symtab_upper_bound(abfd); + dynamic = TRUE; + } + if (storage < 0L) + return bfd_error(bfd_get_filename(abfd)); + + syms = malloc(storage); + if (dynamic) + symcount = bfd_canonicalize_dynamic_symtab(abfd, syms); + else + symcount = bfd_canonicalize_symtab(abfd, syms); + + if (symcount < 0) { + free(syms); + return bfd_error(bfd_get_filename(abfd)); + } + + a2l->syms = syms; + return 0; +} + +static void find_address_in_section(bfd *abfd, asection *section, void *data) +{ + bfd_vma pc, vma; + bfd_size_type size; + struct a2l_data *a2l = data; + flagword flags; + + if (a2l->found) + return; + +#ifdef bfd_get_section_flags + flags = bfd_get_section_flags(abfd, section); +#else + flags = bfd_section_flags(section); +#endif + if ((flags & SEC_ALLOC) == 0) + return; + + pc = a2l->addr; +#ifdef bfd_get_section_vma + vma = bfd_get_section_vma(abfd, section); +#else + vma = bfd_section_vma(section); +#endif +#ifdef bfd_get_section_size + size = bfd_get_section_size(section); +#else + size = bfd_section_size(section); +#endif + + if (pc < vma || pc >= vma + size) + return; + + a2l->found = bfd_find_nearest_line(abfd, section, a2l->syms, pc - vma, + &a2l->filename, &a2l->funcname, + &a2l->line); + + if (a2l->filename && !strlen(a2l->filename)) + a2l->filename = NULL; +} + +static struct a2l_data *addr2line_init(const char *path) +{ + bfd *abfd; + struct a2l_data *a2l = NULL; + + ensure_bfd_init(); + abfd = bfd_openr(path, NULL); + if (abfd == NULL) + return NULL; + + if (!bfd_check_format(abfd, bfd_object)) + goto out; + + a2l = zalloc(sizeof(*a2l)); + if (a2l == NULL) + goto out; + + a2l->abfd = abfd; + a2l->input = strdup(path); + if (a2l->input == NULL) + goto out; + + if (slurp_symtab(abfd, a2l)) + goto out; + + return a2l; + +out: + if (a2l) { + zfree((char **)&a2l->input); + free(a2l); + } + bfd_close(abfd); + return NULL; +} + +static void addr2line_cleanup(struct a2l_data *a2l) +{ + if (a2l->abfd) + bfd_close(a2l->abfd); + zfree((char **)&a2l->input); + zfree(&a2l->syms); + free(a2l); +} + +static int inline_list__append_dso_a2l(struct dso *dso, + struct inline_node *node, + struct symbol *sym) +{ + struct a2l_data *a2l = dso__a2l(dso); + struct symbol *inline_sym = new_inline_sym(dso, sym, a2l->funcname); + char *srcline = NULL; + + if (a2l->filename) + srcline = srcline_from_fileline(a2l->filename, a2l->line); + + return inline_list__append(inline_sym, srcline, node); +} + +int libbfd__addr2line(const char *dso_name, u64 addr, + char **file, unsigned int *line, struct dso *dso, + bool unwind_inlines, struct inline_node *node, + struct symbol *sym) +{ + int ret = 0; + struct a2l_data *a2l = dso__a2l(dso); + + if (!a2l) { + a2l = addr2line_init(dso_name); + dso__set_a2l(dso, a2l); + } + + if (a2l == NULL) { + if (!symbol_conf.disable_add2line_warn) + pr_warning("addr2line_init failed for %s\n", dso_name); + return 0; + } + + a2l->addr = addr; + a2l->found = false; + + bfd_map_over_sections(a2l->abfd, find_address_in_section, a2l); + + if (!a2l->found) + return 0; + + if (unwind_inlines) { + int cnt = 0; + + if (node && inline_list__append_dso_a2l(dso, node, sym)) + return 0; + + while (bfd_find_inliner_info(a2l->abfd, &a2l->filename, + &a2l->funcname, &a2l->line) && + cnt++ < MAX_INLINE_NEST) { + + if (a2l->filename && !strlen(a2l->filename)) + a2l->filename = NULL; + + if (node != NULL) { + if (inline_list__append_dso_a2l(dso, node, sym)) + return 0; + // found at least one inline frame + ret = 1; + } + } + } + + if (file) { + *file = a2l->filename ? strdup(a2l->filename) : NULL; + ret = *file ? 1 : 0; + } + + if (line) + *line = a2l->line; + + return ret; +} + +void dso__free_a2l_libbfd(struct dso *dso) +{ + struct a2l_data *a2l = dso__a2l(dso); + + if (!a2l) + return; + + addr2line_cleanup(a2l); + + dso__set_a2l(dso, NULL); +} + +static int bfd_symbols__cmpvalue(const void *a, const void *b) +{ + const asymbol *as = *(const asymbol **)a, *bs = *(const asymbol **)b; + + if (bfd_asymbol_value(as) != bfd_asymbol_value(bs)) + return bfd_asymbol_value(as) - bfd_asymbol_value(bs); + + return bfd_asymbol_name(as)[0] - bfd_asymbol_name(bs)[0]; +} + +static int bfd2elf_binding(asymbol *symbol) +{ + if (symbol->flags & BSF_WEAK) + return STB_WEAK; + if (symbol->flags & BSF_GLOBAL) + return STB_GLOBAL; + if (symbol->flags & BSF_LOCAL) + return STB_LOCAL; + return -1; +} + +int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) +{ + int err = -1; + long symbols_size, symbols_count, i; + asection *section; + asymbol **symbols, *sym; + struct symbol *symbol; + bfd *abfd; + u64 start, len; + + ensure_bfd_init(); + abfd = bfd_openr(debugfile, NULL); + if (!abfd) + return -1; + + if (!bfd_check_format(abfd, bfd_object)) { + pr_debug2("%s: cannot read %s bfd file.\n", __func__, + dso__long_name(dso)); + goto out_close; + } + + if (bfd_get_flavour(abfd) == bfd_target_elf_flavour) + goto out_close; + + symbols_size = bfd_get_symtab_upper_bound(abfd); + if (symbols_size == 0) { + bfd_close(abfd); + return 0; + } + + if (symbols_size < 0) + goto out_close; + + symbols = malloc(symbols_size); + if (!symbols) + goto out_close; + + symbols_count = bfd_canonicalize_symtab(abfd, symbols); + if (symbols_count < 0) + goto out_free; + + section = bfd_get_section_by_name(abfd, ".text"); + if (section) { + for (i = 0; i < symbols_count; ++i) { + if (!strcmp(bfd_asymbol_name(symbols[i]), "__ImageBase") || + !strcmp(bfd_asymbol_name(symbols[i]), "__image_base__")) + break; + } + if (i < symbols_count) { + /* PE symbols can only have 4 bytes, so use .text high bits */ + u64 text_offset = (section->vma - (u32)section->vma) + + (u32)bfd_asymbol_value(symbols[i]); + dso__set_text_offset(dso, text_offset); + dso__set_text_end(dso, (section->vma - text_offset) + section->size); + } else { + dso__set_text_offset(dso, section->vma - section->filepos); + dso__set_text_end(dso, section->filepos + section->size); + } + } + + qsort(symbols, symbols_count, sizeof(asymbol *), bfd_symbols__cmpvalue); + +#ifdef bfd_get_section +#define bfd_asymbol_section bfd_get_section +#endif + for (i = 0; i < symbols_count; ++i) { + sym = symbols[i]; + section = bfd_asymbol_section(sym); + if (bfd2elf_binding(sym) < 0) + continue; + + while (i + 1 < symbols_count && + bfd_asymbol_section(symbols[i + 1]) == section && + bfd2elf_binding(symbols[i + 1]) < 0) + i++; + + if (i + 1 < symbols_count && + bfd_asymbol_section(symbols[i + 1]) == section) + len = symbols[i + 1]->value - sym->value; + else + len = section->size - sym->value; + + start = bfd_asymbol_value(sym) - dso__text_offset(dso); + symbol = symbol__new(start, len, bfd2elf_binding(sym), STT_FUNC, + bfd_asymbol_name(sym)); + if (!symbol) + goto out_free; + + symbols__insert(dso__symbols(dso), symbol); + } +#ifdef bfd_get_section +#undef bfd_asymbol_section +#endif + + symbols__fixup_end(dso__symbols(dso), false); + symbols__fixup_duplicate(dso__symbols(dso)); + dso__set_adjust_symbols(dso, true); + + err = 0; +out_free: + free(symbols); +out_close: + bfd_close(abfd); + return err; +} + +int libbfd__read_build_id(const char *filename, struct build_id *bid) +{ + size_t size = sizeof(bid->data); + int err = -1, fd; + bfd *abfd; + + if (!filename) + return -EFAULT; + if (!is_regular_file(filename)) + return -EWOULDBLOCK; + + fd = open(filename, O_RDONLY); + if (fd < 0) + return -1; + + ensure_bfd_init(); + abfd = bfd_fdopenr(filename, /*target=*/NULL, fd); + if (!abfd) + return -1; + + if (!bfd_check_format(abfd, bfd_object)) { + pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename); + goto out_close; + } + + if (!abfd->build_id || abfd->build_id->size > size) + goto out_close; + + memcpy(bid->data, abfd->build_id->data, abfd->build_id->size); + memset(bid->data + abfd->build_id->size, 0, size - abfd->build_id->size); + err = bid->size = abfd->build_id->size; + +out_close: + bfd_close(abfd); + return err; +} + +int libbfd_filename__read_debuglink(const char *filename, char *debuglink, + size_t size) +{ + int err = -1; + asection *section; + bfd *abfd; + + ensure_bfd_init(); + abfd = bfd_openr(filename, NULL); + if (!abfd) + return -1; + + if (!bfd_check_format(abfd, bfd_object)) { + pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename); + goto out_close; + } + + section = bfd_get_section_by_name(abfd, ".gnu_debuglink"); + if (!section) + goto out_close; + + if (section->size > size) + goto out_close; + + if (!bfd_get_section_contents(abfd, section, debuglink, 0, + section->size)) + goto out_close; + + err = 0; + +out_close: + bfd_close(abfd); + return err; +} + +int symbol__disassemble_bpf_libbfd(struct symbol *sym __maybe_unused, + struct annotate_args *args __maybe_unused) +{ +#ifdef HAVE_LIBBPF_SUPPORT + struct annotation *notes = symbol__annotation(sym); + struct bpf_prog_linfo *prog_linfo = NULL; + struct bpf_prog_info_node *info_node; + int len = sym->end - sym->start; + disassembler_ftype disassemble; + struct map *map = args->ms.map; + struct perf_bpil *info_linear; + struct disassemble_info info; + struct dso *dso = map__dso(map); + int pc = 0, count, sub_id; + struct btf *btf = NULL; + char tpath[PATH_MAX]; + size_t buf_size; + int nr_skip = 0; + char *buf; + bfd *bfdf; + int ret; + FILE *s; + + if (dso__binary_type(dso) != DSO_BINARY_TYPE__BPF_PROG_INFO) + return SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE; + + pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__, + sym->name, sym->start, sym->end - sym->start); + + memset(tpath, 0, sizeof(tpath)); + perf_exe(tpath, sizeof(tpath)); + + ensure_bfd_init(); + bfdf = bfd_openr(tpath, NULL); + if (bfdf == NULL) + abort(); + + if (!bfd_check_format(bfdf, bfd_object)) + abort(); + + s = open_memstream(&buf, &buf_size); + if (!s) { + ret = errno; + goto out; + } + init_disassemble_info_compat(&info, s, + (fprintf_ftype) fprintf, + fprintf_styled); + info.arch = bfd_get_arch(bfdf); + info.mach = bfd_get_mach(bfdf); + + info_node = perf_env__find_bpf_prog_info(dso__bpf_prog(dso)->env, + dso__bpf_prog(dso)->id); + if (!info_node) { + ret = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF; + goto out; + } + info_linear = info_node->info_linear; + sub_id = dso__bpf_prog(dso)->sub_id; + + info.buffer = (void *)(uintptr_t)(info_linear->info.jited_prog_insns); + info.buffer_length = info_linear->info.jited_prog_len; + + if (info_linear->info.nr_line_info) + prog_linfo = bpf_prog_linfo__new(&info_linear->info); + + if (info_linear->info.btf_id) { + struct btf_node *node; + + node = perf_env__find_btf(dso__bpf_prog(dso)->env, + info_linear->info.btf_id); + if (node) + btf = btf__new((__u8 *)(node->data), + node->data_size); + } + + disassemble_init_for_target(&info); + +#ifdef DISASM_FOUR_ARGS_SIGNATURE + disassemble = disassembler(info.arch, + bfd_big_endian(bfdf), + info.mach, + bfdf); +#else + disassemble = disassembler(bfdf); +#endif + if (disassemble == NULL) + abort(); + + fflush(s); + do { + const struct bpf_line_info *linfo = NULL; + struct disasm_line *dl; + size_t prev_buf_size; + const char *srcline; + u64 addr; + + addr = pc + ((u64 *)(uintptr_t)(info_linear->info.jited_ksyms))[sub_id]; + count = disassemble(pc, &info); + + if (prog_linfo) + linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo, + addr, sub_id, + nr_skip); + + if (linfo && btf) { + srcline = btf__name_by_offset(btf, linfo->line_off); + nr_skip++; + } else + srcline = NULL; + + fprintf(s, "\n"); + prev_buf_size = buf_size; + fflush(s); + + if (!annotate_opts.hide_src_code && srcline) { + args->offset = -1; + args->line = strdup(srcline); + args->line_nr = 0; + args->fileloc = NULL; + args->ms.sym = sym; + dl = disasm_line__new(args); + if (dl) { + annotation_line__add(&dl->al, + ¬es->src->source); + } + } + + args->offset = pc; + args->line = buf + prev_buf_size; + args->line_nr = 0; + args->fileloc = NULL; + args->ms.sym = sym; + dl = disasm_line__new(args); + if (dl) + annotation_line__add(&dl->al, ¬es->src->source); + + pc += count; + } while (count > 0 && pc < len); + + ret = 0; +out: + free(prog_linfo); + btf__free(btf); + fclose(s); + bfd_close(bfdf); + return ret; +#else + return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF; +#endif +} diff --git a/tools/perf/util/libbfd.h b/tools/perf/util/libbfd.h new file mode 100644 index 000000000000..953886f3d62f --- /dev/null +++ b/tools/perf/util/libbfd.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_LIBBFD_H +#define __PERF_LIBBFD_H + +#include <linux/compiler.h> +#include <linux/types.h> +#include <stdbool.h> +#include <stddef.h> + +struct annotate_args; +struct build_id; +struct dso; +struct inline_node; +struct symbol; + +#ifdef HAVE_LIBBFD_SUPPORT +int libbfd__addr2line(const char *dso_name, u64 addr, + char **file, unsigned int *line, struct dso *dso, + bool unwind_inlines, struct inline_node *node, + struct symbol *sym); + + +void dso__free_a2l_libbfd(struct dso *dso); + +int symbol__disassemble_libbfd(const char *filename, struct symbol *sym, + struct annotate_args *args); + +int libbfd__read_build_id(const char *filename, struct build_id *bid); + +int libbfd_filename__read_debuglink(const char *filename, char *debuglink, size_t size); + +int symbol__disassemble_bpf_libbfd(struct symbol *sym, struct annotate_args *args); + +#else // !defined(HAVE_LIBBFD_SUPPORT) +#include "annotate.h" + +static inline int libbfd__addr2line(const char *dso_name __always_unused, + u64 addr __always_unused, + char **file __always_unused, + unsigned int *line __always_unused, + struct dso *dso __always_unused, + bool unwind_inlines __always_unused, + struct inline_node *node __always_unused, + struct symbol *sym __always_unused) +{ + return -1; +} + + +static inline void dso__free_a2l_libbfd(struct dso *dso __always_unused) +{ +} + +static inline int symbol__disassemble_libbfd(const char *filename __always_unused, + struct symbol *sym __always_unused, + struct annotate_args *args __always_unused) +{ + return -1; +} + +static inline int libbfd__read_build_id(const char *filename __always_unused, + struct build_id *bid __always_unused) +{ + return -1; +} + +static inline int libbfd_filename__read_debuglink(const char *filename __always_unused, + char *debuglink __always_unused, + size_t size __always_unused) +{ + return -1; +} + +static inline int symbol__disassemble_bpf_libbfd(struct symbol *sym __always_unused, + struct annotate_args *args __always_unused) +{ + return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF; +} + +#endif // defined(HAVE_LIBBFD_SUPPORT) + +#endif /* __PERF_LIBBFD_H */ diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c index 014d82159656..37ecef0c53b9 100644 --- a/tools/perf/util/libunwind/arm64.c +++ b/tools/perf/util/libunwind/arm64.c @@ -18,8 +18,6 @@ * defined before including "unwind.h" */ #define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arm64_reg_id(regnum) -#define LIBUNWIND__ARCH_REG_IP PERF_REG_ARM64_PC -#define LIBUNWIND__ARCH_REG_SP PERF_REG_ARM64_SP #include "unwind.h" #include "libunwind-aarch64.h" diff --git a/tools/perf/util/libunwind/x86_32.c b/tools/perf/util/libunwind/x86_32.c index b2b92d030aef..1697dece1b74 100644 --- a/tools/perf/util/libunwind/x86_32.c +++ b/tools/perf/util/libunwind/x86_32.c @@ -18,8 +18,6 @@ * defined before including "unwind.h" */ #define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__x86_reg_id(regnum) -#define LIBUNWIND__ARCH_REG_IP PERF_REG_X86_IP -#define LIBUNWIND__ARCH_REG_SP PERF_REG_X86_SP #include "unwind.h" #include "libunwind-x86.h" diff --git a/tools/perf/util/llvm-c-helpers.cpp b/tools/perf/util/llvm-c-helpers.cpp new file mode 100644 index 000000000000..004081bd12c9 --- /dev/null +++ b/tools/perf/util/llvm-c-helpers.cpp @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Must come before the linux/compiler.h include, which defines several + * macros (e.g. noinline) that conflict with compiler builtins used + * by LLVM. + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" /* Needed for LLVM <= 15 */ +#include <llvm/DebugInfo/Symbolize/Symbolize.h> +#include <llvm/Support/TargetSelect.h> +#pragma GCC diagnostic pop + +#include <inttypes.h> +#include <stdio.h> +#include <sys/types.h> +#include <linux/compiler.h> +extern "C" { +#include <linux/zalloc.h> +} +#include "llvm-c-helpers.h" + +extern "C" +char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name); + +using namespace llvm; +using llvm::symbolize::LLVMSymbolizer; + +/* + * Allocate a static LLVMSymbolizer, which will live to the end of the program. + * Unlike the bfd paths, LLVMSymbolizer has its own cache, so we do not need + * to store anything in the dso struct. + */ +static LLVMSymbolizer *get_symbolizer() +{ + static LLVMSymbolizer *instance = nullptr; + if (instance == nullptr) { + LLVMSymbolizer::Options opts; + /* + * LLVM sometimes demangles slightly different from the rest + * of the code, and this mismatch can cause new_inline_sym() + * to get confused and mark non-inline symbol as inlined + * (since the name does not properly match up with base_sym). + * Thus, disable the demangling and let the rest of the code + * handle it. + */ + opts.Demangle = false; + instance = new LLVMSymbolizer(opts); + } + return instance; +} + +/* Returns 0 on error, 1 on success. */ +static int extract_file_and_line(const DILineInfo &line_info, char **file, + unsigned int *line) +{ + if (file) { + if (line_info.FileName == "<invalid>") { + /* Match the convention of libbfd. */ + *file = nullptr; + } else { + /* The caller expects to get something it can free(). */ + *file = strdup(line_info.FileName.c_str()); + if (*file == nullptr) + return 0; + } + } + if (line) + *line = line_info.Line; + return 1; +} + +extern "C" +int llvm_addr2line(const char *dso_name, u64 addr, + char **file, unsigned int *line, + bool unwind_inlines, + llvm_a2l_frame **inline_frames) +{ + LLVMSymbolizer *symbolizer = get_symbolizer(); + object::SectionedAddress sectioned_addr = { + addr, + object::SectionedAddress::UndefSection + }; + + if (unwind_inlines) { + Expected<DIInliningInfo> res_or_err = + symbolizer->symbolizeInlinedCode(dso_name, + sectioned_addr); + if (!res_or_err) + return 0; + unsigned num_frames = res_or_err->getNumberOfFrames(); + if (num_frames == 0) + return 0; + + if (extract_file_and_line(res_or_err->getFrame(0), + file, line) == 0) + return 0; + + *inline_frames = (llvm_a2l_frame *)calloc( + num_frames, sizeof(**inline_frames)); + if (*inline_frames == nullptr) + return 0; + + for (unsigned i = 0; i < num_frames; ++i) { + const DILineInfo &src = res_or_err->getFrame(i); + + llvm_a2l_frame &dst = (*inline_frames)[i]; + if (src.FileName == "<invalid>") + /* Match the convention of libbfd. */ + dst.filename = nullptr; + else + dst.filename = strdup(src.FileName.c_str()); + dst.funcname = strdup(src.FunctionName.c_str()); + dst.line = src.Line; + + if (dst.filename == nullptr || + dst.funcname == nullptr) { + for (unsigned j = 0; j <= i; ++j) { + zfree(&(*inline_frames)[j].filename); + zfree(&(*inline_frames)[j].funcname); + } + zfree(inline_frames); + return 0; + } + } + + return num_frames; + } else { + if (inline_frames) + *inline_frames = nullptr; + + Expected<DILineInfo> res_or_err = + symbolizer->symbolizeCode(dso_name, sectioned_addr); + if (!res_or_err) + return 0; + return extract_file_and_line(*res_or_err, file, line); + } +} + +static char * +make_symbol_relative_string(struct dso *dso, const char *sym_name, + u64 addr, u64 base_addr) +{ + if (!strcmp(sym_name, "<invalid>")) + return NULL; + + char *demangled = dso__demangle_sym(dso, 0, sym_name); + if (base_addr && base_addr != addr) { + char buf[256]; + snprintf(buf, sizeof(buf), "%s+0x%" PRIx64, + demangled ? demangled : sym_name, addr - base_addr); + free(demangled); + return strdup(buf); + } else { + if (demangled) + return demangled; + else + return strdup(sym_name); + } +} + +extern "C" +char *llvm_name_for_code(struct dso *dso, const char *dso_name, u64 addr) +{ + LLVMSymbolizer *symbolizer = get_symbolizer(); + object::SectionedAddress sectioned_addr = { + addr, + object::SectionedAddress::UndefSection + }; + Expected<DILineInfo> res_or_err = + symbolizer->symbolizeCode(dso_name, sectioned_addr); + if (!res_or_err) { + return NULL; + } + return make_symbol_relative_string( + dso, res_or_err->FunctionName.c_str(), + addr, res_or_err->StartAddress ? *res_or_err->StartAddress : 0); +} + +extern "C" +char *llvm_name_for_data(struct dso *dso, const char *dso_name, u64 addr) +{ + LLVMSymbolizer *symbolizer = get_symbolizer(); + object::SectionedAddress sectioned_addr = { + addr, + object::SectionedAddress::UndefSection + }; + Expected<DIGlobal> res_or_err = + symbolizer->symbolizeData(dso_name, sectioned_addr); + if (!res_or_err) { + return NULL; + } + return make_symbol_relative_string( + dso, res_or_err->Name.c_str(), + addr, res_or_err->Start); +} diff --git a/tools/perf/util/llvm-c-helpers.h b/tools/perf/util/llvm-c-helpers.h new file mode 100644 index 000000000000..d2b99637a28a --- /dev/null +++ b/tools/perf/util/llvm-c-helpers.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_LLVM_C_HELPERS +#define __PERF_LLVM_C_HELPERS 1 + +/* + * Helpers to call into LLVM C++ code from C, for the parts that do not have + * C APIs. + */ + +#include <linux/compiler.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct dso; + +struct llvm_a2l_frame { + char* filename; + char* funcname; + unsigned int line; +}; + +/* + * Implement addr2line() using libLLVM. LLVM is a C++ API, and + * many of the linux/ headers cannot be included in a C++ compile unit, + * so we need to make a little bridge code here. llvm_addr2line() will + * convert the inline frame information from LLVM's internal structures + * and put them into a flat array given in inline_frames. The caller + * is then responsible for taking that array and convert it into perf's + * regular inline frame structures (which depend on e.g. struct list_head). + * + * If the address could not be resolved, or an error occurred (e.g. OOM), + * returns 0. Otherwise, returns the number of inline frames (which means 1 + * if the address was not part of an inlined function). If unwind_inlines + * is set and the return code is nonzero, inline_frames will be set to + * a newly allocated array with that length. The caller is then responsible + * for freeing both the strings and the array itself. + */ +int llvm_addr2line(const char* dso_name, + u64 addr, + char** file, + unsigned int* line, + bool unwind_inlines, + struct llvm_a2l_frame** inline_frames); + +/* + * Simple symbolizers for addresses; will convert something like + * 0x12345 to "func+0x123". Will return NULL if no symbol was found. + * + * The returned value must be freed by the caller, with free(). + */ +char *llvm_name_for_code(struct dso *dso, const char *dso_name, u64 addr); +char *llvm_name_for_data(struct dso *dso, const char *dso_name, u64 addr); + +#ifdef __cplusplus +} +#endif + +#endif /* __PERF_LLVM_C_HELPERS */ diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c deleted file mode 100644 index c6c9c2228578..000000000000 --- a/tools/perf/util/llvm-utils.c +++ /dev/null @@ -1,612 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com> - * Copyright (C) 2015, Huawei Inc. - */ - -#include <errno.h> -#include <limits.h> -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <linux/err.h> -#include <linux/string.h> -#include <linux/zalloc.h> -#include "debug.h" -#include "llvm-utils.h" -#include "config.h" -#include "util.h" -#include <sys/wait.h> -#include <subcmd/exec-cmd.h> - -#define CLANG_BPF_CMD_DEFAULT_TEMPLATE \ - "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\ - "-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE " \ - "$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \ - "-Wno-unused-value -Wno-pointer-sign " \ - "-working-directory $WORKING_DIR " \ - "-c \"$CLANG_SOURCE\" --target=bpf $CLANG_EMIT_LLVM -g -O2 -o - $LLVM_OPTIONS_PIPE" - -struct llvm_param llvm_param = { - .clang_path = "clang", - .llc_path = "llc", - .clang_bpf_cmd_template = CLANG_BPF_CMD_DEFAULT_TEMPLATE, - .clang_opt = NULL, - .opts = NULL, - .kbuild_dir = NULL, - .kbuild_opts = NULL, - .user_set_param = false, -}; - -static void version_notice(void); - -int perf_llvm_config(const char *var, const char *value) -{ - if (!strstarts(var, "llvm.")) - return 0; - var += sizeof("llvm.") - 1; - - if (!strcmp(var, "clang-path")) - llvm_param.clang_path = strdup(value); - else if (!strcmp(var, "clang-bpf-cmd-template")) - llvm_param.clang_bpf_cmd_template = strdup(value); - else if (!strcmp(var, "clang-opt")) - llvm_param.clang_opt = strdup(value); - else if (!strcmp(var, "kbuild-dir")) - llvm_param.kbuild_dir = strdup(value); - else if (!strcmp(var, "kbuild-opts")) - llvm_param.kbuild_opts = strdup(value); - else if (!strcmp(var, "dump-obj")) - llvm_param.dump_obj = !!perf_config_bool(var, value); - else if (!strcmp(var, "opts")) - llvm_param.opts = strdup(value); - else { - pr_debug("Invalid LLVM config option: %s\n", value); - return -1; - } - llvm_param.user_set_param = true; - return 0; -} - -static int -search_program(const char *def, const char *name, - char *output) -{ - char *env, *path, *tmp = NULL; - char buf[PATH_MAX]; - int ret; - - output[0] = '\0'; - if (def && def[0] != '\0') { - if (def[0] == '/') { - if (access(def, F_OK) == 0) { - strlcpy(output, def, PATH_MAX); - return 0; - } - } else if (def[0] != '\0') - name = def; - } - - env = getenv("PATH"); - if (!env) - return -1; - env = strdup(env); - if (!env) - return -1; - - ret = -ENOENT; - path = strtok_r(env, ":", &tmp); - while (path) { - scnprintf(buf, sizeof(buf), "%s/%s", path, name); - if (access(buf, F_OK) == 0) { - strlcpy(output, buf, PATH_MAX); - ret = 0; - break; - } - path = strtok_r(NULL, ":", &tmp); - } - - free(env); - return ret; -} - -static int search_program_and_warn(const char *def, const char *name, - char *output) -{ - int ret = search_program(def, name, output); - - if (ret) { - pr_err("ERROR:\tunable to find %s.\n" - "Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n" - " \tand '%s-path' option in [llvm] section of ~/.perfconfig.\n", - name, name); - version_notice(); - } - return ret; -} - -#define READ_SIZE 4096 -static int -read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz) -{ - int err = 0; - void *buf = NULL; - FILE *file = NULL; - size_t read_sz = 0, buf_sz = 0; - char serr[STRERR_BUFSIZE]; - - file = popen(cmd, "r"); - if (!file) { - pr_err("ERROR: unable to popen cmd: %s\n", - str_error_r(errno, serr, sizeof(serr))); - return -EINVAL; - } - - while (!feof(file) && !ferror(file)) { - /* - * Make buf_sz always have obe byte extra space so we - * can put '\0' there. - */ - if (buf_sz - read_sz < READ_SIZE + 1) { - void *new_buf; - - buf_sz = read_sz + READ_SIZE + 1; - new_buf = realloc(buf, buf_sz); - - if (!new_buf) { - pr_err("ERROR: failed to realloc memory\n"); - err = -ENOMEM; - goto errout; - } - - buf = new_buf; - } - read_sz += fread(buf + read_sz, 1, READ_SIZE, file); - } - - if (buf_sz - read_sz < 1) { - pr_err("ERROR: internal error\n"); - err = -EINVAL; - goto errout; - } - - if (ferror(file)) { - pr_err("ERROR: error occurred when reading from pipe: %s\n", - str_error_r(errno, serr, sizeof(serr))); - err = -EIO; - goto errout; - } - - err = WEXITSTATUS(pclose(file)); - file = NULL; - if (err) { - err = -EINVAL; - goto errout; - } - - /* - * If buf is string, give it terminal '\0' to make our life - * easier. If buf is not string, that '\0' is out of space - * indicated by read_sz so caller won't even notice it. - */ - ((char *)buf)[read_sz] = '\0'; - - if (!p_buf) - free(buf); - else - *p_buf = buf; - - if (p_read_sz) - *p_read_sz = read_sz; - return 0; - -errout: - if (file) - pclose(file); - free(buf); - if (p_buf) - *p_buf = NULL; - if (p_read_sz) - *p_read_sz = 0; - return err; -} - -static inline void -force_set_env(const char *var, const char *value) -{ - if (value) { - setenv(var, value, 1); - pr_debug("set env: %s=%s\n", var, value); - } else { - unsetenv(var); - pr_debug("unset env: %s\n", var); - } -} - -static void -version_notice(void) -{ - pr_err( -" \tLLVM 3.7 or newer is required. Which can be found from http://llvm.org\n" -" \tYou may want to try git trunk:\n" -" \t\tgit clone http://llvm.org/git/llvm.git\n" -" \t\t and\n" -" \t\tgit clone http://llvm.org/git/clang.git\n\n" -" \tOr fetch the latest clang/llvm 3.7 from pre-built llvm packages for\n" -" \tdebian/ubuntu:\n" -" \t\thttps://apt.llvm.org/\n\n" -" \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n" -" \toption in [llvm] section of ~/.perfconfig to:\n\n" -" \t \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS \\\n" -" \t -working-directory $WORKING_DIR -c $CLANG_SOURCE \\\n" -" \t -emit-llvm -o - | /path/to/llc -march=bpf -filetype=obj -o -\"\n" -" \t(Replace /path/to/llc with path to your llc)\n\n" -); -} - -static int detect_kbuild_dir(char **kbuild_dir) -{ - const char *test_dir = llvm_param.kbuild_dir; - const char *prefix_dir = ""; - const char *suffix_dir = ""; - - /* _UTSNAME_LENGTH is 65 */ - char release[128]; - - char *autoconf_path; - - int err; - - if (!test_dir) { - err = fetch_kernel_version(NULL, release, - sizeof(release)); - if (err) - return -EINVAL; - - test_dir = release; - prefix_dir = "/lib/modules/"; - suffix_dir = "/build"; - } - - err = asprintf(&autoconf_path, "%s%s%s/include/generated/autoconf.h", - prefix_dir, test_dir, suffix_dir); - if (err < 0) - return -ENOMEM; - - if (access(autoconf_path, R_OK) == 0) { - free(autoconf_path); - - err = asprintf(kbuild_dir, "%s%s%s", prefix_dir, test_dir, - suffix_dir); - if (err < 0) - return -ENOMEM; - return 0; - } - pr_debug("%s: Couldn't find \"%s\", missing kernel-devel package?.\n", - __func__, autoconf_path); - free(autoconf_path); - return -ENOENT; -} - -static const char *kinc_fetch_script = -"#!/usr/bin/env sh\n" -"if ! test -d \"$KBUILD_DIR\"\n" -"then\n" -" exit 1\n" -"fi\n" -"if ! test -f \"$KBUILD_DIR/include/generated/autoconf.h\"\n" -"then\n" -" exit 1\n" -"fi\n" -"TMPDIR=`mktemp -d`\n" -"if test -z \"$TMPDIR\"\n" -"then\n" -" exit 1\n" -"fi\n" -"cat << EOF > $TMPDIR/Makefile\n" -"obj-y := dummy.o\n" -"\\$(obj)/%.o: \\$(src)/%.c\n" -"\t@echo -n \"\\$(NOSTDINC_FLAGS) \\$(LINUXINCLUDE) \\$(EXTRA_CFLAGS)\"\n" -"\t\\$(CC) -c -o \\$@ \\$<\n" -"EOF\n" -"touch $TMPDIR/dummy.c\n" -"make -s -C $KBUILD_DIR M=$TMPDIR $KBUILD_OPTS dummy.o 2>/dev/null\n" -"RET=$?\n" -"rm -rf $TMPDIR\n" -"exit $RET\n"; - -void llvm__get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts) -{ - static char *saved_kbuild_dir; - static char *saved_kbuild_include_opts; - int err; - - if (!kbuild_dir || !kbuild_include_opts) - return; - - *kbuild_dir = NULL; - *kbuild_include_opts = NULL; - - if (saved_kbuild_dir && saved_kbuild_include_opts && - !IS_ERR(saved_kbuild_dir) && !IS_ERR(saved_kbuild_include_opts)) { - *kbuild_dir = strdup(saved_kbuild_dir); - *kbuild_include_opts = strdup(saved_kbuild_include_opts); - - if (*kbuild_dir && *kbuild_include_opts) - return; - - zfree(kbuild_dir); - zfree(kbuild_include_opts); - /* - * Don't fall through: it may breaks saved_kbuild_dir and - * saved_kbuild_include_opts if detect them again when - * memory is low. - */ - return; - } - - if (llvm_param.kbuild_dir && !llvm_param.kbuild_dir[0]) { - pr_debug("[llvm.kbuild-dir] is set to \"\" deliberately.\n"); - pr_debug("Skip kbuild options detection.\n"); - goto errout; - } - - err = detect_kbuild_dir(kbuild_dir); - if (err) { - pr_warning( -"WARNING:\tunable to get correct kernel building directory.\n" -"Hint:\tSet correct kbuild directory using 'kbuild-dir' option in [llvm]\n" -" \tsection of ~/.perfconfig or set it to \"\" to suppress kbuild\n" -" \tdetection.\n\n"); - goto errout; - } - - pr_debug("Kernel build dir is set to %s\n", *kbuild_dir); - force_set_env("KBUILD_DIR", *kbuild_dir); - force_set_env("KBUILD_OPTS", llvm_param.kbuild_opts); - err = read_from_pipe(kinc_fetch_script, - (void **)kbuild_include_opts, - NULL); - if (err) { - pr_warning( -"WARNING:\tunable to get kernel include directories from '%s'\n" -"Hint:\tTry set clang include options using 'clang-bpf-cmd-template'\n" -" \toption in [llvm] section of ~/.perfconfig and set 'kbuild-dir'\n" -" \toption in [llvm] to \"\" to suppress this detection.\n\n", - *kbuild_dir); - - zfree(kbuild_dir); - goto errout; - } - - pr_debug("include option is set to %s\n", *kbuild_include_opts); - - saved_kbuild_dir = strdup(*kbuild_dir); - saved_kbuild_include_opts = strdup(*kbuild_include_opts); - - if (!saved_kbuild_dir || !saved_kbuild_include_opts) { - zfree(&saved_kbuild_dir); - zfree(&saved_kbuild_include_opts); - } - return; -errout: - saved_kbuild_dir = ERR_PTR(-EINVAL); - saved_kbuild_include_opts = ERR_PTR(-EINVAL); -} - -int llvm__get_nr_cpus(void) -{ - static int nr_cpus_avail = 0; - char serr[STRERR_BUFSIZE]; - - if (nr_cpus_avail > 0) - return nr_cpus_avail; - - nr_cpus_avail = sysconf(_SC_NPROCESSORS_CONF); - if (nr_cpus_avail <= 0) { - pr_err( -"WARNING:\tunable to get available CPUs in this system: %s\n" -" \tUse 128 instead.\n", str_error_r(errno, serr, sizeof(serr))); - nr_cpus_avail = 128; - } - return nr_cpus_avail; -} - -void llvm__dump_obj(const char *path, void *obj_buf, size_t size) -{ - char *obj_path = strdup(path); - FILE *fp; - char *p; - - if (!obj_path) { - pr_warning("WARNING: Not enough memory, skip object dumping\n"); - return; - } - - p = strrchr(obj_path, '.'); - if (!p || (strcmp(p, ".c") != 0)) { - pr_warning("WARNING: invalid llvm source path: '%s', skip object dumping\n", - obj_path); - goto out; - } - - p[1] = 'o'; - fp = fopen(obj_path, "wb"); - if (!fp) { - pr_warning("WARNING: failed to open '%s': %s, skip object dumping\n", - obj_path, strerror(errno)); - goto out; - } - - pr_debug("LLVM: dumping %s\n", obj_path); - if (fwrite(obj_buf, size, 1, fp) != 1) - pr_debug("WARNING: failed to write to file '%s': %s, skip object dumping\n", obj_path, strerror(errno)); - fclose(fp); -out: - free(obj_path); -} - -int llvm__compile_bpf(const char *path, void **p_obj_buf, - size_t *p_obj_buf_sz) -{ - size_t obj_buf_sz; - void *obj_buf = NULL; - int err, nr_cpus_avail; - unsigned int kernel_version; - char linux_version_code_str[64]; - const char *clang_opt = llvm_param.clang_opt; - char clang_path[PATH_MAX], llc_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64]; - char serr[STRERR_BUFSIZE]; - char *kbuild_dir = NULL, *kbuild_include_opts = NULL, - *perf_bpf_include_opts = NULL; - const char *template = llvm_param.clang_bpf_cmd_template; - char *pipe_template = NULL; - const char *opts = llvm_param.opts; - char *command_echo = NULL, *command_out; - char *libbpf_include_dir = system_path(LIBBPF_INCLUDE_DIR); - - if (path[0] != '-' && realpath(path, abspath) == NULL) { - err = errno; - pr_err("ERROR: problems with path %s: %s\n", - path, str_error_r(err, serr, sizeof(serr))); - return -err; - } - - if (!template) - template = CLANG_BPF_CMD_DEFAULT_TEMPLATE; - - err = search_program_and_warn(llvm_param.clang_path, - "clang", clang_path); - if (err) - return -ENOENT; - - /* - * This is an optional work. Even it fail we can continue our - * work. Needn't check error return. - */ - llvm__get_kbuild_opts(&kbuild_dir, &kbuild_include_opts); - - nr_cpus_avail = llvm__get_nr_cpus(); - snprintf(nr_cpus_avail_str, sizeof(nr_cpus_avail_str), "%d", - nr_cpus_avail); - - if (fetch_kernel_version(&kernel_version, NULL, 0)) - kernel_version = 0; - - snprintf(linux_version_code_str, sizeof(linux_version_code_str), - "0x%x", kernel_version); - if (asprintf(&perf_bpf_include_opts, "-I%s/", libbpf_include_dir) < 0) - goto errout; - force_set_env("NR_CPUS", nr_cpus_avail_str); - force_set_env("LINUX_VERSION_CODE", linux_version_code_str); - force_set_env("CLANG_EXEC", clang_path); - force_set_env("CLANG_OPTIONS", clang_opt); - force_set_env("KERNEL_INC_OPTIONS", kbuild_include_opts); - force_set_env("PERF_BPF_INC_OPTIONS", perf_bpf_include_opts); - force_set_env("WORKING_DIR", kbuild_dir ? : "."); - - if (opts) { - err = search_program_and_warn(llvm_param.llc_path, "llc", llc_path); - if (err) - goto errout; - - err = -ENOMEM; - if (asprintf(&pipe_template, "%s -emit-llvm | %s -march=bpf %s -filetype=obj -o -", - template, llc_path, opts) < 0) { - pr_err("ERROR:\tnot enough memory to setup command line\n"); - goto errout; - } - - template = pipe_template; - - } - - /* - * Since we may reset clang's working dir, path of source file - * should be transferred into absolute path, except we want - * stdin to be source file (testing). - */ - force_set_env("CLANG_SOURCE", - (path[0] == '-') ? path : abspath); - - pr_debug("llvm compiling command template: %s\n", template); - - /* - * Below, substitute control characters for values that can cause the - * echo to misbehave, then substitute the values back. - */ - err = -ENOMEM; - if (asprintf(&command_echo, "echo -n \a%s\a", template) < 0) - goto errout; - -#define SWAP_CHAR(a, b) do { if (*p == a) *p = b; } while (0) - for (char *p = command_echo; *p; p++) { - SWAP_CHAR('<', '\001'); - SWAP_CHAR('>', '\002'); - SWAP_CHAR('"', '\003'); - SWAP_CHAR('\'', '\004'); - SWAP_CHAR('|', '\005'); - SWAP_CHAR('&', '\006'); - SWAP_CHAR('\a', '"'); - } - err = read_from_pipe(command_echo, (void **) &command_out, NULL); - if (err) - goto errout; - - for (char *p = command_out; *p; p++) { - SWAP_CHAR('\001', '<'); - SWAP_CHAR('\002', '>'); - SWAP_CHAR('\003', '"'); - SWAP_CHAR('\004', '\''); - SWAP_CHAR('\005', '|'); - SWAP_CHAR('\006', '&'); - } -#undef SWAP_CHAR - pr_debug("llvm compiling command : %s\n", command_out); - - err = read_from_pipe(template, &obj_buf, &obj_buf_sz); - if (err) { - pr_err("ERROR:\tunable to compile %s\n", path); - pr_err("Hint:\tCheck error message shown above.\n"); - pr_err("Hint:\tYou can also pre-compile it into .o using:\n"); - pr_err(" \t\tclang --target=bpf -O2 -c %s\n", path); - pr_err(" \twith proper -I and -D options.\n"); - goto errout; - } - - free(command_echo); - free(command_out); - free(kbuild_dir); - free(kbuild_include_opts); - free(perf_bpf_include_opts); - free(libbpf_include_dir); - - if (!p_obj_buf) - free(obj_buf); - else - *p_obj_buf = obj_buf; - - if (p_obj_buf_sz) - *p_obj_buf_sz = obj_buf_sz; - return 0; -errout: - free(command_echo); - free(kbuild_dir); - free(kbuild_include_opts); - free(obj_buf); - free(perf_bpf_include_opts); - free(libbpf_include_dir); - free(pipe_template); - if (p_obj_buf) - *p_obj_buf = NULL; - if (p_obj_buf_sz) - *p_obj_buf_sz = 0; - return err; -} - -int llvm__search_clang(void) -{ - char clang_path[PATH_MAX]; - - return search_program_and_warn(llvm_param.clang_path, "clang", clang_path); -} diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h deleted file mode 100644 index 7878a0e3fa98..000000000000 --- a/tools/perf/util/llvm-utils.h +++ /dev/null @@ -1,69 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com> - * Copyright (C) 2015, Huawei Inc. - */ -#ifndef __LLVM_UTILS_H -#define __LLVM_UTILS_H - -#include <stdbool.h> - -struct llvm_param { - /* Path of clang executable */ - const char *clang_path; - /* Path of llc executable */ - const char *llc_path; - /* - * Template of clang bpf compiling. 5 env variables - * can be used: - * $CLANG_EXEC: Path to clang. - * $CLANG_OPTIONS: Extra options to clang. - * $KERNEL_INC_OPTIONS: Kernel include directories. - * $WORKING_DIR: Kernel source directory. - * $CLANG_SOURCE: Source file to be compiled. - */ - const char *clang_bpf_cmd_template; - /* Will be filled in $CLANG_OPTIONS */ - const char *clang_opt; - /* - * If present it'll add -emit-llvm to $CLANG_OPTIONS to pipe - * the clang output to llc, useful for new llvm options not - * yet selectable via 'clang -mllvm option', such as -mattr=dwarfris - * in clang 6.0/llvm 7 - */ - const char *opts; - /* Where to find kbuild system */ - const char *kbuild_dir; - /* - * Arguments passed to make, like 'ARCH=arm' if doing cross - * compiling. Should not be used for dynamic compiling. - */ - const char *kbuild_opts; - /* - * Default is false. If set to true, write compiling result - * to object file. - */ - bool dump_obj; - /* - * Default is false. If one of the above fields is set by user - * explicitly then user_set_llvm is set to true. This is used - * for perf test. If user doesn't set anything in .perfconfig - * and clang is not found, don't trigger llvm test. - */ - bool user_set_param; -}; - -extern struct llvm_param llvm_param; -int perf_llvm_config(const char *var, const char *value); - -int llvm__compile_bpf(const char *path, void **p_obj_buf, size_t *p_obj_buf_sz); - -/* This function is for test__llvm() use only */ -int llvm__search_clang(void); - -/* Following functions are reused by builtin clang support */ -void llvm__get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts); -int llvm__get_nr_cpus(void); - -void llvm__dump_obj(const char *path, void *obj_buf, size_t size); -#endif diff --git a/tools/perf/util/llvm.c b/tools/perf/util/llvm.c new file mode 100644 index 000000000000..2ebf1f5f65bf --- /dev/null +++ b/tools/perf/util/llvm.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "llvm.h" +#include "annotate.h" +#include "debug.h" +#include "dso.h" +#include "map.h" +#include "namespaces.h" +#include "srcline.h" +#include "symbol.h" +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> +#include <linux/zalloc.h> + +#ifdef HAVE_LIBLLVM_SUPPORT +#include "llvm-c-helpers.h" +#include <llvm-c/Disassembler.h> +#include <llvm-c/Target.h> +#endif + +#ifdef HAVE_LIBLLVM_SUPPORT +static void free_llvm_inline_frames(struct llvm_a2l_frame *inline_frames, + int num_frames) +{ + if (inline_frames != NULL) { + for (int i = 0; i < num_frames; ++i) { + zfree(&inline_frames[i].filename); + zfree(&inline_frames[i].funcname); + } + zfree(&inline_frames); + } +} +#endif + +int llvm__addr2line(const char *dso_name __maybe_unused, u64 addr __maybe_unused, + char **file __maybe_unused, unsigned int *line __maybe_unused, + struct dso *dso __maybe_unused, bool unwind_inlines __maybe_unused, + struct inline_node *node __maybe_unused, struct symbol *sym __maybe_unused) +{ +#ifdef HAVE_LIBLLVM_SUPPORT + struct llvm_a2l_frame *inline_frames = NULL; + int num_frames = llvm_addr2line(dso_name, addr, file, line, + node && unwind_inlines, &inline_frames); + + if (num_frames == 0 || !inline_frames) { + /* Error, or we didn't want inlines. */ + return num_frames; + } + + for (int i = 0; i < num_frames; ++i) { + struct symbol *inline_sym = + new_inline_sym(dso, sym, inline_frames[i].funcname); + char *srcline = NULL; + + if (inline_frames[i].filename) { + srcline = + srcline_from_fileline(inline_frames[i].filename, + inline_frames[i].line); + } + if (inline_list__append(inline_sym, srcline, node) != 0) { + free_llvm_inline_frames(inline_frames, num_frames); + return 0; + } + } + free_llvm_inline_frames(inline_frames, num_frames); + + return num_frames; +#else + return -1; +#endif +} + +#ifdef HAVE_LIBLLVM_SUPPORT +static void init_llvm(void) +{ + static bool init; + + if (!init) { + LLVMInitializeAllTargetInfos(); + LLVMInitializeAllTargetMCs(); + LLVMInitializeAllDisassemblers(); + init = true; + } +} + +/* + * Whenever LLVM wants to resolve an address into a symbol, it calls this + * callback. We don't ever actually _return_ anything (in particular, because + * it puts quotation marks around what we return), but we use this as a hint + * that there is a branch or PC-relative address in the expression that we + * should add some textual annotation for after the instruction. The caller + * will use this information to add the actual annotation. + */ +struct symbol_lookup_storage { + u64 branch_addr; + u64 pcrel_load_addr; +}; + +static const char * +symbol_lookup_callback(void *disinfo, uint64_t value, + uint64_t *ref_type, + uint64_t address __maybe_unused, + const char **ref __maybe_unused) +{ + struct symbol_lookup_storage *storage = disinfo; + + if (*ref_type == LLVMDisassembler_ReferenceType_In_Branch) + storage->branch_addr = value; + else if (*ref_type == LLVMDisassembler_ReferenceType_In_PCrel_Load) + storage->pcrel_load_addr = value; + *ref_type = LLVMDisassembler_ReferenceType_InOut_None; + return NULL; +} +#endif + +int symbol__disassemble_llvm(const char *filename, struct symbol *sym, + struct annotate_args *args __maybe_unused) +{ +#ifdef HAVE_LIBLLVM_SUPPORT + struct annotation *notes = symbol__annotation(sym); + struct map *map = args->ms.map; + struct dso *dso = map__dso(map); + u64 start = map__rip_2objdump(map, sym->start); + /* Malloc-ed buffer containing instructions read from disk. */ + u8 *code_buf = NULL; + /* Pointer to code to be disassembled. */ + const u8 *buf; + u64 buf_len; + u64 pc; + bool is_64bit; + char disasm_buf[2048]; + size_t disasm_len; + struct disasm_line *dl; + LLVMDisasmContextRef disasm = NULL; + struct symbol_lookup_storage storage; + char *line_storage = NULL; + size_t line_storage_len = 0; + int ret = -1; + + if (args->options->objdump_path) + return -1; + + buf = dso__read_symbol(dso, filename, map, sym, + &code_buf, &buf_len, &is_64bit); + if (buf == NULL) + return errno; + + init_llvm(); + if (arch__is(args->arch, "x86")) { + const char *triplet = is_64bit ? "x86_64-pc-linux" : "i686-pc-linux"; + + disasm = LLVMCreateDisasm(triplet, &storage, /*tag_type=*/0, + /*get_op_info=*/NULL, symbol_lookup_callback); + } else { + char triplet[64]; + + scnprintf(triplet, sizeof(triplet), "%s-linux-gnu", + args->arch->name); + disasm = LLVMCreateDisasm(triplet, &storage, /*tag_type=*/0, + /*get_op_info=*/NULL, symbol_lookup_callback); + } + + if (disasm == NULL) + goto err; + + if (args->options->disassembler_style && + !strcmp(args->options->disassembler_style, "intel")) + LLVMSetDisasmOptions(disasm, + LLVMDisassembler_Option_AsmPrinterVariant); + + /* + * This needs to be set after AsmPrinterVariant, due to a bug in LLVM; + * setting AsmPrinterVariant makes a new instruction printer, making it + * forget about the PrintImmHex flag (which is applied before if both + * are given to the same call). + */ + LLVMSetDisasmOptions(disasm, LLVMDisassembler_Option_PrintImmHex); + + /* add the function address and name */ + scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:", + start, sym->name); + + args->offset = -1; + args->line = disasm_buf; + args->line_nr = 0; + args->fileloc = NULL; + args->ms.sym = sym; + + dl = disasm_line__new(args); + if (dl == NULL) + goto err; + + annotation_line__add(&dl->al, ¬es->src->source); + + pc = start; + for (u64 offset = 0; offset < buf_len; ) { + unsigned int ins_len; + + storage.branch_addr = 0; + storage.pcrel_load_addr = 0; + + /* + * LLVM's API has the code be disassembled as non-const, cast + * here as we may be disassembling from mapped read-only memory. + */ + ins_len = LLVMDisasmInstruction(disasm, (u8 *)(buf + offset), + buf_len - offset, pc, + disasm_buf, sizeof(disasm_buf)); + if (ins_len == 0) + goto err; + disasm_len = strlen(disasm_buf); + + if (storage.branch_addr != 0) { + char *name = llvm_name_for_code(dso, filename, + storage.branch_addr); + if (name != NULL) { + disasm_len += scnprintf(disasm_buf + disasm_len, + sizeof(disasm_buf) - + disasm_len, + " <%s>", name); + free(name); + } + } + if (storage.pcrel_load_addr != 0) { + char *name = llvm_name_for_data(dso, filename, + storage.pcrel_load_addr); + disasm_len += scnprintf(disasm_buf + disasm_len, + sizeof(disasm_buf) - disasm_len, + " # %#"PRIx64, + storage.pcrel_load_addr); + if (name) { + disasm_len += scnprintf(disasm_buf + disasm_len, + sizeof(disasm_buf) - + disasm_len, + " <%s>", name); + free(name); + } + } + + args->offset = offset; + args->line = expand_tabs(disasm_buf, &line_storage, + &line_storage_len); + args->line_nr = 0; + args->fileloc = NULL; + args->ms.sym = sym; + + llvm_addr2line(filename, pc, &args->fileloc, + (unsigned int *)&args->line_nr, false, NULL); + + dl = disasm_line__new(args); + if (dl == NULL) + goto err; + + annotation_line__add(&dl->al, ¬es->src->source); + + free(args->fileloc); + pc += ins_len; + offset += ins_len; + } + + ret = 0; + +err: + LLVMDisasmDispose(disasm); + free(code_buf); + free(line_storage); + return ret; +#else // HAVE_LIBLLVM_SUPPORT + pr_debug("The LLVM disassembler isn't linked in for %s in %s\n", + sym->name, filename); + return -1; +#endif +} diff --git a/tools/perf/util/llvm.h b/tools/perf/util/llvm.h new file mode 100644 index 000000000000..57f6bafb24bb --- /dev/null +++ b/tools/perf/util/llvm.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_LLVM_H +#define __PERF_LLVM_H + +#include <stdbool.h> +#include <linux/types.h> + +struct annotate_args; +struct dso; +struct inline_node; +struct symbol; + +int llvm__addr2line(const char *dso_name, u64 addr, + char **file, unsigned int *line, struct dso *dso, + bool unwind_inlines, struct inline_node *node, + struct symbol *sym); + +int symbol__disassemble_llvm(const char *filename, struct symbol *sym, + struct annotate_args *args); + +#endif /* __PERF_LLVM_H */ diff --git a/tools/perf/util/lock-contention.c b/tools/perf/util/lock-contention.c new file mode 100644 index 000000000000..92e7b7b572a2 --- /dev/null +++ b/tools/perf/util/lock-contention.c @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "debug.h" +#include "env.h" +#include "lock-contention.h" +#include "machine.h" +#include "symbol.h" + +#include <limits.h> +#include <string.h> + +#include <linux/hash.h> +#include <linux/zalloc.h> + +#define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS) +#define lockhashentry(key) (lockhash_table + __lockhashfn((key))) + +struct callstack_filter { + struct list_head list; + char name[]; +}; + +static LIST_HEAD(callstack_filters); +struct hlist_head *lockhash_table; + +int parse_call_stack(const struct option *opt __maybe_unused, const char *str, + int unset __maybe_unused) +{ + char *s, *tmp, *tok; + int ret = 0; + + s = strdup(str); + if (s == NULL) + return -1; + + for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { + struct callstack_filter *entry; + + entry = malloc(sizeof(*entry) + strlen(tok) + 1); + if (entry == NULL) { + pr_err("Memory allocation failure\n"); + free(s); + return -1; + } + + strcpy(entry->name, tok); + list_add_tail(&entry->list, &callstack_filters); + } + + free(s); + return ret; +} + +bool needs_callstack(void) +{ + return !list_empty(&callstack_filters); +} + +struct lock_stat *lock_stat_find(u64 addr) +{ + struct hlist_head *entry = lockhashentry(addr); + struct lock_stat *ret; + + hlist_for_each_entry(ret, entry, hash_entry) { + if (ret->addr == addr) + return ret; + } + return NULL; +} + +struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags) +{ + struct hlist_head *entry = lockhashentry(addr); + struct lock_stat *ret, *new; + + hlist_for_each_entry(ret, entry, hash_entry) { + if (ret->addr == addr) + return ret; + } + + new = zalloc(sizeof(struct lock_stat)); + if (!new) + goto alloc_failed; + + new->addr = addr; + new->name = strdup(name); + if (!new->name) { + free(new); + goto alloc_failed; + } + + new->flags = flags; + new->wait_time_min = ULLONG_MAX; + + hlist_add_head(&new->hash_entry, entry); + return new; + +alloc_failed: + pr_err("memory allocation failed\n"); + return NULL; +} + +bool match_callstack_filter(struct machine *machine, u64 *callstack, int max_stack_depth) +{ + struct map *kmap; + struct symbol *sym; + u64 ip; + const char *arch = perf_env__arch(machine->env); + + if (list_empty(&callstack_filters)) + return true; + + for (int i = 0; i < max_stack_depth; i++) { + struct callstack_filter *filter; + + /* + * In powerpc, the callchain saved by kernel always includes + * first three entries as the NIP (next instruction pointer), + * LR (link register), and the contents of LR save area in the + * second stack frame. In certain scenarios its possible to have + * invalid kernel instruction addresses in either LR or the second + * stack frame's LR. In that case, kernel will store that address as + * zero. + * + * The below check will continue to look into callstack, + * incase first or second callstack index entry has 0 + * address for powerpc. + */ + if (!callstack || (!callstack[i] && (strcmp(arch, "powerpc") || + (i != 1 && i != 2)))) + break; + + ip = callstack[i]; + sym = machine__find_kernel_symbol(machine, ip, &kmap); + if (sym == NULL) + continue; + + list_for_each_entry(filter, &callstack_filters, list) { + if (strstr(sym->name, filter->name)) + return true; + } + } + return false; +} diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h index fa16532c971c..59c94190b092 100644 --- a/tools/perf/util/lock-contention.h +++ b/tools/perf/util/lock-contention.h @@ -9,9 +9,19 @@ struct lock_filter { int nr_types; int nr_addrs; int nr_syms; + int nr_cgrps; + int nr_slabs; unsigned int *types; unsigned long *addrs; char **syms; + u64 *cgrps; + char **slabs; +}; + +struct lock_delay { + char *sym; + unsigned long addr; + unsigned long time; }; struct lock_stat { @@ -65,10 +75,11 @@ struct lock_stat { */ #define MAX_LOCK_DEPTH 48 -struct lock_stat *lock_stat_find(u64 addr); -struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags); +/* based on kernel/lockdep.c */ +#define LOCKHASH_BITS 12 +#define LOCKHASH_SIZE (1UL << LOCKHASH_BITS) -bool match_callstack_filter(struct machine *machine, u64 *callstack); +extern struct hlist_head *lockhash_table; /* * struct lock_seq_stat: @@ -135,23 +146,38 @@ struct lock_contention { struct machine *machine; struct hlist_head *result; struct lock_filter *filters; + struct lock_delay *delays; struct lock_contention_fails fails; + struct rb_root cgroups; + void *btf; unsigned long map_nr_entries; int max_stack; int stack_skip; int aggr_mode; int owner; int nr_filtered; + int nr_delays; bool save_callstack; }; -#ifdef HAVE_BPF_SKEL +struct option; +int parse_call_stack(const struct option *opt, const char *str, int unset); +bool needs_callstack(void); + +struct lock_stat *lock_stat_find(u64 addr); +struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags); + +bool match_callstack_filter(struct machine *machine, u64 *callstack, int max_stack_depth); + +#ifdef HAVE_BPF_SKEL int lock_contention_prepare(struct lock_contention *con); int lock_contention_start(void); int lock_contention_stop(void); int lock_contention_read(struct lock_contention *con); -int lock_contention_finish(void); +int lock_contention_finish(struct lock_contention *con); + +struct lock_stat *pop_owner_stack_trace(struct lock_contention *con); #else /* !HAVE_BPF_SKEL */ @@ -162,13 +188,21 @@ static inline int lock_contention_prepare(struct lock_contention *con __maybe_un static inline int lock_contention_start(void) { return 0; } static inline int lock_contention_stop(void) { return 0; } -static inline int lock_contention_finish(void) { return 0; } +static inline int lock_contention_finish(struct lock_contention *con __maybe_unused) +{ + return 0; +} static inline int lock_contention_read(struct lock_contention *con __maybe_unused) { return 0; } +static inline struct lock_stat *pop_owner_stack_trace(struct lock_contention *con __maybe_unused) +{ + return NULL; +} + #endif /* HAVE_BPF_SKEL */ #endif /* PERF_LOCK_CONTENTION_H */ diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c index 51424cdc3b68..c355757ed391 100644 --- a/tools/perf/util/lzma.c +++ b/tools/perf/util/lzma.c @@ -32,7 +32,7 @@ static const char *lzma_strerror(lzma_ret ret) } } -int lzma_decompress_to_file(const char *input, int output_fd) +int lzma_decompress_stream_to_file(FILE *infile, int output_fd) { lzma_action action = LZMA_RUN; lzma_stream strm = LZMA_STREAM_INIT; @@ -41,20 +41,11 @@ int lzma_decompress_to_file(const char *input, int output_fd) u8 buf_in[BUFSIZE]; u8 buf_out[BUFSIZE]; - FILE *infile; - - infile = fopen(input, "rb"); - if (!infile) { - pr_err("lzma: fopen failed on %s: '%s'\n", - input, strerror(errno)); - return -1; - } ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED); if (ret != LZMA_OK) { - pr_err("lzma: lzma_stream_decoder failed %s (%d)\n", - lzma_strerror(ret), ret); - goto err_fclose; + pr_debug("lzma: lzma_stream_decoder failed %s (%d)\n", lzma_strerror(ret), ret); + return err; } strm.next_in = NULL; @@ -68,7 +59,7 @@ int lzma_decompress_to_file(const char *input, int output_fd) strm.avail_in = fread(buf_in, 1, sizeof(buf_in), infile); if (ferror(infile)) { - pr_err("lzma: read error: %s\n", strerror(errno)); + pr_debug("lzma: read error: %s\n", strerror(errno)); goto err_lzma_end; } @@ -82,7 +73,7 @@ int lzma_decompress_to_file(const char *input, int output_fd) ssize_t write_size = sizeof(buf_out) - strm.avail_out; if (writen(output_fd, buf_out, write_size) != write_size) { - pr_err("lzma: write error: %s\n", strerror(errno)); + pr_debug("lzma: write error: %s\n", strerror(errno)); goto err_lzma_end; } @@ -94,7 +85,7 @@ int lzma_decompress_to_file(const char *input, int output_fd) if (ret == LZMA_STREAM_END) break; - pr_err("lzma: failed %s\n", lzma_strerror(ret)); + pr_debug("lzma: failed %s\n", lzma_strerror(ret)); goto err_lzma_end; } } @@ -102,11 +93,25 @@ int lzma_decompress_to_file(const char *input, int output_fd) err = 0; err_lzma_end: lzma_end(&strm); -err_fclose: - fclose(infile); return err; } +int lzma_decompress_to_file(const char *input, int output_fd) +{ + FILE *infile; + int ret; + + infile = fopen(input, "rb"); + if (!infile) { + pr_debug("lzma: fopen failed on %s: '%s'\n", input, strerror(errno)); + return -1; + } + + ret = lzma_decompress_stream_to_file(infile, output_fd); + fclose(infile); + return ret; +} + bool lzma_is_compressed(const char *input) { int fd = open(input, O_RDONLY); @@ -115,7 +120,7 @@ bool lzma_is_compressed(const char *input) ssize_t rc; if (fd < 0) - return -1; + return false; rc = read(fd, buf, sizeof(buf)); close(fd); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index f4cb41ee23cd..841b711d970e 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -16,9 +16,11 @@ #include "map_symbol.h" #include "branch.h" #include "mem-events.h" +#include "mem-info.h" #include "path.h" #include "srcline.h" #include "symbol.h" +#include "synthetic-events.h" #include "sort.h" #include "strlist.h" #include "target.h" @@ -36,6 +38,7 @@ #include <internal/lib.h> // page_size #include "cgroup.h" #include "arm64-frame-pointer-unwind-support.h" +#include <api/io_dir.h> #include <linux/ctype.h> #include <symbol/kallsyms.h> @@ -43,51 +46,11 @@ #include <linux/string.h> #include <linux/zalloc.h> -static void __machine__remove_thread(struct machine *machine, struct thread_rb_node *nd, - struct thread *th, bool lock); - static struct dso *machine__kernel_dso(struct machine *machine) { return map__dso(machine->vmlinux_map); } -static void dsos__init(struct dsos *dsos) -{ - INIT_LIST_HEAD(&dsos->head); - dsos->root = RB_ROOT; - init_rwsem(&dsos->lock); -} - -static void machine__threads_init(struct machine *machine) -{ - int i; - - for (i = 0; i < THREADS__TABLE_SIZE; i++) { - struct threads *threads = &machine->threads[i]; - threads->entries = RB_ROOT_CACHED; - init_rwsem(&threads->lock); - threads->nr = 0; - INIT_LIST_HEAD(&threads->dead); - threads->last_match = NULL; - } -} - -static int thread_rb_node__cmp_tid(const void *key, const struct rb_node *nd) -{ - int to_find = (int) *((pid_t *)key); - - return to_find - (int)thread__tid(rb_entry(nd, struct thread_rb_node, rb_node)->thread); -} - -static struct thread_rb_node *thread_rb_node__find(const struct thread *th, - struct rb_root *tree) -{ - pid_t to_find = thread__tid(th); - struct rb_node *nd = rb_find(&to_find, tree, thread_rb_node__cmp_tid); - - return rb_entry(nd, struct thread_rb_node, rb_node); -} - static int machine__set_mmap_name(struct machine *machine) { if (machine__is_host(machine)) @@ -121,7 +84,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) RB_CLEAR_NODE(&machine->rb_node); dsos__init(&machine->dsos); - machine__threads_init(machine); + threads__init(&machine->threads); machine->vdso_info = NULL; machine->env = NULL; @@ -133,6 +96,8 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) machine->comm_exec = false; machine->kernel_start = 0; machine->vmlinux_map = NULL; + /* There is no initial context switch in, so we start at 1. */ + machine->parallelism = 1; machine->root_dir = strdup(root_dir); if (machine->root_dir == NULL) @@ -164,26 +129,62 @@ out: return 0; } -struct machine *machine__new_host(void) +static struct machine *__machine__new_host(struct perf_env *host_env, bool kernel_maps) { struct machine *machine = malloc(sizeof(*machine)); - if (machine != NULL) { - machine__init(machine, "", HOST_KERNEL_ID); + if (!machine) + return NULL; - if (machine__create_kernel_maps(machine) < 0) - goto out_delete; + machine__init(machine, "", HOST_KERNEL_ID); + + if (kernel_maps && machine__create_kernel_maps(machine) < 0) { + free(machine); + return NULL; } + machine->env = host_env; + return machine; +} + +struct machine *machine__new_host(struct perf_env *host_env) +{ + return __machine__new_host(host_env, /*kernel_maps=*/true); +} + +static int mmap_handler(const struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + return machine__process_mmap2_event(machine, event, sample); +} + +static int machine__init_live(struct machine *machine, pid_t pid) +{ + union perf_event event; + + memset(&event, 0, sizeof(event)); + return perf_event__synthesize_mmap_events(NULL, &event, pid, pid, + mmap_handler, machine, true); +} + +struct machine *machine__new_live(struct perf_env *host_env, bool kernel_maps, pid_t pid) +{ + struct machine *machine = __machine__new_host(host_env, kernel_maps); + + if (!machine) + return NULL; + if (machine__init_live(machine, pid)) { + machine__delete(machine); + return NULL; + } return machine; -out_delete: - free(machine); - return NULL; } -struct machine *machine__new_kallsyms(void) +struct machine *machine__new_kallsyms(struct perf_env *host_env) { - struct machine *machine = machine__new_host(); + struct machine *machine = machine__new_host(host_env); /* * FIXME: * 1) We should switch to machine__load_kallsyms(), i.e. not explicitly @@ -198,51 +199,13 @@ struct machine *machine__new_kallsyms(void) return machine; } -static void dsos__purge(struct dsos *dsos) -{ - struct dso *pos, *n; - - down_write(&dsos->lock); - - list_for_each_entry_safe(pos, n, &dsos->head, node) { - RB_CLEAR_NODE(&pos->rb_node); - pos->root = NULL; - list_del_init(&pos->node); - dso__put(pos); - } - - up_write(&dsos->lock); -} - -static void dsos__exit(struct dsos *dsos) -{ - dsos__purge(dsos); - exit_rwsem(&dsos->lock); -} - void machine__delete_threads(struct machine *machine) { - struct rb_node *nd; - int i; - - for (i = 0; i < THREADS__TABLE_SIZE; i++) { - struct threads *threads = &machine->threads[i]; - down_write(&threads->lock); - nd = rb_first_cached(&threads->entries); - while (nd) { - struct thread_rb_node *trb = rb_entry(nd, struct thread_rb_node, rb_node); - - nd = rb_next(nd); - __machine__remove_thread(machine, trb, trb->thread, false); - } - up_write(&threads->lock); - } + threads__remove_all_threads(&machine->threads); } void machine__exit(struct machine *machine) { - int i; - if (machine == NULL) return; @@ -255,12 +218,7 @@ void machine__exit(struct machine *machine) zfree(&machine->current_tid); zfree(&machine->kallsyms_filename); - machine__delete_threads(machine); - for (i = 0; i < THREADS__TABLE_SIZE; i++) { - struct threads *threads = &machine->threads[i]; - - exit_rwsem(&threads->lock); - } + threads__exit(&machine->threads); } void machine__delete(struct machine *machine) @@ -441,7 +399,7 @@ static struct thread *findnew_guest_code(struct machine *machine, return NULL; /* Assume maps are set up if there are any */ - if (maps__nr_maps(thread__maps(thread))) + if (!maps__empty(thread__maps(thread))) return thread; host_thread = machine__find_thread(host_machine, -1, pid); @@ -454,7 +412,7 @@ static struct thread *findnew_guest_code(struct machine *machine, * Guest code can be found in hypervisor process at the same address * so copy host maps. */ - err = maps__clone(thread, thread__maps(host_thread)); + err = maps__copy_from(thread__maps(thread), thread__maps(host_thread)); thread__put(host_thread); if (err) goto out_err; @@ -527,7 +485,7 @@ static void machine__update_thread_pid(struct machine *machine, if (thread__pid(th) == thread__tid(th)) return; - leader = __machine__findnew_thread(machine, thread__pid(th), thread__pid(th)); + leader = machine__findnew_thread(machine, thread__pid(th), thread__pid(th)); if (!leader) goto out_err; @@ -562,159 +520,55 @@ out_err: } /* - * Front-end cache - TID lookups come in blocks, - * so most of the time we dont have to look up - * the full rbtree: - */ -static struct thread* -__threads__get_last_match(struct threads *threads, struct machine *machine, - int pid, int tid) -{ - struct thread *th; - - th = threads->last_match; - if (th != NULL) { - if (thread__tid(th) == tid) { - machine__update_thread_pid(machine, th, pid); - return thread__get(th); - } - thread__put(threads->last_match); - threads->last_match = NULL; - } - - return NULL; -} - -static struct thread* -threads__get_last_match(struct threads *threads, struct machine *machine, - int pid, int tid) -{ - struct thread *th = NULL; - - if (perf_singlethreaded) - th = __threads__get_last_match(threads, machine, pid, tid); - - return th; -} - -static void -__threads__set_last_match(struct threads *threads, struct thread *th) -{ - thread__put(threads->last_match); - threads->last_match = thread__get(th); -} - -static void -threads__set_last_match(struct threads *threads, struct thread *th) -{ - if (perf_singlethreaded) - __threads__set_last_match(threads, th); -} - -/* * Caller must eventually drop thread->refcnt returned with a successful * lookup/new thread inserted. */ -static struct thread *____machine__findnew_thread(struct machine *machine, - struct threads *threads, - pid_t pid, pid_t tid, - bool create) +static struct thread *__machine__findnew_thread(struct machine *machine, + pid_t pid, + pid_t tid, + bool create) { - struct rb_node **p = &threads->entries.rb_root.rb_node; - struct rb_node *parent = NULL; - struct thread *th; - struct thread_rb_node *nd; - bool leftmost = true; + struct thread *th = threads__find(&machine->threads, tid); + bool created; - th = threads__get_last_match(threads, machine, pid, tid); - if (th) + if (th) { + machine__update_thread_pid(machine, th, pid); return th; - - while (*p != NULL) { - parent = *p; - th = rb_entry(parent, struct thread_rb_node, rb_node)->thread; - - if (thread__tid(th) == tid) { - threads__set_last_match(threads, th); - machine__update_thread_pid(machine, th, pid); - return thread__get(th); - } - - if (tid < thread__tid(th)) - p = &(*p)->rb_left; - else { - p = &(*p)->rb_right; - leftmost = false; - } } - if (!create) return NULL; - th = thread__new(pid, tid); - if (th == NULL) - return NULL; - - nd = malloc(sizeof(*nd)); - if (nd == NULL) { - thread__put(th); - return NULL; - } - nd->thread = th; - - rb_link_node(&nd->rb_node, parent, p); - rb_insert_color_cached(&nd->rb_node, &threads->entries, leftmost); - /* - * We have to initialize maps separately after rb tree is updated. - * - * The reason is that we call machine__findnew_thread within - * thread__init_maps to find the thread leader and that would screwed - * the rb tree. - */ - if (thread__init_maps(th, machine)) { - pr_err("Thread init failed thread %d\n", pid); - rb_erase_cached(&nd->rb_node, &threads->entries); - RB_CLEAR_NODE(&nd->rb_node); - free(nd); - thread__put(th); - return NULL; - } - /* - * It is now in the rbtree, get a ref - */ - threads__set_last_match(threads, th); - ++threads->nr; - - return thread__get(th); -} + th = threads__findnew(&machine->threads, pid, tid, &created); + if (created) { + /* + * We have to initialize maps separately after rb tree is + * updated. + * + * The reason is that we call machine__findnew_thread within + * thread__init_maps to find the thread leader and that would + * screwed the rb tree. + */ + if (thread__init_maps(th, machine)) { + pr_err("Thread init failed thread %d\n", pid); + threads__remove(&machine->threads, th); + thread__put(th); + return NULL; + } + } else + machine__update_thread_pid(machine, th, pid); -struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid) -{ - return ____machine__findnew_thread(machine, machine__threads(machine, tid), pid, tid, true); + return th; } -struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, - pid_t tid) +struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid) { - struct threads *threads = machine__threads(machine, tid); - struct thread *th; - - down_write(&threads->lock); - th = __machine__findnew_thread(machine, pid, tid); - up_write(&threads->lock); - return th; + return __machine__findnew_thread(machine, pid, tid, /*create=*/true); } struct thread *machine__find_thread(struct machine *machine, pid_t pid, pid_t tid) { - struct threads *threads = machine__threads(machine, tid); - struct thread *th; - - down_read(&threads->lock); - th = ____machine__findnew_thread(machine, threads, pid, tid, false); - up_read(&threads->lock); - return th; + return __machine__findnew_thread(machine, pid, tid, /*create=*/false); } /* @@ -828,36 +682,12 @@ int machine__process_lost_event(struct machine *machine __maybe_unused, int machine__process_lost_samples_event(struct machine *machine __maybe_unused, union perf_event *event, struct perf_sample *sample) { - dump_printf(": id:%" PRIu64 ": lost samples :%" PRI_lu64 "\n", - sample->id, event->lost_samples.lost); + dump_printf(": id:%" PRIu64 ": lost samples :%" PRI_lu64 "%s\n", + sample->id, event->lost_samples.lost, + event->header.misc & PERF_RECORD_MISC_LOST_SAMPLES_BPF ? " (BPF)" : ""); return 0; } -static struct dso *machine__findnew_module_dso(struct machine *machine, - struct kmod_path *m, - const char *filename) -{ - struct dso *dso; - - down_write(&machine->dsos.lock); - - dso = __dsos__find(&machine->dsos, m->name, true); - if (!dso) { - dso = __dsos__addnew(&machine->dsos, m->name); - if (dso == NULL) - goto out_unlock; - - dso__set_module_info(dso, m, machine); - dso__set_long_name(dso, strdup(filename), true); - dso->kernel = DSO_SPACE__KERNEL; - } - - dso__get(dso); -out_unlock: - up_write(&machine->dsos.lock); - return dso; -} - int machine__process_aux_event(struct machine *machine __maybe_unused, union perf_event *event) { @@ -885,8 +715,11 @@ int machine__process_aux_output_hw_id_event(struct machine *machine __maybe_unus int machine__process_switch_event(struct machine *machine __maybe_unused, union perf_event *event) { + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; + if (dump_trace) perf_event__fprintf_switch(event, stdout); + machine->parallelism += out ? -1 : 1; return 0; } @@ -895,9 +728,8 @@ static int machine__process_ksymbol_register(struct machine *machine, struct perf_sample *sample __maybe_unused) { struct symbol *sym; - struct dso *dso; + struct dso *dso = NULL; struct map *map = maps__find(machine__kernel_maps(machine), event->ksymbol.addr); - bool put_map = false; int err = 0; if (!map) { @@ -907,28 +739,21 @@ static int machine__process_ksymbol_register(struct machine *machine, err = -ENOMEM; goto out; } - dso->kernel = DSO_SPACE__KERNEL; + dso__set_kernel(dso, DSO_SPACE__KERNEL); map = map__new2(0, dso); - dso__put(dso); if (!map) { err = -ENOMEM; goto out; } - /* - * The inserted map has a get on it, we need to put to release - * the reference count here, but do it after all accesses are - * done. - */ - put_map = true; if (event->ksymbol.ksym_type == PERF_RECORD_KSYMBOL_TYPE_OOL) { - dso->binary_type = DSO_BINARY_TYPE__OOL; - dso->data.file_size = event->ksymbol.len; + dso__set_binary_type(dso, DSO_BINARY_TYPE__OOL); + dso__data(dso)->file_size = event->ksymbol.len; dso__set_loaded(dso); } map__set_start(map, event->ksymbol.addr); map__set_end(map, map__start(map) + event->ksymbol.len); - err = maps__insert(machine__kernel_maps(machine), map); + err = maps__fixup_overlap_and_insert(machine__kernel_maps(machine), map); if (err) { err = -ENOMEM; goto out; @@ -937,11 +762,11 @@ static int machine__process_ksymbol_register(struct machine *machine, dso__set_loaded(dso); if (is_bpf_image(event->ksymbol.name)) { - dso->binary_type = DSO_BINARY_TYPE__BPF_IMAGE; + dso__set_binary_type(dso, DSO_BINARY_TYPE__BPF_IMAGE); dso__set_long_name(dso, "", false); } } else { - dso = map__dso(map); + dso = dso__get(map__dso(map)); } sym = symbol__new(map__map_ip(map, map__start(map)), @@ -953,8 +778,8 @@ static int machine__process_ksymbol_register(struct machine *machine, } dso__insert_symbol(dso, sym); out: - if (put_map) - map__put(map); + map__put(map); + dso__put(dso); return err; } @@ -969,7 +794,7 @@ static int machine__process_ksymbol_unregister(struct machine *machine, if (!map) return 0; - if (RC_CHK_ACCESS(map) != RC_CHK_ACCESS(machine->vmlinux_map)) + if (!RC_CHK_EQUAL(map, machine->vmlinux_map)) maps__remove(machine__kernel_maps(machine), map); else { struct dso *dso = map__dso(map); @@ -978,7 +803,7 @@ static int machine__process_ksymbol_unregister(struct machine *machine, if (sym) dso__delete_symbol(dso, sym); } - + map__put(map); return 0; } @@ -989,6 +814,10 @@ int machine__process_ksymbol(struct machine *machine __maybe_unused, if (dump_trace) perf_event__fprintf_ksymbol(event, stdout); + /* no need to process non-JIT BPF as it cannot get samples */ + if (event->ksymbol.len == 0) + return 0; + if (event->ksymbol.flags & PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER) return machine__process_ksymbol_unregister(machine, event, sample); @@ -1006,11 +835,11 @@ int machine__process_text_poke(struct machine *machine, union perf_event *event, perf_event__fprintf_text_poke(event, machine, stdout); if (!event->text_poke.new_len) - return 0; + goto out; if (cpumode != PERF_RECORD_MISC_KERNEL) { pr_debug("%s: unsupported cpumode - ignoring\n", __func__); - return 0; + goto out; } if (dso) { @@ -1033,7 +862,8 @@ int machine__process_text_poke(struct machine *machine, union perf_event *event, pr_debug("Failed to find kernel text poke address map for %#" PRI_lx64 "\n", event->text_poke.addr); } - +out: + map__put(map); return 0; } @@ -1048,7 +878,7 @@ static struct map *machine__addnew_module_map(struct machine *machine, u64 start if (kmod_path__parse_name(&m, filename)) return NULL; - dso = machine__findnew_module_dso(machine, &m, filename); + dso = dsos__findnew_module_dso(&machine->dsos, machine, &m, filename); if (dso == NULL) goto out; @@ -1072,11 +902,11 @@ out: size_t machines__fprintf_dsos(struct machines *machines, FILE *fp) { struct rb_node *nd; - size_t ret = __dsos__fprintf(&machines->host.dsos.head, fp); + size_t ret = dsos__fprintf(&machines->host.dsos, fp); for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); - ret += __dsos__fprintf(&pos->dsos.head, fp); + ret += dsos__fprintf(&pos->dsos, fp); } return ret; @@ -1085,7 +915,7 @@ size_t machines__fprintf_dsos(struct machines *machines, FILE *fp) size_t machine__fprintf_dsos_buildid(struct machine *m, FILE *fp, bool (skip)(struct dso *dso, int parm), int parm) { - return __dsos__fprintf_buildid(&m->dsos.head, fp, skip, parm); + return dsos__fprintf_buildid(&m->dsos, fp, skip, parm); } size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp, @@ -1101,49 +931,30 @@ size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp, return ret; } -size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp) -{ - int i; - size_t printed = 0; - struct dso *kdso = machine__kernel_dso(machine); - - if (kdso->has_build_id) { - char filename[PATH_MAX]; - if (dso__build_id_filename(kdso, filename, sizeof(filename), - false)) - printed += fprintf(fp, "[0] %s\n", filename); - } +struct machine_fprintf_cb_args { + FILE *fp; + size_t printed; +}; - for (i = 0; i < vmlinux_path__nr_entries; ++i) - printed += fprintf(fp, "[%d] %s\n", - i + kdso->has_build_id, vmlinux_path[i]); +static int machine_fprintf_cb(struct thread *thread, void *data) +{ + struct machine_fprintf_cb_args *args = data; - return printed; + /* TODO: handle fprintf errors. */ + args->printed += thread__fprintf(thread, args->fp); + return 0; } size_t machine__fprintf(struct machine *machine, FILE *fp) { - struct rb_node *nd; - size_t ret; - int i; - - for (i = 0; i < THREADS__TABLE_SIZE; i++) { - struct threads *threads = &machine->threads[i]; - - down_read(&threads->lock); - - ret = fprintf(fp, "Threads: %u\n", threads->nr); - - for (nd = rb_first_cached(&threads->entries); nd; - nd = rb_next(nd)) { - struct thread *pos = rb_entry(nd, struct thread_rb_node, rb_node)->thread; - - ret += thread__fprintf(pos, fp); - } + struct machine_fprintf_cb_args args = { + .fp = fp, + .printed = 0, + }; + size_t ret = fprintf(fp, "Threads: %zu\n", threads__nr(&machine->threads)); - up_read(&threads->lock); - } - return ret; + machine__for_each_thread(machine, machine_fprintf_cb, &args); + return ret + args.printed; } static struct dso *machine__get_kernel(struct machine *machine) @@ -1166,7 +977,7 @@ static struct dso *machine__get_kernel(struct machine *machine) DSO_SPACE__KERNEL_GUEST); } - if (kernel != NULL && (!kernel->has_build_id)) + if (kernel != NULL && (!dso__has_build_id(kernel))) dso__read_running_kernel_build_id(kernel, machine); return kernel; @@ -1215,7 +1026,9 @@ static int machine__get_running_kernel_start(struct machine *machine, *start = addr; - err = kallsyms__get_function_start(filename, "_etext", &addr); + err = kallsyms__get_symbol_start(filename, "_edata", &addr); + if (err) + err = kallsyms__get_symbol_start(filename, "_etext", &addr); if (!err) *end = addr; @@ -1284,33 +1097,47 @@ static u64 find_entry_trampoline(struct dso *dso) #define X86_64_CPU_ENTRY_AREA_SIZE 0x2c000 #define X86_64_ENTRY_TRAMPOLINE 0x6000 +struct machine__map_x86_64_entry_trampolines_args { + struct maps *kmaps; + bool found; +}; + +static int machine__map_x86_64_entry_trampolines_cb(struct map *map, void *data) +{ + struct machine__map_x86_64_entry_trampolines_args *args = data; + struct map *dest_map; + struct kmap *kmap = __map__kmap(map); + + if (!kmap || !is_entry_trampoline(kmap->name)) + return 0; + + dest_map = maps__find(args->kmaps, map__pgoff(map)); + if (RC_CHK_ACCESS(dest_map) != RC_CHK_ACCESS(map)) + map__set_pgoff(map, map__map_ip(dest_map, map__pgoff(map))); + + map__put(dest_map); + args->found = true; + return 0; +} + /* Map x86_64 PTI entry trampolines */ int machine__map_x86_64_entry_trampolines(struct machine *machine, struct dso *kernel) { - struct maps *kmaps = machine__kernel_maps(machine); + struct machine__map_x86_64_entry_trampolines_args args = { + .kmaps = machine__kernel_maps(machine), + .found = false, + }; int nr_cpus_avail, cpu; - bool found = false; - struct map_rb_node *rb_node; u64 pgoff; /* * In the vmlinux case, pgoff is a virtual address which must now be * mapped to a vmlinux offset. */ - maps__for_each_entry(kmaps, rb_node) { - struct map *dest_map, *map = rb_node->map; - struct kmap *kmap = __map__kmap(map); - - if (!kmap || !is_entry_trampoline(kmap->name)) - continue; + maps__for_each_map(args.kmaps, machine__map_x86_64_entry_trampolines_cb, &args); - dest_map = maps__find(kmaps, map__pgoff(map)); - if (dest_map != map) - map__set_pgoff(map, map__map_ip(dest_map, map__pgoff(map))); - found = true; - } - if (found || machine->trampolines_mapped) + if (args.found || machine->trampolines_mapped) return 0; pgoff = find_entry_trampoline(kernel); @@ -1358,8 +1185,7 @@ __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) if (machine->vmlinux_map == NULL) return -ENOMEM; - map__set_map_ip(machine->vmlinux_map, identity__map_ip); - map__set_unmap_ip(machine->vmlinux_map, identity__map_ip); + map__set_mapping_type(machine->vmlinux_map, MAPPING_TYPE__IDENTITY); return maps__insert(machine__kernel_maps(machine), machine->vmlinux_map); } @@ -1516,8 +1342,8 @@ static char *get_kernel_version(const char *root_dir) static bool is_kmod_dso(struct dso *dso) { - return dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE || - dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE; + return dso__symtab_type(dso) == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE || + dso__symtab_type(dso) == DSO_BINARY_TYPE__GUEST_KMODULE; } static int maps__set_module_path(struct maps *maps, const char *path, struct kmod_path *m) @@ -1530,8 +1356,10 @@ static int maps__set_module_path(struct maps *maps, const char *path, struct kmo return 0; long_name = strdup(path); - if (long_name == NULL) + if (long_name == NULL) { + map__put(map); return -ENOMEM; + } dso = map__dso(map); dso__set_long_name(dso, long_name, true); @@ -1542,34 +1370,31 @@ static int maps__set_module_path(struct maps *maps, const char *path, struct kmo * we need to update the symtab_type if needed. */ if (m->comp && is_kmod_dso(dso)) { - dso->symtab_type++; - dso->comp = m->comp; + dso__set_symtab_type(dso, dso__symtab_type(dso)+1); + dso__set_comp(dso, m->comp); } - + map__put(map); return 0; } -static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, int depth) +static int maps__set_modules_path_dir(struct maps *maps, char *path, size_t path_size, int depth) { - struct dirent *dent; - DIR *dir = opendir(dir_name); + struct io_dirent64 *dent; + struct io_dir iod; + size_t root_len = strlen(path); int ret = 0; - if (!dir) { - pr_debug("%s: cannot open %s dir\n", __func__, dir_name); + io_dir__init(&iod, open(path, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + if (iod.dirfd < 0) { + pr_debug("%s: cannot open %s dir\n", __func__, path); return -1; } - - while ((dent = readdir(dir)) != NULL) { - char path[PATH_MAX]; - struct stat st; - - /*sshfs might return bad dent->d_type, so we have to stat*/ - path__join(path, sizeof(path), dir_name, dent->d_name); - if (stat(path, &st)) - continue; - - if (S_ISDIR(st.st_mode)) { + /* Bounds check, should never happen. */ + if (root_len >= path_size) + return -1; + path[root_len++] = '/'; + while ((dent = io_dir__readdir(&iod)) != NULL) { + if (io_dir__is_dir(&iod, dent)) { if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) continue; @@ -1581,7 +1406,12 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i continue; } - ret = maps__set_modules_path_dir(maps, path, depth + 1); + /* Bounds check, should never happen. */ + if (root_len + strlen(dent->d_name) >= path_size) + continue; + + strcpy(path + root_len, dent->d_name); + ret = maps__set_modules_path_dir(maps, path, path_size, depth + 1); if (ret < 0) goto out; } else { @@ -1591,9 +1421,14 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i if (ret) goto out; - if (m.kmod) - ret = maps__set_module_path(maps, path, &m); + if (m.kmod) { + /* Bounds check, should never happen. */ + if (root_len + strlen(dent->d_name) < path_size) { + strcpy(path + root_len, dent->d_name); + ret = maps__set_module_path(maps, path, &m); + } + } zfree(&m.name); if (ret) @@ -1602,7 +1437,7 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i } out: - closedir(dir); + close(iod.dirfd); return ret; } @@ -1619,7 +1454,8 @@ static int machine__set_modules_path(struct machine *machine) machine->root_dir, version); free(version); - return maps__set_modules_path_dir(machine__kernel_maps(machine), modules_path, 0); + return maps__set_modules_path_dir(machine__kernel_maps(machine), + modules_path, sizeof(modules_path), 0); } int __weak arch__fix_module_text_start(u64 *start __maybe_unused, u64 *size __maybe_unused, @@ -1696,8 +1532,8 @@ static int machine__update_kernel_mmap(struct machine *machine, updated = map__get(orig); machine->vmlinux_map = updated; - machine__set_kernel_mmap(machine, start, end); maps__remove(machine__kernel_maps(machine), orig); + machine__set_kernel_mmap(machine, start, end); err = maps__insert(machine__kernel_maps(machine), updated); map__put(orig); @@ -1749,29 +1585,30 @@ int machine__create_kernel_maps(struct machine *machine) if (end == ~0ULL) { /* update end address of the kernel map using adjacent module address */ - struct map_rb_node *rb_node = maps__find_node(machine__kernel_maps(machine), - machine__kernel_map(machine)); - struct map_rb_node *next = map_rb_node__next(rb_node); + struct map *next = maps__find_next_entry(machine__kernel_maps(machine), + machine__kernel_map(machine)); - if (next) - machine__set_kernel_mmap(machine, start, map__start(next->map)); + if (next) { + machine__set_kernel_mmap(machine, start, map__start(next)); + map__put(next); + } } + maps__fixup_end(machine__kernel_maps(machine)); + out_put: dso__put(kernel); return ret; } -static bool machine__uses_kcore(struct machine *machine) +static int machine__uses_kcore_cb(struct dso *dso, void *data __maybe_unused) { - struct dso *dso; - - list_for_each_entry(dso, &machine->dsos.head, node) { - if (dso__is_kcore(dso)) - return true; - } + return dso__is_kcore(dso) ? 1 : 0; +} - return false; +static bool machine__uses_kcore(struct machine *machine) +{ + return dsos__for_each_dso(&machine->dsos, machine__uses_kcore_cb, NULL) != 0 ? true : false; } static bool perf_event__is_extra_kernel_mmap(struct machine *machine, @@ -1838,53 +1675,20 @@ static int machine__process_kernel_mmap_event(struct machine *machine, * Should be there already, from the build-id table in * the header. */ - struct dso *kernel = NULL; - struct dso *dso; - - down_read(&machine->dsos.lock); - - list_for_each_entry(dso, &machine->dsos.head, node) { - - /* - * The cpumode passed to is_kernel_module is not the - * cpumode of *this* event. If we insist on passing - * correct cpumode to is_kernel_module, we should - * record the cpumode when we adding this dso to the - * linked list. - * - * However we don't really need passing correct - * cpumode. We know the correct cpumode must be kernel - * mode (if not, we should not link it onto kernel_dsos - * list). - * - * Therefore, we pass PERF_RECORD_MISC_CPUMODE_UNKNOWN. - * is_kernel_module() treats it as a kernel cpumode. - */ - - if (!dso->kernel || - is_kernel_module(dso->long_name, - PERF_RECORD_MISC_CPUMODE_UNKNOWN)) - continue; - - - kernel = dso__get(dso); - break; - } - - up_read(&machine->dsos.lock); + struct dso *kernel = dsos__find_kernel_dso(&machine->dsos); if (kernel == NULL) kernel = machine__findnew_dso(machine, machine->mmap_name); if (kernel == NULL) goto out_problem; - kernel->kernel = dso_space; + dso__set_kernel(kernel, dso_space); if (__machine__create_kernel_maps(machine, kernel) < 0) { dso__put(kernel); goto out_problem; } - if (strstr(kernel->long_name, "vmlinux")) + if (strstr(dso__long_name(kernel), "vmlinux")) dso__set_short_name(kernel, "[kernel.vmlinux]", false); if (machine__update_kernel_mmap(machine, xm->start, xm->end) < 0) { @@ -1927,21 +1731,21 @@ int machine__process_mmap2_event(struct machine *machine, { struct thread *thread; struct map *map; - struct dso_id dso_id = { - .maj = event->mmap2.maj, - .min = event->mmap2.min, - .ino = event->mmap2.ino, - .ino_generation = event->mmap2.ino_generation, - }; - struct build_id __bid, *bid = NULL; + struct dso_id dso_id = dso_id_empty; int ret = 0; if (dump_trace) perf_event__fprintf_mmap2(event, stdout); if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { - bid = &__bid; - build_id__init(bid, event->mmap2.build_id, event->mmap2.build_id_size); + build_id__init(&dso_id.build_id, event->mmap2.build_id, event->mmap2.build_id_size); + } else { + dso_id.maj = event->mmap2.maj; + dso_id.min = event->mmap2.min; + dso_id.ino = event->mmap2.ino; + dso_id.ino_generation = event->mmap2.ino_generation; + dso_id.mmap2_valid = true; + dso_id.mmap2_ino_generation_valid = true; } if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL || @@ -1953,7 +1757,7 @@ int machine__process_mmap2_event(struct machine *machine, }; strlcpy(xm.name, event->mmap2.filename, KMAP_NAME_LEN); - ret = machine__process_kernel_mmap_event(machine, &xm, bid); + ret = machine__process_kernel_mmap_event(machine, &xm, &dso_id.build_id); if (ret < 0) goto out_problem; return 0; @@ -1967,7 +1771,7 @@ int machine__process_mmap2_event(struct machine *machine, map = map__new(machine, event->mmap2.start, event->mmap2.len, event->mmap2.pgoff, &dso_id, event->mmap2.prot, - event->mmap2.flags, bid, + event->mmap2.flags, event->mmap2.filename, thread); if (map == NULL) @@ -2025,8 +1829,8 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event prot = PROT_EXEC; map = map__new(machine, event->mmap.start, - event->mmap.len, event->mmap.pgoff, - NULL, prot, 0, NULL, event->mmap.filename, thread); + event->mmap.len, event->mmap.pgoff, + &dso_id_empty, prot, /*flags=*/0, event->mmap.filename, thread); if (map == NULL) goto out_problem_map; @@ -2048,36 +1852,9 @@ out_problem: return 0; } -static void __machine__remove_thread(struct machine *machine, struct thread_rb_node *nd, - struct thread *th, bool lock) -{ - struct threads *threads = machine__threads(machine, thread__tid(th)); - - if (!nd) - nd = thread_rb_node__find(th, &threads->entries.rb_root); - - if (threads->last_match && RC_CHK_ACCESS(threads->last_match) == RC_CHK_ACCESS(th)) - threads__set_last_match(threads, NULL); - - if (lock) - down_write(&threads->lock); - - BUG_ON(refcount_read(thread__refcnt(th)) == 0); - - thread__put(nd->thread); - rb_erase_cached(&nd->rb_node, &threads->entries); - RB_CLEAR_NODE(&nd->rb_node); - --threads->nr; - - free(nd); - - if (lock) - up_write(&threads->lock); -} - void machine__remove_thread(struct machine *machine, struct thread *th) { - return __machine__remove_thread(machine, NULL, th, true); + return threads__remove(&machine->threads, th); } int machine__process_fork_event(struct machine *machine, union perf_event *event, @@ -2156,9 +1933,15 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event if (dump_trace) perf_event__fprintf_task(event, stdout); - if (thread != NULL) - thread__put(thread); - + /* There is no context switch out before exit, so we decrement here. */ + machine->parallelism--; + if (thread != NULL) { + if (symbol_conf.keep_exited_threads) + thread__set_exited(thread, /*exited=*/true); + else + machine__remove_thread(machine, thread); + } + thread__put(thread); return 0; } @@ -2211,9 +1994,7 @@ int machine__process_event(struct machine *machine, union perf_event *event, static bool symbol__match_regex(struct symbol *sym, regex_t *regex) { - if (!regexec(regex, sym->name, 0, NULL, 0)) - return true; - return false; + return regexec(regex, sym->name, 0, NULL, 0) == 0; } static void ip__resolve_ams(struct thread *thread, @@ -2230,7 +2011,7 @@ static void ip__resolve_ams(struct thread *thread, * Thus, we have to try consecutively until we find a match * or else, the symbol is unknown */ - thread__find_cpumode_addr_location(thread, ip, &al); + thread__find_cpumode_addr_location(thread, ip, /*symbols=*/true, &al); ams->addr = ip; ams->al_addr = al.addr; @@ -2272,11 +2053,11 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, if (!mi) return NULL; - ip__resolve_ams(al->thread, &mi->iaddr, sample->ip); - ip__resolve_data(al->thread, al->cpumode, &mi->daddr, + ip__resolve_ams(al->thread, mem_info__iaddr(mi), sample->ip); + ip__resolve_data(al->thread, al->cpumode, mem_info__daddr(mi), sample->addr, sample->phys_addr, sample->data_page_size); - mi->data_src.val = sample->data_src; + mem_info__data_src(mi)->val = sample->data_src; return mi; } @@ -2291,14 +2072,14 @@ static char *callchain_srcline(struct map_symbol *ms, u64 ip) return srcline; dso = map__dso(map); - srcline = srcline__tree_find(&dso->srclines, ip); + srcline = srcline__tree_find(dso__srclines(dso), ip); if (!srcline) { bool show_sym = false; bool show_addr = callchain_param.key == CCKEY_ADDRESS; srcline = get_srcline(dso, map__rip_2objdump(map, ip), ms->sym, show_sym, show_addr, ip); - srcline__tree_insert(&dso->srclines, ip, srcline); + srcline__tree_insert(dso__srclines(dso), ip, srcline); } return srcline; @@ -2318,7 +2099,8 @@ static int add_callchain_ip(struct thread *thread, bool branch, struct branch_flags *flags, struct iterations *iter, - u64 branch_from) + u64 branch_from, + bool symbols) { struct map_symbol ms = {}; struct addr_location al; @@ -2331,7 +2113,7 @@ static int add_callchain_ip(struct thread *thread, al.sym = NULL; al.srcline = NULL; if (!cpumode) { - thread__find_cpumode_addr_location(thread, ip, &al); + thread__find_cpumode_addr_location(thread, ip, symbols, &al); } else { if (ip >= PERF_CONTEXT_MAX) { switch (ip) { @@ -2342,6 +2124,7 @@ static int add_callchain_ip(struct thread *thread, *cpumode = PERF_RECORD_MISC_KERNEL; break; case PERF_CONTEXT_USER: + case PERF_CONTEXT_USER_DEFERRED: *cpumode = PERF_RECORD_MISC_USER; break; default: @@ -2357,7 +2140,10 @@ static int add_callchain_ip(struct thread *thread, } goto out; } - thread__find_symbol(thread, *cpumode, ip, &al); + if (symbols) + thread__find_symbol(thread, *cpumode, ip, &al); + else + thread__find_map(thread, *cpumode, ip, &al); } if (al.sym != NULL) { @@ -2390,8 +2176,7 @@ static int add_callchain_ip(struct thread *thread, iter_cycles, branch_from, srcline); out: addr_location__exit(&al); - maps__put(ms.maps); - map__put(ms.map); + map_symbol__exit(&ms); return err; } @@ -2401,6 +2186,7 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, unsigned int i; const struct branch_stack *bs = sample->branch_stack; struct branch_entry *entries = perf_sample__branch_entries(sample); + u64 *branch_stack_cntr = sample->branch_stack_cntr; struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info)); if (!bi) @@ -2410,6 +2196,8 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, ip__resolve_ams(al->thread, &bi[i].to, entries[i].to); ip__resolve_ams(al->thread, &bi[i].from, entries[i].from); bi[i].flags = entries[i].flags; + if (branch_stack_cntr) + bi[i].branch_stack_cntr = branch_stack_cntr[i]; } return bi; } @@ -2484,7 +2272,8 @@ static int lbr_callchain_add_kernel_ip(struct thread *thread, struct symbol **parent, struct addr_location *root_al, u64 branch_from, - bool callee, int end) + bool callee, int end, + bool symbols) { struct ip_callchain *chain = sample->callchain; u8 cpumode = PERF_RECORD_MISC_USER; @@ -2494,7 +2283,8 @@ static int lbr_callchain_add_kernel_ip(struct thread *thread, for (i = 0; i < end + 1; i++) { err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, chain->ips[i], - false, NULL, NULL, branch_from); + false, NULL, NULL, branch_from, + symbols); if (err) return err; } @@ -2504,7 +2294,8 @@ static int lbr_callchain_add_kernel_ip(struct thread *thread, for (i = end; i >= 0; i--) { err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, chain->ips[i], - false, NULL, NULL, branch_from); + false, NULL, NULL, branch_from, + symbols); if (err) return err; } @@ -2530,8 +2321,12 @@ static void save_lbr_cursor_node(struct thread *thread, cursor->curr = cursor->first; else cursor->curr = cursor->curr->next; + + map_symbol__exit(&lbr_stitch->prev_lbr_cursor[idx].ms); memcpy(&lbr_stitch->prev_lbr_cursor[idx], cursor->curr, sizeof(struct callchain_cursor_node)); + lbr_stitch->prev_lbr_cursor[idx].ms.maps = maps__get(cursor->curr->ms.maps); + lbr_stitch->prev_lbr_cursor[idx].ms.map = map__get(cursor->curr->ms.map); lbr_stitch->prev_lbr_cursor[idx].valid = true; cursor->pos++; @@ -2543,7 +2338,8 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread, struct symbol **parent, struct addr_location *root_al, u64 *branch_from, - bool callee) + bool callee, + bool symbols) { struct branch_stack *lbr_stack = sample->branch_stack; struct branch_entry *entries = perf_sample__branch_entries(sample); @@ -2576,7 +2372,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread, err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, true, flags, NULL, - *branch_from); + *branch_from, symbols); if (err) return err; @@ -2601,7 +2397,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread, err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, true, flags, NULL, - *branch_from); + *branch_from, symbols); if (err) return err; save_lbr_cursor_node(thread, cursor, i); @@ -2616,22 +2412,24 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread, err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, true, flags, NULL, - *branch_from); + *branch_from, symbols); if (err) return err; save_lbr_cursor_node(thread, cursor, i); } - /* Add LBR ip from first entries.to */ - ip = entries[0].to; - flags = &entries[0].flags; - *branch_from = entries[0].from; - err = add_callchain_ip(thread, cursor, parent, - root_al, &cpumode, ip, - true, flags, NULL, - *branch_from); - if (err) - return err; + if (lbr_nr > 0) { + /* Add LBR ip from first entries.to */ + ip = entries[0].to; + flags = &entries[0].flags; + *branch_from = entries[0].from; + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, + true, flags, NULL, + *branch_from, symbols); + if (err) + return err; + } return 0; } @@ -2740,6 +2538,9 @@ static bool has_stitched_lbr(struct thread *thread, memcpy(&stitch_node->cursor, &lbr_stitch->prev_lbr_cursor[i], sizeof(struct callchain_cursor_node)); + stitch_node->cursor.ms.maps = maps__get(lbr_stitch->prev_lbr_cursor[i].ms.maps); + stitch_node->cursor.ms.map = map__get(lbr_stitch->prev_lbr_cursor[i].ms.map); + if (callee) list_add(&stitch_node->node, &lbr_stitch->lists); else @@ -2763,6 +2564,8 @@ static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr) if (!thread__lbr_stitch(thread)->prev_lbr_cursor) goto free_lbr_stitch; + thread__lbr_stitch(thread)->prev_lbr_cursor_size = max_lbr + 1; + INIT_LIST_HEAD(&thread__lbr_stitch(thread)->lists); INIT_LIST_HEAD(&thread__lbr_stitch(thread)->free_lists); @@ -2790,7 +2593,8 @@ static int resolve_lbr_callchain_sample(struct thread *thread, struct symbol **parent, struct addr_location *root_al, int max_stack, - unsigned int max_lbr) + unsigned int max_lbr, + bool symbols) { bool callee = (callchain_param.order == ORDER_CALLEE); struct ip_callchain *chain = sample->callchain; @@ -2818,8 +2622,12 @@ static int resolve_lbr_callchain_sample(struct thread *thread, max_lbr, callee); if (!stitched_lbr && !list_empty(&lbr_stitch->lists)) { - list_replace_init(&lbr_stitch->lists, - &lbr_stitch->free_lists); + struct stitch_list *stitch_node; + + list_for_each_entry(stitch_node, &lbr_stitch->lists, node) + map_symbol__exit(&stitch_node->cursor.ms); + + list_splice_init(&lbr_stitch->lists, &lbr_stitch->free_lists); } memcpy(&lbr_stitch->prev_sample, sample, sizeof(*sample)); } @@ -2828,12 +2636,12 @@ static int resolve_lbr_callchain_sample(struct thread *thread, /* Add kernel ip */ err = lbr_callchain_add_kernel_ip(thread, cursor, sample, parent, root_al, branch_from, - true, i); + true, i, symbols); if (err) goto error; err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent, - root_al, &branch_from, true); + root_al, &branch_from, true, symbols); if (err) goto error; @@ -2850,14 +2658,14 @@ static int resolve_lbr_callchain_sample(struct thread *thread, goto error; } err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent, - root_al, &branch_from, false); + root_al, &branch_from, false, symbols); if (err) goto error; /* Add kernel ip */ err = lbr_callchain_add_kernel_ip(thread, cursor, sample, parent, root_al, branch_from, - false, i); + false, i, symbols); if (err) goto error; } @@ -2871,7 +2679,7 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread, struct callchain_cursor *cursor, struct symbol **parent, struct addr_location *root_al, - u8 *cpumode, int ent) + u8 *cpumode, int ent, bool symbols) { int err = 0; @@ -2881,7 +2689,7 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread, if (ip >= PERF_CONTEXT_MAX) { err = add_callchain_ip(thread, cursor, parent, root_al, cpumode, ip, - false, NULL, NULL, 0); + false, NULL, NULL, 0, symbols); break; } } @@ -2903,7 +2711,8 @@ static int thread__resolve_callchain_sample(struct thread *thread, struct perf_sample *sample, struct symbol **parent, struct addr_location *root_al, - int max_stack) + int max_stack, + bool symbols) { struct branch_stack *branch = sample->branch_stack; struct branch_entry *entries = perf_sample__branch_entries(sample); @@ -2923,7 +2732,8 @@ static int thread__resolve_callchain_sample(struct thread *thread, err = resolve_lbr_callchain_sample(thread, cursor, sample, parent, root_al, max_stack, - !env ? 0 : env->max_branches); + !env ? 0 : env->max_branches, + symbols); if (err) return (err < 0) ? err : 0; } @@ -2988,13 +2798,14 @@ static int thread__resolve_callchain_sample(struct thread *thread, root_al, NULL, be[i].to, true, &be[i].flags, - NULL, be[i].from); + NULL, be[i].from, symbols); - if (!err) + if (!err) { err = add_callchain_ip(thread, cursor, parent, root_al, NULL, be[i].from, true, &be[i].flags, - &iter[i], 0); + &iter[i], 0, symbols); + } if (err == -EINVAL) break; if (err) @@ -3010,7 +2821,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, check_calls: if (chain && callchain_param.order != ORDER_CALLEE) { err = find_prev_cpumode(chain, thread, cursor, parent, root_al, - &cpumode, chain->nr - first_call); + &cpumode, chain->nr - first_call, symbols); if (err) return (err < 0) ? err : 0; } @@ -3032,7 +2843,7 @@ check_calls: ++nr_entries; else if (callchain_param.order != ORDER_CALLEE) { err = find_prev_cpumode(chain, thread, cursor, parent, - root_al, &cpumode, j); + root_al, &cpumode, j, symbols); if (err) return (err < 0) ? err : 0; continue; @@ -3059,8 +2870,8 @@ check_calls: if (leaf_frame_caller && leaf_frame_caller != ip) { err = add_callchain_ip(thread, cursor, parent, - root_al, &cpumode, leaf_frame_caller, - false, NULL, NULL, 0); + root_al, &cpumode, leaf_frame_caller, + false, NULL, NULL, 0, symbols); if (err) return (err < 0) ? err : 0; } @@ -3068,7 +2879,7 @@ check_calls: err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, - false, NULL, NULL, 0); + false, NULL, NULL, 0, symbols); if (err) return (err < 0) ? err : 0; @@ -3095,12 +2906,12 @@ static int append_inlines(struct callchain_cursor *cursor, struct map_symbol *ms addr = map__rip_2objdump(map, addr); dso = map__dso(map); - inline_node = inlines__tree_find(&dso->inlined_nodes, addr); + inline_node = inlines__tree_find(dso__inlined_nodes(dso), addr); if (!inline_node) { inline_node = dso__parse_addr_inlines(dso, addr, sym); if (!inline_node) return ret; - inlines__tree_insert(&dso->inlined_nodes, inline_node); + inlines__tree_insert(dso__inlined_nodes(dso), inline_node); } ilist_ms = (struct map_symbol) { @@ -3115,8 +2926,7 @@ static int append_inlines(struct callchain_cursor *cursor, struct map_symbol *ms if (ret != 0) return ret; } - map__put(ilist_ms.map); - maps__put(ilist_ms.maps); + map_symbol__exit(&ilist_ms); return ret; } @@ -3149,7 +2959,7 @@ static int thread__resolve_callchain_unwind(struct thread *thread, struct callchain_cursor *cursor, struct evsel *evsel, struct perf_sample *sample, - int max_stack) + int max_stack, bool symbols) { /* Can we do dwarf post unwind? */ if (!((evsel->core.attr.sample_type & PERF_SAMPLE_REGS_USER) && @@ -3157,21 +2967,25 @@ static int thread__resolve_callchain_unwind(struct thread *thread, return 0; /* Bail out if nothing was captured. */ - if ((!sample->user_regs.regs) || - (!sample->user_stack.size)) + if (!sample->user_regs || !sample->user_regs->regs || + !sample->user_stack.size) return 0; + if (!symbols) + pr_debug("Not resolving symbols with an unwinder isn't currently supported\n"); + return unwind__get_entries(unwind_entry, cursor, thread, sample, max_stack, false); } -int thread__resolve_callchain(struct thread *thread, - struct callchain_cursor *cursor, - struct evsel *evsel, - struct perf_sample *sample, - struct symbol **parent, - struct addr_location *root_al, - int max_stack) +int __thread__resolve_callchain(struct thread *thread, + struct callchain_cursor *cursor, + struct evsel *evsel, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + int max_stack, + bool symbols) { int ret = 0; @@ -3184,22 +2998,22 @@ int thread__resolve_callchain(struct thread *thread, ret = thread__resolve_callchain_sample(thread, cursor, evsel, sample, parent, root_al, - max_stack); + max_stack, symbols); if (ret) return ret; ret = thread__resolve_callchain_unwind(thread, cursor, evsel, sample, - max_stack); + max_stack, symbols); } else { ret = thread__resolve_callchain_unwind(thread, cursor, evsel, sample, - max_stack); + max_stack, symbols); if (ret) return ret; ret = thread__resolve_callchain_sample(thread, cursor, evsel, sample, parent, root_al, - max_stack); + max_stack, symbols); } return ret; @@ -3209,23 +3023,7 @@ int machine__for_each_thread(struct machine *machine, int (*fn)(struct thread *thread, void *p), void *priv) { - struct threads *threads; - struct rb_node *nd; - int rc = 0; - int i; - - for (i = 0; i < THREADS__TABLE_SIZE; i++) { - threads = &machine->threads[i]; - for (nd = rb_first_cached(&threads->entries); nd; - nd = rb_next(nd)) { - struct thread_rb_node *trb = rb_entry(nd, struct thread_rb_node, rb_node); - - rc = fn(trb->thread, priv); - if (rc != 0) - return rc; - } - } - return rc; + return threads__for_each_thread(&machine->threads, fn, priv); } int machines__for_each_thread(struct machines *machines, @@ -3249,6 +3047,36 @@ int machines__for_each_thread(struct machines *machines, return rc; } + +static int thread_list_cb(struct thread *thread, void *data) +{ + struct list_head *list = data; + struct thread_list *entry = malloc(sizeof(*entry)); + + if (!entry) + return -ENOMEM; + + entry->thread = thread__get(thread); + list_add_tail(&entry->list, list); + return 0; +} + +int machine__thread_list(struct machine *machine, struct list_head *list) +{ + return machine__for_each_thread(machine, thread_list_cb, list); +} + +void thread_list__delete(struct list_head *list) +{ + struct thread_list *pos, *next; + + list_for_each_entry_safe(pos, next, list, list) { + thread__zput(pos->thread); + list_del(&pos->list); + free(pos); + } +} + pid_t machine__get_current_tid(struct machine *machine, int cpu) { if (cpu < 0 || (size_t)cpu >= machine->current_tid_sz) @@ -3357,14 +3185,15 @@ out: return addr_cpumode; } -struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, struct dso_id *id) +struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, + const struct dso_id *id) { return dsos__findnew_id(&machine->dsos, filename, id); } struct dso *machine__findnew_dso(struct machine *machine, const char *filename) { - return machine__findnew_dso_id(machine, filename, NULL); + return machine__findnew_dso_id(machine, filename, &dso_id_empty); } char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp) @@ -3376,36 +3205,40 @@ char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, ch if (sym == NULL) return NULL; - *modp = __map__is_kmodule(map) ? (char *)map__dso(map)->short_name : NULL; + *modp = __map__is_kmodule(map) ? (char *)dso__short_name(map__dso(map)) : NULL; *addrp = map__unmap_ip(map, sym->start); return sym->name; } +struct machine__for_each_dso_cb_args { + struct machine *machine; + machine__dso_t fn; + void *priv; +}; + +static int machine__for_each_dso_cb(struct dso *dso, void *data) +{ + struct machine__for_each_dso_cb_args *args = data; + + return args->fn(dso, args->machine, args->priv); +} + int machine__for_each_dso(struct machine *machine, machine__dso_t fn, void *priv) { - struct dso *pos; - int err = 0; + struct machine__for_each_dso_cb_args args = { + .machine = machine, + .fn = fn, + .priv = priv, + }; - list_for_each_entry(pos, &machine->dsos.head, node) { - if (fn(pos, machine, priv)) - err = -1; - } - return err; + return dsos__for_each_dso(&machine->dsos, machine__for_each_dso_cb, &args); } int machine__for_each_kernel_map(struct machine *machine, machine__map_t fn, void *priv) { struct maps *maps = machine__kernel_maps(machine); - struct map_rb_node *pos; - int err = 0; - maps__for_each_entry(maps, pos) { - err = fn(pos->map, priv); - if (err != 0) { - break; - } - } - return err; + return maps__for_each_map(maps, fn, priv); } bool machine__is_lock_function(struct machine *machine, u64 addr) @@ -3431,6 +3264,17 @@ bool machine__is_lock_function(struct machine *machine, u64 addr) sym = machine__find_kernel_symbol_by_name(machine, "__lock_text_end", &kmap); machine->lock.text_end = map__unmap_ip(kmap, sym->start); + + sym = machine__find_kernel_symbol_by_name(machine, "__traceiter_contention_begin", &kmap); + if (sym) { + machine->traceiter.text_start = map__unmap_ip(kmap, sym->start); + machine->traceiter.text_end = map__unmap_ip(kmap, sym->end); + } + sym = machine__find_kernel_symbol_by_name(machine, "trace_contention_begin", &kmap); + if (sym) { + machine->trace.text_start = map__unmap_ip(kmap, sym->start); + machine->trace.text_end = map__unmap_ip(kmap, sym->end); + } } /* failed to get kernel symbols */ @@ -3445,5 +3289,23 @@ bool machine__is_lock_function(struct machine *machine, u64 addr) if (machine->lock.text_start <= addr && addr < machine->lock.text_end) return true; + /* traceiter functions currently don't have their own section + * but we consider them lock functions + */ + if (machine->traceiter.text_start != 0) { + if (machine->traceiter.text_start <= addr && addr < machine->traceiter.text_end) + return true; + } + + if (machine->trace.text_start != 0) { + if (machine->trace.text_start <= addr && addr < machine->trace.text_end) + return true; + } + return false; } + +int machine__hit_all_dsos(struct machine *machine) +{ + return dsos__hit_all(&machine->dsos); +} diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index d034ecaf89c1..22a42c5825fa 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -7,6 +7,7 @@ #include "maps.h" #include "dsos.h" #include "rwsem.h" +#include "threads.h" struct addr_location; struct branch_stack; @@ -28,17 +29,6 @@ extern const char *ref_reloc_sym_names[]; struct vdso_info; -#define THREADS__TABLE_BITS 8 -#define THREADS__TABLE_SIZE (1 << THREADS__TABLE_BITS) - -struct threads { - struct rb_root_cached entries; - struct rw_semaphore lock; - unsigned int nr; - struct list_head dead; - struct thread *last_match; -}; - struct machine { struct rb_node rb_node; pid_t pid; @@ -49,7 +39,7 @@ struct machine { char *root_dir; char *mmap_name; char *kallsyms_filename; - struct threads threads[THREADS__TABLE_SIZE]; + struct threads threads; struct vdso_info *vdso_info; struct perf_env *env; struct dsos dsos; @@ -59,7 +49,13 @@ struct machine { struct { u64 text_start; u64 text_end; - } sched, lock; + } sched, lock, traceiter, trace; + /* + * The current parallelism level (number of threads that run on CPUs). + * This value can be less than 1, or larger than the total number + * of CPUs, if events are poorly ordered. + */ + int parallelism; pid_t *current_tid; size_t current_tid_sz; union { /* Tool specific area */ @@ -70,12 +66,6 @@ struct machine { bool trampolines_mapped; }; -static inline struct threads *machine__threads(struct machine *machine, pid_t tid) -{ - /* Cast it to handle tid == -1 */ - return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE]; -} - /* * The main kernel (vmlinux) map */ @@ -179,8 +169,9 @@ struct thread *machine__findnew_guest_code(struct machine *machine, pid_t pid); void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size); void machines__set_comm_exec(struct machines *machines, bool comm_exec); -struct machine *machine__new_host(void); -struct machine *machine__new_kallsyms(void); +struct machine *machine__new_host(struct perf_env *host_env); +struct machine *machine__new_kallsyms(struct perf_env *host_env); +struct machine *machine__new_live(struct perf_env *host_env, bool kernel_maps, pid_t pid); int machine__init(struct machine *machine, const char *root_dir, pid_t pid); void machine__exit(struct machine *machine); void machine__delete_threads(struct machine *machine); @@ -194,13 +185,32 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, struct callchain_cursor; -int thread__resolve_callchain(struct thread *thread, - struct callchain_cursor *cursor, - struct evsel *evsel, - struct perf_sample *sample, - struct symbol **parent, - struct addr_location *root_al, - int max_stack); +int __thread__resolve_callchain(struct thread *thread, + struct callchain_cursor *cursor, + struct evsel *evsel, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + int max_stack, + bool symbols); + +static inline int thread__resolve_callchain(struct thread *thread, + struct callchain_cursor *cursor, + struct evsel *evsel, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + int max_stack) +{ + return __thread__resolve_callchain(thread, + cursor, + evsel, + sample, + parent, + root_al, + max_stack, + /*symbols=*/true); +} /* * Default guest kernel is defined by parameter --guestkallsyms @@ -221,10 +231,10 @@ bool machine__is(struct machine *machine, const char *arch); bool machine__normalized_is(struct machine *machine, const char *arch); int machine__nr_cpus_avail(struct machine *machine); -struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); -struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, struct dso_id *id); +struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, + const struct dso_id *id); struct dso *machine__findnew_dso(struct machine *machine, const char *filename); size_t machine__fprintf(struct machine *machine, FILE *fp); @@ -263,8 +273,6 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid); int machines__create_guest_kernel_maps(struct machines *machines); void machines__destroy_kernel_maps(struct machines *machines); -size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp); - typedef int (*machine__dso_t)(struct dso *dso, struct machine *machine, void *priv); int machine__for_each_dso(struct machine *machine, machine__dso_t fn, @@ -281,6 +289,16 @@ int machines__for_each_thread(struct machines *machines, int (*fn)(struct thread *thread, void *p), void *priv); +struct thread_list { + struct list_head list; + struct thread *thread; +}; + +/* Make a list of struct thread_list based on threads in the machine. */ +int machine__thread_list(struct machine *machine, struct list_head *list); +/* Free up the nodes within the thread_list list. */ +void thread_list__delete(struct list_head *list); + pid_t machine__get_current_tid(struct machine *machine, int cpu); int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, pid_t tid); @@ -313,4 +331,6 @@ int machine__map_x86_64_entry_trampolines(struct machine *machine, int machine__resolve(struct machine *machine, struct addr_location *al, struct perf_sample *sample); +int machine__hit_all_dsos(struct machine *machine); + #endif /* __PERF_MACHINE_H */ diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index f64b83004421..41cdddc987ee 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -102,22 +102,26 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) return false; } -void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso) +static void map__init(struct map *map, u64 start, u64 end, u64 pgoff, + struct dso *dso, u32 prot, u32 flags) { map__set_start(map, start); map__set_end(map, end); map__set_pgoff(map, pgoff); - map__set_reloc(map, 0); + assert(map__reloc(map) == 0); map__set_dso(map, dso__get(dso)); - map__set_map_ip(map, map__dso_map_ip); - map__set_unmap_ip(map, map__dso_unmap_ip); - map__set_erange_warned(map, false); refcount_set(map__refcnt(map), 1); + RC_CHK_ACCESS(map)->prot = prot; + RC_CHK_ACCESS(map)->flags = flags; + map__set_mapping_type(map, MAPPING_TYPE__DSO); + assert(map__erange_warned(map) == false); + assert(map__priv(map) == false); + assert(map__hit(map) == false); } struct map *map__new(struct machine *machine, u64 start, u64 len, - u64 pgoff, struct dso_id *id, - u32 prot, u32 flags, struct build_id *bid, + u64 pgoff, const struct dso_id *id, + u32 prot, u32 flags, char *filename, struct thread *thread) { struct map *result; @@ -125,18 +129,16 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, struct nsinfo *nsi = NULL; struct nsinfo *nnsi; - map = malloc(sizeof(*map)); + map = zalloc(sizeof(*map)); if (ADD_RC_CHK(result, map)) { char newfilename[PATH_MAX]; - struct dso *dso, *header_bid_dso; + struct dso *dso; int anon, no_dso, vdso, android; android = is_android_lib(filename); anon = is_anon_memory(filename) || flags & MAP_HUGETLB; vdso = is_vdso_map(filename); no_dso = is_no_dso_memory(filename); - map->prot = prot; - map->flags = flags; nsi = nsinfo__get(thread__nsinfo(thread)); if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) { @@ -169,10 +171,11 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, if (dso == NULL) goto out_delete; - map__init(result, start, start + len, pgoff, dso); + assert(!dso__kernel(dso)); + map__init(result, start, start + len, pgoff, dso, prot, flags); if (anon || no_dso) { - map->map_ip = map->unmap_ip = identity__map_ip; + map->mapping_type = MAPPING_TYPE__IDENTITY; /* * Set memory without DSO as loaded. All map__find_* @@ -182,27 +185,24 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, if (!(prot & PROT_EXEC)) dso__set_loaded(dso); } - mutex_lock(&dso->lock); - nsinfo__put(dso->nsinfo); - dso->nsinfo = nsi; - mutex_unlock(&dso->lock); + mutex_lock(dso__lock(dso)); + dso__set_nsinfo(dso, nsi); + mutex_unlock(dso__lock(dso)); - if (build_id__is_defined(bid)) { - dso__set_build_id(dso, bid); - } else { + if (!build_id__is_defined(&id->build_id)) { /* * If the mmap event had no build ID, search for an existing dso from the * build ID header by name. Otherwise only the dso loaded at the time of * reading the header will have the build ID set and all future mmaps will * have it missing. */ - down_read(&machine->dsos.lock); - header_bid_dso = __dsos__find(&machine->dsos, filename, false); - up_read(&machine->dsos.lock); - if (header_bid_dso && header_bid_dso->header_build_id) { - dso__set_build_id(dso, &header_bid_dso->bid); - dso->header_build_id = 1; + struct dso *header_bid_dso = dsos__find(&machine->dsos, filename, false); + + if (header_bid_dso && dso__header_build_id(header_bid_dso)) { + dso__set_build_id(dso, dso__bid(header_bid_dso)); + dso__set_header_build_id(dso, 1); } + dso__put(header_bid_dso); } dso__put(dso); } @@ -223,12 +223,10 @@ struct map *map__new2(u64 start, struct dso *dso) struct map *result; RC_STRUCT(map) *map; - map = calloc(1, sizeof(*map) + (dso->kernel ? sizeof(struct kmap) : 0)); + map = calloc(1, sizeof(*map) + (dso__kernel(dso) ? sizeof(struct kmap) : 0)); if (ADD_RC_CHK(result, map)) { - /* - * ->end will be filled after we load all the symbols - */ - map__init(result, start, 0, 0, dso); + /* ->end will be filled after we load all the symbols. */ + map__init(result, start, /*end=*/0, /*pgoff=*/0, dso, /*prot=*/0, /*flags=*/0); } return result; @@ -236,7 +234,7 @@ struct map *map__new2(u64 start, struct dso *dso) bool __map__is_kernel(const struct map *map) { - if (!map__dso(map)->kernel) + if (!dso__kernel(map__dso(map))) return false; return machine__kernel_map(maps__machine(map__kmaps((struct map *)map))) == map; } @@ -253,7 +251,7 @@ bool __map__is_bpf_prog(const struct map *map) const char *name; struct dso *dso = map__dso(map); - if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) + if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) return true; /* @@ -261,7 +259,7 @@ bool __map__is_bpf_prog(const struct map *map) * type of DSO_BINARY_TYPE__BPF_PROG_INFO. In such cases, we can * guess the type based on name. */ - name = dso->short_name; + name = dso__short_name(dso); return name && (strstr(name, "bpf_prog_") == name); } @@ -270,7 +268,7 @@ bool __map__is_bpf_image(const struct map *map) const char *name; struct dso *dso = map__dso(map); - if (dso->binary_type == DSO_BINARY_TYPE__BPF_IMAGE) + if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) return true; /* @@ -278,7 +276,7 @@ bool __map__is_bpf_image(const struct map *map) * type of DSO_BINARY_TYPE__BPF_IMAGE. In such cases, we can * guess the type based on name. */ - name = dso->short_name; + name = dso__short_name(dso); return name && is_bpf_image(name); } @@ -286,7 +284,7 @@ bool __map__is_ool(const struct map *map) { const struct dso *dso = map__dso(map); - return dso && dso->binary_type == DSO_BINARY_TYPE__OOL; + return dso && dso__binary_type(dso) == DSO_BINARY_TYPE__OOL; } bool map__has_symbols(const struct map *map) @@ -317,7 +315,7 @@ void map__put(struct map *map) void map__fixup_start(struct map *map) { struct dso *dso = map__dso(map); - struct rb_root_cached *symbols = &dso->symbols; + struct rb_root_cached *symbols = dso__symbols(dso); struct rb_node *nd = rb_first_cached(symbols); if (nd != NULL) { @@ -330,7 +328,7 @@ void map__fixup_start(struct map *map) void map__fixup_end(struct map *map) { struct dso *dso = map__dso(map); - struct rb_root_cached *symbols = &dso->symbols; + struct rb_root_cached *symbols = dso__symbols(dso); struct rb_node *nd = rb_last(&symbols->rb_root); if (nd != NULL) { @@ -344,7 +342,7 @@ void map__fixup_end(struct map *map) int map__load(struct map *map) { struct dso *dso = map__dso(map); - const char *name = dso->long_name; + const char *name = dso__long_name(dso); int nr; if (dso__loaded(dso)) @@ -352,10 +350,10 @@ int map__load(struct map *map) nr = dso__load(dso, map); if (nr < 0) { - if (dso->has_build_id) { + if (dso__has_build_id(dso)) { char sbuild_id[SBUILD_ID_SIZE]; - build_id__sprintf(&dso->bid, sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); pr_debug("%s with build id %s not found", name, sbuild_id); } else pr_debug("Failed to open %s", name); @@ -417,7 +415,7 @@ struct map *map__clone(struct map *from) size_t size = sizeof(RC_STRUCT(map)); struct dso *dso = map__dso(from); - if (dso && dso->kernel) + if (dso && dso__kernel(dso)) size += sizeof(struct kmap); map = memdup(RC_CHK_ACCESS(from), size); @@ -434,14 +432,14 @@ size_t map__fprintf(struct map *map, FILE *fp) const struct dso *dso = map__dso(map); return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s\n", - map__start(map), map__end(map), map__pgoff(map), dso->name); + map__start(map), map__end(map), map__pgoff(map), dso__name(dso)); } static bool prefer_dso_long_name(const struct dso *dso, bool print_off) { - return dso->long_name && + return dso__long_name(dso) && (symbol_conf.show_kernel_path || - (print_off && (dso->name[0] == '[' || dso__is_kcore(dso)))); + (print_off && (dso__name(dso)[0] == '[' || dso__is_kcore(dso)))); } static size_t __map__fprintf_dsoname(struct map *map, bool print_off, FILE *fp) @@ -452,9 +450,9 @@ static size_t __map__fprintf_dsoname(struct map *map, bool print_off, FILE *fp) if (dso) { if (prefer_dso_long_name(dso, print_off)) - dsoname = dso->long_name; + dsoname = dso__long_name(dso); else - dsoname = dso->name; + dsoname = dso__name(dso); } if (symbol_conf.pad_output_len_dso) { @@ -515,6 +513,8 @@ void srccode_state_free(struct srccode_state *state) state->line = 0; } +static const struct kmap *__map__const_kmap(const struct map *map); + /** * map__rip_2objdump - convert symbol start address to objdump address. * @map: memory map @@ -526,9 +526,9 @@ void srccode_state_free(struct srccode_state *state) * * Return: Address suitable for passing to "objdump --start-address=" */ -u64 map__rip_2objdump(struct map *map, u64 rip) +u64 map__rip_2objdump(const struct map *map, u64 rip) { - struct kmap *kmap = __map__kmap(map); + const struct kmap *kmap = __map__const_kmap(map); const struct dso *dso = map__dso(map); /* @@ -547,18 +547,14 @@ u64 map__rip_2objdump(struct map *map, u64 rip) } } - if (!dso->adjust_symbols) + if (!dso__adjust_symbols(dso)) return rip; - if (dso->rel) + if (dso__rel(dso)) return rip - map__pgoff(map); - /* - * kernel modules also have DSO_TYPE_USER in dso->kernel, - * but all kernel modules are ET_REL, so won't get here. - */ - if (dso->kernel == DSO_SPACE__USER) - return rip + dso->text_offset; + if (dso__kernel(dso) == DSO_SPACE__USER) + return rip + dso__text_offset(dso); return map__unmap_ip(map, rip) - map__reloc(map); } @@ -575,26 +571,39 @@ u64 map__rip_2objdump(struct map *map, u64 rip) * * Return: Memory address. */ -u64 map__objdump_2mem(struct map *map, u64 ip) +u64 map__objdump_2mem(const struct map *map, u64 ip) { const struct dso *dso = map__dso(map); - if (!dso->adjust_symbols) + if (!dso__adjust_symbols(dso)) return map__unmap_ip(map, ip); - if (dso->rel) + if (dso__rel(dso)) return map__unmap_ip(map, ip + map__pgoff(map)); - /* - * kernel modules also have DSO_TYPE_USER in dso->kernel, - * but all kernel modules are ET_REL, so won't get here. - */ - if (dso->kernel == DSO_SPACE__USER) - return map__unmap_ip(map, ip - dso->text_offset); + if (dso__kernel(dso) == DSO_SPACE__USER) + return map__unmap_ip(map, ip - dso__text_offset(dso)); return ip + map__reloc(map); } +/* convert objdump address to relative address. (To be removed) */ +u64 map__objdump_2rip(const struct map *map, u64 ip) +{ + const struct dso *dso = map__dso(map); + + if (!dso__adjust_symbols(dso)) + return ip; + + if (dso__rel(dso)) + return ip + map__pgoff(map); + + if (dso__kernel(dso) == DSO_SPACE__USER) + return ip - dso__text_offset(dso); + + return map__map_ip(map, ip + map__reloc(map)); +} + bool map__contains_symbol(const struct map *map, const struct symbol *sym) { u64 ip = map__unmap_ip(map, sym->start); @@ -606,7 +615,16 @@ struct kmap *__map__kmap(struct map *map) { const struct dso *dso = map__dso(map); - if (!dso || !dso->kernel) + if (!dso || !dso__kernel(dso)) + return NULL; + return (struct kmap *)(&RC_CHK_ACCESS(map)[1]); +} + +static const struct kmap *__map__const_kmap(const struct map *map) +{ + const struct dso *dso = map__dso(map); + + if (!dso || !dso__kernel(dso)) return NULL; return (struct kmap *)(&RC_CHK_ACCESS(map)[1]); } @@ -630,18 +648,3 @@ struct maps *map__kmaps(struct map *map) } return kmap->kmaps; } - -u64 map__dso_map_ip(const struct map *map, u64 ip) -{ - return ip - map__start(map) + map__pgoff(map); -} - -u64 map__dso_unmap_ip(const struct map *map, u64 ip) -{ - return ip + map__start(map) - map__pgoff(map); -} - -u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip) -{ - return ip; -} diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 1b53d53adc86..979b3e11b9bc 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -16,23 +16,26 @@ struct dso; struct maps; struct machine; +enum mapping_type { + /* map__map_ip/map__unmap_ip are given as offsets in the DSO. */ + MAPPING_TYPE__DSO, + /* map__map_ip/map__unmap_ip are just the given ip value. */ + MAPPING_TYPE__IDENTITY, +}; + DECLARE_RC_STRUCT(map) { u64 start; u64 end; - bool erange_warned:1; - bool priv:1; - u32 prot; u64 pgoff; u64 reloc; - - /* ip -> dso rip */ - u64 (*map_ip)(const struct map *, u64); - /* dso rip -> ip */ - u64 (*unmap_ip)(const struct map *, u64); - struct dso *dso; refcount_t refcnt; + u32 prot; u32 flags; + enum mapping_type mapping_type:8; + bool erange_warned; + bool priv; + bool hit; }; struct kmap; @@ -41,38 +44,11 @@ struct kmap *__map__kmap(struct map *map); struct kmap *map__kmap(struct map *map); struct maps *map__kmaps(struct map *map); -/* ip -> dso rip */ -u64 map__dso_map_ip(const struct map *map, u64 ip); -/* dso rip -> ip */ -u64 map__dso_unmap_ip(const struct map *map, u64 ip); -/* Returns ip */ -u64 identity__map_ip(const struct map *map __maybe_unused, u64 ip); - static inline struct dso *map__dso(const struct map *map) { return RC_CHK_ACCESS(map)->dso; } -static inline u64 map__map_ip(const struct map *map, u64 ip) -{ - return RC_CHK_ACCESS(map)->map_ip(map, ip); -} - -static inline u64 map__unmap_ip(const struct map *map, u64 ip) -{ - return RC_CHK_ACCESS(map)->unmap_ip(map, ip); -} - -static inline void *map__map_ip_ptr(struct map *map) -{ - return RC_CHK_ACCESS(map)->map_ip; -} - -static inline void* map__unmap_ip_ptr(struct map *map) -{ - return RC_CHK_ACCESS(map)->unmap_ip; -} - static inline u64 map__start(const struct map *map) { return RC_CHK_ACCESS(map)->start; @@ -108,6 +84,11 @@ static inline bool map__priv(const struct map *map) return RC_CHK_ACCESS(map)->priv; } +static inline bool map__hit(const struct map *map) +{ + return RC_CHK_ACCESS(map)->hit; +} + static inline refcount_t *map__refcnt(struct map *map) { return &RC_CHK_ACCESS(map)->refcnt; @@ -123,11 +104,42 @@ static inline size_t map__size(const struct map *map) return map__end(map) - map__start(map); } +/* ip -> dso rip */ +static inline u64 map__dso_map_ip(const struct map *map, u64 ip) +{ + return ip - map__start(map) + map__pgoff(map); +} + +/* dso rip -> ip */ +static inline u64 map__dso_unmap_ip(const struct map *map, u64 rip) +{ + return rip + map__start(map) - map__pgoff(map); +} + +static inline u64 map__map_ip(const struct map *map, u64 ip_or_rip) +{ + if ((RC_CHK_ACCESS(map)->mapping_type) == MAPPING_TYPE__DSO) + return map__dso_map_ip(map, ip_or_rip); + else + return ip_or_rip; +} + +static inline u64 map__unmap_ip(const struct map *map, u64 ip_or_rip) +{ + if ((RC_CHK_ACCESS(map)->mapping_type) == MAPPING_TYPE__DSO) + return map__dso_unmap_ip(map, ip_or_rip); + else + return ip_or_rip; +} + /* rip/ip <-> addr suitable for passing to `objdump --start-address=` */ -u64 map__rip_2objdump(struct map *map, u64 rip); +u64 map__rip_2objdump(const struct map *map, u64 rip); /* objdump address -> memory address */ -u64 map__objdump_2mem(struct map *map, u64 ip); +u64 map__objdump_2mem(const struct map *map, u64 ip); + +/* objdump address -> rip */ +u64 map__objdump_2rip(const struct map *map, u64 ip); struct symbol; struct thread; @@ -160,15 +172,11 @@ struct thread; #define map__for_each_symbol_by_name(map, sym_name, pos, idx) \ __map__for_each_symbol_by_name(map, sym_name, (pos), idx) -void map__init(struct map *map, - u64 start, u64 end, u64 pgoff, struct dso *dso); - struct dso_id; -struct build_id; struct map *map__new(struct machine *machine, u64 start, u64 len, - u64 pgoff, struct dso_id *id, u32 prot, u32 flags, - struct build_id *bid, char *filename, struct thread *thread); + u64 pgoff, const struct dso_id *id, u32 prot, u32 flags, + char *filename, struct thread *thread); struct map *map__new2(u64 start, struct dso *dso); void map__delete(struct map *map); struct map *map__clone(struct map *map); @@ -279,14 +287,19 @@ static inline void map__set_reloc(struct map *map, u64 reloc) RC_CHK_ACCESS(map)->reloc = reloc; } -static inline void map__set_priv(struct map *map, int priv) +static inline void map__set_priv(struct map *map) +{ + RC_CHK_ACCESS(map)->priv = true; +} + +static inline void map__set_hit(struct map *map) { - RC_CHK_ACCESS(map)->priv = priv; + RC_CHK_ACCESS(map)->hit = true; } -static inline void map__set_erange_warned(struct map *map, bool erange_warned) +static inline void map__set_erange_warned(struct map *map) { - RC_CHK_ACCESS(map)->erange_warned = erange_warned; + RC_CHK_ACCESS(map)->erange_warned = true; } static inline void map__set_dso(struct map *map, struct dso *dso) @@ -294,13 +307,13 @@ static inline void map__set_dso(struct map *map, struct dso *dso) RC_CHK_ACCESS(map)->dso = dso; } -static inline void map__set_map_ip(struct map *map, u64 (*map_ip)(const struct map *map, u64 ip)) +static inline void map__set_mapping_type(struct map *map, enum mapping_type type) { - RC_CHK_ACCESS(map)->map_ip = map_ip; + RC_CHK_ACCESS(map)->mapping_type = type; } -static inline void map__set_unmap_ip(struct map *map, u64 (*unmap_ip)(const struct map *map, u64 rip)) +static inline enum mapping_type map__mapping_type(struct map *map) { - RC_CHK_ACCESS(map)->unmap_ip = unmap_ip; + return RC_CHK_ACCESS(map)->mapping_type; } #endif /* __PERF_MAP_H */ diff --git a/tools/perf/util/map_symbol.c b/tools/perf/util/map_symbol.c new file mode 100644 index 000000000000..6ad2960bc289 --- /dev/null +++ b/tools/perf/util/map_symbol.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "map_symbol.h" +#include "maps.h" +#include "map.h" + +void map_symbol__exit(struct map_symbol *ms) +{ + maps__zput(ms->maps); + map__zput(ms->map); +} + +void addr_map_symbol__exit(struct addr_map_symbol *ams) +{ + map_symbol__exit(&ams->ms); +} + +void map_symbol__copy(struct map_symbol *dst, struct map_symbol *src) +{ + dst->maps = maps__get(src->maps); + dst->map = map__get(src->map); + dst->sym = src->sym; +} + +void addr_map_symbol__copy(struct addr_map_symbol *dst, struct addr_map_symbol *src) +{ + map_symbol__copy(&dst->ms, &src->ms); + + dst->addr = src->addr; + dst->al_addr = src->al_addr; + dst->al_level = src->al_level; + dst->phys_addr = src->phys_addr; + dst->data_page_size = src->data_page_size; +} diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h index e08817b0c30f..e370bb32ed47 100644 --- a/tools/perf/util/map_symbol.h +++ b/tools/perf/util/map_symbol.h @@ -22,4 +22,11 @@ struct addr_map_symbol { u64 phys_addr; u64 data_page_size; }; + +void map_symbol__exit(struct map_symbol *ms); +void addr_map_symbol__exit(struct addr_map_symbol *ams); + +void map_symbol__copy(struct map_symbol *dst, struct map_symbol *src); +void addr_map_symbol__copy(struct addr_map_symbol *dst, struct addr_map_symbol *src); + #endif // __PERF_MAP_SYMBOL diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 233438c95b53..c321d4f4d846 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -6,158 +6,240 @@ #include "dso.h" #include "map.h" #include "maps.h" +#include "rwsem.h" #include "thread.h" #include "ui/ui.h" #include "unwind.h" +#include <internal/rc_check.h> -static void maps__init(struct maps *maps, struct machine *machine) +/* + * Locking/sorting note: + * + * Sorting is done with the write lock, iteration and binary searching happens + * under the read lock requiring being sorted. There is a race between sorting + * releasing the write lock and acquiring the read lock for iteration/searching + * where another thread could insert and break the sorting of the maps. In + * practice inserting maps should be rare meaning that the race shouldn't lead + * to live lock. Removal of maps doesn't break being sorted. + */ + +DECLARE_RC_STRUCT(maps) { + struct rw_semaphore lock; + /** + * @maps_by_address: array of maps sorted by their starting address if + * maps_by_address_sorted is true. + */ + struct map **maps_by_address; + /** + * @maps_by_name: optional array of maps sorted by their dso name if + * maps_by_name_sorted is true. + */ + struct map **maps_by_name; + struct machine *machine; +#ifdef HAVE_LIBUNWIND_SUPPORT + void *addr_space; + const struct unwind_libunwind_ops *unwind_libunwind_ops; +#endif + refcount_t refcnt; + /** + * @nr_maps: number of maps_by_address, and possibly maps_by_name, + * entries that contain maps. + */ + unsigned int nr_maps; + /** + * @nr_maps_allocated: number of entries in maps_by_address and possibly + * maps_by_name. + */ + unsigned int nr_maps_allocated; + /** + * @last_search_by_name_idx: cache of last found by name entry's index + * as frequent searches for the same dso name are common. + */ + unsigned int last_search_by_name_idx; + /** @maps_by_address_sorted: is maps_by_address sorted. */ + bool maps_by_address_sorted; + /** @maps_by_name_sorted: is maps_by_name sorted. */ + bool maps_by_name_sorted; + /** @ends_broken: does the map contain a map where end values are unset/unsorted? */ + bool ends_broken; +}; + +static void check_invariants(const struct maps *maps __maybe_unused) { - refcount_set(maps__refcnt(maps), 1); - init_rwsem(maps__lock(maps)); - RC_CHK_ACCESS(maps)->entries = RB_ROOT; - RC_CHK_ACCESS(maps)->machine = machine; - RC_CHK_ACCESS(maps)->last_search_by_name = NULL; - RC_CHK_ACCESS(maps)->nr_maps = 0; - RC_CHK_ACCESS(maps)->maps_by_name = NULL; +#ifndef NDEBUG + assert(RC_CHK_ACCESS(maps)->nr_maps <= RC_CHK_ACCESS(maps)->nr_maps_allocated); + for (unsigned int i = 0; i < RC_CHK_ACCESS(maps)->nr_maps; i++) { + struct map *map = RC_CHK_ACCESS(maps)->maps_by_address[i]; + + /* Check map is well-formed. */ + assert(map__end(map) == 0 || map__start(map) <= map__end(map)); + /* Expect at least 1 reference count. */ + assert(refcount_read(map__refcnt(map)) > 0); + + if (map__dso(map) && dso__kernel(map__dso(map))) + assert(RC_CHK_EQUAL(map__kmap(map)->kmaps, maps)); + + if (i > 0) { + struct map *prev = RC_CHK_ACCESS(maps)->maps_by_address[i - 1]; + + /* If addresses are sorted... */ + if (RC_CHK_ACCESS(maps)->maps_by_address_sorted) { + /* Maps should be in start address order. */ + assert(map__start(prev) <= map__start(map)); + /* + * If the ends of maps aren't broken (during + * construction) then they should be ordered + * too. + */ + if (!RC_CHK_ACCESS(maps)->ends_broken) { + assert(map__end(prev) <= map__end(map)); + assert(map__end(prev) <= map__start(map) || + map__start(prev) == map__start(map)); + } + } + } + } + if (RC_CHK_ACCESS(maps)->maps_by_name) { + for (unsigned int i = 0; i < RC_CHK_ACCESS(maps)->nr_maps; i++) { + struct map *map = RC_CHK_ACCESS(maps)->maps_by_name[i]; + + /* + * Maps by name maps should be in maps_by_address, so + * the reference count should be higher. + */ + assert(refcount_read(map__refcnt(map)) > 1); + } + } +#endif } -static void __maps__free_maps_by_name(struct maps *maps) +static struct map **maps__maps_by_address(const struct maps *maps) { - /* - * Free everything to try to do it from the rbtree in the next search - */ - for (unsigned int i = 0; i < maps__nr_maps(maps); i++) - map__put(maps__maps_by_name(maps)[i]); - - zfree(&RC_CHK_ACCESS(maps)->maps_by_name); - RC_CHK_ACCESS(maps)->nr_maps_allocated = 0; + return RC_CHK_ACCESS(maps)->maps_by_address; } -static int __maps__insert(struct maps *maps, struct map *map) +static void maps__set_maps_by_address(struct maps *maps, struct map **new) { - struct rb_node **p = &maps__entries(maps)->rb_node; - struct rb_node *parent = NULL; - const u64 ip = map__start(map); - struct map_rb_node *m, *new_rb_node; - - new_rb_node = malloc(sizeof(*new_rb_node)); - if (!new_rb_node) - return -ENOMEM; + RC_CHK_ACCESS(maps)->maps_by_address = new; - RB_CLEAR_NODE(&new_rb_node->rb_node); - new_rb_node->map = map__get(map); +} - while (*p != NULL) { - parent = *p; - m = rb_entry(parent, struct map_rb_node, rb_node); - if (ip < map__start(m->map)) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } +static void maps__set_nr_maps_allocated(struct maps *maps, unsigned int nr_maps_allocated) +{ + RC_CHK_ACCESS(maps)->nr_maps_allocated = nr_maps_allocated; +} - rb_link_node(&new_rb_node->rb_node, parent, p); - rb_insert_color(&new_rb_node->rb_node, maps__entries(maps)); - return 0; +static void maps__set_nr_maps(struct maps *maps, unsigned int nr_maps) +{ + RC_CHK_ACCESS(maps)->nr_maps = nr_maps; } -int maps__insert(struct maps *maps, struct map *map) +/* Not in the header, to aid reference counting. */ +static struct map **maps__maps_by_name(const struct maps *maps) { - int err; - const struct dso *dso = map__dso(map); + return RC_CHK_ACCESS(maps)->maps_by_name; - down_write(maps__lock(maps)); - err = __maps__insert(maps, map); - if (err) - goto out; +} - ++RC_CHK_ACCESS(maps)->nr_maps; +static void maps__set_maps_by_name(struct maps *maps, struct map **new) +{ + RC_CHK_ACCESS(maps)->maps_by_name = new; - if (dso && dso->kernel) { - struct kmap *kmap = map__kmap(map); +} - if (kmap) - kmap->kmaps = maps; - else - pr_err("Internal error: kernel dso with non kernel map\n"); - } +static bool maps__maps_by_address_sorted(const struct maps *maps) +{ + return RC_CHK_ACCESS(maps)->maps_by_address_sorted; +} +static void maps__set_maps_by_address_sorted(struct maps *maps, bool value) +{ + RC_CHK_ACCESS(maps)->maps_by_address_sorted = value; +} - /* - * If we already performed some search by name, then we need to add the just - * inserted map and resort. - */ - if (maps__maps_by_name(maps)) { - if (maps__nr_maps(maps) > RC_CHK_ACCESS(maps)->nr_maps_allocated) { - int nr_allocate = maps__nr_maps(maps) * 2; - struct map **maps_by_name = realloc(maps__maps_by_name(maps), - nr_allocate * sizeof(map)); +static bool maps__maps_by_name_sorted(const struct maps *maps) +{ + return RC_CHK_ACCESS(maps)->maps_by_name_sorted; +} - if (maps_by_name == NULL) { - __maps__free_maps_by_name(maps); - err = -ENOMEM; - goto out; - } +static void maps__set_maps_by_name_sorted(struct maps *maps, bool value) +{ + RC_CHK_ACCESS(maps)->maps_by_name_sorted = value; +} - RC_CHK_ACCESS(maps)->maps_by_name = maps_by_name; - RC_CHK_ACCESS(maps)->nr_maps_allocated = nr_allocate; - } - maps__maps_by_name(maps)[maps__nr_maps(maps) - 1] = map__get(map); - __maps__sort_by_name(maps); - } - out: - up_write(maps__lock(maps)); - return err; +struct machine *maps__machine(const struct maps *maps) +{ + return RC_CHK_ACCESS(maps)->machine; } -static void __maps__remove(struct maps *maps, struct map_rb_node *rb_node) +unsigned int maps__nr_maps(const struct maps *maps) { - rb_erase_init(&rb_node->rb_node, maps__entries(maps)); - map__put(rb_node->map); - free(rb_node); + return RC_CHK_ACCESS(maps)->nr_maps; } -void maps__remove(struct maps *maps, struct map *map) +refcount_t *maps__refcnt(struct maps *maps) { - struct map_rb_node *rb_node; + return &RC_CHK_ACCESS(maps)->refcnt; +} - down_write(maps__lock(maps)); - if (RC_CHK_ACCESS(maps)->last_search_by_name == map) - RC_CHK_ACCESS(maps)->last_search_by_name = NULL; - - rb_node = maps__find_node(maps, map); - assert(rb_node->RC_CHK_ACCESS(map) == RC_CHK_ACCESS(map)); - __maps__remove(maps, rb_node); - if (maps__maps_by_name(maps)) - __maps__free_maps_by_name(maps); - --RC_CHK_ACCESS(maps)->nr_maps; - up_write(maps__lock(maps)); +#ifdef HAVE_LIBUNWIND_SUPPORT +void *maps__addr_space(const struct maps *maps) +{ + return RC_CHK_ACCESS(maps)->addr_space; +} + +void maps__set_addr_space(struct maps *maps, void *addr_space) +{ + RC_CHK_ACCESS(maps)->addr_space = addr_space; } -static void __maps__purge(struct maps *maps) +const struct unwind_libunwind_ops *maps__unwind_libunwind_ops(const struct maps *maps) { - struct map_rb_node *pos, *next; + return RC_CHK_ACCESS(maps)->unwind_libunwind_ops; +} - if (maps__maps_by_name(maps)) - __maps__free_maps_by_name(maps); +void maps__set_unwind_libunwind_ops(struct maps *maps, const struct unwind_libunwind_ops *ops) +{ + RC_CHK_ACCESS(maps)->unwind_libunwind_ops = ops; +} +#endif - maps__for_each_entry_safe(maps, pos, next) { - rb_erase_init(&pos->rb_node, maps__entries(maps)); - map__put(pos->map); - free(pos); - } +static struct rw_semaphore *maps__lock(struct maps *maps) +{ + return &RC_CHK_ACCESS(maps)->lock; } -static void maps__exit(struct maps *maps) +static void maps__init(struct maps *maps, struct machine *machine) { - down_write(maps__lock(maps)); - __maps__purge(maps); - up_write(maps__lock(maps)); + init_rwsem(maps__lock(maps)); + RC_CHK_ACCESS(maps)->maps_by_address = NULL; + RC_CHK_ACCESS(maps)->maps_by_name = NULL; + RC_CHK_ACCESS(maps)->machine = machine; +#ifdef HAVE_LIBUNWIND_SUPPORT + RC_CHK_ACCESS(maps)->addr_space = NULL; + RC_CHK_ACCESS(maps)->unwind_libunwind_ops = NULL; +#endif + refcount_set(maps__refcnt(maps), 1); + RC_CHK_ACCESS(maps)->nr_maps = 0; + RC_CHK_ACCESS(maps)->nr_maps_allocated = 0; + RC_CHK_ACCESS(maps)->last_search_by_name_idx = 0; + RC_CHK_ACCESS(maps)->maps_by_address_sorted = true; + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; } -bool maps__empty(struct maps *maps) +static void maps__exit(struct maps *maps) { - return !maps__first(maps); + struct map **maps_by_address = maps__maps_by_address(maps); + struct map **maps_by_name = maps__maps_by_name(maps); + + for (unsigned int i = 0; i < maps__nr_maps(maps); i++) { + map__zput(maps_by_address[i]); + if (maps_by_name) + map__zput(maps_by_name[i]); + } + zfree(&maps_by_address); + zfree(&maps_by_name); + unwind__finish_access(maps); } struct maps *maps__new(struct machine *machine) @@ -174,7 +256,6 @@ struct maps *maps__new(struct machine *machine) static void maps__delete(struct maps *maps) { maps__exit(maps); - unwind__finish_access(maps); RC_CHK_FREE(maps); } @@ -196,45 +277,398 @@ void maps__put(struct maps *maps) RC_CHK_PUT(maps); } -struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) +static void __maps__free_maps_by_name(struct maps *maps) { - struct map *map = maps__find(maps, addr); + if (!maps__maps_by_name(maps)) + return; - /* Ensure map is loaded before using map->map_ip */ - if (map != NULL && map__load(map) >= 0) { - if (mapp != NULL) - *mapp = map; - return map__find_symbol(map, map__map_ip(map, addr)); + /* + * Free everything to try to do it from the rbtree in the next search + */ + for (unsigned int i = 0; i < maps__nr_maps(maps); i++) + map__put(maps__maps_by_name(maps)[i]); + + zfree(&RC_CHK_ACCESS(maps)->maps_by_name); + + /* Consistent with maps__init(). When maps_by_name == NULL, maps_by_name_sorted == false */ + maps__set_maps_by_name_sorted(maps, false); +} + +static int map__start_cmp(const void *a, const void *b) +{ + const struct map *map_a = *(const struct map * const *)a; + const struct map *map_b = *(const struct map * const *)b; + u64 map_a_start = map__start(map_a); + u64 map_b_start = map__start(map_b); + + if (map_a_start == map_b_start) { + u64 map_a_end = map__end(map_a); + u64 map_b_end = map__end(map_b); + + if (map_a_end == map_b_end) { + /* Ensure maps with the same addresses have a fixed order. */ + if (RC_CHK_ACCESS(map_a) == RC_CHK_ACCESS(map_b)) + return 0; + return (intptr_t)RC_CHK_ACCESS(map_a) > (intptr_t)RC_CHK_ACCESS(map_b) + ? 1 : -1; + } + return map_a_end > map_b_end ? 1 : -1; } + return map_a_start > map_b_start ? 1 : -1; +} - return NULL; +static void __maps__sort_by_address(struct maps *maps) +{ + if (maps__maps_by_address_sorted(maps)) + return; + + qsort(maps__maps_by_address(maps), + maps__nr_maps(maps), + sizeof(struct map *), + map__start_cmp); + maps__set_maps_by_address_sorted(maps, true); } -struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) +static void maps__sort_by_address(struct maps *maps) { - struct symbol *sym; - struct map_rb_node *pos; + down_write(maps__lock(maps)); + __maps__sort_by_address(maps); + up_write(maps__lock(maps)); +} - down_read(maps__lock(maps)); +static int map__strcmp(const void *a, const void *b) +{ + const struct map *map_a = *(const struct map * const *)a; + const struct map *map_b = *(const struct map * const *)b; + const struct dso *dso_a = map__dso(map_a); + const struct dso *dso_b = map__dso(map_b); + int ret = strcmp(dso__short_name(dso_a), dso__short_name(dso_b)); + + if (ret == 0 && RC_CHK_ACCESS(map_a) != RC_CHK_ACCESS(map_b)) { + /* Ensure distinct but name equal maps have an order. */ + return map__start_cmp(a, b); + } + return ret; +} + +static int maps__sort_by_name(struct maps *maps) +{ + int err = 0; + + down_write(maps__lock(maps)); + if (!maps__maps_by_name_sorted(maps)) { + struct map **maps_by_name = maps__maps_by_name(maps); + + if (!maps_by_name) { + maps_by_name = malloc(RC_CHK_ACCESS(maps)->nr_maps_allocated * + sizeof(*maps_by_name)); + if (!maps_by_name) + err = -ENOMEM; + else { + struct map **maps_by_address = maps__maps_by_address(maps); + unsigned int n = maps__nr_maps(maps); + + maps__set_maps_by_name(maps, maps_by_name); + for (unsigned int i = 0; i < n; i++) + maps_by_name[i] = map__get(maps_by_address[i]); + } + } + if (!err) { + qsort(maps_by_name, + maps__nr_maps(maps), + sizeof(struct map *), + map__strcmp); + maps__set_maps_by_name_sorted(maps, true); + } + } + check_invariants(maps); + up_write(maps__lock(maps)); + return err; +} + +static unsigned int maps__by_address_index(const struct maps *maps, const struct map *map) +{ + struct map **maps_by_address = maps__maps_by_address(maps); + + if (maps__maps_by_address_sorted(maps)) { + struct map **mapp = + bsearch(&map, maps__maps_by_address(maps), maps__nr_maps(maps), + sizeof(*mapp), map__start_cmp); + + if (mapp) + return mapp - maps_by_address; + } else { + for (unsigned int i = 0; i < maps__nr_maps(maps); i++) { + if (RC_CHK_ACCESS(maps_by_address[i]) == RC_CHK_ACCESS(map)) + return i; + } + } + pr_err("Map missing from maps"); + return -1; +} + +static unsigned int maps__by_name_index(const struct maps *maps, const struct map *map) +{ + struct map **maps_by_name = maps__maps_by_name(maps); + + if (maps__maps_by_name_sorted(maps)) { + struct map **mapp = + bsearch(&map, maps_by_name, maps__nr_maps(maps), + sizeof(*mapp), map__strcmp); + + if (mapp) + return mapp - maps_by_name; + } else { + for (unsigned int i = 0; i < maps__nr_maps(maps); i++) { + if (RC_CHK_ACCESS(maps_by_name[i]) == RC_CHK_ACCESS(map)) + return i; + } + } + pr_err("Map missing from maps"); + return -1; +} - maps__for_each_entry(maps, pos) { - sym = map__find_symbol_by_name(pos->map, name); +static void map__set_kmap_maps(struct map *map, struct maps *maps) +{ + struct dso *dso; + + if (map == NULL) + return; + + dso = map__dso(map); + + if (dso && dso__kernel(dso)) { + struct kmap *kmap = map__kmap(map); + + if (kmap) + kmap->kmaps = maps; + else + pr_err("Internal error: kernel dso with non kernel map\n"); + } +} - if (sym == NULL) - continue; - if (!map__contains_symbol(pos->map, sym)) { - sym = NULL; - continue; +static int __maps__insert(struct maps *maps, struct map *new) +{ + struct map **maps_by_address = maps__maps_by_address(maps); + struct map **maps_by_name = maps__maps_by_name(maps); + unsigned int nr_maps = maps__nr_maps(maps); + unsigned int nr_allocate = RC_CHK_ACCESS(maps)->nr_maps_allocated; + + if (nr_maps + 1 > nr_allocate) { + nr_allocate = !nr_allocate ? 32 : nr_allocate * 2; + + maps_by_address = realloc(maps_by_address, nr_allocate * sizeof(new)); + if (!maps_by_address) + return -ENOMEM; + + maps__set_maps_by_address(maps, maps_by_address); + if (maps_by_name) { + maps_by_name = realloc(maps_by_name, nr_allocate * sizeof(new)); + if (!maps_by_name) { + /* + * If by name fails, just disable by name and it will + * recompute next time it is required. + */ + __maps__free_maps_by_name(maps); + } + maps__set_maps_by_name(maps, maps_by_name); } - if (mapp != NULL) - *mapp = pos->map; - goto out; + RC_CHK_ACCESS(maps)->nr_maps_allocated = nr_allocate; + } + /* Insert the value at the end. */ + maps_by_address[nr_maps] = map__get(new); + map__set_kmap_maps(new, maps); + if (maps_by_name) + maps_by_name[nr_maps] = map__get(new); + + nr_maps++; + RC_CHK_ACCESS(maps)->nr_maps = nr_maps; + + /* + * Recompute if things are sorted. If things are inserted in a sorted + * manner, for example by processing /proc/pid/maps, then no + * sorting/resorting will be necessary. + */ + if (nr_maps == 1) { + /* If there's just 1 entry then maps are sorted. */ + maps__set_maps_by_address_sorted(maps, true); + maps__set_maps_by_name_sorted(maps, maps_by_name != NULL); + } else { + /* Sorted if maps were already sorted and this map starts after the last one. */ + maps__set_maps_by_address_sorted(maps, + maps__maps_by_address_sorted(maps) && + map__end(maps_by_address[nr_maps - 2]) <= map__start(new)); + maps__set_maps_by_name_sorted(maps, false); + } + if (map__end(new) < map__start(new)) + RC_CHK_ACCESS(maps)->ends_broken = true; + + return 0; +} + +int maps__insert(struct maps *maps, struct map *map) +{ + int ret; + + down_write(maps__lock(maps)); + ret = __maps__insert(maps, map); + check_invariants(maps); + up_write(maps__lock(maps)); + return ret; +} + +static void __maps__remove(struct maps *maps, struct map *map) +{ + struct map **maps_by_address = maps__maps_by_address(maps); + struct map **maps_by_name = maps__maps_by_name(maps); + unsigned int nr_maps = maps__nr_maps(maps); + unsigned int address_idx; + + /* Slide later mappings over the one to remove */ + address_idx = maps__by_address_index(maps, map); + map__put(maps_by_address[address_idx]); + memmove(&maps_by_address[address_idx], + &maps_by_address[address_idx + 1], + (nr_maps - address_idx - 1) * sizeof(*maps_by_address)); + + if (maps_by_name) { + unsigned int name_idx = maps__by_name_index(maps, map); + + map__put(maps_by_name[name_idx]); + memmove(&maps_by_name[name_idx], + &maps_by_name[name_idx + 1], + (nr_maps - name_idx - 1) * sizeof(*maps_by_name)); } - sym = NULL; -out: + --RC_CHK_ACCESS(maps)->nr_maps; +} + +void maps__remove(struct maps *maps, struct map *map) +{ + down_write(maps__lock(maps)); + __maps__remove(maps, map); + check_invariants(maps); + up_write(maps__lock(maps)); +} + +bool maps__empty(struct maps *maps) +{ + bool res; + + down_read(maps__lock(maps)); + res = maps__nr_maps(maps) == 0; up_read(maps__lock(maps)); - return sym; + + return res; +} + +bool maps__equal(struct maps *a, struct maps *b) +{ + return RC_CHK_EQUAL(a, b); +} + +int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data), void *data) +{ + bool done = false; + int ret = 0; + + /* See locking/sorting note. */ + while (!done) { + down_read(maps__lock(maps)); + if (maps__maps_by_address_sorted(maps)) { + /* + * maps__for_each_map callbacks may buggily/unsafely + * insert into maps_by_address. Deliberately reload + * maps__nr_maps and maps_by_address on each iteration + * to avoid using memory freed by maps__insert growing + * the array - this may cause maps to be skipped or + * repeated. + */ + for (unsigned int i = 0; i < maps__nr_maps(maps); i++) { + struct map **maps_by_address = maps__maps_by_address(maps); + struct map *map = maps_by_address[i]; + + ret = cb(map, data); + if (ret) + break; + } + done = true; + } + up_read(maps__lock(maps)); + if (!done) + maps__sort_by_address(maps); + } + return ret; +} + +void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data) +{ + struct map **maps_by_address; + + down_write(maps__lock(maps)); + + maps_by_address = maps__maps_by_address(maps); + for (unsigned int i = 0; i < maps__nr_maps(maps);) { + if (cb(maps_by_address[i], data)) + __maps__remove(maps, maps_by_address[i]); + else + i++; + } + check_invariants(maps); + up_write(maps__lock(maps)); +} + +struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) +{ + struct map *map = maps__find(maps, addr); + struct symbol *result = NULL; + + /* Ensure map is loaded before using map->map_ip */ + if (map != NULL && map__load(map) >= 0) + result = map__find_symbol(map, map__map_ip(map, addr)); + + if (mapp) + *mapp = map; + else + map__put(map); + + return result; +} + +struct maps__find_symbol_by_name_args { + struct map **mapp; + const char *name; + struct symbol *sym; +}; + +static int maps__find_symbol_by_name_cb(struct map *map, void *data) +{ + struct maps__find_symbol_by_name_args *args = data; + + args->sym = map__find_symbol_by_name(map, args->name); + if (!args->sym) + return 0; + + if (!map__contains_symbol(map, args->sym)) { + args->sym = NULL; + return 0; + } + + if (args->mapp != NULL) + *args->mapp = map__get(map); + return 1; +} + +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) +{ + struct maps__find_symbol_by_name_args args = { + .mapp = mapp, + .name = name, + .sym = NULL, + }; + + maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); + return args.sym; } int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) @@ -253,225 +687,657 @@ int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) return ams->ms.sym ? 0 : -1; } -size_t maps__fprintf(struct maps *maps, FILE *fp) -{ - size_t printed = 0; - struct map_rb_node *pos; +struct maps__fprintf_args { + FILE *fp; + size_t printed; +}; - down_read(maps__lock(maps)); +static int maps__fprintf_cb(struct map *map, void *data) +{ + struct maps__fprintf_args *args = data; - maps__for_each_entry(maps, pos) { - printed += fprintf(fp, "Map:"); - printed += map__fprintf(pos->map, fp); - if (verbose > 2) { - printed += dso__fprintf(map__dso(pos->map), fp); - printed += fprintf(fp, "--\n"); - } + args->printed += fprintf(args->fp, "Map:"); + args->printed += map__fprintf(map, args->fp); + if (verbose > 2) { + args->printed += dso__fprintf(map__dso(map), args->fp); + args->printed += fprintf(args->fp, "--\n"); } + return 0; +} - up_read(maps__lock(maps)); +size_t maps__fprintf(struct maps *maps, FILE *fp) +{ + struct maps__fprintf_args args = { + .fp = fp, + .printed = 0, + }; - return printed; + maps__for_each_map(maps, maps__fprintf_cb, &args); + + return args.printed; } -int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) +/* + * Find first map where end > map->start. + * Same as find_vma() in kernel. + */ +static unsigned int first_ending_after(struct maps *maps, const struct map *map) { - struct rb_root *root; - struct rb_node *next, *first; - int err = 0; + struct map **maps_by_address = maps__maps_by_address(maps); + int low = 0, high = (int)maps__nr_maps(maps) - 1, first = high + 1; - down_write(maps__lock(maps)); + assert(maps__maps_by_address_sorted(maps)); + if (low <= high && map__end(maps_by_address[0]) > map__start(map)) + return 0; - root = maps__entries(maps); + while (low <= high) { + int mid = (low + high) / 2; + struct map *pos = maps_by_address[mid]; - /* - * Find first map where end > map->start. - * Same as find_vma() in kernel. - */ - next = root->rb_node; - first = NULL; - while (next) { - struct map_rb_node *pos = rb_entry(next, struct map_rb_node, rb_node); - - if (map__end(pos->map) > map__start(map)) { - first = next; - if (map__start(pos->map) <= map__start(map)) + if (map__end(pos) > map__start(map)) { + first = mid; + if (map__start(pos) <= map__start(map)) { + /* Entry overlaps map. */ break; - next = next->rb_left; + } + high = mid - 1; } else - next = next->rb_right; + low = mid + 1; } + return first; +} + +static int __maps__insert_sorted(struct maps *maps, unsigned int first_after_index, + struct map *new1, struct map *new2) +{ + struct map **maps_by_address = maps__maps_by_address(maps); + struct map **maps_by_name = maps__maps_by_name(maps); + unsigned int nr_maps = maps__nr_maps(maps); + unsigned int nr_allocate = RC_CHK_ACCESS(maps)->nr_maps_allocated; + unsigned int to_add = new2 ? 2 : 1; + + assert(maps__maps_by_address_sorted(maps)); + assert(first_after_index == nr_maps || + map__end(new1) <= map__start(maps_by_address[first_after_index])); + assert(!new2 || map__end(new1) <= map__start(new2)); + assert(first_after_index == nr_maps || !new2 || + map__end(new2) <= map__start(maps_by_address[first_after_index])); + + if (nr_maps + to_add > nr_allocate) { + nr_allocate = !nr_allocate ? 32 : nr_allocate * 2; + + maps_by_address = realloc(maps_by_address, nr_allocate * sizeof(new1)); + if (!maps_by_address) + return -ENOMEM; + + maps__set_maps_by_address(maps, maps_by_address); + if (maps_by_name) { + maps_by_name = realloc(maps_by_name, nr_allocate * sizeof(new1)); + if (!maps_by_name) { + /* + * If by name fails, just disable by name and it will + * recompute next time it is required. + */ + __maps__free_maps_by_name(maps); + } + maps__set_maps_by_name(maps, maps_by_name); + } + RC_CHK_ACCESS(maps)->nr_maps_allocated = nr_allocate; + } + memmove(&maps_by_address[first_after_index+to_add], + &maps_by_address[first_after_index], + (nr_maps - first_after_index) * sizeof(new1)); + maps_by_address[first_after_index] = map__get(new1); + if (maps_by_name) + maps_by_name[nr_maps] = map__get(new1); + if (new2) { + maps_by_address[first_after_index + 1] = map__get(new2); + if (maps_by_name) + maps_by_name[nr_maps + 1] = map__get(new2); + } + RC_CHK_ACCESS(maps)->nr_maps = nr_maps + to_add; + maps__set_maps_by_name_sorted(maps, false); + map__set_kmap_maps(new1, maps); + map__set_kmap_maps(new2, maps); + + check_invariants(maps); + return 0; +} + +/* + * Adds new to maps, if new overlaps existing entries then the existing maps are + * adjusted or removed so that new fits without overlapping any entries. + */ +static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) +{ + int err = 0; + FILE *fp = debug_file(); + unsigned int i, ni = INT_MAX; // Some gcc complain, but depends on maps_by_name... + + if (!maps__maps_by_address_sorted(maps)) + __maps__sort_by_address(maps); - next = first; - while (next && !err) { - struct map_rb_node *pos = rb_entry(next, struct map_rb_node, rb_node); - next = rb_next(&pos->rb_node); + /* + * Iterate through entries where the end of the existing entry is + * greater-than the new map's start. + */ + for (i = first_ending_after(maps, new); i < maps__nr_maps(maps); ) { + struct map **maps_by_address = maps__maps_by_address(maps); + struct map **maps_by_name = maps__maps_by_name(maps); + struct map *pos = maps_by_address[i]; + struct map *before = NULL, *after = NULL; /* * Stop if current map starts after map->end. * Maps are ordered by start: next will not overlap for sure. */ - if (map__start(pos->map) >= map__end(map)) + if (map__start(pos) >= map__end(new)) break; - if (verbose >= 2) { - - if (use_browser) { - pr_debug("overlapping maps in %s (disable tui for more info)\n", - map__dso(map)->name); - } else { - fputs("overlapping maps:\n", fp); - map__fprintf(map, fp); - map__fprintf(pos->map, fp); - } + if (use_browser) { + pr_debug("overlapping maps in %s (disable tui for more info)\n", + dso__name(map__dso(new))); + } else if (verbose >= 2) { + pr_debug("overlapping maps:\n"); + map__fprintf(new, fp); + map__fprintf(pos, fp); } - rb_erase_init(&pos->rb_node, root); + if (maps_by_name) + ni = maps__by_name_index(maps, pos); + /* * Now check if we need to create new maps for areas not * overlapped by the new map: */ - if (map__start(map) > map__start(pos->map)) { - struct map *before = map__clone(pos->map); + if (map__start(new) > map__start(pos)) { + /* Map starts within existing map. Need to shorten the existing map. */ + before = map__clone(pos); if (before == NULL) { err = -ENOMEM; - goto put_map; - } - - map__set_end(before, map__start(map)); - err = __maps__insert(maps, before); - if (err) { - map__put(before); - goto put_map; + goto out_err; } + map__set_end(before, map__start(new)); if (verbose >= 2 && !use_browser) map__fprintf(before, fp); - map__put(before); } - - if (map__end(map) < map__end(pos->map)) { - struct map *after = map__clone(pos->map); + if (map__end(new) < map__end(pos)) { + /* The new map isn't as long as the existing map. */ + after = map__clone(pos); if (after == NULL) { + map__zput(before); err = -ENOMEM; - goto put_map; + goto out_err; } - map__set_start(after, map__end(map)); - map__add_pgoff(after, map__end(map) - map__start(pos->map)); - assert(map__map_ip(pos->map, map__end(map)) == - map__map_ip(after, map__end(map))); - err = __maps__insert(maps, after); - if (err) { - map__put(after); - goto put_map; - } + map__set_start(after, map__end(new)); + map__add_pgoff(after, map__end(new) - map__start(pos)); + assert(map__map_ip(pos, map__end(new)) == + map__map_ip(after, map__end(new))); + if (verbose >= 2 && !use_browser) map__fprintf(after, fp); + } + /* + * If adding one entry, for `before` or `after`, we can replace + * the existing entry. If both `before` and `after` are + * necessary than an insert is needed. If the existing entry + * entirely overlaps the existing entry it can just be removed. + */ + if (before) { + map__put(maps_by_address[i]); + maps_by_address[i] = before; + map__set_kmap_maps(before, maps); + + if (maps_by_name) { + map__put(maps_by_name[ni]); + maps_by_name[ni] = map__get(before); + } + + /* Maps are still ordered, go to next one. */ + i++; + if (after) { + /* + * 'before' and 'after' mean 'new' split the + * 'pos' mapping and therefore there are no + * later mappings. + */ + err = __maps__insert_sorted(maps, i, new, after); + map__put(after); + check_invariants(maps); + return err; + } + check_invariants(maps); + } else if (after) { + /* + * 'after' means 'new' split 'pos' and there are no + * later mappings. + */ + map__put(maps_by_address[i]); + maps_by_address[i] = map__get(new); + map__set_kmap_maps(new, maps); + + if (maps_by_name) { + map__put(maps_by_name[ni]); + maps_by_name[ni] = map__get(new); + } + + err = __maps__insert_sorted(maps, i + 1, after, NULL); map__put(after); + check_invariants(maps); + return err; + } else { + struct map *next = NULL; + unsigned int nr_maps = maps__nr_maps(maps); + + if (i + 1 < nr_maps) + next = maps_by_address[i + 1]; + + if (!next || map__start(next) >= map__end(new)) { + /* + * Replace existing mapping and end knowing + * there aren't later overlapping or any + * mappings. + */ + map__put(maps_by_address[i]); + maps_by_address[i] = map__get(new); + map__set_kmap_maps(new, maps); + + if (maps_by_name) { + map__put(maps_by_name[ni]); + maps_by_name[ni] = map__get(new); + } + + check_invariants(maps); + return err; + } + /* + * pos fully covers the previous mapping so remove + * it. The following is an inlined version of + * maps__remove that reuses the already computed + * indices. + */ + map__put(maps_by_address[i]); + memmove(&maps_by_address[i], + &maps_by_address[i + 1], + (nr_maps - i - 1) * sizeof(*maps_by_address)); + + if (maps_by_name) { + map__put(maps_by_name[ni]); + memmove(&maps_by_name[ni], + &maps_by_name[ni + 1], + (nr_maps - ni - 1) * sizeof(*maps_by_name)); + } + --RC_CHK_ACCESS(maps)->nr_maps; + check_invariants(maps); + /* + * Maps are ordered but no need to increase `i` as the + * later maps were moved down. + */ } -put_map: - map__put(pos->map); - free(pos); } - up_write(maps__lock(maps)); + /* Add the map. */ + err = __maps__insert_sorted(maps, i, new, NULL); +out_err: return err; } -/* - * XXX This should not really _copy_ te maps, but refcount them. - */ -int maps__clone(struct thread *thread, struct maps *parent) +int maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) { - struct maps *maps = thread__maps(thread); int err; - struct map_rb_node *rb_node; + down_write(maps__lock(maps)); + err = __maps__fixup_overlap_and_insert(maps, new); + up_write(maps__lock(maps)); + return err; +} + +int maps__copy_from(struct maps *dest, struct maps *parent) +{ + /* Note, if struct map were immutable then cloning could use ref counts. */ + struct map **parent_maps_by_address; + int err = 0; + unsigned int n; + + down_write(maps__lock(dest)); down_read(maps__lock(parent)); - maps__for_each_entry(parent, rb_node) { - struct map *new = map__clone(rb_node->map); + parent_maps_by_address = maps__maps_by_address(parent); + n = maps__nr_maps(parent); + if (maps__nr_maps(dest) == 0) { + /* No existing mappings so just copy from parent to avoid reallocs in insert. */ + unsigned int nr_maps_allocated = RC_CHK_ACCESS(parent)->nr_maps_allocated; + struct map **dest_maps_by_address = + malloc(nr_maps_allocated * sizeof(struct map *)); + struct map **dest_maps_by_name = NULL; - if (new == NULL) { + if (!dest_maps_by_address) err = -ENOMEM; - goto out_unlock; + else { + if (maps__maps_by_name(parent)) { + dest_maps_by_name = + malloc(nr_maps_allocated * sizeof(struct map *)); + } + + RC_CHK_ACCESS(dest)->maps_by_address = dest_maps_by_address; + RC_CHK_ACCESS(dest)->maps_by_name = dest_maps_by_name; + RC_CHK_ACCESS(dest)->nr_maps_allocated = nr_maps_allocated; } - err = unwind__prepare_access(maps, new, NULL); - if (err) - goto out_unlock; + for (unsigned int i = 0; !err && i < n; i++) { + struct map *pos = parent_maps_by_address[i]; + struct map *new = map__clone(pos); - err = maps__insert(maps, new); - if (err) - goto out_unlock; + if (!new) + err = -ENOMEM; + else { + err = unwind__prepare_access(dest, new, NULL); + if (!err) { + dest_maps_by_address[i] = new; + map__set_kmap_maps(new, dest); + if (dest_maps_by_name) + dest_maps_by_name[i] = map__get(new); + RC_CHK_ACCESS(dest)->nr_maps = i + 1; + } + } + if (err) + map__put(new); + } + maps__set_maps_by_address_sorted(dest, maps__maps_by_address_sorted(parent)); + if (!err) { + RC_CHK_ACCESS(dest)->last_search_by_name_idx = + RC_CHK_ACCESS(parent)->last_search_by_name_idx; + maps__set_maps_by_name_sorted(dest, + dest_maps_by_name && + maps__maps_by_name_sorted(parent)); + } else { + RC_CHK_ACCESS(dest)->last_search_by_name_idx = 0; + maps__set_maps_by_name_sorted(dest, false); + } + } else { + /* Unexpected copying to a maps containing entries. */ + for (unsigned int i = 0; !err && i < n; i++) { + struct map *pos = parent_maps_by_address[i]; + struct map *new = map__clone(pos); - map__put(new); + if (!new) + err = -ENOMEM; + else { + err = unwind__prepare_access(dest, new, NULL); + if (!err) + err = __maps__insert(dest, new); + } + map__put(new); + } } + check_invariants(dest); - err = 0; -out_unlock: up_read(maps__lock(parent)); + up_write(maps__lock(dest)); return err; } -struct map_rb_node *maps__find_node(struct maps *maps, struct map *map) +static int map__addr_cmp(const void *key, const void *entry) { - struct map_rb_node *rb_node; + const u64 ip = *(const u64 *)key; + const struct map *map = *(const struct map * const *)entry; - maps__for_each_entry(maps, rb_node) { - if (rb_node->RC_CHK_ACCESS(map) == RC_CHK_ACCESS(map)) - return rb_node; - } - return NULL; + if (ip < map__start(map)) + return -1; + if (ip >= map__end(map)) + return 1; + return 0; } struct map *maps__find(struct maps *maps, u64 ip) { - struct rb_node *p; - struct map_rb_node *m; + struct map *result = NULL; + bool done = false; + + /* See locking/sorting note. */ + while (!done) { + down_read(maps__lock(maps)); + if (maps__maps_by_address_sorted(maps)) { + struct map **mapp = NULL; + struct map **maps_by_address = maps__maps_by_address(maps); + unsigned int nr_maps = maps__nr_maps(maps); + + if (maps_by_address && nr_maps) + mapp = bsearch(&ip, maps_by_address, nr_maps, sizeof(*mapp), + map__addr_cmp); + if (mapp) + result = map__get(*mapp); + done = true; + } + up_read(maps__lock(maps)); + if (!done) + maps__sort_by_address(maps); + } + return result; +} +static int map__strcmp_name(const void *name, const void *b) +{ + const struct dso *dso = map__dso(*(const struct map **)b); - down_read(maps__lock(maps)); + return strcmp(name, dso__short_name(dso)); +} - p = maps__entries(maps)->rb_node; - while (p != NULL) { - m = rb_entry(p, struct map_rb_node, rb_node); - if (ip < map__start(m->map)) - p = p->rb_left; - else if (ip >= map__end(m->map)) - p = p->rb_right; - else - goto out; +struct map *maps__find_by_name(struct maps *maps, const char *name) +{ + struct map *result = NULL; + bool done = false; + + /* See locking/sorting note. */ + while (!done) { + unsigned int i; + + down_read(maps__lock(maps)); + + /* First check last found entry. */ + i = RC_CHK_ACCESS(maps)->last_search_by_name_idx; + if (i < maps__nr_maps(maps) && maps__maps_by_name(maps)) { + struct dso *dso = map__dso(maps__maps_by_name(maps)[i]); + + if (dso && strcmp(dso__short_name(dso), name) == 0) { + result = map__get(maps__maps_by_name(maps)[i]); + done = true; + } + } + + /* Second search sorted array. */ + if (!done && maps__maps_by_name_sorted(maps)) { + struct map **mapp = + bsearch(name, maps__maps_by_name(maps), maps__nr_maps(maps), + sizeof(*mapp), map__strcmp_name); + + if (mapp) { + result = map__get(*mapp); + i = mapp - maps__maps_by_name(maps); + RC_CHK_ACCESS(maps)->last_search_by_name_idx = i; + } + done = true; + } + up_read(maps__lock(maps)); + if (!done) { + /* Sort and retry binary search. */ + if (maps__sort_by_name(maps)) { + /* + * Memory allocation failed do linear search + * through address sorted maps. + */ + struct map **maps_by_address; + unsigned int n; + + down_read(maps__lock(maps)); + maps_by_address = maps__maps_by_address(maps); + n = maps__nr_maps(maps); + for (i = 0; i < n; i++) { + struct map *pos = maps_by_address[i]; + struct dso *dso = map__dso(pos); + + if (dso && strcmp(dso__short_name(dso), name) == 0) { + result = map__get(pos); + break; + } + } + up_read(maps__lock(maps)); + done = true; + } + } + } + return result; +} + +struct map *maps__find_next_entry(struct maps *maps, struct map *map) +{ + unsigned int i; + struct map *result = NULL; + + down_read(maps__lock(maps)); + while (!maps__maps_by_address_sorted(maps)) { + up_read(maps__lock(maps)); + maps__sort_by_address(maps); + down_read(maps__lock(maps)); } + i = maps__by_address_index(maps, map); + if (++i < maps__nr_maps(maps)) + result = map__get(maps__maps_by_address(maps)[i]); - m = NULL; -out: up_read(maps__lock(maps)); - return m ? m->map : NULL; + return result; } -struct map_rb_node *maps__first(struct maps *maps) +void maps__fixup_end(struct maps *maps) { - struct rb_node *first = rb_first(maps__entries(maps)); + struct map **maps_by_address; + unsigned int n; - if (first) - return rb_entry(first, struct map_rb_node, rb_node); - return NULL; + down_write(maps__lock(maps)); + if (!maps__maps_by_address_sorted(maps)) + __maps__sort_by_address(maps); + + maps_by_address = maps__maps_by_address(maps); + n = maps__nr_maps(maps); + for (unsigned int i = 1; i < n; i++) { + struct map *prev = maps_by_address[i - 1]; + struct map *curr = maps_by_address[i]; + + if (!map__end(prev) || map__end(prev) > map__start(curr)) + map__set_end(prev, map__start(curr)); + } + + /* + * We still haven't the actual symbols, so guess the + * last map final address. + */ + if (n > 0 && !map__end(maps_by_address[n - 1])) + map__set_end(maps_by_address[n - 1], ~0ULL); + + RC_CHK_ACCESS(maps)->ends_broken = false; + check_invariants(maps); + + up_write(maps__lock(maps)); } -struct map_rb_node *map_rb_node__next(struct map_rb_node *node) +/* + * Merges map into maps by splitting the new map within the existing map + * regions. + */ +int maps__merge_in(struct maps *kmaps, struct map *new_map) { - struct rb_node *next; + unsigned int first_after_, kmaps__nr_maps; + struct map **kmaps_maps_by_address; + struct map **merged_maps_by_address; + unsigned int merged_nr_maps_allocated; + + /* First try under a read lock. */ + while (true) { + down_read(maps__lock(kmaps)); + if (maps__maps_by_address_sorted(kmaps)) + break; + + up_read(maps__lock(kmaps)); + + /* First after binary search requires sorted maps. Sort and try again. */ + maps__sort_by_address(kmaps); + } + first_after_ = first_ending_after(kmaps, new_map); + kmaps_maps_by_address = maps__maps_by_address(kmaps); + + if (first_after_ >= maps__nr_maps(kmaps) || + map__start(kmaps_maps_by_address[first_after_]) >= map__end(new_map)) { + /* No overlap so regular insert suffices. */ + up_read(maps__lock(kmaps)); + return maps__insert(kmaps, new_map); + } + up_read(maps__lock(kmaps)); - if (!node) - return NULL; + /* Plain insert with a read-lock failed, try again now with the write lock. */ + down_write(maps__lock(kmaps)); + if (!maps__maps_by_address_sorted(kmaps)) + __maps__sort_by_address(kmaps); + + first_after_ = first_ending_after(kmaps, new_map); + kmaps_maps_by_address = maps__maps_by_address(kmaps); + kmaps__nr_maps = maps__nr_maps(kmaps); + + if (first_after_ >= kmaps__nr_maps || + map__start(kmaps_maps_by_address[first_after_]) >= map__end(new_map)) { + /* No overlap so regular insert suffices. */ + int ret = __maps__insert(kmaps, new_map); + + check_invariants(kmaps); + up_write(maps__lock(kmaps)); + return ret; + } + /* Array to merge into, possibly 1 more for the sake of new_map. */ + merged_nr_maps_allocated = RC_CHK_ACCESS(kmaps)->nr_maps_allocated; + if (kmaps__nr_maps + 1 == merged_nr_maps_allocated) + merged_nr_maps_allocated++; + + merged_maps_by_address = malloc(merged_nr_maps_allocated * sizeof(*merged_maps_by_address)); + if (!merged_maps_by_address) { + up_write(maps__lock(kmaps)); + return -ENOMEM; + } + maps__set_maps_by_address(kmaps, merged_maps_by_address); + maps__set_maps_by_address_sorted(kmaps, true); + __maps__free_maps_by_name(kmaps); + maps__set_nr_maps_allocated(kmaps, merged_nr_maps_allocated); - next = rb_next(&node->rb_node); + /* Copy entries before the new_map that can't overlap. */ + for (unsigned int i = 0; i < first_after_; i++) + merged_maps_by_address[i] = map__get(kmaps_maps_by_address[i]); - if (!next) - return NULL; + maps__set_nr_maps(kmaps, first_after_); - return rb_entry(next, struct map_rb_node, rb_node); + /* Add the new map, it will be split when the later overlapping mappings are added. */ + __maps__insert(kmaps, new_map); + + /* Insert mappings after new_map, splitting new_map in the process. */ + for (unsigned int i = first_after_; i < kmaps__nr_maps; i++) + __maps__fixup_overlap_and_insert(kmaps, kmaps_maps_by_address[i]); + + /* Copy the maps from merged into kmaps. */ + for (unsigned int i = 0; i < kmaps__nr_maps; i++) + map__zput(kmaps_maps_by_address[i]); + + free(kmaps_maps_by_address); + check_invariants(kmaps); + up_write(maps__lock(kmaps)); + return 0; +} + +void maps__load_first(struct maps *maps) +{ + down_read(maps__lock(maps)); + + if (maps__nr_maps(maps) > 0) + map__load(maps__maps_by_address(maps)[0]); + + up_read(maps__lock(maps)); } diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 83144e0645ed..d9aa62ed968a 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -3,50 +3,14 @@ #define __PERF_MAPS_H #include <linux/refcount.h> -#include <linux/rbtree.h> #include <stdio.h> #include <stdbool.h> #include <linux/types.h> -#include "rwsem.h" -#include <internal/rc_check.h> struct ref_reloc_sym; struct machine; struct map; struct maps; -struct thread; - -struct map_rb_node { - struct rb_node rb_node; - struct map *map; -}; - -struct map_rb_node *maps__first(struct maps *maps); -struct map_rb_node *map_rb_node__next(struct map_rb_node *node); -struct map_rb_node *maps__find_node(struct maps *maps, struct map *map); -struct map *maps__find(struct maps *maps, u64 addr); - -#define maps__for_each_entry(maps, map) \ - for (map = maps__first(maps); map; map = map_rb_node__next(map)) - -#define maps__for_each_entry_safe(maps, map, next) \ - for (map = maps__first(maps), next = map_rb_node__next(map); map; \ - map = next, next = map_rb_node__next(map)) - -DECLARE_RC_STRUCT(maps) { - struct rb_root entries; - struct rw_semaphore lock; - struct machine *machine; - struct map *last_search_by_name; - struct map **maps_by_name; - refcount_t refcnt; - unsigned int nr_maps; - unsigned int nr_maps_allocated; -#ifdef HAVE_LIBUNWIND_SUPPORT - void *addr_space; - const struct unwind_libunwind_ops *unwind_libunwind_ops; -#endif -}; #define KMAP_NAME_LEN 256 @@ -58,7 +22,7 @@ struct kmap { struct maps *maps__new(struct machine *machine); bool maps__empty(struct maps *maps); -int maps__clone(struct thread *thread, struct maps *parent); +int maps__copy_from(struct maps *maps, struct maps *parent); struct maps *maps__get(struct maps *maps); void maps__put(struct maps *maps); @@ -71,46 +35,22 @@ static inline void __maps__zput(struct maps **map) #define maps__zput(map) __maps__zput(&map) -static inline struct rb_root *maps__entries(struct maps *maps) -{ - return &RC_CHK_ACCESS(maps)->entries; -} +bool maps__equal(struct maps *a, struct maps *b); -static inline struct machine *maps__machine(struct maps *maps) -{ - return RC_CHK_ACCESS(maps)->machine; -} +/* Iterate over map calling cb for each entry. */ +int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data), void *data); +/* Iterate over map removing an entry if cb returns true. */ +void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data); -static inline struct rw_semaphore *maps__lock(struct maps *maps) -{ - return &RC_CHK_ACCESS(maps)->lock; -} - -static inline struct map **maps__maps_by_name(struct maps *maps) -{ - return RC_CHK_ACCESS(maps)->maps_by_name; -} - -static inline unsigned int maps__nr_maps(const struct maps *maps) -{ - return RC_CHK_ACCESS(maps)->nr_maps; -} - -static inline refcount_t *maps__refcnt(struct maps *maps) -{ - return &RC_CHK_ACCESS(maps)->refcnt; -} +struct machine *maps__machine(const struct maps *maps); +unsigned int maps__nr_maps(const struct maps *maps); /* Test only. */ +refcount_t *maps__refcnt(struct maps *maps); /* Test only. */ #ifdef HAVE_LIBUNWIND_SUPPORT -static inline void *maps__addr_space(struct maps *maps) -{ - return RC_CHK_ACCESS(maps)->addr_space; -} - -static inline const struct unwind_libunwind_ops *maps__unwind_libunwind_ops(const struct maps *maps) -{ - return RC_CHK_ACCESS(maps)->unwind_libunwind_ops; -} +void *maps__addr_space(const struct maps *maps); +void maps__set_addr_space(struct maps *maps, void *addr_space); +const struct unwind_libunwind_ops *maps__unwind_libunwind_ops(const struct maps *maps); +void maps__set_unwind_libunwind_ops(struct maps *maps, const struct unwind_libunwind_ops *ops); #endif size_t maps__fprintf(struct maps *maps, FILE *fp); @@ -118,6 +58,7 @@ size_t maps__fprintf(struct maps *maps, FILE *fp); int maps__insert(struct maps *maps, struct map *map); void maps__remove(struct maps *maps, struct map *map); +struct map *maps__find(struct maps *maps, u64 addr); struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp); struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp); @@ -125,12 +66,16 @@ struct addr_map_symbol; int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams); -int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp); +int maps__fixup_overlap_and_insert(struct maps *maps, struct map *new); struct map *maps__find_by_name(struct maps *maps, const char *name); +struct map *maps__find_next_entry(struct maps *maps, struct map *map); + int maps__merge_in(struct maps *kmaps, struct map *new_map); -void __maps__sort_by_name(struct maps *maps); +void maps__fixup_end(struct maps *maps); + +void maps__load_first(struct maps *maps); #endif // __PERF_MAPS_H diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index c07fe3a90722..0b49fce251fc 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -8,60 +8,138 @@ #include <unistd.h> #include <api/fs/fs.h> #include <linux/kernel.h> +#include "cpumap.h" #include "map_symbol.h" #include "mem-events.h" +#include "mem-info.h" #include "debug.h" +#include "evsel.h" #include "symbol.h" #include "pmu.h" #include "pmus.h" unsigned int perf_mem_events__loads_ldlat = 30; -#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } +#define E(t, n, s, l, a) { .tag = t, .name = n, .event_name = s, .ldlat = l, .aux_event = a } -static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { - E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "cpu/events/mem-loads"), - E("ldlat-stores", "cpu/mem-stores/P", "cpu/events/mem-stores"), - E(NULL, NULL, NULL), +struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { + E("ldlat-loads", "%s/mem-loads,ldlat=%u/P", "mem-loads", true, 0), + E("ldlat-stores", "%s/mem-stores/P", "mem-stores", false, 0), + E(NULL, NULL, NULL, false, 0), }; #undef E -static char mem_loads_name[100]; -static bool mem_loads_name__init; +bool perf_mem_record[PERF_MEM_EVENTS__MAX] = { 0 }; -struct perf_mem_event * __weak perf_mem_events__ptr(int i) +struct perf_mem_event *perf_pmu__mem_events_ptr(struct perf_pmu *pmu, int i) { - if (i >= PERF_MEM_EVENTS__MAX) + if (i >= PERF_MEM_EVENTS__MAX || !pmu) return NULL; - return &perf_mem_events[i]; + return &pmu->mem_events[i]; } -char * __weak perf_mem_events__name(int i, char *pmu_name __maybe_unused) +static struct perf_pmu *perf_pmus__scan_mem(struct perf_pmu *pmu) { - struct perf_mem_event *e = perf_mem_events__ptr(i); + while ((pmu = perf_pmus__scan(pmu)) != NULL) { + if (pmu->mem_events) + return pmu; + } + return NULL; +} + +struct perf_pmu *perf_mem_events_find_pmu(void) +{ + /* + * The current perf mem doesn't support per-PMU configuration. + * The exact same configuration is applied to all the + * mem_events supported PMUs. + * Return the first mem_events supported PMU. + * + * Notes: The only case which may support multiple mem_events + * supported PMUs is Intel hybrid. The exact same mem_events + * is shared among the PMUs. Only configure the first PMU + * is good enough as well. + */ + return perf_pmus__scan_mem(NULL); +} + +/** + * perf_pmu__mem_events_num_mem_pmus - Get the number of mem PMUs since the given pmu + * @pmu: Start pmu. If it's NULL, search the entire PMU list. + */ +int perf_pmu__mem_events_num_mem_pmus(struct perf_pmu *pmu) +{ + int num = 0; - if (!e) + while ((pmu = perf_pmus__scan_mem(pmu)) != NULL) + num++; + + return num; +} + +static const char *perf_pmu__mem_events_name(struct perf_pmu *pmu, int i, + char *buf, size_t buf_size) +{ + struct perf_mem_event *e; + + if (i >= PERF_MEM_EVENTS__MAX || !pmu) return NULL; - if (i == PERF_MEM_EVENTS__LOAD) { - if (!mem_loads_name__init) { - mem_loads_name__init = true; - scnprintf(mem_loads_name, sizeof(mem_loads_name), - e->name, perf_mem_events__loads_ldlat); + e = &pmu->mem_events[i]; + if (!e || !e->name) + return NULL; + + if (i == PERF_MEM_EVENTS__LOAD || i == PERF_MEM_EVENTS__LOAD_STORE) { + if (e->ldlat) { + if (!e->aux_event) { + /* ARM and Most of Intel */ + scnprintf(buf, buf_size, + e->name, pmu->name, + perf_mem_events__loads_ldlat); + } else { + /* Intel with mem-loads-aux event */ + scnprintf(buf, buf_size, + e->name, pmu->name, pmu->name, + perf_mem_events__loads_ldlat); + } + } else { + if (!e->aux_event) { + /* AMD and POWER */ + scnprintf(buf, buf_size, + e->name, pmu->name); + } else { + return NULL; + } } - return mem_loads_name; + return buf; } - return (char *)e->name; + if (i == PERF_MEM_EVENTS__STORE) { + scnprintf(buf, buf_size, + e->name, pmu->name); + return buf; + } + + return NULL; } -__weak bool is_mem_loads_aux_event(struct evsel *leader __maybe_unused) +bool is_mem_loads_aux_event(struct evsel *leader) { - return false; + struct perf_pmu *pmu = leader->pmu; + struct perf_mem_event *e; + + if (!pmu || !pmu->mem_events) + return false; + + e = &pmu->mem_events[PERF_MEM_EVENTS__LOAD]; + if (!e->aux_event) + return false; + + return leader->core.attr.config == e->aux_event; } -int perf_mem_events__parse(const char *str) +int perf_pmu__mem_events_parse(struct perf_pmu *pmu, const char *str) { char *tok, *saveptr = NULL; bool found = false; @@ -79,13 +157,13 @@ int perf_mem_events__parse(const char *str) while (tok) { for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - struct perf_mem_event *e = perf_mem_events__ptr(j); + struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j); if (!e->tag) continue; if (strstr(e->tag, tok)) - e->record = found = true; + perf_mem_record[j] = found = true; } tok = strtok_r(NULL, ",", &saveptr); @@ -100,16 +178,21 @@ int perf_mem_events__parse(const char *str) return -1; } -static bool perf_mem_event__supported(const char *mnt, char *sysfs_name) +static bool perf_pmu__mem_events_supported(const char *mnt, struct perf_pmu *pmu, + struct perf_mem_event *e) { char path[PATH_MAX]; struct stat st; - scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name); + if (!e->event_name) + return true; + + scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/events/%s", mnt, pmu->name, e->event_name); + return !stat(path, &st); } -int perf_mem_events__init(void) +static int __perf_pmu__mem_events_init(struct perf_pmu *pmu) { const char *mnt = sysfs__mount(); bool found = false; @@ -119,9 +202,7 @@ int perf_mem_events__init(void) return -ENOENT; for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - struct perf_mem_event *e = perf_mem_events__ptr(j); - char sysfs_name[100]; - struct perf_pmu *pmu = NULL; + struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j); /* * If the event entry isn't valid, skip initialization @@ -130,16 +211,7 @@ int perf_mem_events__init(void) if (!e->tag) continue; - /* - * Scan all PMUs not just core ones, since perf mem/c2c on - * platforms like AMD uses IBS OP PMU which is independent - * of core PMU. - */ - while ((pmu = perf_pmus__scan(pmu)) != NULL) { - scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, pmu->name); - e->supported |= perf_mem_event__supported(mnt, sysfs_name); - } - + e->supported |= perf_pmu__mem_events_supported(mnt, pmu, e); if (e->supported) found = true; } @@ -147,84 +219,104 @@ int perf_mem_events__init(void) return found ? 0 : -ENOENT; } -void perf_mem_events__list(void) +int perf_pmu__mem_events_init(void) +{ + struct perf_pmu *pmu = NULL; + + while ((pmu = perf_pmus__scan_mem(pmu)) != NULL) { + if (__perf_pmu__mem_events_init(pmu)) + return -ENOENT; + } + + return 0; +} + +void perf_pmu__mem_events_list(struct perf_pmu *pmu) { int j; for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - struct perf_mem_event *e = perf_mem_events__ptr(j); + char buf[128]; + struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j); fprintf(stderr, "%-*s%-*s%s", e->tag ? 13 : 0, e->tag ? : "", e->tag && verbose > 0 ? 25 : 0, - e->tag && verbose > 0 ? perf_mem_events__name(j, NULL) : "", + e->tag && verbose > 0 + ? perf_pmu__mem_events_name(pmu, j, buf, sizeof(buf)) + : "", e->supported ? ": available\n" : ""); } } -static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e, - int idx) +int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, char **event_name_storage_out) { const char *mnt = sysfs__mount(); - char sysfs_name[100]; struct perf_pmu *pmu = NULL; + int i = *argv_nr; + struct perf_cpu_map *cpu_map = NULL; + size_t event_name_storage_size = + perf_pmu__mem_events_num_mem_pmus(NULL) * PERF_MEM_EVENTS__MAX * 128; + size_t event_name_storage_remaining = event_name_storage_size; + char *event_name_storage = malloc(event_name_storage_size); + char *event_name_storage_ptr = event_name_storage; + + if (!event_name_storage) + return -ENOMEM; - while ((pmu = perf_pmus__scan(pmu)) != NULL) { - scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, - pmu->name); - if (!perf_mem_event__supported(mnt, sysfs_name)) { - pr_err("failed: event '%s' not supported\n", - perf_mem_events__name(idx, pmu->name)); - } - } -} - -int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, - char **rec_tmp, int *tmp_nr) -{ - int i = *argv_nr, k = 0; - struct perf_mem_event *e; - struct perf_pmu *pmu; - char *s; + *event_name_storage_out = NULL; + while ((pmu = perf_pmus__scan_mem(pmu)) != NULL) { + for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) { + const char *s; + struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j); + int ret; - for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - e = perf_mem_events__ptr(j); - if (!e->record) - continue; + if (!perf_mem_record[j]) + continue; - if (perf_pmus__num_mem_pmus() == 1) { if (!e->supported) { + char buf[128]; + pr_err("failed: event '%s' not supported\n", - perf_mem_events__name(j, NULL)); + perf_pmu__mem_events_name(pmu, j, buf, sizeof(buf))); + free(event_name_storage); return -1; } + s = perf_pmu__mem_events_name(pmu, j, event_name_storage_ptr, + event_name_storage_remaining); + if (!s || !perf_pmu__mem_events_supported(mnt, pmu, e)) + continue; + rec_argv[i++] = "-e"; - rec_argv[i++] = perf_mem_events__name(j, NULL); - } else { - if (!e->supported) { - perf_mem_events__print_unsupport_hybrid(e, j); - return -1; + rec_argv[i++] = event_name_storage_ptr; + event_name_storage_remaining -= strlen(event_name_storage_ptr) + 1; + event_name_storage_ptr += strlen(event_name_storage_ptr) + 1; + + ret = perf_cpu_map__merge(&cpu_map, pmu->cpus); + if (ret < 0) { + free(event_name_storage); + return ret; } + } + } - while ((pmu = perf_pmus__scan(pmu)) != NULL) { - rec_argv[i++] = "-e"; - s = perf_mem_events__name(j, pmu->name); - if (s) { - s = strdup(s); - if (!s) - return -1; - - rec_argv[i++] = s; - rec_tmp[k++] = s; - } - } + if (cpu_map) { + struct perf_cpu_map *online = cpu_map__online(); + + if (!perf_cpu_map__equal(cpu_map, online)) { + char buf[200]; + + cpu_map__snprint(cpu_map, buf, sizeof(buf)); + pr_warning("Memory events are enabled on a subset of CPUs: %s\n", buf); } + perf_cpu_map__put(online); + perf_cpu_map__put(cpu_map); } *argv_nr = i; - *tmp_nr = k; + *event_name_storage_out = event_name_storage; return 0; } @@ -238,7 +330,7 @@ static const char * const tlb_access[] = { "Fault", }; -int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +int perf_mem__tlb_scnprintf(char *out, size_t sz, const struct mem_info *mem_info) { size_t l = 0, i; u64 m = PERF_MEM_TLB_NA; @@ -248,7 +340,7 @@ int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info) out[0] = '\0'; if (mem_info) - m = mem_info->data_src.mem_dtlb; + m = mem_info__const_data_src(mem_info)->mem_dtlb; hit = m & PERF_MEM_TLB_HIT; miss = m & PERF_MEM_TLB_MISS; @@ -293,6 +385,12 @@ static const char * const mem_lvl[] = { }; static const char * const mem_lvlnum[] = { + [PERF_MEM_LVLNUM_L1] = "L1", + [PERF_MEM_LVLNUM_L2] = "L2", + [PERF_MEM_LVLNUM_L3] = "L3", + [PERF_MEM_LVLNUM_L4] = "L4", + [PERF_MEM_LVLNUM_L2_MHB] = "L2 MHB", + [PERF_MEM_LVLNUM_MSC] = "Memory-side Cache", [PERF_MEM_LVLNUM_UNC] = "Uncached", [PERF_MEM_LVLNUM_CXL] = "CXL", [PERF_MEM_LVLNUM_IO] = "I/O", @@ -316,13 +414,13 @@ static const char * const mem_hops[] = { "board", }; -static int perf_mem__op_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +static int perf_mem__op_scnprintf(char *out, size_t sz, const struct mem_info *mem_info) { u64 op = PERF_MEM_LOCK_NA; int l; if (mem_info) - op = mem_info->data_src.mem_op; + op = mem_info__const_data_src(mem_info)->mem_op; if (op & PERF_MEM_OP_NA) l = scnprintf(out, sz, "N/A"); @@ -340,7 +438,7 @@ static int perf_mem__op_scnprintf(char *out, size_t sz, struct mem_info *mem_inf return l; } -int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +int perf_mem__lvl_scnprintf(char *out, size_t sz, const struct mem_info *mem_info) { union perf_mem_data_src data_src; int printed = 0; @@ -355,7 +453,7 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) if (!mem_info) goto na; - data_src = mem_info->data_src; + data_src = *mem_info__const_data_src(mem_info); if (data_src.mem_lvl & PERF_MEM_LVL_HIT) memcpy(hit_miss, "hit", 3); @@ -375,7 +473,7 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) if (mem_lvlnum[lvl]) l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]); else - l += scnprintf(out + l, sz - l, "L%d", lvl); + l += scnprintf(out + l, sz - l, "Unknown level %d", lvl); l += scnprintf(out + l, sz - l, " %s", hit_miss); return l; @@ -422,7 +520,7 @@ static const char * const snoopx_access[] = { "Peer", }; -int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +int perf_mem__snp_scnprintf(char *out, size_t sz, const struct mem_info *mem_info) { size_t i, l = 0; u64 m = PERF_MEM_SNOOP_NA; @@ -431,7 +529,7 @@ int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info) out[0] = '\0'; if (mem_info) - m = mem_info->data_src.mem_snoop; + m = mem_info__const_data_src(mem_info)->mem_snoop; for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) { if (!(m & 0x1)) @@ -445,7 +543,7 @@ int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info) m = 0; if (mem_info) - m = mem_info->data_src.mem_snoopx; + m = mem_info__const_data_src(mem_info)->mem_snoopx; for (i = 0; m && i < ARRAY_SIZE(snoopx_access); i++, m >>= 1) { if (!(m & 0x1)) @@ -464,13 +562,13 @@ int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info) return l; } -int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +int perf_mem__lck_scnprintf(char *out, size_t sz, const struct mem_info *mem_info) { u64 mask = PERF_MEM_LOCK_NA; int l; if (mem_info) - mask = mem_info->data_src.mem_lock; + mask = mem_info__const_data_src(mem_info)->mem_lock; if (mask & PERF_MEM_LOCK_NA) l = scnprintf(out, sz, "N/A"); @@ -482,7 +580,7 @@ int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info) return l; } -int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +int perf_mem__blk_scnprintf(char *out, size_t sz, const struct mem_info *mem_info) { size_t l = 0; u64 mask = PERF_MEM_BLK_NA; @@ -491,7 +589,7 @@ int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info) out[0] = '\0'; if (mem_info) - mask = mem_info->data_src.mem_blk; + mask = mem_info__const_data_src(mem_info)->mem_blk; if (!mask || (mask & PERF_MEM_BLK_NA)) { l += scnprintf(out + l, sz - l, " N/A"); @@ -505,7 +603,7 @@ int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info) return l; } -int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +int perf_script__meminfo_scnprintf(char *out, size_t sz, const struct mem_info *mem_info) { int i = 0; @@ -527,8 +625,8 @@ int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_in int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) { - union perf_mem_data_src *data_src = &mi->data_src; - u64 daddr = mi->daddr.addr; + union perf_mem_data_src *data_src = mem_info__data_src(mi); + u64 daddr = mem_info__daddr(mi)->addr; u64 op = data_src->mem_op; u64 lvl = data_src->mem_lvl; u64 snoop = data_src->mem_snoop; @@ -582,7 +680,10 @@ do { \ if (lvl & P(LVL, LFB)) stats->ld_fbhit++; if (lvl & P(LVL, L1 )) stats->ld_l1hit++; if (lvl & P(LVL, L2)) { - stats->ld_l2hit++; + if (snoop & P(SNOOP, HITM)) + HITM_INC(lcl_hitm); + else + stats->ld_l2hit++; if (snoopx & P(SNOOPX, PEER)) PEER_INC(lcl_peer); @@ -655,7 +756,7 @@ do { \ return -1; } - if (!mi->daddr.ms.map || !mi->iaddr.ms.map) { + if (!mem_info__daddr(mi)->ms.map || !mem_info__iaddr(mi)->ms.map) { stats->nomap++; return -1; } @@ -701,3 +802,181 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add) stats->nomap += add->nomap; stats->noparse += add->noparse; } + +/* + * It returns an index in hist_entry->mem_stat array for the given val which + * represents a data-src based on the mem_stat_type. + */ +int mem_stat_index(const enum mem_stat_type mst, const u64 val) +{ + union perf_mem_data_src src = { + .val = val, + }; + + switch (mst) { + case PERF_MEM_STAT_OP: + switch (src.mem_op) { + case PERF_MEM_OP_LOAD: + return MEM_STAT_OP_LOAD; + case PERF_MEM_OP_STORE: + return MEM_STAT_OP_STORE; + case PERF_MEM_OP_LOAD | PERF_MEM_OP_STORE: + return MEM_STAT_OP_LDST; + default: + if (src.mem_op & PERF_MEM_OP_PFETCH) + return MEM_STAT_OP_PFETCH; + if (src.mem_op & PERF_MEM_OP_EXEC) + return MEM_STAT_OP_EXEC; + return MEM_STAT_OP_OTHER; + } + case PERF_MEM_STAT_CACHE: + switch (src.mem_lvl_num) { + case PERF_MEM_LVLNUM_L1: + return MEM_STAT_CACHE_L1; + case PERF_MEM_LVLNUM_L2: + return MEM_STAT_CACHE_L2; + case PERF_MEM_LVLNUM_L3: + return MEM_STAT_CACHE_L3; + case PERF_MEM_LVLNUM_L4: + return MEM_STAT_CACHE_L4; + case PERF_MEM_LVLNUM_LFB: + return MEM_STAT_CACHE_L1_BUF; + case PERF_MEM_LVLNUM_L2_MHB: + return MEM_STAT_CACHE_L2_BUF; + default: + return MEM_STAT_CACHE_OTHER; + } + case PERF_MEM_STAT_MEMORY: + switch (src.mem_lvl_num) { + case PERF_MEM_LVLNUM_MSC: + return MEM_STAT_MEMORY_MSC; + case PERF_MEM_LVLNUM_RAM: + return MEM_STAT_MEMORY_RAM; + case PERF_MEM_LVLNUM_UNC: + return MEM_STAT_MEMORY_UNC; + case PERF_MEM_LVLNUM_CXL: + return MEM_STAT_MEMORY_CXL; + case PERF_MEM_LVLNUM_IO: + return MEM_STAT_MEMORY_IO; + case PERF_MEM_LVLNUM_PMEM: + return MEM_STAT_MEMORY_PMEM; + default: + return MEM_STAT_MEMORY_OTHER; + } + case PERF_MEM_STAT_SNOOP: + switch (src.mem_snoop) { + case PERF_MEM_SNOOP_HIT: + return MEM_STAT_SNOOP_HIT; + case PERF_MEM_SNOOP_HITM: + return MEM_STAT_SNOOP_HITM; + case PERF_MEM_SNOOP_MISS: + return MEM_STAT_SNOOP_MISS; + default: + return MEM_STAT_SNOOP_OTHER; + } + case PERF_MEM_STAT_DTLB: + switch (src.mem_dtlb) { + case PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT: + return MEM_STAT_DTLB_L1_HIT; + case PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT: + return MEM_STAT_DTLB_L2_HIT; + case PERF_MEM_TLB_L1 | PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT: + return MEM_STAT_DTLB_ANY_HIT; + default: + if (src.mem_dtlb & PERF_MEM_TLB_MISS) + return MEM_STAT_DTLB_MISS; + return MEM_STAT_DTLB_OTHER; + } + default: + break; + } + return -1; +} + +/* To align output, returned string should be shorter than MEM_STAT_PRINT_LEN */ +const char *mem_stat_name(const enum mem_stat_type mst, const int idx) +{ + switch (mst) { + case PERF_MEM_STAT_OP: + switch (idx) { + case MEM_STAT_OP_LOAD: + return "Load"; + case MEM_STAT_OP_STORE: + return "Store"; + case MEM_STAT_OP_LDST: + return "Ld+St"; + case MEM_STAT_OP_PFETCH: + return "Pfetch"; + case MEM_STAT_OP_EXEC: + return "Exec"; + case MEM_STAT_OP_OTHER: + default: + return "Other"; + } + case PERF_MEM_STAT_CACHE: + switch (idx) { + case MEM_STAT_CACHE_L1: + return "L1"; + case MEM_STAT_CACHE_L2: + return "L2"; + case MEM_STAT_CACHE_L3: + return "L3"; + case MEM_STAT_CACHE_L4: + return "L4"; + case MEM_STAT_CACHE_L1_BUF: + return "L1-buf"; + case MEM_STAT_CACHE_L2_BUF: + return "L2-buf"; + case MEM_STAT_CACHE_OTHER: + default: + return "Other"; + } + case PERF_MEM_STAT_MEMORY: + switch (idx) { + case MEM_STAT_MEMORY_RAM: + return "RAM"; + case MEM_STAT_MEMORY_MSC: + return "MSC"; + case MEM_STAT_MEMORY_UNC: + return "Uncach"; + case MEM_STAT_MEMORY_CXL: + return "CXL"; + case MEM_STAT_MEMORY_IO: + return "IO"; + case MEM_STAT_MEMORY_PMEM: + return "PMEM"; + case MEM_STAT_MEMORY_OTHER: + default: + return "Other"; + } + case PERF_MEM_STAT_SNOOP: + switch (idx) { + case MEM_STAT_SNOOP_HIT: + return "Hit"; + case MEM_STAT_SNOOP_HITM: + return "HitM"; + case MEM_STAT_SNOOP_MISS: + return "Miss"; + case MEM_STAT_SNOOP_OTHER: + default: + return "Other"; + } + case PERF_MEM_STAT_DTLB: + switch (idx) { + case MEM_STAT_DTLB_L1_HIT: + return "L1-Hit"; + case MEM_STAT_DTLB_L2_HIT: + return "L2-Hit"; + case MEM_STAT_DTLB_ANY_HIT: + return "L?-Hit"; + case MEM_STAT_DTLB_MISS: + return "Miss"; + case MEM_STAT_DTLB_OTHER: + default: + return "Other"; + } + default: + break; + } + return "N/A"; +} diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 12372309d60e..5b98076904b0 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -3,27 +3,15 @@ #define __PERF_MEM_EVENTS_H #include <stdbool.h> -#include <stdint.h> -#include <stdio.h> #include <linux/types.h> -#include <linux/refcount.h> -#include <linux/perf_event.h> -#include "stat.h" -#include "evsel.h" struct perf_mem_event { - bool record; bool supported; + bool ldlat; + u32 aux_event; const char *tag; const char *name; - const char *sysfs_name; -}; - -struct mem_info { - struct addr_map_symbol iaddr; - struct addr_map_symbol daddr; - union perf_mem_data_src data_src; - refcount_t refcnt; + const char *event_name; }; enum { @@ -33,26 +21,33 @@ enum { PERF_MEM_EVENTS__MAX, }; +struct evsel; +struct mem_info; +struct perf_pmu; + extern unsigned int perf_mem_events__loads_ldlat; +extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX]; +extern bool perf_mem_record[PERF_MEM_EVENTS__MAX]; -int perf_mem_events__parse(const char *str); -int perf_mem_events__init(void); +int perf_pmu__mem_events_parse(struct perf_pmu *pmu, const char *str); +int perf_pmu__mem_events_init(void); -char *perf_mem_events__name(int i, char *pmu_name); -struct perf_mem_event *perf_mem_events__ptr(int i); +struct perf_mem_event *perf_pmu__mem_events_ptr(struct perf_pmu *pmu, int i); +struct perf_pmu *perf_mem_events_find_pmu(void); +int perf_pmu__mem_events_num_mem_pmus(struct perf_pmu *pmu); bool is_mem_loads_aux_event(struct evsel *leader); -void perf_mem_events__list(void); +void perf_pmu__mem_events_list(struct perf_pmu *pmu); int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, - char **rec_tmp, int *tmp_nr); + char **event_name_storage_out); -int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info); -int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info); -int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info); -int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info); -int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info); +int perf_mem__tlb_scnprintf(char *out, size_t sz, const struct mem_info *mem_info); +int perf_mem__lvl_scnprintf(char *out, size_t sz, const struct mem_info *mem_info); +int perf_mem__snp_scnprintf(char *out, size_t sz, const struct mem_info *mem_info); +int perf_mem__lck_scnprintf(char *out, size_t sz, const struct mem_info *mem_info); +int perf_mem__blk_scnprintf(char *out, size_t sz, const struct mem_info *mem_info); -int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info); +int perf_script__meminfo_scnprintf(char *bf, size_t size, const struct mem_info *mem_info); struct c2c_stats { u32 nr_entries; @@ -94,4 +89,61 @@ struct hist_entry; int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi); void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add); +enum mem_stat_type { + PERF_MEM_STAT_OP, + PERF_MEM_STAT_CACHE, + PERF_MEM_STAT_MEMORY, + PERF_MEM_STAT_SNOOP, + PERF_MEM_STAT_DTLB, +}; + +#define MEM_STAT_PRINT_LEN 7 /* 1 space + 5 digits + 1 percent sign */ + +enum mem_stat_op { + MEM_STAT_OP_LOAD, + MEM_STAT_OP_STORE, + MEM_STAT_OP_LDST, + MEM_STAT_OP_PFETCH, + MEM_STAT_OP_EXEC, + MEM_STAT_OP_OTHER, +}; + +enum mem_stat_cache { + MEM_STAT_CACHE_L1, + MEM_STAT_CACHE_L2, + MEM_STAT_CACHE_L3, + MEM_STAT_CACHE_L4, + MEM_STAT_CACHE_L1_BUF, + MEM_STAT_CACHE_L2_BUF, + MEM_STAT_CACHE_OTHER, +}; + +enum mem_stat_memory { + MEM_STAT_MEMORY_RAM, + MEM_STAT_MEMORY_MSC, + MEM_STAT_MEMORY_UNC, + MEM_STAT_MEMORY_CXL, + MEM_STAT_MEMORY_IO, + MEM_STAT_MEMORY_PMEM, + MEM_STAT_MEMORY_OTHER, +}; + +enum mem_stat_snoop { + MEM_STAT_SNOOP_HIT, + MEM_STAT_SNOOP_HITM, + MEM_STAT_SNOOP_MISS, + MEM_STAT_SNOOP_OTHER, +}; + +enum mem_stat_dtlb { + MEM_STAT_DTLB_L1_HIT, + MEM_STAT_DTLB_L2_HIT, + MEM_STAT_DTLB_ANY_HIT, + MEM_STAT_DTLB_MISS, + MEM_STAT_DTLB_OTHER, +}; + +int mem_stat_index(const enum mem_stat_type mst, const u64 data_src); +const char *mem_stat_name(const enum mem_stat_type mst, const int idx); + #endif /* __PERF_MEM_EVENTS_H */ diff --git a/tools/perf/util/mem-info.c b/tools/perf/util/mem-info.c new file mode 100644 index 000000000000..d3efa9c139f2 --- /dev/null +++ b/tools/perf/util/mem-info.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/zalloc.h> +#include "mem-info.h" + +struct mem_info *mem_info__get(struct mem_info *mi) +{ + struct mem_info *result; + + if (RC_CHK_GET(result, mi)) + refcount_inc(mem_info__refcnt(mi)); + + return result; +} + +void mem_info__put(struct mem_info *mi) +{ + if (mi && refcount_dec_and_test(mem_info__refcnt(mi))) { + addr_map_symbol__exit(mem_info__iaddr(mi)); + addr_map_symbol__exit(mem_info__daddr(mi)); + RC_CHK_FREE(mi); + } else { + RC_CHK_PUT(mi); + } +} + +struct mem_info *mem_info__new(void) +{ + struct mem_info *result = NULL; + RC_STRUCT(mem_info) *mi = zalloc(sizeof(*mi)); + + if (ADD_RC_CHK(result, mi)) + refcount_set(mem_info__refcnt(result), 1); + + return result; +} + +struct mem_info *mem_info__clone(struct mem_info *mi) +{ + struct mem_info *result = mem_info__new(); + + if (result) { + addr_map_symbol__copy(mem_info__iaddr(result), mem_info__iaddr(mi)); + addr_map_symbol__copy(mem_info__daddr(result), mem_info__daddr(mi)); + mem_info__data_src(result)->val = mem_info__data_src(mi)->val; + } + + return result; +} diff --git a/tools/perf/util/mem-info.h b/tools/perf/util/mem-info.h new file mode 100644 index 000000000000..df75e94ed3d0 --- /dev/null +++ b/tools/perf/util/mem-info.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_MEM_INFO_H +#define __PERF_MEM_INFO_H + +#include <linux/refcount.h> +#include <linux/perf_event.h> +#include <internal/rc_check.h> +#include "map_symbol.h" + +DECLARE_RC_STRUCT(mem_info) { + struct addr_map_symbol iaddr; + struct addr_map_symbol daddr; + union perf_mem_data_src data_src; + refcount_t refcnt; +}; + +struct mem_info *mem_info__new(void); +struct mem_info *mem_info__clone(struct mem_info *mi); +struct mem_info *mem_info__get(struct mem_info *mi); +void mem_info__put(struct mem_info *mi); + +static inline void __mem_info__zput(struct mem_info **mi) +{ + mem_info__put(*mi); + *mi = NULL; +} + +#define mem_info__zput(mi) __mem_info__zput(&mi) + +static inline struct addr_map_symbol *mem_info__iaddr(struct mem_info *mi) +{ + return &RC_CHK_ACCESS(mi)->iaddr; +} + +static inline struct addr_map_symbol *mem_info__daddr(struct mem_info *mi) +{ + return &RC_CHK_ACCESS(mi)->daddr; +} + +static inline union perf_mem_data_src *mem_info__data_src(struct mem_info *mi) +{ + return &RC_CHK_ACCESS(mi)->data_src; +} + +static inline const union perf_mem_data_src *mem_info__const_data_src(const struct mem_info *mi) +{ + return &RC_CHK_ACCESS(mi)->data_src; +} + +static inline refcount_t *mem_info__refcnt(struct mem_info *mi) +{ + return &RC_CHK_ACCESS(mi)->refcnt; +} + +#endif /* __PERF_MEM_INFO_H */ diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index a6a5ed44a679..25c75fdbfc52 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -14,6 +14,7 @@ #include "pmus.h" #include "print-events.h" #include "smt.h" +#include "tool_pmu.h" #include "expr.h" #include "rblist.h" #include <string.h> @@ -44,6 +45,8 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events, if (!metric_events) return NULL; + if (evsel && evsel->metric_leader) + me.evsel = evsel->metric_leader; nd = rblist__find(metric_events, &me); if (nd) return container_of(nd, struct metric_event, nd); @@ -100,7 +103,7 @@ static void metric_event_delete(struct rblist *rblist __maybe_unused, free(me); } -static void metricgroup__rblist_init(struct rblist *metric_events) +void metricgroup__rblist_init(struct rblist *metric_events) { rblist__init(metric_events); metric_events->node_cmp = metric_event_cmp; @@ -149,6 +152,8 @@ struct metric { * Should events of the metric be grouped? */ bool group_events; + /** Show events even if in the Default metric group. */ + bool default_show_events; /** * Parsed events for the metric. Optional as events may be taken from a * different metric whose group contains all the IDs necessary for this @@ -176,7 +181,7 @@ static void metric__watchdog_constraint_hint(const char *name, bool foot) " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); } -static bool metric__group_events(const struct pmu_metric *pm) +static bool metric__group_events(const struct pmu_metric *pm, bool metric_no_threshold) { switch (pm->event_grouping) { case MetricNoGroupEvents: @@ -188,6 +193,13 @@ static bool metric__group_events(const struct pmu_metric *pm) return false; case MetricNoGroupEventsSmt: return !smt_on(); + case MetricNoGroupEventsThresholdAndNmi: + if (metric_no_threshold) + return true; + if (!sysctl__nmi_watchdog_enabled()) + return true; + metric__watchdog_constraint_hint(pm->metric_name, /*foot=*/false); + return false; case MetricGroupEvents: default: return true; @@ -209,6 +221,7 @@ static void metric__free(struct metric *m) static struct metric *metric__new(const struct pmu_metric *pm, const char *modifier, bool metric_no_group, + bool metric_no_threshold, int runtime, const char *user_requested_cpu_list, bool system_wide) @@ -225,7 +238,7 @@ static struct metric *metric__new(const struct pmu_metric *pm, m->pmu = pm->pmu ?: "cpu"; m->metric_name = pm->metric_name; - m->default_metricgroup_name = pm->default_metricgroup_name; + m->default_metricgroup_name = pm->default_metricgroup_name ?: ""; m->modifier = NULL; if (modifier) { m->modifier = strdup(modifier); @@ -243,7 +256,8 @@ static struct metric *metric__new(const struct pmu_metric *pm, } m->pctx->sctx.runtime = runtime; m->pctx->sctx.system_wide = system_wide; - m->group_events = !metric_no_group && metric__group_events(pm); + m->group_events = !metric_no_group && metric__group_events(pm, metric_no_threshold); + m->default_show_events = pm->default_show_events; m->metric_refs = NULL; m->evlist = NULL; @@ -286,7 +300,7 @@ static int setup_metric_events(const char *pmu, struct hashmap *ids, *out_metric_events = NULL; ids_size = hashmap__size(ids); - metric_events = calloc(sizeof(void *), ids_size + 1); + metric_events = calloc(ids_size + 1, sizeof(void *)); if (!metric_events) return -ENOMEM; @@ -295,8 +309,8 @@ static int setup_metric_events(const char *pmu, struct hashmap *ids, struct expr_id_data *val_ptr; /* Don't match events for the wrong hybrid PMU. */ - if (!all_pmus && ev->pmu_name && evsel__is_hybrid(ev) && - strcmp(ev->pmu_name, pmu)) + if (!all_pmus && ev->pmu && evsel__is_hybrid(ev) && + strcmp(ev->pmu->name, pmu)) continue; /* * Check for duplicate events with the same name. For @@ -350,135 +364,35 @@ static int setup_metric_events(const char *pmu, struct hashmap *ids, return 0; } -static bool match_metric(const char *n, const char *list) +static bool match_metric_or_groups(const char *metric_or_groups, const char *sought) { int len; char *m; - if (!list) + if (!sought) return false; - if (!strcmp(list, "all")) + if (!strcmp(sought, "all")) return true; - if (!n) - return !strcasecmp(list, "No_group"); - len = strlen(list); - m = strcasestr(n, list); - if (!m) - return false; - if ((m == n || m[-1] == ';' || m[-1] == ' ') && - (m[len] == 0 || m[len] == ';')) + if (!metric_or_groups) + return !strcasecmp(sought, "No_group"); + len = strlen(sought); + if (!strncasecmp(metric_or_groups, sought, len) && + (metric_or_groups[len] == 0 || metric_or_groups[len] == ';')) return true; - return false; + m = strchr(metric_or_groups, ';'); + return m && match_metric_or_groups(m + 1, sought); } -static bool match_pm_metric(const struct pmu_metric *pm, const char *pmu, const char *metric) +static bool match_pm_metric_or_groups(const struct pmu_metric *pm, const char *pmu, + const char *metric_or_groups) { const char *pm_pmu = pm->pmu ?: "cpu"; if (strcmp(pmu, "all") && strcmp(pm_pmu, pmu)) return false; - return match_metric(pm->metric_group, metric) || - match_metric(pm->metric_name, metric); -} - -/** struct mep - RB-tree node for building printing information. */ -struct mep { - /** nd - RB-tree element. */ - struct rb_node nd; - /** @metric_group: Owned metric group name, separated others with ';'. */ - char *metric_group; - const char *metric_name; - const char *metric_desc; - const char *metric_long_desc; - const char *metric_expr; - const char *metric_threshold; - const char *metric_unit; -}; - -static int mep_cmp(struct rb_node *rb_node, const void *entry) -{ - struct mep *a = container_of(rb_node, struct mep, nd); - struct mep *b = (struct mep *)entry; - int ret; - - ret = strcmp(a->metric_group, b->metric_group); - if (ret) - return ret; - - return strcmp(a->metric_name, b->metric_name); -} - -static struct rb_node *mep_new(struct rblist *rl __maybe_unused, const void *entry) -{ - struct mep *me = malloc(sizeof(struct mep)); - - if (!me) - return NULL; - - memcpy(me, entry, sizeof(struct mep)); - return &me->nd; -} - -static void mep_delete(struct rblist *rl __maybe_unused, - struct rb_node *nd) -{ - struct mep *me = container_of(nd, struct mep, nd); - - zfree(&me->metric_group); - free(me); -} - -static struct mep *mep_lookup(struct rblist *groups, const char *metric_group, - const char *metric_name) -{ - struct rb_node *nd; - struct mep me = { - .metric_group = strdup(metric_group), - .metric_name = metric_name, - }; - nd = rblist__find(groups, &me); - if (nd) { - free(me.metric_group); - return container_of(nd, struct mep, nd); - } - rblist__add_node(groups, &me); - nd = rblist__find(groups, &me); - if (nd) - return container_of(nd, struct mep, nd); - return NULL; -} - -static int metricgroup__add_to_mep_groups(const struct pmu_metric *pm, - struct rblist *groups) -{ - const char *g; - char *omg, *mg; - - mg = strdup(pm->metric_group ?: "No_group"); - if (!mg) - return -ENOMEM; - omg = mg; - while ((g = strsep(&mg, ";")) != NULL) { - struct mep *me; - - g = skip_spaces(g); - if (strlen(g)) - me = mep_lookup(groups, g, pm->metric_name); - else - me = mep_lookup(groups, "No_group", pm->metric_name); - - if (me) { - me->metric_desc = pm->desc; - me->metric_long_desc = pm->long_desc; - me->metric_expr = pm->metric_expr; - me->metric_threshold = pm->metric_threshold; - me->metric_unit = pm->unit; - } - } - free(omg); - - return 0; + return match_metric_or_groups(pm->metric_group, metric_or_groups) || + match_metric_or_groups(pm->metric_name, metric_or_groups); } struct metricgroup_iter_data { @@ -498,7 +412,7 @@ static int metricgroup__sys_event_iter(const struct pmu_metric *pm, while ((pmu = perf_pmus__scan(pmu))) { - if (!pmu->id || strcmp(pmu->id, pm->compat)) + if (!pmu->id || !pmu_uncore_identifier_match(pm->compat, pmu->id)) continue; return d->fn(pm, table, d->data); @@ -506,53 +420,30 @@ static int metricgroup__sys_event_iter(const struct pmu_metric *pm, return 0; } -static int metricgroup__add_to_mep_groups_callback(const struct pmu_metric *pm, - const struct pmu_metrics_table *table __maybe_unused, - void *vdata) +int metricgroup__for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn, + void *data) { - struct rblist *groups = vdata; + struct metricgroup_iter_data sys_data = { + .fn = fn, + .data = data, + }; + const struct pmu_metrics_table *tables[2] = { + table, + pmu_metrics_table__default(), + }; - return metricgroup__add_to_mep_groups(pm, groups); -} + for (size_t i = 0; i < ARRAY_SIZE(tables); i++) { + int ret; -void metricgroup__print(const struct print_callbacks *print_cb, void *print_state) -{ - struct rblist groups; - const struct pmu_metrics_table *table; - struct rb_node *node, *next; - - rblist__init(&groups); - groups.node_new = mep_new; - groups.node_cmp = mep_cmp; - groups.node_delete = mep_delete; - table = pmu_metrics_table__find(); - if (table) { - pmu_metrics_table_for_each_metric(table, - metricgroup__add_to_mep_groups_callback, - &groups); - } - { - struct metricgroup_iter_data data = { - .fn = metricgroup__add_to_mep_groups_callback, - .data = &groups, - }; - pmu_for_each_sys_metric(metricgroup__sys_event_iter, &data); - } + if (!tables[i]) + continue; - for (node = rb_first_cached(&groups.entries); node; node = next) { - struct mep *me = container_of(node, struct mep, nd); - - print_cb->print_metric(print_state, - me->metric_group, - me->metric_name, - me->metric_desc, - me->metric_long_desc, - me->metric_expr, - me->metric_threshold, - me->metric_unit); - next = rb_next(node); - rblist__remove_node(&groups, node); + ret = pmu_metrics_table__for_each_metric(tables[i], fn, data); + if (ret) + return ret; } + + return pmu_for_each_sys_metric(metricgroup__sys_event_iter, &sys_data); } static const char *code_characters = ",-=@"; @@ -673,20 +564,20 @@ static int metricgroup__build_event_string(struct strbuf *events, struct hashmap_entry *cur; size_t bkt; bool no_group = true, has_tool_events = false; - bool tool_events[PERF_TOOL_MAX] = {false}; + bool tool_events[TOOL_PMU__EVENT_MAX] = {false}; int ret = 0; #define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0) hashmap__for_each_entry(ctx->ids, cur, bkt) { const char *sep, *rsep, *id = cur->pkey; - enum perf_tool_event ev; + enum tool_pmu_event ev; pr_debug("found event %s\n", id); /* Always move tool events outside of the group. */ - ev = perf_tool_event__from_str(id); - if (ev != PERF_TOOL_NONE) { + ev = tool_pmu__str_to_event(id); + if (ev != TOOL_PMU__EVENT_NONE) { has_tool_events = true; tool_events[ev] = true; continue; @@ -754,14 +645,14 @@ static int metricgroup__build_event_string(struct strbuf *events, if (has_tool_events) { int i; - perf_tool_event__for_each_event(i) { + tool_pmu__for_each_event(i) { if (tool_events[i]) { if (!no_group) { ret = strbuf_addch(events, ','); RETURN_IF_NON_ZERO(ret); } no_group = false; - ret = strbuf_addstr(events, perf_tool_event__to_str(i)); + ret = strbuf_addstr(events, tool_pmu__event_to_str(i)); RETURN_IF_NON_ZERO(ret); } } @@ -801,11 +692,6 @@ struct metricgroup_add_iter_data { const struct pmu_metrics_table *table; }; -static bool metricgroup__find_metric(const char *pmu, - const char *metric, - const struct pmu_metrics_table *table, - struct pmu_metric *pm); - static int add_metric(struct list_head *metric_list, const struct pmu_metric *pm, const char *modifier, @@ -817,6 +703,16 @@ static int add_metric(struct list_head *metric_list, const struct visited_metric *visited, const struct pmu_metrics_table *table); +static int metricgroup__find_metric_callback(const struct pmu_metric *pm, + const struct pmu_metrics_table *table __maybe_unused, + void *vdata) +{ + struct pmu_metric *copied_pm = vdata; + + memcpy(copied_pm, pm, sizeof(*pm)); + return 0; +} + /** * resolve_metric - Locate metrics within the root metric and recursively add * references to them. @@ -837,7 +733,7 @@ static int add_metric(struct list_head *metric_list, * architecture perf is running upon. */ static int resolve_metric(struct list_head *metric_list, - const char *pmu, + struct perf_pmu *pmu, const char *modifier, bool metric_no_group, bool metric_no_threshold, @@ -867,7 +763,9 @@ static int resolve_metric(struct list_head *metric_list, hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) { struct pmu_metric pm; - if (metricgroup__find_metric(pmu, cur->pkey, table, &pm)) { + if (pmu_metrics_table__find_metric(table, pmu, cur->pkey, + metricgroup__find_metric_callback, + &pm) != PMU_METRICS__NOT_FOUND) { pending = realloc(pending, (pending_cnt + 1) * sizeof(struct to_resolve)); if (!pending) @@ -952,8 +850,8 @@ static int __add_metric(struct list_head *metric_list, * This metric is the root of a tree and may reference other * metrics that are added recursively. */ - root_metric = metric__new(pm, modifier, metric_no_group, runtime, - user_requested_cpu_list, system_wide); + root_metric = metric__new(pm, modifier, metric_no_group, metric_no_threshold, + runtime, user_requested_cpu_list, system_wide); if (!root_metric) return -ENOMEM; @@ -1018,7 +916,12 @@ static int __add_metric(struct list_head *metric_list, } if (!ret) { /* Resolve referenced metrics. */ - const char *pmu = pm->pmu ?: "cpu"; + struct perf_pmu *pmu; + + if (pm->pmu && pm->pmu[0] != '\0') + pmu = perf_pmus__find(pm->pmu); + else + pmu = perf_pmus__scan_core(/*pmu=*/ NULL); ret = resolve_metric(metric_list, pmu, modifier, metric_no_group, metric_no_threshold, user_requested_cpu_list, @@ -1035,44 +938,6 @@ static int __add_metric(struct list_head *metric_list, return ret; } -struct metricgroup__find_metric_data { - const char *pmu; - const char *metric; - struct pmu_metric *pm; -}; - -static int metricgroup__find_metric_callback(const struct pmu_metric *pm, - const struct pmu_metrics_table *table __maybe_unused, - void *vdata) -{ - struct metricgroup__find_metric_data *data = vdata; - const char *pm_pmu = pm->pmu ?: "cpu"; - - if (strcmp(data->pmu, "all") && strcmp(pm_pmu, data->pmu)) - return 0; - - if (!match_metric(pm->metric_name, data->metric)) - return 0; - - memcpy(data->pm, pm, sizeof(*pm)); - return 1; -} - -static bool metricgroup__find_metric(const char *pmu, - const char *metric, - const struct pmu_metrics_table *table, - struct pmu_metric *pm) -{ - struct metricgroup__find_metric_data data = { - .pmu = pmu, - .metric = metric, - .pm = pm, - }; - - return pmu_metrics_table_for_each_metric(table, metricgroup__find_metric_callback, &data) - ? true : false; -} - static int add_metric(struct list_head *metric_list, const struct pmu_metric *pm, const char *modifier, @@ -1111,29 +976,6 @@ static int add_metric(struct list_head *metric_list, return ret; } -static int metricgroup__add_metric_sys_event_iter(const struct pmu_metric *pm, - const struct pmu_metrics_table *table __maybe_unused, - void *data) -{ - struct metricgroup_add_iter_data *d = data; - int ret; - - if (!match_pm_metric(pm, d->pmu, d->metric_name)) - return 0; - - ret = add_metric(d->metric_list, pm, d->modifier, d->metric_no_group, - d->metric_no_threshold, d->user_requested_cpu_list, - d->system_wide, d->root_metric, d->visited, d->table); - if (ret) - goto out; - - *(d->has_match) = true; - -out: - *(d->ret) = ret; - return ret; -} - /** * metric_list_cmp - list_sort comparator that sorts metrics with more events to * the front. tool events are excluded from the count. @@ -1147,14 +989,14 @@ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, int i, left_count, right_count; left_count = hashmap__size(left->pctx->ids); - perf_tool_event__for_each_event(i) { - if (!expr__get_id(left->pctx, perf_tool_event__to_str(i), &data)) + tool_pmu__for_each_event(i) { + if (!expr__get_id(left->pctx, tool_pmu__event_to_str(i), &data)) left_count--; } right_count = hashmap__size(right->pctx->ids); - perf_tool_event__for_each_event(i) { - if (!expr__get_id(right->pctx, perf_tool_event__to_str(i), &data)) + tool_pmu__for_each_event(i) { + if (!expr__get_id(right->pctx, tool_pmu__event_to_str(i), &data)) right_count--; } @@ -1199,9 +1041,9 @@ static int metricgroup__add_metric_callback(const struct pmu_metric *pm, struct metricgroup__add_metric_data *data = vdata; int ret = 0; - if (pm->metric_expr && match_pm_metric(pm, data->pmu, data->metric_name)) { + if (pm->metric_expr && match_pm_metric_or_groups(pm, data->pmu, data->metric_name)) { bool metric_no_group = data->metric_no_group || - match_metric(pm->metricgroup_no_group, data->metric_name); + match_metric_or_groups(pm->metricgroup_no_group, data->metric_name); data->has_match = true; ret = add_metric(data->list, pm, data->modifier, metric_no_group, @@ -1237,55 +1079,26 @@ static int metricgroup__add_metric(const char *pmu, const char *metric_name, con { LIST_HEAD(list); int ret; - bool has_match = false; - - { - struct metricgroup__add_metric_data data = { - .list = &list, - .pmu = pmu, - .metric_name = metric_name, - .modifier = modifier, - .metric_no_group = metric_no_group, - .metric_no_threshold = metric_no_threshold, - .user_requested_cpu_list = user_requested_cpu_list, - .system_wide = system_wide, - .has_match = false, - }; - /* - * Iterate over all metrics seeing if metric matches either the - * name or group. When it does add the metric to the list. - */ - ret = pmu_metrics_table_for_each_metric(table, metricgroup__add_metric_callback, - &data); - if (ret) - goto out; + struct metricgroup__add_metric_data data = { + .list = &list, + .pmu = pmu, + .metric_name = metric_name, + .modifier = modifier, + .metric_no_group = metric_no_group, + .metric_no_threshold = metric_no_threshold, + .user_requested_cpu_list = user_requested_cpu_list, + .system_wide = system_wide, + .has_match = false, + }; - has_match = data.has_match; - } - { - struct metricgroup_iter_data data = { - .fn = metricgroup__add_metric_sys_event_iter, - .data = (void *) &(struct metricgroup_add_iter_data) { - .metric_list = &list, - .pmu = pmu, - .metric_name = metric_name, - .modifier = modifier, - .metric_no_group = metric_no_group, - .user_requested_cpu_list = user_requested_cpu_list, - .system_wide = system_wide, - .has_match = &has_match, - .ret = &ret, - .table = table, - }, - }; - - pmu_for_each_sys_metric(metricgroup__sys_event_iter, &data); - } - /* End of pmu events. */ - if (!has_match) + /* + * Iterate over all metrics seeing if metric matches either the + * name or group. When it does add the metric to the list. + */ + ret = metricgroup__for_each_metric(table, metricgroup__add_metric_callback, &data); + if (!ret && !data.has_match) ret = -EINVAL; -out: /* * add to metric_list so that they can be released * even if it's failed @@ -1374,18 +1187,18 @@ static void metricgroup__free_metrics(struct list_head *metric_list) * to true if tool event is found. */ static void find_tool_events(const struct list_head *metric_list, - bool tool_events[PERF_TOOL_MAX]) + bool tool_events[TOOL_PMU__EVENT_MAX]) { struct metric *m; list_for_each_entry(m, metric_list, nd) { int i; - perf_tool_event__for_each_event(i) { + tool_pmu__for_each_event(i) { struct expr_id_data *data; if (!tool_events[i] && - !expr__get_id(m->pctx, perf_tool_event__to_str(i), &data)) + !expr__get_id(m->pctx, tool_pmu__event_to_str(i), &data)) tool_events[i] = true; } } @@ -1436,7 +1249,7 @@ err_out: * parse_ids - Build the event string for the ids and parse them creating an * evlist. The encoded metric_ids are decoded. * @metric_no_merge: is metric sharing explicitly disabled. - * @fake_pmu: used when testing metrics not supported by the current CPU. + * @fake_pmu: use a fake PMU when testing metrics not supported by the current CPU. * @ids: the event identifiers parsed from a metric. * @modifier: any modifiers added to the events. * @group_events: should events be placed in a weak group. @@ -1444,9 +1257,9 @@ err_out: * the overall list of metrics. * @out_evlist: the created list of events. */ -static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu, +static int parse_ids(bool metric_no_merge, bool fake_pmu, struct expr_parse_ctx *ids, const char *modifier, - bool group_events, const bool tool_events[PERF_TOOL_MAX], + bool group_events, const bool tool_events[TOOL_PMU__EVENT_MAX], struct evlist **out_evlist) { struct parse_events_error parse_error; @@ -1471,9 +1284,9 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu, * event1 if #smt_on else 0 * Add a tool event to avoid a parse error on an empty string. */ - perf_tool_event__for_each_event(i) { + tool_pmu__for_each_event(i) { if (tool_events[i]) { - char *tmp = strdup(perf_tool_event__to_str(i)); + char *tmp = strdup(tool_pmu__event_to_str(i)); if (!tmp) return -ENOMEM; @@ -1502,7 +1315,8 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu, pr_debug("Parsing metric events '%s'\n", events.buf); parse_events_error__init(&parse_error); ret = __parse_events(parsed_evlist, events.buf, /*pmu_filter=*/NULL, - &parse_error, fake_pmu, /*warn_if_reordered=*/false); + &parse_error, fake_pmu, /*warn_if_reordered=*/false, + /*fake_tp=*/false); if (ret) { parse_events_error__print(&parse_error, events.buf); goto err_out; @@ -1520,6 +1334,51 @@ err_out: return ret; } +/* How many times will a given evsel be used in a set of metrics? */ +static int count_uses(struct list_head *metric_list, struct evsel *evsel) +{ + const char *metric_id = evsel__metric_id(evsel); + struct metric *m; + int uses = 0; + + list_for_each_entry(m, metric_list, nd) { + if (hashmap__find(m->pctx->ids, metric_id, NULL)) + uses++; + } + return uses; +} + +/* + * Select the evsel that stat-display will use to trigger shadow/metric + * printing. Pick the least shared non-tool evsel, encouraging metrics to be + * with a hardware counter that is specific to them. + */ +static struct evsel *pick_display_evsel(struct list_head *metric_list, + struct evsel **metric_events) +{ + struct evsel *selected = metric_events[0]; + size_t selected_uses; + bool selected_is_tool; + + if (!selected) + return NULL; + + selected_uses = count_uses(metric_list, selected); + selected_is_tool = evsel__is_tool(selected); + for (int i = 1; metric_events[i]; i++) { + struct evsel *candidate = metric_events[i]; + size_t candidate_uses = count_uses(metric_list, candidate); + + if ((selected_is_tool && !evsel__is_tool(candidate)) || + (candidate_uses < selected_uses)) { + selected = candidate; + selected_uses = candidate_uses; + selected_is_tool = evsel__is_tool(selected); + } + } + return selected; +} + static int parse_groups(struct evlist *perf_evlist, const char *pmu, const char *str, bool metric_no_group, @@ -1527,19 +1386,16 @@ static int parse_groups(struct evlist *perf_evlist, bool metric_no_threshold, const char *user_requested_cpu_list, bool system_wide, - struct perf_pmu *fake_pmu, - struct rblist *metric_events_list, + bool fake_pmu, const struct pmu_metrics_table *table) { struct evlist *combined_evlist = NULL; LIST_HEAD(metric_list); struct metric *m; - bool tool_events[PERF_TOOL_MAX] = {false}; + bool tool_events[TOOL_PMU__EVENT_MAX] = {false}; bool is_default = !strcmp(str, "Default"); int ret; - if (metric_events_list->nr_entries == 0) - metricgroup__rblist_init(metric_events_list); ret = metricgroup__add_metric_list(pmu, str, metric_no_group, metric_no_threshold, user_requested_cpu_list, system_wide, &metric_list, table); @@ -1630,7 +1486,9 @@ static int parse_groups(struct evlist *perf_evlist, goto out; } - me = metricgroup__lookup(metric_events_list, metric_events[0], true); + me = metricgroup__lookup(&perf_evlist->metric_events, + pick_display_evsel(&metric_list, metric_events), + /*create=*/true); expr = malloc(sizeof(struct metric_expr)); if (!expr) { @@ -1654,9 +1512,20 @@ static int parse_groups(struct evlist *perf_evlist, if (!expr->metric_name) { ret = -ENOMEM; + free(expr); free(metric_events); goto out; } + if (m->default_show_events) { + struct evsel *pos; + + for (int i = 0; metric_events[i]; i++) + metric_events[i]->default_show_events = true; + evlist__for_each_entry(metric_evlist, pos) { + if (pos->metric_leader && pos->metric_leader->default_show_events) + pos->default_show_events = true; + } + } expr->metric_threshold = m->metric_threshold; expr->metric_unit = m->metric_unit; expr->metric_events = metric_events; @@ -1690,22 +1559,23 @@ int metricgroup__parse_groups(struct evlist *perf_evlist, bool metric_no_threshold, const char *user_requested_cpu_list, bool system_wide, - struct rblist *metric_events) + bool hardware_aware_grouping) { const struct pmu_metrics_table *table = pmu_metrics_table__find(); if (!table) return -EINVAL; + if (hardware_aware_grouping) + pr_debug("Use hardware aware grouping instead of traditional metric grouping method\n"); return parse_groups(perf_evlist, pmu, str, metric_no_group, metric_no_merge, metric_no_threshold, user_requested_cpu_list, system_wide, - /*fake_pmu=*/NULL, metric_events, table); + /*fake_pmu=*/false, table); } int metricgroup__parse_groups_test(struct evlist *evlist, const struct pmu_metrics_table *table, - const char *str, - struct rblist *metric_events) + const char *str) { return parse_groups(evlist, "all", str, /*metric_no_group=*/false, @@ -1713,35 +1583,41 @@ int metricgroup__parse_groups_test(struct evlist *evlist, /*metric_no_threshold=*/false, /*user_requested_cpu_list=*/NULL, /*system_wide=*/false, - &perf_pmu__fake, metric_events, table); + /*fake_pmu=*/true, table); } struct metricgroup__has_metric_data { const char *pmu; - const char *metric; + const char *metric_or_groups; }; -static int metricgroup__has_metric_callback(const struct pmu_metric *pm, - const struct pmu_metrics_table *table __maybe_unused, - void *vdata) +static int metricgroup__has_metric_or_groups_callback(const struct pmu_metric *pm, + const struct pmu_metrics_table *table + __maybe_unused, + void *vdata) { struct metricgroup__has_metric_data *data = vdata; - return match_pm_metric(pm, data->pmu, data->metric) ? 1 : 0; + return match_pm_metric_or_groups(pm, data->pmu, data->metric_or_groups) ? 1 : 0; } -bool metricgroup__has_metric(const char *pmu, const char *metric) +bool metricgroup__has_metric_or_groups(const char *pmu, const char *metric_or_groups) { - const struct pmu_metrics_table *table = pmu_metrics_table__find(); + const struct pmu_metrics_table *tables[2] = { + pmu_metrics_table__find(), + pmu_metrics_table__default(), + }; struct metricgroup__has_metric_data data = { .pmu = pmu, - .metric = metric, + .metric_or_groups = metric_or_groups, }; - if (!table) - return false; - - return pmu_metrics_table_for_each_metric(table, metricgroup__has_metric_callback, &data) - ? true : false; + for (size_t i = 0; i < ARRAY_SIZE(tables); i++) { + if (pmu_metrics_table__for_each_metric(tables[i], + metricgroup__has_metric_or_groups_callback, + &data)) + return true; + } + return false; } static int metricgroup__topdown_max_level_callback(const struct pmu_metric *pm, @@ -1770,7 +1646,7 @@ unsigned int metricgroups__topdown_max_level(void) if (!table) return false; - pmu_metrics_table_for_each_metric(table, metricgroup__topdown_max_level_callback, + pmu_metrics_table__for_each_metric(table, metricgroup__topdown_max_level_callback, &max_level); return max_level; } @@ -1795,13 +1671,14 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, evsel = evlist__find_evsel(evlist, old_me->evsel->core.idx); if (!evsel) return -EINVAL; - new_me = metricgroup__lookup(new_metric_events, evsel, true); + new_me = metricgroup__lookup(new_metric_events, evsel, /*create=*/true); if (!new_me) return -ENOMEM; pr_debug("copying metric event for cgroup '%s': %s (idx=%d)\n", cgrp ? cgrp->name : "root", evsel->name, evsel->core.idx); + new_me->is_default = old_me->is_default; list_for_each_entry(old_expr, &old_me->head, nd) { new_expr = malloc(sizeof(*new_expr)); if (!new_expr) @@ -1815,6 +1692,7 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, new_expr->metric_unit = old_expr->metric_unit; new_expr->runtime = old_expr->runtime; + new_expr->default_metricgroup_name = old_expr->default_metricgroup_name; if (old_expr->metric_refs) { /* calculate number of metric_events */ diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index d5325c6ec8e1..4be6bfc13c46 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -16,7 +16,7 @@ struct cgroup; /** * A node in a rblist keyed by the evsel. The global rblist of metric events - * generally exists in perf_stat_config. The evsel is looked up in the rblist + * generally exists in evlist. The evsel is looked up in the rblist * yielding a list of metric_expr. */ struct metric_event { @@ -77,16 +77,17 @@ int metricgroup__parse_groups(struct evlist *perf_evlist, bool metric_no_threshold, const char *user_requested_cpu_list, bool system_wide, - struct rblist *metric_events); + bool hardware_aware_grouping); int metricgroup__parse_groups_test(struct evlist *evlist, const struct pmu_metrics_table *table, - const char *str, - struct rblist *metric_events); + const char *str); -void metricgroup__print(const struct print_callbacks *print_cb, void *print_state); -bool metricgroup__has_metric(const char *pmu, const char *metric); +int metricgroup__for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn, + void *data); +bool metricgroup__has_metric_or_groups(const char *pmu, const char *metric_or_groups); unsigned int metricgroups__topdown_max_level(void); int arch_get_runtimeparam(const struct pmu_metric *pm); +void metricgroup__rblist_init(struct rblist *metric_events); void metricgroup__rblist_exit(struct rblist *metric_events); int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 49093b21ee2d..b69f926d314b 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -7,6 +7,7 @@ */ #include <sys/mman.h> +#include <errno.h> #include <inttypes.h> #include <asm/bug.h> #include <linux/zalloc.h> @@ -230,9 +231,7 @@ void mmap__munmap(struct mmap *map) { bitmap_free(map->affinity_mask.bits); -#ifndef PYTHON_PERF zstd_fini(&map->zstd_data); -#endif perf_mmap__aio_munmap(map); if (map->data != NULL) { @@ -246,9 +245,8 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask) { int idx, nr_cpus; struct perf_cpu cpu; - const struct perf_cpu_map *cpu_map = NULL; + struct perf_cpu_map *cpu_map = cpu_map__online(); - cpu_map = cpu_map__online(); if (!cpu_map) return; @@ -258,6 +256,7 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask) if (cpu__get_node(cpu) == node) __set_bit(cpu.cpu, mask->bits); } + perf_cpu_map__put(cpu_map); } static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp) @@ -295,15 +294,12 @@ int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu map->core.flush = mp->flush; - map->comp_level = mp->comp_level; -#ifndef PYTHON_PERF - if (zstd_init(&map->zstd_data, map->comp_level)) { + if (zstd_init(&map->zstd_data, mp->comp_level)) { pr_debug2("failed to init mmap compressor, error %d\n", errno); return -1; } -#endif - if (map->comp_level && !perf_mmap__aio_enabled(map)) { + if (mp->comp_level && !perf_mmap__aio_enabled(map)) { map->data = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); if (map->data == MAP_FAILED) { @@ -361,14 +357,3 @@ int perf_mmap__push(struct mmap *md, void *to, out: return rc; } - -int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original, struct mmap_cpu_mask *clone) -{ - clone->nbits = original->nbits; - clone->bits = bitmap_zalloc(original->nbits); - if (!clone->bits) - return -ENOMEM; - - memcpy(clone->bits, original->bits, MMAP_CPU_MASK_BYTES(original)); - return 0; -} diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index f944c3cd5efa..4d72c5fa5084 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -39,7 +39,6 @@ struct mmap { #endif struct mmap_cpu_mask affinity_mask; void *data; - int comp_level; struct perf_data_file *file; struct zstd_data zstd_data; }; @@ -62,7 +61,4 @@ size_t mmap__mmap_len(struct mmap *map); void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag); -int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original, - struct mmap_cpu_mask *clone); - #endif /*__PERF_MMAP_H */ diff --git a/tools/perf/util/mutex.c b/tools/perf/util/mutex.c index bca7f0717f35..7aa1f3f55a7d 100644 --- a/tools/perf/util/mutex.c +++ b/tools/perf/util/mutex.c @@ -17,7 +17,7 @@ static void check_err(const char *fn, int err) #define CHECK_ERR(err) check_err(__func__, err) -static void __mutex_init(struct mutex *mtx, bool pshared) +static void __mutex_init(struct mutex *mtx, bool pshared, bool recursive) { pthread_mutexattr_t attr; @@ -27,21 +27,27 @@ static void __mutex_init(struct mutex *mtx, bool pshared) /* In normal builds enable error checking, such as recursive usage. */ CHECK_ERR(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK)); #endif + if (recursive) + CHECK_ERR(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)); if (pshared) CHECK_ERR(pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)); - CHECK_ERR(pthread_mutex_init(&mtx->lock, &attr)); CHECK_ERR(pthread_mutexattr_destroy(&attr)); } void mutex_init(struct mutex *mtx) { - __mutex_init(mtx, /*pshared=*/false); + __mutex_init(mtx, /*pshared=*/false, /*recursive=*/false); } void mutex_init_pshared(struct mutex *mtx) { - __mutex_init(mtx, /*pshared=*/true); + __mutex_init(mtx, /*pshared=*/true, /*recursive=*/false); +} + +void mutex_init_recursive(struct mutex *mtx) +{ + __mutex_init(mtx, /*pshared=*/false, /*recursive=*/true); } void mutex_destroy(struct mutex *mtx) diff --git a/tools/perf/util/mutex.h b/tools/perf/util/mutex.h index 40661120cacc..70232d8d094f 100644 --- a/tools/perf/util/mutex.h +++ b/tools/perf/util/mutex.h @@ -33,10 +33,22 @@ /* Documents if a type is a lockable type. */ #define LOCKABLE __attribute__((lockable)) +/* Documents a function that expects a lock not to be held prior to entry. */ +#define LOCKS_EXCLUDED(...) __attribute__((locks_excluded(__VA_ARGS__))) + +/* Documents a function that returns a lock. */ +#define LOCK_RETURNED(x) __attribute__((lock_returned(x))) + /* Documents functions that acquire a lock in the body of a function, and do not release it. */ #define EXCLUSIVE_LOCK_FUNCTION(...) __attribute__((exclusive_lock_function(__VA_ARGS__))) /* + * Documents functions that acquire a shared (reader) lock in the body of a + * function, and do not release it. + */ +#define SHARED_LOCK_FUNCTION(...) __attribute__((shared_lock_function(__VA_ARGS__))) + +/* * Documents functions that expect a lock to be held on entry to the function, * and release it in the body of the function. */ @@ -49,6 +61,9 @@ /* Documents a function that expects a mutex to be held prior to entry. */ #define EXCLUSIVE_LOCKS_REQUIRED(...) __attribute__((exclusive_locks_required(__VA_ARGS__))) +/* Documents a function that expects a shared (reader) lock to be held prior to entry. */ +#define SHARED_LOCKS_REQUIRED(...) __attribute__((shared_locks_required(__VA_ARGS__))) + /* Turns off thread safety checking within the body of a particular function. */ #define NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) @@ -57,10 +72,14 @@ #define GUARDED_BY(x) #define PT_GUARDED_BY(x) #define LOCKABLE +#define LOCKS_EXCLUDED(...) +#define LOCK_RETURNED(x) #define EXCLUSIVE_LOCK_FUNCTION(...) +#define SHARED_LOCK_FUNCTION(...) #define UNLOCK_FUNCTION(...) #define EXCLUSIVE_TRYLOCK_FUNCTION(...) #define EXCLUSIVE_LOCKS_REQUIRED(...) +#define SHARED_LOCKS_REQUIRED(...) #define NO_THREAD_SAFETY_ANALYSIS #endif @@ -85,6 +104,8 @@ void mutex_init(struct mutex *mtx); * process-private attribute. */ void mutex_init_pshared(struct mutex *mtx); +/* Initializes a mutex that may be recursively held on the same thread. */ +void mutex_init_recursive(struct mutex *mtx); void mutex_destroy(struct mutex *mtx); void mutex_lock(struct mutex *mtx) EXCLUSIVE_LOCK_FUNCTION(*mtx); diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index cb185c5659d6..01502570b32d 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -6,7 +6,6 @@ #include "namespaces.h" #include "event.h" -#include "get_current_dir_name.h" #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> @@ -266,11 +265,16 @@ pid_t nsinfo__pid(const struct nsinfo *nsi) return RC_CHK_ACCESS(nsi)->pid; } -pid_t nsinfo__in_pidns(const struct nsinfo *nsi) +bool nsinfo__in_pidns(const struct nsinfo *nsi) { return RC_CHK_ACCESS(nsi)->in_pidns; } +void nsinfo__set_in_pidns(struct nsinfo *nsi) +{ + RC_CHK_ACCESS(nsi)->in_pidns = true; +} + void nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc) { @@ -288,14 +292,14 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, if (!nsi || !nsinfo__need_setns(nsi)) return; - if (snprintf(curpath, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX) + if (!getcwd(curpath, sizeof(curpath))) return; - oldcwd = get_current_dir_name(); + oldcwd = strdup(curpath); if (!oldcwd) return; - oldns = open(curpath, O_RDONLY); + oldns = open("/proc/self/ns/mnt", O_RDONLY); if (oldns < 0) goto errout; diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index 8c0731c6cbb7..e95c79b80e27 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -58,7 +58,8 @@ void nsinfo__clear_need_setns(struct nsinfo *nsi); pid_t nsinfo__tgid(const struct nsinfo *nsi); pid_t nsinfo__nstgid(const struct nsinfo *nsi); pid_t nsinfo__pid(const struct nsinfo *nsi); -pid_t nsinfo__in_pidns(const struct nsinfo *nsi); +bool nsinfo__in_pidns(const struct nsinfo *nsi); +void nsinfo__set_in_pidns(struct nsinfo *nsi); void nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc); void nsinfo__mountns_exit(struct nscookie *nc); diff --git a/tools/perf/util/off_cpu.h b/tools/perf/util/off_cpu.h index 2dd67c60f211..64bf763ddf50 100644 --- a/tools/perf/util/off_cpu.h +++ b/tools/perf/util/off_cpu.h @@ -13,9 +13,10 @@ struct record_opts; #define OFFCPU_SAMPLE_TYPES (PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | \ PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | \ - PERF_SAMPLE_PERIOD | PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_PERIOD | PERF_SAMPLE_RAW | \ PERF_SAMPLE_CGROUP) +#define OFFCPU_THRESH 500000000ULL #ifdef HAVE_BPF_SKEL int off_cpu_prepare(struct evlist *evlist, struct target *target, diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c index fd67d204d720..f7f7aff3d85a 100644 --- a/tools/perf/util/parse-branch-options.c +++ b/tools/perf/util/parse-branch-options.c @@ -36,6 +36,7 @@ static const struct branch_mode branch_modes[] = { BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK), BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX), BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE), + BRANCH_OPT("counter", PERF_SAMPLE_BRANCH_COUNTERS), BRANCH_END }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index c9ec0cafb69d..17c1c36a7bf9 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -7,178 +7,126 @@ #include <errno.h> #include <sys/ioctl.h> #include <sys/param.h> +#include "cpumap.h" #include "term.h" +#include "env.h" #include "evlist.h" #include "evsel.h" #include <subcmd/parse-options.h> #include "parse-events.h" #include "string2.h" -#include "strlist.h" -#include "bpf-loader.h" +#include "strbuf.h" #include "debug.h" -#include <api/fs/tracing_path.h> #include <perf/cpumap.h> -#include "parse-events-bison.h" -#include "parse-events-flex.h" +#include <util/parse-events-bison.h> +#include <util/parse-events-flex.h> #include "pmu.h" #include "pmus.h" +#include "tp_pmu.h" #include "asm/bug.h" +#include "ui/ui.h" #include "util/parse-branch-options.h" #include "util/evsel_config.h" #include "util/event.h" #include "util/bpf-filter.h" +#include "util/stat.h" +#include "util/tool_pmu.h" #include "util/util.h" #include "tracepoint.h" +#include <api/fs/tracing_path.h> #define MAX_NAME_LEN 100 -#ifdef PARSER_DEBUG -extern int parse_events_debug; -#endif -int parse_events_parse(void *parse_state, void *scanner); -static int get_config_terms(struct list_head *head_config, - struct list_head *head_terms __maybe_unused); - -struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = { - .symbol = "cpu-cycles", - .alias = "cycles", - }, - [PERF_COUNT_HW_INSTRUCTIONS] = { - .symbol = "instructions", - .alias = "", - }, - [PERF_COUNT_HW_CACHE_REFERENCES] = { - .symbol = "cache-references", - .alias = "", - }, - [PERF_COUNT_HW_CACHE_MISSES] = { - .symbol = "cache-misses", - .alias = "", - }, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { - .symbol = "branch-instructions", - .alias = "branches", - }, - [PERF_COUNT_HW_BRANCH_MISSES] = { - .symbol = "branch-misses", - .alias = "", - }, - [PERF_COUNT_HW_BUS_CYCLES] = { - .symbol = "bus-cycles", - .alias = "", - }, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = { - .symbol = "stalled-cycles-frontend", - .alias = "idle-cycles-frontend", - }, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = { - .symbol = "stalled-cycles-backend", - .alias = "idle-cycles-backend", - }, - [PERF_COUNT_HW_REF_CPU_CYCLES] = { - .symbol = "ref-cycles", - .alias = "", - }, -}; - -struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { - [PERF_COUNT_SW_CPU_CLOCK] = { - .symbol = "cpu-clock", - .alias = "", - }, - [PERF_COUNT_SW_TASK_CLOCK] = { - .symbol = "task-clock", - .alias = "", - }, - [PERF_COUNT_SW_PAGE_FAULTS] = { - .symbol = "page-faults", - .alias = "faults", - }, - [PERF_COUNT_SW_CONTEXT_SWITCHES] = { - .symbol = "context-switches", - .alias = "cs", - }, - [PERF_COUNT_SW_CPU_MIGRATIONS] = { - .symbol = "cpu-migrations", - .alias = "migrations", - }, - [PERF_COUNT_SW_PAGE_FAULTS_MIN] = { - .symbol = "minor-faults", - .alias = "", - }, - [PERF_COUNT_SW_PAGE_FAULTS_MAJ] = { - .symbol = "major-faults", - .alias = "", - }, - [PERF_COUNT_SW_ALIGNMENT_FAULTS] = { - .symbol = "alignment-faults", - .alias = "", - }, - [PERF_COUNT_SW_EMULATION_FAULTS] = { - .symbol = "emulation-faults", - .alias = "", - }, - [PERF_COUNT_SW_DUMMY] = { - .symbol = "dummy", - .alias = "", - }, - [PERF_COUNT_SW_BPF_OUTPUT] = { - .symbol = "bpf-output", - .alias = "", - }, - [PERF_COUNT_SW_CGROUP_SWITCHES] = { - .symbol = "cgroup-switches", - .alias = "", - }, +static int get_config_terms(const struct parse_events_terms *head_config, + struct list_head *head_terms); +static int parse_events_terms__copy(const struct parse_events_terms *src, + struct parse_events_terms *dest); +static int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct strbuf *sb); + +static const char *const event_types[] = { + [PERF_TYPE_HARDWARE] = "hardware", + [PERF_TYPE_SOFTWARE] = "software", + [PERF_TYPE_TRACEPOINT] = "tracepoint", + [PERF_TYPE_HW_CACHE] = "hardware-cache", + [PERF_TYPE_RAW] = "raw", + [PERF_TYPE_BREAKPOINT] = "breakpoint", }; -const char *event_type(int type) +const char *event_type(size_t type) { - switch (type) { - case PERF_TYPE_HARDWARE: - return "hardware"; - - case PERF_TYPE_SOFTWARE: - return "software"; - - case PERF_TYPE_TRACEPOINT: - return "tracepoint"; + if (type >= PERF_TYPE_MAX) + return "unknown"; - case PERF_TYPE_HW_CACHE: - return "hardware-cache"; - - default: - break; - } - - return "unknown"; + return event_types[type]; } -static char *get_config_str(struct list_head *head_terms, int type_term) +static char *get_config_str(const struct parse_events_terms *head_terms, + enum parse_events__term_type type_term) { struct parse_events_term *term; if (!head_terms) return NULL; - list_for_each_entry(term, head_terms, list) + list_for_each_entry(term, &head_terms->terms, list) if (term->type_term == type_term) return term->val.str; return NULL; } -static char *get_config_metric_id(struct list_head *head_terms) +static char *get_config_metric_id(const struct parse_events_terms *head_terms) { return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_METRIC_ID); } -static char *get_config_name(struct list_head *head_terms) +static char *get_config_name(const struct parse_events_terms *head_terms) { return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_NAME); } +static struct perf_cpu_map *get_config_cpu(const struct parse_events_terms *head_terms, + bool fake_pmu) +{ + struct parse_events_term *term; + struct perf_cpu_map *cpus = NULL; + + if (!head_terms) + return NULL; + + list_for_each_entry(term, &head_terms->terms, list) { + struct perf_cpu_map *term_cpus; + + if (term->type_term != PARSE_EVENTS__TERM_TYPE_CPU) + continue; + + if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { + term_cpus = perf_cpu_map__new_int(term->val.num); + } else { + struct perf_pmu *pmu = perf_pmus__find(term->val.str); + + if (pmu) { + term_cpus = pmu->is_core && perf_cpu_map__is_empty(pmu->cpus) + ? cpu_map__online() + : perf_cpu_map__get(pmu->cpus); + } else { + term_cpus = perf_cpu_map__new(term->val.str); + if (!term_cpus && fake_pmu) { + /* + * Assume the PMU string makes sense on a different + * machine and fake a value with all online CPUs. + */ + term_cpus = cpu_map__online(); + } + } + } + perf_cpu_map__merge(&cpus, term_cpus); + perf_cpu_map__put(term_cpus); + } + + return cpus; +} + /** * fix_raw - For each raw term see if there is an event (aka alias) in pmu that * matches the raw's string value. If the string value matches an @@ -190,43 +138,36 @@ static char *get_config_name(struct list_head *head_terms) * @config_terms: the list of terms that may contain a raw term. * @pmu: the PMU to scan for events from. */ -static void fix_raw(struct list_head *config_terms, struct perf_pmu *pmu) +static void fix_raw(struct parse_events_terms *config_terms, struct perf_pmu *pmu) { struct parse_events_term *term; - list_for_each_entry(term, config_terms, list) { - struct perf_pmu_alias *alias; - bool matched = false; + list_for_each_entry(term, &config_terms->terms, list) { + u64 num; if (term->type_term != PARSE_EVENTS__TERM_TYPE_RAW) continue; - list_for_each_entry(alias, &pmu->aliases, list) { - if (!strcmp(alias->name, term->val.str)) { - free(term->config); - term->config = term->val.str; - term->type_val = PARSE_EVENTS__TERM_TYPE_NUM; - term->type_term = PARSE_EVENTS__TERM_TYPE_USER; - term->val.num = 1; - term->no_value = true; - matched = true; - break; - } - } - if (!matched) { - u64 num; - - free(term->config); - term->config = strdup("config"); - errno = 0; - num = strtoull(term->val.str + 1, NULL, 16); - assert(errno == 0); - free(term->val.str); + if (perf_pmu__have_event(pmu, term->val.str)) { + zfree(&term->config); + term->config = term->val.str; term->type_val = PARSE_EVENTS__TERM_TYPE_NUM; - term->type_term = PARSE_EVENTS__TERM_TYPE_CONFIG; - term->val.num = num; - term->no_value = false; + term->type_term = PARSE_EVENTS__TERM_TYPE_USER; + term->val.num = 1; + term->no_value = true; + continue; } + + zfree(&term->config); + term->config = strdup("config"); + errno = 0; + num = strtoull(term->val.str + 1, NULL, 16); + assert(errno == 0); + free(term->val.str); + term->type_val = PARSE_EVENTS__TERM_TYPE_NUM; + term->type_term = PARSE_EVENTS__TERM_TYPE_CONFIG; + term->val.num = num; + term->no_value = false; } } @@ -235,49 +176,104 @@ __add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, bool init_attr, const char *name, const char *metric_id, struct perf_pmu *pmu, - struct list_head *config_terms, bool auto_merge_stats, - const char *cpu_list) + struct list_head *config_terms, struct evsel *first_wildcard_match, + struct perf_cpu_map *user_cpus, u64 alternate_hw_config) { struct evsel *evsel; - struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) : - cpu_list ? perf_cpu_map__new(cpu_list) : NULL; + bool is_pmu_core; + struct perf_cpu_map *cpus, *pmu_cpus; + bool has_user_cpus = !perf_cpu_map__is_empty(user_cpus); - if (pmu) - perf_pmu__warn_invalid_formats(pmu); + /* + * Ensure the first_wildcard_match's PMU matches that of the new event + * being added. Otherwise try to match with another event further down + * the evlist. + */ + if (first_wildcard_match) { + struct evsel *pos = list_prev_entry(first_wildcard_match, core.node); - if (pmu && (attr->type == PERF_TYPE_RAW || attr->type >= PERF_TYPE_MAX)) { - perf_pmu__warn_invalid_config(pmu, attr->config, name, - PERF_PMU_FORMAT_VALUE_CONFIG, "config"); - perf_pmu__warn_invalid_config(pmu, attr->config1, name, - PERF_PMU_FORMAT_VALUE_CONFIG1, "config1"); - perf_pmu__warn_invalid_config(pmu, attr->config2, name, - PERF_PMU_FORMAT_VALUE_CONFIG2, "config2"); - perf_pmu__warn_invalid_config(pmu, attr->config3, name, - PERF_PMU_FORMAT_VALUE_CONFIG3, "config3"); + first_wildcard_match = NULL; + list_for_each_entry_continue(pos, list, core.node) { + if (perf_pmu__name_no_suffix_match(pos->pmu, pmu->name)) { + first_wildcard_match = pos; + break; + } + if (pos->pmu->is_core && (!pmu || pmu->is_core)) { + first_wildcard_match = pos; + break; + } + } } + + if (pmu) { + perf_pmu__warn_invalid_formats(pmu); + if (attr->type == PERF_TYPE_RAW || attr->type >= PERF_TYPE_MAX) { + perf_pmu__warn_invalid_config(pmu, attr->config, name, + PERF_PMU_FORMAT_VALUE_CONFIG, "config"); + perf_pmu__warn_invalid_config(pmu, attr->config1, name, + PERF_PMU_FORMAT_VALUE_CONFIG1, "config1"); + perf_pmu__warn_invalid_config(pmu, attr->config2, name, + PERF_PMU_FORMAT_VALUE_CONFIG2, "config2"); + perf_pmu__warn_invalid_config(pmu, attr->config3, name, + PERF_PMU_FORMAT_VALUE_CONFIG3, "config3"); + perf_pmu__warn_invalid_config(pmu, attr->config4, name, + PERF_PMU_FORMAT_VALUE_CONFIG4, "config4"); + } + } + /* + * If a PMU wasn't given, such as for legacy events, find now that + * warnings won't be generated. + */ + if (!pmu) + pmu = perf_pmus__find_by_attr(attr); + + if (pmu) { + is_pmu_core = pmu->is_core; + pmu_cpus = perf_cpu_map__get(pmu->cpus); + if (perf_cpu_map__is_empty(pmu_cpus)) { + if (perf_pmu__is_tool(pmu)) + pmu_cpus = tool_pmu__cpus(attr); + else + pmu_cpus = cpu_map__online(); + } + } else { + is_pmu_core = (attr->type == PERF_TYPE_HARDWARE || + attr->type == PERF_TYPE_HW_CACHE); + pmu_cpus = is_pmu_core ? cpu_map__online() : NULL; + } + + if (has_user_cpus) + cpus = perf_cpu_map__get(user_cpus); + else + cpus = perf_cpu_map__get(pmu_cpus); + if (init_attr) event_attr_init(attr); evsel = evsel__new_idx(attr, *idx); - if (!evsel) { - perf_cpu_map__put(cpus); - return NULL; + if (!evsel) + goto out_err; + + if (name) { + evsel->name = strdup(name); + if (!evsel->name) + goto out_err; + } + + if (metric_id) { + evsel->metric_id = strdup(metric_id); + if (!evsel->metric_id) + goto out_err; } (*idx)++; evsel->core.cpus = cpus; - evsel->core.own_cpus = perf_cpu_map__get(cpus); + evsel->core.pmu_cpus = pmu_cpus; evsel->core.requires_cpu = pmu ? pmu->is_uncore : false; - evsel->core.is_pmu_core = pmu ? pmu->is_core : false; - evsel->auto_merge_stats = auto_merge_stats; + evsel->core.is_pmu_core = is_pmu_core; evsel->pmu = pmu; - evsel->pmu_name = pmu && pmu->name ? strdup(pmu->name) : NULL; - - if (name) - evsel->name = strdup(name); - - if (metric_id) - evsel->metric_id = strdup(metric_id); + evsel->alternate_hw_config = alternate_hw_config; + evsel->first_wildcard_match = first_wildcard_match; if (config_terms) list_splice_init(config_terms, &evsel->config_terms); @@ -285,7 +281,17 @@ __add_event(struct list_head *list, int *idx, if (list) list_add_tail(&evsel->core.node, list); + if (has_user_cpus) + evsel__warn_user_requested_cpus(evsel, user_cpus); + return evsel; +out_err: + perf_cpu_map__put(cpus); + perf_cpu_map__put(pmu_cpus); + zfree(&evsel->name); + zfree(&evsel->metric_id); + free(evsel); + return NULL; } struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, @@ -294,41 +300,19 @@ struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, { return __add_event(/*list=*/NULL, &idx, attr, /*init_attr=*/false, name, metric_id, pmu, /*config_terms=*/NULL, - /*auto_merge_stats=*/false, /*cpu_list=*/NULL); + /*first_wildcard_match=*/NULL, /*cpu_list=*/NULL, + /*alternate_hw_config=*/PERF_COUNT_HW_MAX); } static int add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, const char *name, - const char *metric_id, struct list_head *config_terms) + const char *metric_id, struct list_head *config_terms, + u64 alternate_hw_config) { return __add_event(list, idx, attr, /*init_attr*/true, name, metric_id, /*pmu=*/NULL, config_terms, - /*auto_merge_stats=*/false, /*cpu_list=*/NULL) ? 0 : -ENOMEM; -} - -static int add_event_tool(struct list_head *list, int *idx, - enum perf_tool_event tool_event) -{ - struct evsel *evsel; - struct perf_event_attr attr = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_DUMMY, - }; - - evsel = __add_event(list, idx, &attr, /*init_attr=*/true, /*name=*/NULL, - /*metric_id=*/NULL, /*pmu=*/NULL, - /*config_terms=*/NULL, /*auto_merge_stats=*/false, - /*cpu_list=*/"0"); - if (!evsel) - return -ENOMEM; - evsel->tool_event = tool_event; - if (tool_event == PERF_TOOL_DURATION_TIME - || tool_event == PERF_TOOL_USER_TIME - || tool_event == PERF_TOOL_SYSTEM_TIME) { - free((char *)evsel->unit); - evsel->unit = strdup("ns"); - } - return 0; + /*first_wildcard_match=*/NULL, /*cpu_list=*/NULL, + alternate_hw_config) ? 0 : -ENOMEM; } /** @@ -360,13 +344,13 @@ static int parse_aliases(const char *str, const char *const names[][EVSEL__MAX_A typedef int config_term_func_t(struct perf_event_attr *attr, struct parse_events_term *term, - struct parse_events_error *err); + struct parse_events_state *parse_state); static int config_term_common(struct perf_event_attr *attr, struct parse_events_term *term, - struct parse_events_error *err); + struct parse_events_state *parse_state); static int config_attr(struct perf_event_attr *attr, - struct list_head *head, - struct parse_events_error *err, + const struct parse_events_terms *head, + struct parse_events_state *parse_state, config_term_func_t config_term); /** @@ -446,60 +430,16 @@ bool parse_events__filter_pmu(const struct parse_events_state *parse_state, if (parse_state->pmu_filter == NULL) return false; - if (pmu->name == NULL) - return true; - return strcmp(parse_state->pmu_filter, pmu->name) != 0; } -int parse_events_add_cache(struct list_head *list, int *idx, const char *name, - struct parse_events_state *parse_state, - struct list_head *head_config) -{ - struct perf_pmu *pmu = NULL; - bool found_supported = false; - const char *config_name = get_config_name(head_config); - const char *metric_id = get_config_metric_id(head_config); - - /* Legacy cache events are only supported by core PMUs. */ - while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { - LIST_HEAD(config_terms); - struct perf_event_attr attr; - int ret; - - if (parse_events__filter_pmu(parse_state, pmu)) - continue; - - memset(&attr, 0, sizeof(attr)); - attr.type = PERF_TYPE_HW_CACHE; - - ret = parse_events__decode_legacy_cache(name, pmu->type, &attr.config); - if (ret) - return ret; - - found_supported = true; - - if (head_config) { - if (config_attr(&attr, head_config, parse_state->error, config_term_common)) - return -EINVAL; - - if (get_config_terms(head_config, &config_terms)) - return -ENOMEM; - } - - if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name, - metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, - /*cpu_list=*/NULL) == NULL) - return -ENOMEM; - - free_config_terms(&config_terms); - } - return found_supported ? 0 : -EINVAL; -} +static int parse_events_add_pmu(struct parse_events_state *parse_state, + struct list_head *list, struct perf_pmu *pmu, + const struct parse_events_terms *const_parsed_terms, + struct evsel *first_wildcard_match); -#ifdef HAVE_LIBTRACEEVENT static void tracepoint_error(struct parse_events_error *e, int err, - const char *sys, const char *name) + const char *sys, const char *name, int column) { const char *str; char help[BUFSIZ]; @@ -526,18 +466,21 @@ static void tracepoint_error(struct parse_events_error *e, int err, } tracing_path__strerror_open_tp(err, help, sizeof(help), sys, name); - parse_events_error__handle(e, 0, strdup(str), strdup(help)); + parse_events_error__handle(e, column, strdup(str), strdup(help)); } -static int add_tracepoint(struct list_head *list, int *idx, +static int add_tracepoint(struct parse_events_state *parse_state, + struct list_head *list, const char *sys_name, const char *evt_name, struct parse_events_error *err, - struct list_head *head_config) + struct parse_events_terms *head_config, void *loc_) { - struct evsel *evsel = evsel__newtp_idx(sys_name, evt_name, (*idx)++); + YYLTYPE *loc = loc_; + struct evsel *evsel = evsel__newtp_idx(sys_name, evt_name, parse_state->idx++, + !parse_state->fake_tp); if (IS_ERR(evsel)) { - tracepoint_error(err, PTR_ERR(evsel), sys_name, evt_name); + tracepoint_error(err, PTR_ERR(evsel), sys_name, evt_name, loc->first_column); return PTR_ERR(evsel); } @@ -553,358 +496,104 @@ static int add_tracepoint(struct list_head *list, int *idx, return 0; } -static int add_tracepoint_multi_event(struct list_head *list, int *idx, - const char *sys_name, const char *evt_name, - struct parse_events_error *err, - struct list_head *head_config) -{ - char *evt_path; - struct dirent *evt_ent; - DIR *evt_dir; - int ret = 0, found = 0; - - evt_path = get_events_file(sys_name); - if (!evt_path) { - tracepoint_error(err, errno, sys_name, evt_name); - return -1; - } - evt_dir = opendir(evt_path); - if (!evt_dir) { - put_events_file(evt_path); - tracepoint_error(err, errno, sys_name, evt_name); - return -1; - } - - while (!ret && (evt_ent = readdir(evt_dir))) { - if (!strcmp(evt_ent->d_name, ".") - || !strcmp(evt_ent->d_name, "..") - || !strcmp(evt_ent->d_name, "enable") - || !strcmp(evt_ent->d_name, "filter")) - continue; - - if (!strglobmatch(evt_ent->d_name, evt_name)) - continue; - - found++; - - ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name, - err, head_config); - } - - if (!found) { - tracepoint_error(err, ENOENT, sys_name, evt_name); - ret = -1; - } - - put_events_file(evt_path); - closedir(evt_dir); - return ret; -} - -static int add_tracepoint_event(struct list_head *list, int *idx, - const char *sys_name, const char *evt_name, - struct parse_events_error *err, - struct list_head *head_config) -{ - return strpbrk(evt_name, "*?") ? - add_tracepoint_multi_event(list, idx, sys_name, evt_name, - err, head_config) : - add_tracepoint(list, idx, sys_name, evt_name, - err, head_config); -} - -static int add_tracepoint_multi_sys(struct list_head *list, int *idx, - const char *sys_name, const char *evt_name, - struct parse_events_error *err, - struct list_head *head_config) -{ - struct dirent *events_ent; - DIR *events_dir; - int ret = 0; - - events_dir = tracing_events__opendir(); - if (!events_dir) { - tracepoint_error(err, errno, sys_name, evt_name); - return -1; - } - - while (!ret && (events_ent = readdir(events_dir))) { - if (!strcmp(events_ent->d_name, ".") - || !strcmp(events_ent->d_name, "..") - || !strcmp(events_ent->d_name, "enable") - || !strcmp(events_ent->d_name, "header_event") - || !strcmp(events_ent->d_name, "header_page")) - continue; - - if (!strglobmatch(events_ent->d_name, sys_name)) - continue; - - ret = add_tracepoint_event(list, idx, events_ent->d_name, - evt_name, err, head_config); - } - - closedir(events_dir); - return ret; -} -#endif /* HAVE_LIBTRACEEVENT */ - -#ifdef HAVE_LIBBPF_SUPPORT -struct __add_bpf_event_param { +struct add_tracepoint_multi_args { struct parse_events_state *parse_state; struct list_head *list; - struct list_head *head_config; + const char *sys_glob; + const char *evt_glob; + struct parse_events_error *err; + struct parse_events_terms *head_config; + YYLTYPE *loc; + int found; }; -static int add_bpf_event(const char *group, const char *event, int fd, struct bpf_object *obj, - void *_param) +static int add_tracepoint_multi_event_cb(void *state, const char *sys_name, const char *evt_name) { - LIST_HEAD(new_evsels); - struct __add_bpf_event_param *param = _param; - struct parse_events_state *parse_state = param->parse_state; - struct list_head *list = param->list; - struct evsel *pos; - int err; - /* - * Check if we should add the event, i.e. if it is a TP but starts with a '!', - * then don't add the tracepoint, this will be used for something else, like - * adding to a BPF_MAP_TYPE_PROG_ARRAY. - * - * See tools/perf/examples/bpf/augmented_raw_syscalls.c - */ - if (group[0] == '!') - return 0; - - pr_debug("add bpf event %s:%s and attach bpf program %d\n", - group, event, fd); + struct add_tracepoint_multi_args *args = state; + int ret; - err = parse_events_add_tracepoint(&new_evsels, &parse_state->idx, group, - event, parse_state->error, - param->head_config); - if (err) { - struct evsel *evsel, *tmp; + if (!strglobmatch(evt_name, args->evt_glob)) + return 0; - pr_debug("Failed to add BPF event %s:%s\n", - group, event); - list_for_each_entry_safe(evsel, tmp, &new_evsels, core.node) { - list_del_init(&evsel->core.node); - evsel__delete(evsel); - } - return err; - } - pr_debug("adding %s:%s\n", group, event); + args->found++; + ret = add_tracepoint(args->parse_state, args->list, sys_name, evt_name, + args->err, args->head_config, args->loc); - list_for_each_entry(pos, &new_evsels, core.node) { - pr_debug("adding %s:%s to %p\n", - group, event, pos); - pos->bpf_fd = fd; - pos->bpf_obj = obj; - } - list_splice(&new_evsels, list); - return 0; + return ret; } -int parse_events_load_bpf_obj(struct parse_events_state *parse_state, - struct list_head *list, - struct bpf_object *obj, - struct list_head *head_config) +static int add_tracepoint_multi_event(struct add_tracepoint_multi_args *args, const char *sys_name) { - int err; - char errbuf[BUFSIZ]; - struct __add_bpf_event_param param = {parse_state, list, head_config}; - static bool registered_unprobe_atexit = false; - - if (IS_ERR(obj) || !obj) { - snprintf(errbuf, sizeof(errbuf), - "Internal error: load bpf obj with NULL"); - err = -EINVAL; - goto errout; + if (strpbrk(args->evt_glob, "*?") == NULL) { + /* Not a glob. */ + args->found++; + return add_tracepoint(args->parse_state, args->list, sys_name, args->evt_glob, + args->err, args->head_config, args->loc); } - /* - * Register atexit handler before calling bpf__probe() so - * bpf__probe() don't need to unprobe probe points its already - * created when failure. - */ - if (!registered_unprobe_atexit) { - atexit(bpf__clear); - registered_unprobe_atexit = true; - } - - err = bpf__probe(obj); - if (err) { - bpf__strerror_probe(obj, err, errbuf, sizeof(errbuf)); - goto errout; - } - - err = bpf__load(obj); - if (err) { - bpf__strerror_load(obj, err, errbuf, sizeof(errbuf)); - goto errout; - } - - err = bpf__foreach_event(obj, add_bpf_event, ¶m); - if (err) { - snprintf(errbuf, sizeof(errbuf), - "Attach events in BPF object failed"); - goto errout; - } - - return 0; -errout: - parse_events_error__handle(parse_state->error, 0, - strdup(errbuf), strdup("(add -v to see detail)")); - return err; + return tp_pmu__for_each_tp_event(sys_name, args, add_tracepoint_multi_event_cb); } -static int -parse_events_config_bpf(struct parse_events_state *parse_state, - struct bpf_object *obj, - struct list_head *head_config) +static int add_tracepoint_multi_sys_cb(void *state, const char *sys_name) { - struct parse_events_term *term; - int error_pos; + struct add_tracepoint_multi_args *args = state; - if (!head_config || list_empty(head_config)) + if (!strglobmatch(sys_name, args->sys_glob)) return 0; - list_for_each_entry(term, head_config, list) { - int err; - - if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) { - parse_events_error__handle(parse_state->error, term->err_term, - strdup("Invalid config term for BPF object"), - NULL); - return -EINVAL; - } - - err = bpf__config_obj(obj, term, parse_state->evlist, &error_pos); - if (err) { - char errbuf[BUFSIZ]; - int idx; - - bpf__strerror_config_obj(obj, term, parse_state->evlist, - &error_pos, err, errbuf, - sizeof(errbuf)); - - if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE) - idx = term->err_val; - else - idx = term->err_term + error_pos; - - parse_events_error__handle(parse_state->error, idx, - strdup(errbuf), - strdup( -"Hint:\tValid config terms:\n" -" \tmap:[<arraymap>].value<indices>=[value]\n" -" \tmap:[<eventmap>].event<indices>=[event]\n" -"\n" -" \twhere <indices> is something like [0,3...5] or [all]\n" -" \t(add -v to see detail)")); - return err; - } - } - return 0; + return add_tracepoint_multi_event(args, sys_name); } -/* - * Split config terms: - * perf record -e bpf.c/call-graph=fp,map:array.value[0]=1/ ... - * 'call-graph=fp' is 'evt config', should be applied to each - * events in bpf.c. - * 'map:array.value[0]=1' is 'obj config', should be processed - * with parse_events_config_bpf. - * - * Move object config terms from the first list to obj_head_config. - */ -static void -split_bpf_config_terms(struct list_head *evt_head_config, - struct list_head *obj_head_config) -{ - struct parse_events_term *term, *temp; +static int add_tracepoint_multi_sys(struct parse_events_state *parse_state, + struct list_head *list, + const char *sys_glob, const char *evt_glob, + struct parse_events_error *err, + struct parse_events_terms *head_config, YYLTYPE *loc) +{ + struct add_tracepoint_multi_args args = { + .parse_state = parse_state, + .list = list, + .sys_glob = sys_glob, + .evt_glob = evt_glob, + .err = err, + .head_config = head_config, + .loc = loc, + .found = 0, + }; + int ret; - /* - * Currently, all possible user config term - * belong to bpf object. parse_events__is_hardcoded_term() - * happens to be a good flag. - * - * See parse_events_config_bpf() and - * config_term_tracepoint(). - */ - list_for_each_entry_safe(term, temp, evt_head_config, list) - if (!parse_events__is_hardcoded_term(term)) - list_move_tail(&term->list, obj_head_config); + if (strpbrk(sys_glob, "*?") == NULL) { + /* Not a glob. */ + ret = add_tracepoint_multi_event(&args, sys_glob); + } else { + ret = tp_pmu__for_each_tp_sys(&args, add_tracepoint_multi_sys_cb); + } + if (args.found == 0) { + tracepoint_error(err, ENOENT, sys_glob, evt_glob, loc->first_column); + return -ENOENT; + } + return ret; } -int parse_events_load_bpf(struct parse_events_state *parse_state, - struct list_head *list, - char *bpf_file_name, - bool source, - struct list_head *head_config) +size_t default_breakpoint_len(void) { - int err; - struct bpf_object *obj; - LIST_HEAD(obj_head_config); +#if defined(__i386__) + static int len; - if (head_config) - split_bpf_config_terms(head_config, &obj_head_config); + if (len == 0) { + struct perf_env env = {}; - obj = bpf__prepare_load(bpf_file_name, source); - if (IS_ERR(obj)) { - char errbuf[BUFSIZ]; - - err = PTR_ERR(obj); - - if (err == -ENOTSUP) - snprintf(errbuf, sizeof(errbuf), - "BPF support is not compiled"); - else - bpf__strerror_prepare_load(bpf_file_name, - source, - -err, errbuf, - sizeof(errbuf)); - - parse_events_error__handle(parse_state->error, 0, - strdup(errbuf), strdup("(add -v to see detail)")); - return err; + perf_env__init(&env); + len = perf_env__kernel_is_64_bit(&env) ? sizeof(u64) : sizeof(long); + perf_env__exit(&env); } - - err = parse_events_load_bpf_obj(parse_state, list, obj, head_config); - if (err) - return err; - err = parse_events_config_bpf(parse_state, obj, &obj_head_config); - - /* - * Caller doesn't know anything about obj_head_config, - * so combine them together again before returning. - */ - if (head_config) - list_splice_tail(&obj_head_config, head_config); - return err; -} -#else // HAVE_LIBBPF_SUPPORT -int parse_events_load_bpf_obj(struct parse_events_state *parse_state, - struct list_head *list __maybe_unused, - struct bpf_object *obj __maybe_unused, - struct list_head *head_config __maybe_unused) -{ - parse_events_error__handle(parse_state->error, 0, - strdup("BPF support is not compiled"), - strdup("Make sure libbpf-devel is available at build time.")); - return -ENOTSUP; -} - -int parse_events_load_bpf(struct parse_events_state *parse_state, - struct list_head *list __maybe_unused, - char *bpf_file_name __maybe_unused, - bool source __maybe_unused, - struct list_head *head_config __maybe_unused) -{ - parse_events_error__handle(parse_state->error, 0, - strdup("BPF support is not compiled"), - strdup("Make sure libbpf-devel is available at build time.")); - return -ENOTSUP; + return len; +#elif defined(__aarch64__) + return 4; +#else + return sizeof(long); +#endif } -#endif // HAVE_LIBBPF_SUPPORT static int parse_breakpoint_type(const char *type, struct perf_event_attr *attr) @@ -949,7 +638,7 @@ do { \ int parse_events_add_breakpoint(struct parse_events_state *parse_state, struct list_head *list, u64 addr, char *type, u64 len, - struct list_head *head_config __maybe_unused) + struct parse_events_terms *head_config) { struct perf_event_attr attr; LIST_HEAD(config_terms); @@ -964,7 +653,7 @@ int parse_events_add_breakpoint(struct parse_events_state *parse_state, /* Provide some defaults if len is not specified */ if (!len) { if (attr.bp_type == HW_BREAKPOINT_X) - len = sizeof(long); + len = default_breakpoint_len(); else len = HW_BREAKPOINT_LEN_4; } @@ -975,8 +664,7 @@ int parse_events_add_breakpoint(struct parse_events_state *parse_state, attr.sample_period = 1; if (head_config) { - if (config_attr(&attr, head_config, parse_state->error, - config_term_common)) + if (config_attr(&attr, head_config, parse_state, config_term_common)) return -EINVAL; if (get_config_terms(head_config, &config_terms)) @@ -986,12 +674,12 @@ int parse_events_add_breakpoint(struct parse_events_state *parse_state, name = get_config_name(head_config); return add_event(list, &parse_state->idx, &attr, name, /*mertic_id=*/NULL, - &config_terms); + &config_terms, /*alternate_hw_config=*/PERF_COUNT_HW_MAX); } static int check_type_val(struct parse_events_term *term, struct parse_events_error *err, - int type) + enum parse_events__term_val_type type) { if (type == term->type_val) return 0; @@ -1006,42 +694,53 @@ static int check_type_val(struct parse_events_term *term, return -EINVAL; } -/* - * Update according to parse-events.l - */ -static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { - [PARSE_EVENTS__TERM_TYPE_USER] = "<sysfs term>", - [PARSE_EVENTS__TERM_TYPE_CONFIG] = "config", - [PARSE_EVENTS__TERM_TYPE_CONFIG1] = "config1", - [PARSE_EVENTS__TERM_TYPE_CONFIG2] = "config2", - [PARSE_EVENTS__TERM_TYPE_CONFIG3] = "config3", - [PARSE_EVENTS__TERM_TYPE_NAME] = "name", - [PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD] = "period", - [PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ] = "freq", - [PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE] = "branch_type", - [PARSE_EVENTS__TERM_TYPE_TIME] = "time", - [PARSE_EVENTS__TERM_TYPE_CALLGRAPH] = "call-graph", - [PARSE_EVENTS__TERM_TYPE_STACKSIZE] = "stack-size", - [PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit", - [PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit", - [PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack", - [PARSE_EVENTS__TERM_TYPE_MAX_EVENTS] = "nr", - [PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite", - [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite", - [PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config", - [PARSE_EVENTS__TERM_TYPE_PERCORE] = "percore", - [PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT] = "aux-output", - [PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE] = "aux-sample-size", - [PARSE_EVENTS__TERM_TYPE_METRIC_ID] = "metric-id", - [PARSE_EVENTS__TERM_TYPE_RAW] = "raw", - [PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE] = "legacy-cache", - [PARSE_EVENTS__TERM_TYPE_HARDWARE] = "hardware", -}; - static bool config_term_shrinked; +const char *parse_events__term_type_str(enum parse_events__term_type term_type) +{ + /* + * Update according to parse-events.l + */ + static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { + [PARSE_EVENTS__TERM_TYPE_USER] = "<sysfs term>", + [PARSE_EVENTS__TERM_TYPE_CONFIG] = "config", + [PARSE_EVENTS__TERM_TYPE_CONFIG1] = "config1", + [PARSE_EVENTS__TERM_TYPE_CONFIG2] = "config2", + [PARSE_EVENTS__TERM_TYPE_CONFIG3] = "config3", + [PARSE_EVENTS__TERM_TYPE_CONFIG4] = "config4", + [PARSE_EVENTS__TERM_TYPE_NAME] = "name", + [PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD] = "period", + [PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ] = "freq", + [PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE] = "branch_type", + [PARSE_EVENTS__TERM_TYPE_TIME] = "time", + [PARSE_EVENTS__TERM_TYPE_CALLGRAPH] = "call-graph", + [PARSE_EVENTS__TERM_TYPE_STACKSIZE] = "stack-size", + [PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit", + [PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit", + [PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack", + [PARSE_EVENTS__TERM_TYPE_MAX_EVENTS] = "nr", + [PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite", + [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite", + [PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config", + [PARSE_EVENTS__TERM_TYPE_PERCORE] = "percore", + [PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT] = "aux-output", + [PARSE_EVENTS__TERM_TYPE_AUX_ACTION] = "aux-action", + [PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE] = "aux-sample-size", + [PARSE_EVENTS__TERM_TYPE_METRIC_ID] = "metric-id", + [PARSE_EVENTS__TERM_TYPE_RAW] = "raw", + [PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG] = "legacy-hardware-config", + [PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG] = "legacy-cache-config", + [PARSE_EVENTS__TERM_TYPE_CPU] = "cpu", + [PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV] = "ratio-to-prev", + }; + if ((unsigned int)term_type >= __PARSE_EVENTS__TERM_TYPE_NR) + return "unknown term"; + + return config_term_names[term_type]; +} + static bool -config_term_avail(int term_type, struct parse_events_error *err) +config_term_avail(enum parse_events__term_type term_type, struct parse_events_error *err) { char *err_str; @@ -1058,18 +757,40 @@ config_term_avail(int term_type, struct parse_events_error *err) case PARSE_EVENTS__TERM_TYPE_CONFIG1: case PARSE_EVENTS__TERM_TYPE_CONFIG2: case PARSE_EVENTS__TERM_TYPE_CONFIG3: + case PARSE_EVENTS__TERM_TYPE_CONFIG4: case PARSE_EVENTS__TERM_TYPE_NAME: case PARSE_EVENTS__TERM_TYPE_METRIC_ID: case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: case PARSE_EVENTS__TERM_TYPE_PERCORE: + case PARSE_EVENTS__TERM_TYPE_CPU: return true; + case PARSE_EVENTS__TERM_TYPE_USER: + case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: + case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE: + case PARSE_EVENTS__TERM_TYPE_TIME: + case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: + case PARSE_EVENTS__TERM_TYPE_STACKSIZE: + case PARSE_EVENTS__TERM_TYPE_NOINHERIT: + case PARSE_EVENTS__TERM_TYPE_INHERIT: + case PARSE_EVENTS__TERM_TYPE_MAX_STACK: + case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: + case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: + case PARSE_EVENTS__TERM_TYPE_OVERWRITE: + case PARSE_EVENTS__TERM_TYPE_DRV_CFG: + case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: + case PARSE_EVENTS__TERM_TYPE_AUX_ACTION: + case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: + case PARSE_EVENTS__TERM_TYPE_RAW: + case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: + case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG: default: if (!err) return false; /* term_type is validated so indexing is safe */ if (asprintf(&err_str, "'%s' is not usable in 'perf stat'", - config_term_names[term_type]) >= 0) + parse_events__term_type_str(term_type)) >= 0) parse_events_error__handle(err, -1, err_str, NULL); return false; } @@ -1082,12 +803,12 @@ void parse_events__shrink_config_terms(void) static int config_term_common(struct perf_event_attr *attr, struct parse_events_term *term, - struct parse_events_error *err) + struct parse_events_state *parse_state) { -#define CHECK_TYPE_VAL(type) \ -do { \ - if (check_type_val(term, err, PARSE_EVENTS__TERM_TYPE_ ## type)) \ - return -EINVAL; \ +#define CHECK_TYPE_VAL(type) \ +do { \ + if (check_type_val(term, parse_state->error, PARSE_EVENTS__TERM_TYPE_ ## type)) \ + return -EINVAL; \ } while (0) switch (term->type_term) { @@ -1107,6 +828,10 @@ do { \ CHECK_TYPE_VAL(NUM); attr->config3 = term->val.num; break; + case PARSE_EVENTS__TERM_TYPE_CONFIG4: + CHECK_TYPE_VAL(NUM); + attr->config4 = term->val.num; + break; case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: CHECK_TYPE_VAL(NUM); break; @@ -1118,7 +843,7 @@ do { \ if (strcmp(term->val.str, "no") && parse_branch_str(term->val.str, &attr->branch_sample_type)) { - parse_events_error__handle(err, term->err_val, + parse_events_error__handle(parse_state->error, term->err_val, strdup("invalid branch sample type"), NULL); return -EINVAL; @@ -1127,7 +852,7 @@ do { \ case PARSE_EVENTS__TERM_TYPE_TIME: CHECK_TYPE_VAL(NUM); if (term->val.num > 1) { - parse_events_error__handle(err, term->err_val, + parse_events_error__handle(parse_state->error, term->err_val, strdup("expected 0 or 1"), NULL); return -EINVAL; @@ -1169,7 +894,7 @@ do { \ case PARSE_EVENTS__TERM_TYPE_PERCORE: CHECK_TYPE_VAL(NUM); if ((unsigned int)term->val.num > 1) { - parse_events_error__handle(err, term->err_val, + parse_events_error__handle(parse_state->error, term->err_val, strdup("expected 0 or 1"), NULL); return -EINVAL; @@ -1178,19 +903,67 @@ do { \ case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: CHECK_TYPE_VAL(NUM); break; + case PARSE_EVENTS__TERM_TYPE_AUX_ACTION: + CHECK_TYPE_VAL(STR); + break; case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: CHECK_TYPE_VAL(NUM); if (term->val.num > UINT_MAX) { - parse_events_error__handle(err, term->err_val, + parse_events_error__handle(parse_state->error, term->err_val, strdup("too big"), NULL); return -EINVAL; } break; + case PARSE_EVENTS__TERM_TYPE_CPU: { + struct perf_cpu_map *map; + + if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { + if (term->val.num >= (u64)cpu__max_present_cpu().cpu) { + parse_events_error__handle(parse_state->error, term->err_val, + strdup("too big"), + /*help=*/NULL); + return -EINVAL; + } + break; + } + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_STR); + if (perf_pmus__find(term->val.str) != NULL) + break; + + map = perf_cpu_map__new(term->val.str); + if (!map && !parse_state->fake_pmu) { + parse_events_error__handle(parse_state->error, term->err_val, + strdup("not a valid PMU or CPU number"), + /*help=*/NULL); + return -EINVAL; + } + perf_cpu_map__put(map); + break; + } + case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: + CHECK_TYPE_VAL(STR); + if (strtod(term->val.str, NULL) <= 0) { + parse_events_error__handle(parse_state->error, term->err_val, + strdup("zero or negative"), + NULL); + return -EINVAL; + } + if (errno == ERANGE) { + parse_events_error__handle(parse_state->error, term->err_val, + strdup("too big"), + NULL); + return -EINVAL; + } + break; + case PARSE_EVENTS__TERM_TYPE_DRV_CFG: + case PARSE_EVENTS__TERM_TYPE_USER: + case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG: default: - parse_events_error__handle(err, term->err_term, - strdup("unknown term"), - parse_events_formats_error_string(NULL)); + parse_events_error__handle(parse_state->error, term->err_term, + strdup(parse_events__term_type_str(term->type_term)), + parse_events_formats_error_string(NULL)); return -EINVAL; } @@ -1203,49 +976,72 @@ do { \ * if an invalid config term is provided for legacy events * (for example, instructions/badterm/...), which is confusing. */ - if (!config_term_avail(term->type_term, err)) + if (!config_term_avail(term->type_term, parse_state->error)) return -EINVAL; return 0; #undef CHECK_TYPE_VAL } -static int config_term_pmu(struct perf_event_attr *attr, - struct parse_events_term *term, - struct parse_events_error *err) +static bool check_pmu_is_core(__u32 type, const struct parse_events_term *term, + struct parse_events_error *err) { - if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE) { - const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); + struct perf_pmu *pmu = NULL; - if (!pmu) { - char *err_str; + /* Avoid loading all PMUs with perf_pmus__find_by_type, just scan the core ones. */ + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { + if (pmu->type == type) + return true; + } + parse_events_error__handle(err, term->err_val, + strdup("needs a core PMU"), + NULL); + return false; +} - if (asprintf(&err_str, "Failed to find PMU for type %d", attr->type) >= 0) - parse_events_error__handle(err, term->err_term, - err_str, /*help=*/NULL); +static int config_term_pmu(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_state *parse_state) +{ + if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG) { + if (check_type_val(term, parse_state->error, PARSE_EVENTS__TERM_TYPE_NUM)) + return -EINVAL; + if (term->val.num >= PERF_COUNT_HW_MAX) { + parse_events_error__handle(parse_state->error, term->err_val, + strdup("too big"), + NULL); return -EINVAL; } - if (perf_pmu__supports_legacy_cache(pmu)) { - attr->type = PERF_TYPE_HW_CACHE; - return parse_events__decode_legacy_cache(term->config, pmu->type, - &attr->config); - } else - term->type_term = PARSE_EVENTS__TERM_TYPE_USER; + if (!check_pmu_is_core(attr->type, term, parse_state->error)) + return -EINVAL; + attr->config = term->val.num; + if (perf_pmus__supports_extended_type()) + attr->config |= (__u64)attr->type << PERF_PMU_TYPE_SHIFT; + attr->type = PERF_TYPE_HARDWARE; + return 0; } - if (term->type_term == PARSE_EVENTS__TERM_TYPE_HARDWARE) { - const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); + if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG) { + int cache_type, cache_op, cache_result; - if (!pmu) { - char *err_str; - - if (asprintf(&err_str, "Failed to find PMU for type %d", attr->type) >= 0) - parse_events_error__handle(err, term->err_term, - err_str, /*help=*/NULL); + if (check_type_val(term, parse_state->error, PARSE_EVENTS__TERM_TYPE_NUM)) + return -EINVAL; + cache_type = term->val.num & 0xFF; + cache_op = (term->val.num >> 8) & 0xFF; + cache_result = (term->val.num >> 16) & 0xFF; + if ((term->val.num & ~0xFFFFFF) || + cache_type >= PERF_COUNT_HW_CACHE_MAX || + cache_op >= PERF_COUNT_HW_CACHE_OP_MAX || + cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) { + parse_events_error__handle(parse_state->error, term->err_val, + strdup("too big"), + NULL); return -EINVAL; } - attr->type = PERF_TYPE_HARDWARE; + if (!check_pmu_is_core(attr->type, term, parse_state->error)) + return -EINVAL; attr->config = term->val.num; if (perf_pmus__supports_extended_type()) - attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + attr->config |= (__u64)attr->type << PERF_PMU_TYPE_SHIFT; + attr->type = PERF_TYPE_HW_CACHE; return 0; } if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER || @@ -1256,13 +1052,12 @@ static int config_term_pmu(struct perf_event_attr *attr, */ return 0; } - return config_term_common(attr, term, err); + return config_term_common(attr, term, parse_state); } -#ifdef HAVE_LIBTRACEEVENT static int config_term_tracepoint(struct perf_event_attr *attr, struct parse_events_term *term, - struct parse_events_error *err) + struct parse_events_state *parse_state) { switch (term->type_term) { case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: @@ -1274,37 +1069,55 @@ static int config_term_tracepoint(struct perf_event_attr *attr, case PARSE_EVENTS__TERM_TYPE_OVERWRITE: case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: + case PARSE_EVENTS__TERM_TYPE_AUX_ACTION: case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: - return config_term_common(attr, term, err); + return config_term_common(attr, term, parse_state); + case PARSE_EVENTS__TERM_TYPE_USER: + case PARSE_EVENTS__TERM_TYPE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_CONFIG1: + case PARSE_EVENTS__TERM_TYPE_CONFIG2: + case PARSE_EVENTS__TERM_TYPE_CONFIG3: + case PARSE_EVENTS__TERM_TYPE_CONFIG4: + case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_NAME: + case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: + case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: + case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE: + case PARSE_EVENTS__TERM_TYPE_TIME: + case PARSE_EVENTS__TERM_TYPE_DRV_CFG: + case PARSE_EVENTS__TERM_TYPE_PERCORE: + case PARSE_EVENTS__TERM_TYPE_METRIC_ID: + case PARSE_EVENTS__TERM_TYPE_RAW: + case PARSE_EVENTS__TERM_TYPE_CPU: + case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: default: - if (err) { - parse_events_error__handle(err, term->err_term, - strdup("unknown term"), - strdup("valid terms: call-graph,stack-size\n")); - } + parse_events_error__handle(parse_state->error, term->err_term, + strdup(parse_events__term_type_str(term->type_term)), + strdup("valid terms: call-graph,stack-size\n") + ); return -EINVAL; } return 0; } -#endif static int config_attr(struct perf_event_attr *attr, - struct list_head *head, - struct parse_events_error *err, + const struct parse_events_terms *head, + struct parse_events_state *parse_state, config_term_func_t config_term) { struct parse_events_term *term; - list_for_each_entry(term, head, list) - if (config_term(attr, term, err)) + list_for_each_entry(term, &head->terms, list) + if (config_term(attr, term, parse_state)) return -EINVAL; return 0; } -static int get_config_terms(struct list_head *head_config, - struct list_head *head_terms __maybe_unused) +static int get_config_terms(const struct parse_events_terms *head_config, + struct list_head *head_terms) { #define ADD_CONFIG_TERM(__type, __weak) \ struct evsel_config_term *__t; \ @@ -1337,7 +1150,7 @@ do { \ struct parse_events_term *term; - list_for_each_entry(term, head_config, list) { + list_for_each_entry(term, &head_config->terms, list) { switch (term->type_term) { case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: ADD_CONFIG_TERM_VAL(PERIOD, period, term->val.num, term->weak); @@ -1393,10 +1206,28 @@ do { \ ADD_CONFIG_TERM_VAL(AUX_OUTPUT, aux_output, term->val.num ? 1 : 0, term->weak); break; + case PARSE_EVENTS__TERM_TYPE_AUX_ACTION: + ADD_CONFIG_TERM_STR(AUX_ACTION, term->val.str, term->weak); + break; case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: ADD_CONFIG_TERM_VAL(AUX_SAMPLE_SIZE, aux_sample_size, term->val.num, term->weak); break; + case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: + ADD_CONFIG_TERM_STR(RATIO_TO_PREV, term->val.str, term->weak); + break; + case PARSE_EVENTS__TERM_TYPE_USER: + case PARSE_EVENTS__TERM_TYPE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_CONFIG1: + case PARSE_EVENTS__TERM_TYPE_CONFIG2: + case PARSE_EVENTS__TERM_TYPE_CONFIG3: + case PARSE_EVENTS__TERM_TYPE_CONFIG4: + case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_NAME: + case PARSE_EVENTS__TERM_TYPE_METRIC_ID: + case PARSE_EVENTS__TERM_TYPE_RAW: + case PARSE_EVENTS__TERM_TYPE_CPU: default: break; } @@ -1408,24 +1239,52 @@ do { \ * Add EVSEL__CONFIG_TERM_CFG_CHG where cfg_chg will have a bit set for * each bit of attr->config that the user has changed. */ -static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config, +static int get_config_chgs(struct perf_pmu *pmu, struct parse_events_terms *head_config, struct list_head *head_terms) { struct parse_events_term *term; u64 bits = 0; int type; - list_for_each_entry(term, head_config, list) { + list_for_each_entry(term, &head_config->terms, list) { switch (term->type_term) { case PARSE_EVENTS__TERM_TYPE_USER: - type = perf_pmu__format_type(&pmu->format, term->config); + type = perf_pmu__format_type(pmu, term->config); if (type != PERF_PMU_FORMAT_VALUE_CONFIG) continue; - bits |= perf_pmu__format_bits(&pmu->format, term->config); + bits |= perf_pmu__format_bits(pmu, term->config); break; case PARSE_EVENTS__TERM_TYPE_CONFIG: bits = ~(u64)0; break; + case PARSE_EVENTS__TERM_TYPE_CONFIG1: + case PARSE_EVENTS__TERM_TYPE_CONFIG2: + case PARSE_EVENTS__TERM_TYPE_CONFIG3: + case PARSE_EVENTS__TERM_TYPE_CONFIG4: + case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_NAME: + case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: + case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: + case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE: + case PARSE_EVENTS__TERM_TYPE_TIME: + case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: + case PARSE_EVENTS__TERM_TYPE_STACKSIZE: + case PARSE_EVENTS__TERM_TYPE_NOINHERIT: + case PARSE_EVENTS__TERM_TYPE_INHERIT: + case PARSE_EVENTS__TERM_TYPE_MAX_STACK: + case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: + case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: + case PARSE_EVENTS__TERM_TYPE_OVERWRITE: + case PARSE_EVENTS__TERM_TYPE_DRV_CFG: + case PARSE_EVENTS__TERM_TYPE_PERCORE: + case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: + case PARSE_EVENTS__TERM_TYPE_AUX_ACTION: + case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: + case PARSE_EVENTS__TERM_TYPE_METRIC_ID: + case PARSE_EVENTS__TERM_TYPE_RAW: + case PARSE_EVENTS__TERM_TYPE_CPU: + case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: default: break; } @@ -1438,46 +1297,35 @@ static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config, return 0; } -int parse_events_add_tracepoint(struct list_head *list, int *idx, +int parse_events_add_tracepoint(struct parse_events_state *parse_state, + struct list_head *list, const char *sys, const char *event, struct parse_events_error *err, - struct list_head *head_config) + struct parse_events_terms *head_config, void *loc_) { -#ifdef HAVE_LIBTRACEEVENT + YYLTYPE *loc = loc_; + if (head_config) { struct perf_event_attr attr; - if (config_attr(&attr, head_config, err, - config_term_tracepoint)) + if (config_attr(&attr, head_config, parse_state, config_term_tracepoint)) return -EINVAL; } - if (strpbrk(sys, "*?")) - return add_tracepoint_multi_sys(list, idx, sys, event, - err, head_config); - else - return add_tracepoint_event(list, idx, sys, event, - err, head_config); -#else - (void)list; - (void)idx; - (void)sys; - (void)event; - (void)head_config; - parse_events_error__handle(err, 0, strdup("unsupported tracepoint"), - strdup("libtraceevent is necessary for tracepoint support")); - return -1; -#endif + return add_tracepoint_multi_sys(parse_state, list, sys, event, + err, head_config, loc); } static int __parse_events_add_numeric(struct parse_events_state *parse_state, struct list_head *list, struct perf_pmu *pmu, u32 type, u32 extended_type, - u64 config, struct list_head *head_config) + u64 config, const struct parse_events_terms *head_config, + struct evsel *first_wildcard_match) { struct perf_event_attr attr; LIST_HEAD(config_terms); const char *name, *metric_id; + struct perf_cpu_map *cpus; int ret; memset(&attr, 0, sizeof(attr)); @@ -1489,8 +1337,7 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state, } if (head_config) { - if (config_attr(&attr, head_config, parse_state->error, - config_term_common)) + if (config_attr(&attr, head_config, parse_state, config_term_common)) return -EINVAL; if (get_config_terms(head_config, &config_terms)) @@ -1499,9 +1346,11 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state, name = get_config_name(head_config); metric_id = get_config_metric_id(head_config); + cpus = get_config_cpu(head_config, parse_state->fake_pmu); ret = __add_event(list, &parse_state->idx, &attr, /*init_attr*/true, name, - metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, - /*cpu_list=*/NULL) ? 0 : -ENOMEM; + metric_id, pmu, &config_terms, first_wildcard_match, + cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) ? 0 : -ENOMEM; + perf_cpu_map__put(cpus); free_config_terms(&config_terms); return ret; } @@ -1509,7 +1358,7 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state, int parse_events_add_numeric(struct parse_events_state *parse_state, struct list_head *list, u32 type, u64 config, - struct list_head *head_config, + const struct parse_events_terms *head_config, bool wildcard) { struct perf_pmu *pmu = NULL; @@ -1517,6 +1366,7 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, /* Wildcards on numeric values are only supported by core PMUs. */ if (wildcard && perf_pmus__supports_extended_type()) { + struct evsel *first_wildcard_match = NULL; while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { int ret; @@ -1526,22 +1376,20 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, ret = __parse_events_add_numeric(parse_state, list, pmu, type, pmu->type, - config, head_config); + config, head_config, + first_wildcard_match); if (ret) return ret; + if (first_wildcard_match == NULL) + first_wildcard_match = + container_of(list->prev, struct evsel, core.node); } if (found_supported) return 0; } return __parse_events_add_numeric(parse_state, list, perf_pmus__find_by_type(type), - type, /*extended_type=*/0, config, head_config); -} - -int parse_events_add_tool(struct parse_events_state *parse_state, - struct list_head *list, - int tool_event) -{ - return add_event_tool(list, &parse_state->idx, tool_event); + type, /*extended_type=*/0, config, head_config, + /*first_wildcard_match=*/NULL); } static bool config_term_percore(struct list_head *config_terms) @@ -1556,155 +1404,179 @@ static bool config_term_percore(struct list_head *config_terms) return false; } -int parse_events_add_pmu(struct parse_events_state *parse_state, - struct list_head *list, char *name, - struct list_head *head_config, - bool auto_merge_stats) +static int parse_events_add_pmu(struct parse_events_state *parse_state, + struct list_head *list, struct perf_pmu *pmu, + const struct parse_events_terms *const_parsed_terms, + struct evsel *first_wildcard_match) { + u64 alternate_hw_config = PERF_COUNT_HW_MAX; struct perf_event_attr attr; struct perf_pmu_info info; - struct perf_pmu *pmu; struct evsel *evsel; struct parse_events_error *err = parse_state->error; LIST_HEAD(config_terms); + struct parse_events_terms parsed_terms; + bool alias_rewrote_terms = false; + struct perf_cpu_map *term_cpu = NULL; - pmu = parse_state->fake_pmu ?: perf_pmus__find(name); - - if (verbose > 1 && !(pmu && pmu->selectable)) { - fprintf(stderr, "Attempting to add event pmu '%s' with '", - name); - if (head_config) { - struct parse_events_term *term; + if (verbose > 1) { + struct strbuf sb; - list_for_each_entry(term, head_config, list) { - fprintf(stderr, "%s,", term->config); - } + strbuf_init(&sb, /*hint=*/ 0); + if (pmu->selectable && const_parsed_terms && + list_empty(&const_parsed_terms->terms)) { + strbuf_addf(&sb, "%s//", pmu->name); + } else { + strbuf_addf(&sb, "%s/", pmu->name); + parse_events_terms__to_strbuf(const_parsed_terms, &sb); + strbuf_addch(&sb, '/'); } - fprintf(stderr, "' that may result in non-fatal errors\n"); + fprintf(stderr, "Attempt to add: %s\n", sb.buf); + strbuf_release(&sb); } - if (!pmu) { - char *err_str; - - if (asprintf(&err_str, - "Cannot find PMU `%s'. Missing kernel support?", - name) >= 0) - parse_events_error__handle(err, 0, err_str, NULL); - return -EINVAL; - } - if (head_config) - fix_raw(head_config, pmu); + memset(&attr, 0, sizeof(attr)); + if (pmu->perf_event_attr_init_default) + pmu->perf_event_attr_init_default(pmu, &attr); - if (pmu->default_config) { - memcpy(&attr, pmu->default_config, - sizeof(struct perf_event_attr)); - } else { - memset(&attr, 0, sizeof(attr)); - } attr.type = pmu->type; - if (!head_config) { + if (!const_parsed_terms || list_empty(&const_parsed_terms->terms)) { evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true, /*name=*/NULL, /*metric_id=*/NULL, pmu, - /*config_terms=*/NULL, auto_merge_stats, - /*cpu_list=*/NULL); + /*config_terms=*/NULL, first_wildcard_match, + /*cpu_list=*/NULL, alternate_hw_config); return evsel ? 0 : -ENOMEM; } - if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, head_config, &info)) + parse_events_terms__init(&parsed_terms); + if (const_parsed_terms) { + int ret = parse_events_terms__copy(const_parsed_terms, &parsed_terms); + + if (ret) + return ret; + } + fix_raw(&parsed_terms, pmu); + + /* Configure attr/terms with a known PMU, this will set hardcoded terms. */ + if (config_attr(&attr, &parsed_terms, parse_state, config_term_pmu)) { + parse_events_terms__exit(&parsed_terms); return -EINVAL; + } + + /* Look for event names in the terms and rewrite into format based terms. */ + if (perf_pmu__check_alias(pmu, &parsed_terms, + &info, &alias_rewrote_terms, + &alternate_hw_config, err)) { + parse_events_terms__exit(&parsed_terms); + return -EINVAL; + } if (verbose > 1) { - fprintf(stderr, "After aliases, add event pmu '%s' with '", - name); - if (head_config) { - struct parse_events_term *term; + struct strbuf sb; - list_for_each_entry(term, head_config, list) { - fprintf(stderr, "%s,", term->config); - } - } - fprintf(stderr, "' that may result in non-fatal errors\n"); + strbuf_init(&sb, /*hint=*/ 0); + parse_events_terms__to_strbuf(&parsed_terms, &sb); + fprintf(stderr, "..after resolving event: %s/%s/\n", pmu->name, sb.buf); + strbuf_release(&sb); } - /* - * Configure hardcoded terms first, no need to check - * return value when called with fail == 0 ;) - */ - if (config_attr(&attr, head_config, parse_state->error, config_term_pmu)) + /* Configure attr/terms again if an alias was expanded. */ + if (alias_rewrote_terms && + config_attr(&attr, &parsed_terms, parse_state, config_term_pmu)) { + parse_events_terms__exit(&parsed_terms); return -EINVAL; + } - if (get_config_terms(head_config, &config_terms)) + if (get_config_terms(&parsed_terms, &config_terms)) { + parse_events_terms__exit(&parsed_terms); return -ENOMEM; + } /* * When using default config, record which bits of attr->config were * changed by the user. */ - if (pmu->default_config && get_config_chgs(pmu, head_config, &config_terms)) + if (pmu->perf_event_attr_init_default && + get_config_chgs(pmu, &parsed_terms, &config_terms)) { + parse_events_terms__exit(&parsed_terms); return -ENOMEM; + } - if (!parse_state->fake_pmu && perf_pmu__config(pmu, &attr, head_config, parse_state->error)) { + /* Skip configuring hard coded terms that were applied by config_attr. */ + if (perf_pmu__config(pmu, &attr, &parsed_terms, /*apply_hardcoded=*/false, + parse_state->error)) { free_config_terms(&config_terms); + parse_events_terms__exit(&parsed_terms); return -EINVAL; } + term_cpu = get_config_cpu(&parsed_terms, parse_state->fake_pmu); evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true, - get_config_name(head_config), - get_config_metric_id(head_config), pmu, - &config_terms, auto_merge_stats, /*cpu_list=*/NULL); - if (!evsel) + get_config_name(&parsed_terms), + get_config_metric_id(&parsed_terms), pmu, + &config_terms, first_wildcard_match, term_cpu, alternate_hw_config); + perf_cpu_map__put(term_cpu); + if (!evsel) { + parse_events_terms__exit(&parsed_terms); return -ENOMEM; + } if (evsel->name) evsel->use_config_name = true; evsel->percore = config_term_percore(&evsel->config_terms); - if (parse_state->fake_pmu) - return 0; - + parse_events_terms__exit(&parsed_terms); free((char *)evsel->unit); evsel->unit = strdup(info.unit); evsel->scale = info.scale; evsel->per_pkg = info.per_pkg; evsel->snapshot = info.snapshot; + evsel->retirement_latency.mean = info.retirement_latency_mean; + evsel->retirement_latency.min = info.retirement_latency_min; + evsel->retirement_latency.max = info.retirement_latency_max; + return 0; } int parse_events_multi_pmu_add(struct parse_events_state *parse_state, - char *str, struct list_head *head, - struct list_head **listp) + const char *event_name, + const struct parse_events_terms *const_parsed_terms, + struct list_head **listp, void *loc_) { struct parse_events_term *term; struct list_head *list = NULL; - struct list_head *orig_head = NULL; struct perf_pmu *pmu = NULL; + YYLTYPE *loc = loc_; int ok = 0; - char *config; + const char *config; + struct parse_events_terms parsed_terms; + struct evsel *first_wildcard_match = NULL; *listp = NULL; - if (!head) { - head = malloc(sizeof(struct list_head)); - if (!head) - goto out_err; + parse_events_terms__init(&parsed_terms); + if (const_parsed_terms) { + int ret = parse_events_terms__copy(const_parsed_terms, &parsed_terms); - INIT_LIST_HEAD(head); + if (ret) + return ret; } - config = strdup(str); + + config = strdup(event_name); if (!config) goto out_err; if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, - config, 1, false, NULL, - NULL) < 0) { - free(config); + config, /*num=*/1, /*novalue=*/true, + loc, /*loc_val=*/NULL) < 0) { + zfree(&config); goto out_err; } - list_add_tail(&term->list, head); + list_add_tail(&term->list, &parsed_terms.terms); /* Add it for all PMUs that support the alias */ list = malloc(sizeof(struct list_head)); @@ -1713,52 +1585,116 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, INIT_LIST_HEAD(list); - while ((pmu = perf_pmus__scan(pmu)) != NULL) { - struct perf_pmu_alias *alias; - bool auto_merge_stats; + while ((pmu = perf_pmus__scan_for_event(pmu, event_name)) != NULL) { if (parse_events__filter_pmu(parse_state, pmu)) continue; - auto_merge_stats = perf_pmu__auto_merge_stats(pmu); - - list_for_each_entry(alias, &pmu->aliases, list) { - if (!strcasecmp(alias->name, str)) { - parse_events_copy_term_list(head, &orig_head); - if (!parse_events_add_pmu(parse_state, list, - pmu->name, orig_head, - auto_merge_stats)) { - pr_debug("%s -> %s/%s/\n", str, - pmu->name, alias->str); - ok++; - } - parse_events_terms__delete(orig_head); - } + if (!perf_pmu__have_event(pmu, event_name)) + continue; + + if (!parse_events_add_pmu(parse_state, list, pmu, + &parsed_terms, first_wildcard_match)) { + struct strbuf sb; + + strbuf_init(&sb, /*hint=*/ 0); + parse_events_terms__to_strbuf(&parsed_terms, &sb); + pr_debug("%s -> %s/%s/\n", event_name, pmu->name, sb.buf); + strbuf_release(&sb); + ok++; } + if (first_wildcard_match == NULL) + first_wildcard_match = container_of(list->prev, struct evsel, core.node); } if (parse_state->fake_pmu) { - if (!parse_events_add_pmu(parse_state, list, str, head, - /*auto_merge_stats=*/true)) { - pr_debug("%s -> %s/%s/\n", str, "fake_pmu", str); + if (!parse_events_add_pmu(parse_state, list, perf_pmus__fake_pmu(), &parsed_terms, + first_wildcard_match)) { + struct strbuf sb; + + strbuf_init(&sb, /*hint=*/ 0); + parse_events_terms__to_strbuf(&parsed_terms, &sb); + pr_debug("%s -> fake/%s/\n", event_name, sb.buf); + strbuf_release(&sb); ok++; } } out_err: + parse_events_terms__exit(&parsed_terms); if (ok) *listp = list; else free(list); - parse_events_terms__delete(head); return ok ? 0 : -1; } -int parse_events__modifier_group(struct list_head *list, - char *event_mod) +int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state, + const char *event_or_pmu, + const struct parse_events_terms *const_parsed_terms, + struct list_head **listp, + void *loc_) { - return parse_events__modifier_event(list, event_mod, true); + YYLTYPE *loc = loc_; + struct perf_pmu *pmu; + int ok = 0; + char *help; + struct evsel *first_wildcard_match = NULL; + + *listp = malloc(sizeof(**listp)); + if (!*listp) + return -ENOMEM; + + INIT_LIST_HEAD(*listp); + + /* Attempt to add to list assuming event_or_pmu is a PMU name. */ + pmu = perf_pmus__find(event_or_pmu); + if (pmu && !parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms, + first_wildcard_match)) + return 0; + + if (parse_state->fake_pmu) { + if (!parse_events_add_pmu(parse_state, *listp, perf_pmus__fake_pmu(), + const_parsed_terms, + first_wildcard_match)) + return 0; + } + + pmu = NULL; + /* Failed to add, try wildcard expansion of event_or_pmu as a PMU name. */ + while ((pmu = perf_pmus__scan_matching_wildcard(pmu, event_or_pmu)) != NULL) { + + if (parse_events__filter_pmu(parse_state, pmu)) + continue; + + if (!parse_events_add_pmu(parse_state, *listp, pmu, + const_parsed_terms, + first_wildcard_match)) { + ok++; + parse_state->wild_card_pmus = true; + } + if (first_wildcard_match == NULL) { + first_wildcard_match = + container_of((*listp)->prev, struct evsel, core.node); + } + } + if (ok) + return 0; + + /* Failure to add, assume event_or_pmu is an event name. */ + zfree(listp); + if (!parse_events_multi_pmu_add(parse_state, event_or_pmu, + const_parsed_terms, listp, loc)) + return 0; + + if (asprintf(&help, "Unable to find PMU or event on a PMU of '%s'", event_or_pmu) < 0) + help = NULL; + parse_events_error__handle(parse_state->error, loc->first_column, + strdup("Bad event or PMU"), + help); + zfree(listp); + return -EINVAL; } void parse_events__set_leader(char *name, struct list_head *list) @@ -1772,210 +1708,151 @@ void parse_events__set_leader(char *name, struct list_head *list) leader = list_first_entry(list, struct evsel, core.node); __perf_evlist__set_leader(list, &leader->core); + zfree(&leader->group_name); leader->group_name = name; } -/* list_event is assumed to point to malloc'ed memory */ -void parse_events_update_lists(struct list_head *list_event, - struct list_head *list_all) +static int parse_events__modifier_list(struct parse_events_state *parse_state, + YYLTYPE *loc, + struct list_head *list, + struct parse_events_modifier mod, + bool group) { - /* - * Called for single event definition. Update the - * 'all event' list, and reinit the 'single event' - * list, for next event definition. - */ - list_splice_tail(list_event, list_all); - free(list_event); -} - -struct event_modifier { - int eu; - int ek; - int eh; - int eH; - int eG; - int eI; - int precise; - int precise_max; - int exclude_GH; - int sample_read; - int pinned; - int weak; - int exclusive; - int bpf_counter; -}; + struct evsel *evsel; -static int get_event_modifier(struct event_modifier *mod, char *str, - struct evsel *evsel) -{ - int eu = evsel ? evsel->core.attr.exclude_user : 0; - int ek = evsel ? evsel->core.attr.exclude_kernel : 0; - int eh = evsel ? evsel->core.attr.exclude_hv : 0; - int eH = evsel ? evsel->core.attr.exclude_host : 0; - int eG = evsel ? evsel->core.attr.exclude_guest : 0; - int eI = evsel ? evsel->core.attr.exclude_idle : 0; - int precise = evsel ? evsel->core.attr.precise_ip : 0; - int precise_max = 0; - int sample_read = 0; - int pinned = evsel ? evsel->core.attr.pinned : 0; - int exclusive = evsel ? evsel->core.attr.exclusive : 0; - - int exclude = eu | ek | eh; - int exclude_GH = evsel ? evsel->exclude_GH : 0; - int weak = 0; - int bpf_counter = 0; - - memset(mod, 0, sizeof(*mod)); - - while (*str) { - if (*str == 'u') { + if (!group && mod.weak) { + parse_events_error__handle(parse_state->error, loc->first_column, + strdup("Weak modifier is for use with groups"), NULL); + return -EINVAL; + } + + __evlist__for_each_entry(list, evsel) { + /* Translate modifiers into the equivalent evsel excludes. */ + int eu = group ? evsel->core.attr.exclude_user : 0; + int ek = group ? evsel->core.attr.exclude_kernel : 0; + int eh = group ? evsel->core.attr.exclude_hv : 0; + int eH = group ? evsel->core.attr.exclude_host : 0; + int eG = group ? evsel->core.attr.exclude_guest : 0; + int exclude = eu | ek | eh; + int exclude_GH = eG | eH; + + if (mod.user) { if (!exclude) exclude = eu = ek = eh = 1; - if (!exclude_GH && !perf_guest) - eG = 1; eu = 0; - } else if (*str == 'k') { + } + if (mod.kernel) { if (!exclude) exclude = eu = ek = eh = 1; ek = 0; - } else if (*str == 'h') { + } + if (mod.hypervisor) { if (!exclude) exclude = eu = ek = eh = 1; eh = 0; - } else if (*str == 'G') { + } + if (mod.guest) { if (!exclude_GH) exclude_GH = eG = eH = 1; eG = 0; - } else if (*str == 'H') { + } + if (mod.host) { if (!exclude_GH) exclude_GH = eG = eH = 1; eH = 0; - } else if (*str == 'I') { - eI = 1; - } else if (*str == 'p') { - precise++; - /* use of precise requires exclude_guest */ - if (!exclude_GH) + } + if (!exclude_GH && exclude_GH_default) { + if (perf_host) eG = 1; - } else if (*str == 'P') { - precise_max = 1; - } else if (*str == 'S') { - sample_read = 1; - } else if (*str == 'D') { - pinned = 1; - } else if (*str == 'e') { - exclusive = 1; - } else if (*str == 'W') { - weak = 1; - } else if (*str == 'b') { - bpf_counter = 1; - } else - break; - - ++str; - } - - /* - * precise ip: - * - * 0 - SAMPLE_IP can have arbitrary skid - * 1 - SAMPLE_IP must have constant skid - * 2 - SAMPLE_IP requested to have 0 skid - * 3 - SAMPLE_IP must have 0 skid - * - * See also PERF_RECORD_MISC_EXACT_IP - */ - if (precise > 3) - return -EINVAL; + else if (perf_guest) + eH = 1; + } - mod->eu = eu; - mod->ek = ek; - mod->eh = eh; - mod->eH = eH; - mod->eG = eG; - mod->eI = eI; - mod->precise = precise; - mod->precise_max = precise_max; - mod->exclude_GH = exclude_GH; - mod->sample_read = sample_read; - mod->pinned = pinned; - mod->weak = weak; - mod->bpf_counter = bpf_counter; - mod->exclusive = exclusive; + evsel->core.attr.exclude_user = eu; + evsel->core.attr.exclude_kernel = ek; + evsel->core.attr.exclude_hv = eh; + evsel->core.attr.exclude_host = eH; + evsel->core.attr.exclude_guest = eG; + evsel->exclude_GH = exclude_GH; + /* Simple modifiers copied to the evsel. */ + if (mod.precise) { + u8 precise = evsel->core.attr.precise_ip + mod.precise; + /* + * precise ip: + * + * 0 - SAMPLE_IP can have arbitrary skid + * 1 - SAMPLE_IP must have constant skid + * 2 - SAMPLE_IP requested to have 0 skid + * 3 - SAMPLE_IP must have 0 skid + * + * See also PERF_RECORD_MISC_EXACT_IP + */ + if (precise > 3) { + char *help; + + if (asprintf(&help, + "Maximum combined precise value is 3, adding precision to \"%s\"", + evsel__name(evsel)) > 0) { + parse_events_error__handle(parse_state->error, + loc->first_column, + help, NULL); + } + return -EINVAL; + } + evsel->core.attr.precise_ip = precise; + } + if (mod.precise_max) + evsel->precise_max = 1; + if (mod.non_idle) + evsel->core.attr.exclude_idle = 1; + if (mod.sample_read) + evsel->sample_read = 1; + if (mod.pinned && evsel__is_group_leader(evsel)) + evsel->core.attr.pinned = 1; + if (mod.exclusive && evsel__is_group_leader(evsel)) + evsel->core.attr.exclusive = 1; + if (mod.weak) + evsel->weak_group = true; + if (mod.bpf) + evsel->bpf_counter = true; + if (mod.retire_lat) + evsel->retire_lat = true; + if (mod.dont_regroup) + evsel->dont_regroup = true; + } return 0; } -/* - * Basic modifier sanity check to validate it contains only one - * instance of any modifier (apart from 'p') present. - */ -static int check_modifier(char *str) +int parse_events__modifier_group(struct parse_events_state *parse_state, void *loc, + struct list_head *list, + struct parse_events_modifier mod) { - char *p = str; - - /* The sizeof includes 0 byte as well. */ - if (strlen(str) > (sizeof("ukhGHpppPSDIWeb") - 1)) - return -1; - - while (*p) { - if (*p != 'p' && strchr(p + 1, *p)) - return -1; - p++; - } - - return 0; + return parse_events__modifier_list(parse_state, loc, list, mod, /*group=*/true); } -int parse_events__modifier_event(struct list_head *list, char *str, bool add) +int parse_events__modifier_event(struct parse_events_state *parse_state, void *loc, + struct list_head *list, + struct parse_events_modifier mod) { - struct evsel *evsel; - struct event_modifier mod; - - if (str == NULL) - return 0; - - if (check_modifier(str)) - return -EINVAL; - - if (!add && get_event_modifier(&mod, str, NULL)) - return -EINVAL; - - __evlist__for_each_entry(list, evsel) { - if (add && get_event_modifier(&mod, str, evsel)) - return -EINVAL; - - evsel->core.attr.exclude_user = mod.eu; - evsel->core.attr.exclude_kernel = mod.ek; - evsel->core.attr.exclude_hv = mod.eh; - evsel->core.attr.precise_ip = mod.precise; - evsel->core.attr.exclude_host = mod.eH; - evsel->core.attr.exclude_guest = mod.eG; - evsel->core.attr.exclude_idle = mod.eI; - evsel->exclude_GH = mod.exclude_GH; - evsel->sample_read = mod.sample_read; - evsel->precise_max = mod.precise_max; - evsel->weak_group = mod.weak; - evsel->bpf_counter = mod.bpf_counter; - - if (evsel__is_group_leader(evsel)) { - evsel->core.attr.pinned = mod.pinned; - evsel->core.attr.exclusive = mod.exclusive; - } - } - - return 0; + return parse_events__modifier_list(parse_state, loc, list, mod, /*group=*/false); } -int parse_events_name(struct list_head *list, const char *name) +int parse_events__set_default_name(struct list_head *list, char *name) { struct evsel *evsel; + bool used_name = false; __evlist__for_each_entry(list, evsel) { - if (!evsel->name) - evsel->name = strdup(name); + if (!evsel->name) { + evsel->name = used_name ? strdup(name) : name; + used_name = true; + if (!evsel->name) + return -ENOMEM; + } } - + if (!used_name) + free(name); return 0; } @@ -2007,7 +1884,7 @@ static int parse_events__scanner(const char *str, /* * parse event config string, return a list of event terms. */ -int parse_events_terms(struct list_head *terms, const char *str) +int parse_events_terms(struct parse_events_terms *terms, const char *str) { struct parse_events_state parse_state = { .terms = NULL, @@ -2016,14 +1893,10 @@ int parse_events_terms(struct list_head *terms, const char *str) int ret; ret = parse_events__scanner(str, &parse_state); + if (!ret) + list_splice(&parse_state.terms->terms, &terms->terms); - if (!ret) { - list_splice(parse_state.terms, terms); - zfree(&parse_state.terms); - return 0; - } - - parse_events_terms__delete(parse_state.terms); + zfree(&parse_state.terms); return ret; } @@ -2089,8 +1962,8 @@ static int evsel__compute_group_pmu_name(struct evsel *evsel, } } } - /* Assign the actual name taking care that the fake PMU lacks a name. */ - evsel->group_pmu_name = strdup(group_pmu_name ?: "fake"); + /* Record computed name. */ + evsel->group_pmu_name = strdup(group_pmu_name); return evsel->group_pmu_name ? 0 : -ENOMEM; } @@ -2109,59 +1982,82 @@ static int evlist__cmp(void *_fg_idx, const struct list_head *l, const struct li int *force_grouped_idx = _fg_idx; int lhs_sort_idx, rhs_sort_idx, ret; const char *lhs_pmu_name, *rhs_pmu_name; - bool lhs_has_group, rhs_has_group; /* - * First sort by grouping/leader. Read the leader idx only if the evsel - * is part of a group, by default ungrouped events will be sorted - * relative to grouped events based on where the first ungrouped event - * occurs. If both events don't have a group we want to fall-through to - * the arch specific sorting, that can reorder and fix things like - * Intel's topdown events. + * Get the indexes of the 2 events to sort. If the events are + * in groups then the leader's index is used otherwise the + * event's index is used. An index may be forced for events that + * must be in the same group, namely Intel topdown events. */ - if (lhs_core->leader != lhs_core || lhs_core->nr_members > 1) { - lhs_has_group = true; - lhs_sort_idx = lhs_core->leader->idx; + if (lhs->dont_regroup) { + lhs_sort_idx = lhs_core->idx; + } else if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)) { + lhs_sort_idx = *force_grouped_idx; } else { - lhs_has_group = false; - lhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs) - ? *force_grouped_idx - : lhs_core->idx; - } - if (rhs_core->leader != rhs_core || rhs_core->nr_members > 1) { - rhs_has_group = true; - rhs_sort_idx = rhs_core->leader->idx; + bool lhs_has_group = lhs_core->leader != lhs_core || lhs_core->nr_members > 1; + + lhs_sort_idx = lhs_has_group ? lhs_core->leader->idx : lhs_core->idx; + } + if (rhs->dont_regroup) { + rhs_sort_idx = rhs_core->idx; + } else if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)) { + rhs_sort_idx = *force_grouped_idx; } else { - rhs_has_group = false; - rhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs) - ? *force_grouped_idx - : rhs_core->idx; + bool rhs_has_group = rhs_core->leader != rhs_core || rhs_core->nr_members > 1; + + rhs_sort_idx = rhs_has_group ? rhs_core->leader->idx : rhs_core->idx; } + /* If the indices differ then respect the insertion order. */ if (lhs_sort_idx != rhs_sort_idx) return lhs_sort_idx - rhs_sort_idx; - /* Group by PMU if there is a group. Groups can't span PMUs. */ - if (lhs_has_group && rhs_has_group) { - lhs_pmu_name = lhs->group_pmu_name; - rhs_pmu_name = rhs->group_pmu_name; - ret = strcmp(lhs_pmu_name, rhs_pmu_name); - if (ret) - return ret; - } + /* + * Ignoring forcing, lhs_sort_idx == rhs_sort_idx so lhs and rhs should + * be in the same group. Events in the same group need to be ordered by + * their grouping PMU name as the group will be broken to ensure only + * events on the same PMU are programmed together. + * + * With forcing the lhs_sort_idx == rhs_sort_idx shows that one or both + * events are being forced to be at force_group_index. If only one event + * is being forced then the other event is the group leader of the group + * we're trying to force the event into. Ensure for the force grouped + * case that the PMU name ordering is also respected. + */ + lhs_pmu_name = lhs->group_pmu_name; + rhs_pmu_name = rhs->group_pmu_name; + ret = strcmp(lhs_pmu_name, rhs_pmu_name); + if (ret) + return ret; - /* Architecture specific sorting. */ + /* + * Architecture specific sorting, by default sort events in the same + * group with the same PMU by their insertion index. On Intel topdown + * constraints must be adhered to - slots first, etc. + */ return arch_evlist__cmp(lhs, rhs); } +int __weak arch_evlist__add_required_events(struct list_head *list __always_unused) +{ + return 0; +} + static int parse_events__sort_events_and_fix_groups(struct list_head *list) { int idx = 0, force_grouped_idx = -1; struct evsel *pos, *cur_leader = NULL; struct perf_evsel *cur_leaders_grp = NULL; - bool idx_changed = false, cur_leader_force_grouped = false; + bool idx_changed = false; int orig_num_leaders = 0, num_leaders = 0; int ret; + struct evsel *force_grouped_leader = NULL; + bool last_event_was_forced_leader = false; + + /* On x86 topdown metrics events require a slots event. */ + ret = arch_evlist__add_required_events(list); + if (ret) + return ret; /* * Compute index to insert ungrouped events at. Place them where the @@ -2184,10 +2080,13 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list) */ pos->core.idx = idx++; - /* Remember an index to sort all forced grouped events together to. */ - if (force_grouped_idx == -1 && pos == pos_leader && pos->core.nr_members < 2 && - arch_evsel__must_be_in_group(pos)) - force_grouped_idx = pos->core.idx; + /* + * Remember an index to sort all forced grouped events + * together to. Use the group leader as some events + * must appear first within the group. + */ + if (force_grouped_idx == -1 && arch_evsel__must_be_in_group(pos)) + force_grouped_idx = pos_leader->core.idx; } /* Sort events. */ @@ -2199,10 +2098,10 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list) */ idx = 0; list_for_each_entry(pos, list, core.node) { - const struct evsel *pos_leader = evsel__leader(pos); + struct evsel *pos_leader = evsel__leader(pos); const char *pos_pmu_name = pos->group_pmu_name; const char *cur_leader_pmu_name; - bool pos_force_grouped = force_grouped_idx != -1 && + bool pos_force_grouped = force_grouped_idx != -1 && !pos->dont_regroup && arch_evsel__must_be_in_group(pos); /* Reset index and nr_members. */ @@ -2215,31 +2114,65 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list) * Set the group leader respecting the given groupings and that * groups can't span PMUs. */ - if (!cur_leader) - cur_leader = pos; - + if (!cur_leader || pos->dont_regroup) { + cur_leader = pos->dont_regroup ? pos_leader : pos; + cur_leaders_grp = &cur_leader->core; + if (pos_force_grouped) + force_grouped_leader = pos; + } cur_leader_pmu_name = cur_leader->group_pmu_name; - if ((cur_leaders_grp != pos->core.leader && - (!pos_force_grouped || !cur_leader_force_grouped)) || - strcmp(cur_leader_pmu_name, pos_pmu_name)) { - /* Event is for a different group/PMU than last. */ + if (strcmp(cur_leader_pmu_name, pos_pmu_name)) { + /* PMU changed so the group/leader must change. */ cur_leader = pos; - /* - * Remember the leader's group before it is overwritten, - * so that later events match as being in the same - * group. - */ cur_leaders_grp = pos->core.leader; + if (pos_force_grouped && force_grouped_leader == NULL) + force_grouped_leader = pos; + } else if (cur_leaders_grp != pos->core.leader) { + bool split_even_if_last_leader_was_forced = true; + /* - * Avoid forcing events into groups with events that - * don't need to be in the group. + * Event is for a different group. If the last event was + * the forced group leader then subsequent group events + * and forced events should be in the same group. If + * there are no other forced group events then the + * forced group leader wasn't really being forced into a + * group, it just set arch_evsel__must_be_in_group, and + * we don't want the group to split here. */ - cur_leader_force_grouped = pos_force_grouped; + if (force_grouped_idx != -1 && last_event_was_forced_leader) { + struct evsel *pos2 = pos; + /* + * Search the whole list as the group leaders + * aren't currently valid. + */ + list_for_each_entry_continue(pos2, list, core.node) { + if (pos->core.leader == pos2->core.leader && + arch_evsel__must_be_in_group(pos2)) { + split_even_if_last_leader_was_forced = false; + break; + } + } + } + if (!last_event_was_forced_leader || split_even_if_last_leader_was_forced) { + if (pos_force_grouped) { + if (force_grouped_leader) { + cur_leader = force_grouped_leader; + cur_leaders_grp = force_grouped_leader->core.leader; + } else { + cur_leader = force_grouped_leader = pos; + cur_leaders_grp = &pos->core; + } + } else { + cur_leader = pos; + cur_leaders_grp = pos->core.leader; + } + } } if (pos_leader != cur_leader) { /* The leader changed so update it. */ evsel__set_leader(pos, cur_leader); } + last_event_was_forced_leader = (force_grouped_leader == pos); } list_for_each_entry(pos, list, core.node) { struct evsel *pos_leader = evsel__leader(pos); @@ -2252,16 +2185,16 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list) } int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filter, - struct parse_events_error *err, struct perf_pmu *fake_pmu, - bool warn_if_reordered) + struct parse_events_error *err, bool fake_pmu, + bool warn_if_reordered, bool fake_tp) { struct parse_events_state parse_state = { .list = LIST_HEAD_INIT(parse_state.list), .idx = evlist->core.nr_entries, .error = err, - .evlist = evlist, .stoken = PE_START_EVENTS, .fake_pmu = fake_pmu, + .fake_tp = fake_tp, .pmu_filter = pmu_filter, .match_legacy_cache_terms = true, }; @@ -2278,14 +2211,23 @@ int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filte if (ret2 < 0) return ret; - if (ret2 && warn_if_reordered && !parse_state.wild_card_pmus) - pr_warning("WARNING: events were regrouped to match PMUs\n"); - /* * Add list to the evlist even with errors to allow callers to clean up. */ evlist__splice_list_tail(evlist, &parse_state.list); + if (ret2 && warn_if_reordered && !parse_state.wild_card_pmus) { + pr_warning("WARNING: events were regrouped to match PMUs\n"); + + if (verbose > 0) { + struct strbuf sb = STRBUF_INIT; + + evlist__uniquify_evsel_names(evlist, &stat_config); + evlist__format_evsels(evlist, &sb, 2048); + pr_debug("evlist after sorting/fixing: '%s'\n", sb.buf); + strbuf_release(&sb); + } + } if (!ret) { struct evsel *last; @@ -2310,54 +2252,59 @@ int parse_event(struct evlist *evlist, const char *str) parse_events_error__init(&err); ret = parse_events(evlist, str, &err); + if (ret && verbose > 0) + parse_events_error__print(&err, str); parse_events_error__exit(&err); return ret; } +struct parse_events_error_entry { + /** @list: The list the error is part of. */ + struct list_head list; + /** @idx: index in the parsed string */ + int idx; + /** @str: string to display at the index */ + char *str; + /** @help: optional help string */ + char *help; +}; + void parse_events_error__init(struct parse_events_error *err) { - bzero(err, sizeof(*err)); + INIT_LIST_HEAD(&err->list); } void parse_events_error__exit(struct parse_events_error *err) { - zfree(&err->str); - zfree(&err->help); - zfree(&err->first_str); - zfree(&err->first_help); + struct parse_events_error_entry *pos, *tmp; + + list_for_each_entry_safe(pos, tmp, &err->list, list) { + zfree(&pos->str); + zfree(&pos->help); + list_del_init(&pos->list); + free(pos); + } } void parse_events_error__handle(struct parse_events_error *err, int idx, char *str, char *help) { + struct parse_events_error_entry *entry; + if (WARN(!str || !err, "WARNING: failed to provide error string or struct\n")) goto out_free; - switch (err->num_errors) { - case 0: - err->idx = idx; - err->str = str; - err->help = help; - break; - case 1: - err->first_idx = err->idx; - err->idx = idx; - err->first_str = err->str; - err->str = str; - err->first_help = err->help; - err->help = help; - break; - default: - pr_debug("Multiple errors dropping message: %s (%s)\n", - err->str, err->help); - free(err->str); - err->str = str; - free(err->help); - err->help = help; - break; + + entry = zalloc(sizeof(*entry)); + if (!entry) { + pr_err("Failed to allocate memory for event parsing error: %s (%s)\n", + str, help ?: "<no help>"); + goto out_free; } - err->num_errors++; + entry->idx = idx; + entry->str = str; + entry->help = help; + list_add(&entry->list, &err->list); return; - out_free: free(str); free(help); @@ -2427,19 +2374,34 @@ static void __parse_events_error__print(int err_idx, const char *err_str, } } -void parse_events_error__print(struct parse_events_error *err, +void parse_events_error__print(const struct parse_events_error *err, const char *event) { - if (!err->num_errors) - return; + struct parse_events_error_entry *pos; + bool first = true; - __parse_events_error__print(err->idx, err->str, err->help, event); + list_for_each_entry(pos, &err->list, list) { + if (!first) + fputs("\n", stderr); + __parse_events_error__print(pos->idx, pos->str, pos->help, event); + first = false; + } +} + +/* + * In the list of errors err, do any of the error strings (str) contain the + * given needle string? + */ +bool parse_events_error__contains(const struct parse_events_error *err, + const char *needle) +{ + struct parse_events_error_entry *pos; - if (err->num_errors > 1) { - fputs("\nInitial error:\n", stderr); - __parse_events_error__print(err->first_idx, err->first_str, - err->first_help, event); + list_for_each_entry(pos, &err->list, list) { + if (strstr(pos->str, needle) != NULL) + return true; } + return false; } #undef MAX_WIDTH @@ -2453,7 +2415,8 @@ int parse_events_option(const struct option *opt, const char *str, parse_events_error__init(&err); ret = __parse_events(*args->evlistp, str, args->pmu_filter, &err, - /*fake_pmu=*/NULL, /*warn_if_reordered=*/true); + /*fake_pmu=*/false, /*warn_if_reordered=*/true, + /*fake_tp=*/false); if (ret) { parse_events_error__print(&err, str); @@ -2519,12 +2482,17 @@ foreach_evsel_in_last_glob(struct evlist *evlist, return 0; } +/* Will a tracepoint filter work for str or should a BPF filter be used? */ +static bool is_possible_tp_filter(const char *str) +{ + return strstr(str, "uid") == NULL; +} + static int set_filter(struct evsel *evsel, const void *arg) { const char *str = arg; - bool found = false; int nr_addr_filters = 0; - struct perf_pmu *pmu = NULL; + struct perf_pmu *pmu; if (evsel == NULL) { fprintf(stderr, @@ -2532,7 +2500,7 @@ static int set_filter(struct evsel *evsel, const void *arg) return -1; } - if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) { + if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT && is_possible_tp_filter(str)) { if (evsel__append_tp_filter(evsel, str) < 0) { fprintf(stderr, "not enough memory to hold filter string\n"); @@ -2542,16 +2510,11 @@ static int set_filter(struct evsel *evsel, const void *arg) return 0; } - while ((pmu = perf_pmus__scan(pmu)) != NULL) - if (pmu->type == evsel->core.attr.type) { - found = true; - break; - } - - if (found) + pmu = evsel__find_pmu(evsel); + if (pmu) { perf_pmu__scan_file(pmu, "nr_addr_filters", "%d", &nr_addr_filters); - + } if (!nr_addr_filters) return perf_bpf_filter__parse(&evsel->bpf_filters, str); @@ -2573,6 +2536,30 @@ int parse_filter(const struct option *opt, const char *str, (const void *)str); } +int parse_uid_filter(struct evlist *evlist, uid_t uid) +{ + struct option opt = { + .value = &evlist, + }; + char buf[128]; + int ret; + + snprintf(buf, sizeof(buf), "uid == %d", uid); + ret = parse_filter(&opt, buf, /*unset=*/0); + if (ret) { + if (use_browser >= 1) { + /* + * Use ui__warning so a pop up appears above the + * underlying BPF error message. + */ + ui__warning("Failed to add UID filtering that uses BPF filtering.\n"); + } else { + fprintf(stderr, "Failed to add UID filtering that uses BPF filtering.\n"); + } + } + return ret; +} + static int add_exclude_perf_filter(struct evsel *evsel, const void *arg __maybe_unused) { @@ -2641,7 +2628,8 @@ static int new_term(struct parse_events_term **_term, } int parse_events_term__num(struct parse_events_term **term, - int type_term, char *config, u64 num, + enum parse_events__term_type type_term, + const char *config, u64 num, bool no_value, void *loc_term_, void *loc_val_) { @@ -2651,17 +2639,18 @@ int parse_events_term__num(struct parse_events_term **term, struct parse_events_term temp = { .type_val = PARSE_EVENTS__TERM_TYPE_NUM, .type_term = type_term, - .config = config ? : strdup(config_term_names[type_term]), + .config = config ? : strdup(parse_events__term_type_str(type_term)), .no_value = no_value, .err_term = loc_term ? loc_term->first_column : 0, .err_val = loc_val ? loc_val->first_column : 0, }; - return new_term(term, &temp, NULL, num); + return new_term(term, &temp, /*str=*/NULL, num); } int parse_events_term__str(struct parse_events_term **term, - int type_term, char *config, char *str, + enum parse_events__term_type type_term, + char *config, char *str, void *loc_term_, void *loc_val_) { YYLTYPE *loc_term = loc_term_; @@ -2675,49 +2664,44 @@ int parse_events_term__str(struct parse_events_term **term, .err_val = loc_val ? loc_val->first_column : 0, }; - return new_term(term, &temp, str, 0); + return new_term(term, &temp, str, /*num=*/0); } int parse_events_term__term(struct parse_events_term **term, - int term_lhs, int term_rhs, + enum parse_events__term_type term_lhs, + enum parse_events__term_type term_rhs, void *loc_term, void *loc_val) { return parse_events_term__str(term, term_lhs, NULL, - strdup(config_term_names[term_rhs]), + strdup(parse_events__term_type_str(term_rhs)), loc_term, loc_val); } int parse_events_term__clone(struct parse_events_term **new, - struct parse_events_term *term) + const struct parse_events_term *term) { char *str; - struct parse_events_term temp = { - .type_val = term->type_val, - .type_term = term->type_term, - .config = NULL, - .err_term = term->err_term, - .err_val = term->err_val, - }; + struct parse_events_term temp = *term; + temp.used = false; if (term->config) { temp.config = strdup(term->config); if (!temp.config) return -ENOMEM; } if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) - return new_term(new, &temp, NULL, term->val.num); + return new_term(new, &temp, /*str=*/NULL, term->val.num); str = strdup(term->val.str); - if (!str) + if (!str) { + zfree(&temp.config); return -ENOMEM; - return new_term(new, &temp, str, 0); + } + return new_term(new, &temp, str, /*num=*/0); } void parse_events_term__delete(struct parse_events_term *term) { - if (term->array.nr_ranges) - zfree(&term->array.ranges); - if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) zfree(&term->val.str); @@ -2725,61 +2709,89 @@ void parse_events_term__delete(struct parse_events_term *term) free(term); } -int parse_events_copy_term_list(struct list_head *old, - struct list_head **new) +static int parse_events_terms__copy(const struct parse_events_terms *src, + struct parse_events_terms *dest) { - struct parse_events_term *term, *n; - int ret; - - if (!old) { - *new = NULL; - return 0; - } + struct parse_events_term *term; - *new = malloc(sizeof(struct list_head)); - if (!*new) - return -ENOMEM; - INIT_LIST_HEAD(*new); + list_for_each_entry (term, &src->terms, list) { + struct parse_events_term *n; + int ret; - list_for_each_entry (term, old, list) { ret = parse_events_term__clone(&n, term); if (ret) return ret; - list_add_tail(&n->list, *new); + + list_add_tail(&n->list, &dest->terms); } return 0; } -void parse_events_terms__purge(struct list_head *terms) +void parse_events_terms__init(struct parse_events_terms *terms) +{ + INIT_LIST_HEAD(&terms->terms); +} + +void parse_events_terms__exit(struct parse_events_terms *terms) { struct parse_events_term *term, *h; - list_for_each_entry_safe(term, h, terms, list) { + list_for_each_entry_safe(term, h, &terms->terms, list) { list_del_init(&term->list); parse_events_term__delete(term); } } -void parse_events_terms__delete(struct list_head *terms) +void parse_events_terms__delete(struct parse_events_terms *terms) { if (!terms) return; - parse_events_terms__purge(terms); + parse_events_terms__exit(terms); free(terms); } -void parse_events__clear_array(struct parse_events_array *a) +static int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct strbuf *sb) { - zfree(&a->ranges); -} + struct parse_events_term *term; + bool first = true; -void parse_events_evlist_error(struct parse_events_state *parse_state, - int idx, const char *str) -{ - if (!parse_state->error) - return; + if (!terms) + return 0; - parse_events_error__handle(parse_state->error, idx, strdup(str), NULL); + list_for_each_entry(term, &terms->terms, list) { + int ret; + + if (!first) { + ret = strbuf_addch(sb, ','); + if (ret < 0) + return ret; + } + first = false; + + if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) + if (term->no_value) { + assert(term->val.num == 1); + ret = strbuf_addf(sb, "%s", term->config); + } else + ret = strbuf_addf(sb, "%s=%#"PRIx64, term->config, term->val.num); + else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) { + if (term->config) { + ret = strbuf_addf(sb, "%s=", term->config); + if (ret < 0) + return ret; + } else if ((unsigned int)term->type_term < __PARSE_EVENTS__TERM_TYPE_NR) { + ret = strbuf_addf(sb, "%s=", + parse_events__term_type_str(term->type_term)); + if (ret < 0) + return ret; + } + assert(!term->no_value); + ret = strbuf_addf(sb, "%s", term->val.str); + } + if (ret < 0) + return ret; + } + return 0; } static void config_terms_list(char *buf, size_t buf_sz) @@ -2789,7 +2801,7 @@ static void config_terms_list(char *buf, size_t buf_sz) buf[0] = '\0'; for (i = 0; i < __PARSE_EVENTS__TERM_TYPE_NR; i++) { - const char *name = config_term_names[i]; + const char *name = parse_events__term_type_str(i); if (!config_term_avail(i, NULL)) continue; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index b0eb95f93e9c..3577ab213730 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -10,6 +10,7 @@ #include <linux/types.h> #include <linux/perf_event.h> #include <string.h> +#include <sys/types.h> struct evsel; struct evlist; @@ -17,8 +18,9 @@ struct parse_events_error; struct option; struct perf_pmu; +struct strbuf; -const char *event_type(int type); +const char *event_type(size_t type); /* Arguments encoded in opt->value. */ struct parse_events_option_args { @@ -29,34 +31,35 @@ int parse_events_option(const struct option *opt, const char *str, int unset); int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset); __attribute__((nonnull(1, 2, 4))) int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filter, - struct parse_events_error *error, struct perf_pmu *fake_pmu, - bool warn_if_reordered); + struct parse_events_error *error, bool fake_pmu, + bool warn_if_reordered, bool fake_tp); __attribute__((nonnull(1, 2, 3))) static inline int parse_events(struct evlist *evlist, const char *str, struct parse_events_error *err) { - return __parse_events(evlist, str, /*pmu_filter=*/NULL, err, /*fake_pmu=*/NULL, - /*warn_if_reordered=*/true); + return __parse_events(evlist, str, /*pmu_filter=*/NULL, err, /*fake_pmu=*/false, + /*warn_if_reordered=*/true, /*fake_tp=*/false); } int parse_event(struct evlist *evlist, const char *str); -int parse_events_terms(struct list_head *terms, const char *str); int parse_filter(const struct option *opt, const char *str, int unset); +int parse_uid_filter(struct evlist *evlist, uid_t uid); int exclude_perf(const struct option *opt, const char *arg, int unset); -enum { +enum parse_events__term_val_type { PARSE_EVENTS__TERM_TYPE_NUM, PARSE_EVENTS__TERM_TYPE_STR, }; -enum { +enum parse_events__term_type { PARSE_EVENTS__TERM_TYPE_USER, PARSE_EVENTS__TERM_TYPE_CONFIG, PARSE_EVENTS__TERM_TYPE_CONFIG1, PARSE_EVENTS__TERM_TYPE_CONFIG2, PARSE_EVENTS__TERM_TYPE_CONFIG3, + PARSE_EVENTS__TERM_TYPE_CONFIG4, PARSE_EVENTS__TERM_TYPE_NAME, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ, @@ -73,151 +76,185 @@ enum { PARSE_EVENTS__TERM_TYPE_DRV_CFG, PARSE_EVENTS__TERM_TYPE_PERCORE, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT, + PARSE_EVENTS__TERM_TYPE_AUX_ACTION, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE, PARSE_EVENTS__TERM_TYPE_METRIC_ID, PARSE_EVENTS__TERM_TYPE_RAW, - PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE, - PARSE_EVENTS__TERM_TYPE_HARDWARE, - __PARSE_EVENTS__TERM_TYPE_NR, -}; - -struct parse_events_array { - size_t nr_ranges; - struct { - unsigned int start; - size_t length; - } *ranges; + PARSE_EVENTS__TERM_TYPE_CPU, + PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV, + PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG, + PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG, +#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG + 1) }; struct parse_events_term { - char *config; - struct parse_events_array array; + /** @list: The term list the term is a part of. */ + struct list_head list; + /** + * @config: The left-hand side of a term assignment, so the term + * "event=8" would have the config be "event" + */ + const char *config; + /** + * @val: The right-hand side of a term assignment that can either be a + * string or a number depending on type_val. + */ union { char *str; u64 num; } val; - int type_val; - int type_term; - struct list_head list; - bool used; - bool no_value; - - /* error string indexes for within parsed string */ + /** @type_val: The union variable in val to be used for the term. */ + enum parse_events__term_val_type type_val; + /** + * @type_term: A predefined term type or PARSE_EVENTS__TERM_TYPE_USER + * when not inbuilt. + */ + enum parse_events__term_type type_term; + /** + * @err_term: The column index of the term from parsing, used during + * error output. + */ int err_term; + /** + * @err_val: The column index of the val from parsing, used during error + * output. + */ int err_val; - - /* Coming from implicit alias */ + /** @used: Was the term used during parameterized-eval. */ + bool used; + /** + * @weak: A term from the sysfs or json encoding of an event that + * shouldn't override terms coming from the command line. + */ bool weak; + /** + * @no_value: Is there no value. If a numeric term has no value then the + * value is assumed to be 1. An event name also has no value. + */ + bool no_value; }; struct parse_events_error { - int num_errors; /* number of errors encountered */ - int idx; /* index in the parsed string */ - char *str; /* string to display at the index */ - char *help; /* optional help string */ - int first_idx;/* as above, but for the first encountered error */ - char *first_str; - char *first_help; + /** @list: The head of a list of errors. */ + struct list_head list; +}; + +/* A wrapper around a list of terms for the sake of better type safety. */ +struct parse_events_terms { + struct list_head terms; }; struct parse_events_state { + /* The list parsed events are placed on. */ struct list_head list; + /* The updated index used by entries as they are added. */ int idx; + /* Error information. */ struct parse_events_error *error; - struct evlist *evlist; - struct list_head *terms; + /* Holds returned terms for term parsing. */ + struct parse_events_terms *terms; + /* Start token. */ int stoken; - struct perf_pmu *fake_pmu; + /* Use the fake PMU marker for testing. */ + bool fake_pmu; + /* Skip actual tracepoint processing for testing. */ + bool fake_tp; /* If non-null, when wildcard matching only match the given PMU. */ const char *pmu_filter; /* Should PE_LEGACY_NAME tokens be generated for config terms? */ bool match_legacy_cache_terms; + /* Were multiple PMUs scanned to find events? */ bool wild_card_pmus; }; +const char *parse_events__term_type_str(enum parse_events__term_type term_type); + bool parse_events__filter_pmu(const struct parse_events_state *parse_state, const struct perf_pmu *pmu); void parse_events__shrink_config_terms(void); int parse_events__is_hardcoded_term(struct parse_events_term *term); int parse_events_term__num(struct parse_events_term **term, - int type_term, char *config, u64 num, + enum parse_events__term_type type_term, + const char *config, u64 num, bool novalue, void *loc_term, void *loc_val); int parse_events_term__str(struct parse_events_term **term, - int type_term, char *config, char *str, + enum parse_events__term_type type_term, + char *config, char *str, void *loc_term, void *loc_val); int parse_events_term__term(struct parse_events_term **term, - int term_lhs, int term_rhs, + enum parse_events__term_type term_lhs, + enum parse_events__term_type term_rhs, void *loc_term, void *loc_val); int parse_events_term__clone(struct parse_events_term **new, - struct parse_events_term *term); + const struct parse_events_term *term); void parse_events_term__delete(struct parse_events_term *term); -void parse_events_terms__delete(struct list_head *terms); -void parse_events_terms__purge(struct list_head *terms); -void parse_events__clear_array(struct parse_events_array *a); -int parse_events__modifier_event(struct list_head *list, char *str, bool add); -int parse_events__modifier_group(struct list_head *list, char *event_mod); -int parse_events_name(struct list_head *list, const char *name); -int parse_events_add_tracepoint(struct list_head *list, int *idx, + +void parse_events_terms__delete(struct parse_events_terms *terms); +void parse_events_terms__init(struct parse_events_terms *terms); +void parse_events_terms__exit(struct parse_events_terms *terms); +int parse_events_terms(struct parse_events_terms *terms, const char *str); + +struct parse_events_modifier { + u8 precise; /* Number of repeated 'p' for precision. */ + bool precise_max : 1; /* 'P' */ + bool non_idle : 1; /* 'I' */ + bool sample_read : 1; /* 'S' */ + bool pinned : 1; /* 'D' */ + bool exclusive : 1; /* 'e' */ + bool weak : 1; /* 'W' */ + bool bpf : 1; /* 'b' */ + bool user : 1; /* 'u' */ + bool kernel : 1; /* 'k' */ + bool hypervisor : 1; /* 'h' */ + bool guest : 1; /* 'G' */ + bool host : 1; /* 'H' */ + bool retire_lat : 1; /* 'R' */ + bool dont_regroup : 1; /* 'X' */ +}; + +int parse_events__modifier_event(struct parse_events_state *parse_state, void *loc, + struct list_head *list, struct parse_events_modifier mod); +int parse_events__modifier_group(struct parse_events_state *parse_state, void *loc, + struct list_head *list, struct parse_events_modifier mod); +int parse_events__set_default_name(struct list_head *list, char *name); +int parse_events_add_tracepoint(struct parse_events_state *parse_state, + struct list_head *list, const char *sys, const char *event, struct parse_events_error *error, - struct list_head *head_config); -int parse_events_load_bpf(struct parse_events_state *parse_state, - struct list_head *list, - char *bpf_file_name, - bool source, - struct list_head *head_config); -/* Provide this function for perf test */ -struct bpf_object; -int parse_events_load_bpf_obj(struct parse_events_state *parse_state, - struct list_head *list, - struct bpf_object *obj, - struct list_head *head_config); + struct parse_events_terms *head_config, void *loc); int parse_events_add_numeric(struct parse_events_state *parse_state, struct list_head *list, u32 type, u64 config, - struct list_head *head_config, + const struct parse_events_terms *head_config, bool wildcard); -int parse_events_add_tool(struct parse_events_state *parse_state, - struct list_head *list, - int tool_event); -int parse_events_add_cache(struct list_head *list, int *idx, const char *name, - struct parse_events_state *parse_state, - struct list_head *head_config); int parse_events__decode_legacy_cache(const char *name, int pmu_type, __u64 *config); int parse_events_add_breakpoint(struct parse_events_state *parse_state, struct list_head *list, u64 addr, char *type, u64 len, - struct list_head *head_config); -int parse_events_add_pmu(struct parse_events_state *parse_state, - struct list_head *list, char *name, - struct list_head *head_config, - bool auto_merge_stats); + struct parse_events_terms *head_config); struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, const char *name, const char *metric_id, struct perf_pmu *pmu); int parse_events_multi_pmu_add(struct parse_events_state *parse_state, - char *str, - struct list_head *head_config, - struct list_head **listp); + const char *event_name, + const struct parse_events_terms *const_parsed_terms, + struct list_head **listp, void *loc); -int parse_events_copy_term_list(struct list_head *old, - struct list_head **new); +int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state, + const char *event_or_pmu, + const struct parse_events_terms *const_parsed_terms, + struct list_head **listp, + void *loc_); void parse_events__set_leader(char *name, struct list_head *list); -void parse_events_update_lists(struct list_head *list_event, - struct list_head *list_all); -void parse_events_evlist_error(struct parse_events_state *parse_state, - int idx, const char *str); struct event_symbol { const char *symbol; const char *alias; }; -extern struct event_symbol event_symbols_hw[]; -extern struct event_symbol event_symbols_sw[]; char *parse_events_formats_error_string(char *additional_terms); @@ -225,9 +262,10 @@ void parse_events_error__init(struct parse_events_error *err); void parse_events_error__exit(struct parse_events_error *err); void parse_events_error__handle(struct parse_events_error *err, int idx, char *str, char *help); -void parse_events_error__print(struct parse_events_error *err, +void parse_events_error__print(const struct parse_events_error *err, const char *event); - +bool parse_events_error__contains(const struct parse_events_error *err, + const char *needle); #ifdef HAVE_LIBELF_SUPPORT /* * If the probe point starts with '%', @@ -247,4 +285,6 @@ static inline bool is_sdt_event(char *str __maybe_unused) } #endif /* HAVE_LIBELF_SUPPORT */ +size_t default_breakpoint_len(void); + #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 99335ec586ae..251ce4321878 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -5,92 +5,72 @@ %option stack %option bison-locations %option yylineno -%option reject +%option noyywrap %{ #include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> #include "parse-events.h" #include "parse-events-bison.h" -#include "evsel.h" char *parse_events_get_text(yyscan_t yyscanner); YYSTYPE *parse_events_get_lval(yyscan_t yyscanner); +int parse_events_get_column(yyscan_t yyscanner); +int parse_events_get_leng(yyscan_t yyscanner); -static int __value(YYSTYPE *yylval, char *str, int base, int token) +static int get_column(yyscan_t scanner) { - u64 num; - - errno = 0; - num = strtoull(str, NULL, base); - if (errno) - return PE_ERROR; - - yylval->num = num; - return token; + return parse_events_get_column(scanner) - parse_events_get_leng(scanner); } -static int value(yyscan_t scanner, int base) +static int value(struct parse_events_state *parse_state, yyscan_t scanner, int base) { YYSTYPE *yylval = parse_events_get_lval(scanner); char *text = parse_events_get_text(scanner); + u64 num; - return __value(yylval, text, base, PE_VALUE); -} + errno = 0; + num = strtoull(text, NULL, base); + if (errno) { + struct parse_events_error *error = parse_state->error; + char *help = NULL; -static int str(yyscan_t scanner, int token) -{ - YYSTYPE *yylval = parse_events_get_lval(scanner); - char *text = parse_events_get_text(scanner); + if (asprintf(&help, "Bad base %d number \"%s\"", base, text) > 0) + parse_events_error__handle(error, get_column(scanner), help , NULL); - if (text[0] != '\'') { - yylval->str = strdup(text); - } else { - /* - * If a text tag specified on the command line - * contains opening single quite ' then it is - * expected that the tag ends with single quote - * as well, like this: - * name=\'CPU_CLK_UNHALTED.THREAD:cmask=1\' - * quotes need to be escaped to bypass shell - * processing. - */ - yylval->str = strndup(&text[1], strlen(text) - 2); + return PE_ERROR; } - return token; + yylval->num = num; + return PE_VALUE; } -static int lc_str(yyscan_t scanner, const struct parse_events_state *state) +static int str(yyscan_t scanner, int token) { - return str(scanner, state->match_legacy_cache_terms ? PE_LEGACY_CACHE : PE_NAME); -} + YYSTYPE *yylval = parse_events_get_lval(scanner); + char *text = parse_events_get_text(scanner); -static bool isbpf_suffix(char *text) -{ - int len = strlen(text); - - if (len < 2) - return false; - if ((text[len - 1] == 'c' || text[len - 1] == 'o') && - text[len - 2] == '.') - return true; - if (len > 4 && !strcmp(text + len - 4, ".obj")) - return true; - return false; + yylval->str = strdup(text); + return token; } -static bool isbpf(yyscan_t scanner) +static int quoted_str(yyscan_t scanner, int token) { + YYSTYPE *yylval = parse_events_get_lval(scanner); char *text = parse_events_get_text(scanner); - struct stat st; - - if (!isbpf_suffix(text)) - return false; - return stat(text, &st) == 0; + /* + * If a text tag specified on the command line + * contains opening single quite ' then it is + * expected that the tag ends with single quote + * as well, like this: + * name=\'CPU_CLK_UNHALTED.THREAD:cmask=1\' + * quotes need to be escaped to bypass shell + * processing. + */ + yylval->str = strndup(&text[1], strlen(text) - 2); + return token; } /* @@ -113,6 +93,11 @@ static int drv_str(yyscan_t scanner, int token) return token; } +/* + * Use yyless to return all the characaters to the input. Update the column for + * location debugging. If __alloc is non-zero set yylval to the text for the + * returned token's value. + */ #define REWIND(__alloc) \ do { \ YYSTYPE *__yylval = parse_events_get_lval(yyscanner); \ @@ -125,38 +110,85 @@ do { \ yyless(0); \ } while (0) -static int sym(yyscan_t scanner, int type, int config) +static int term(yyscan_t scanner, enum parse_events__term_type type) { YYSTYPE *yylval = parse_events_get_lval(scanner); - yylval->num = (type << 16) + config; - return type == PERF_TYPE_HARDWARE ? PE_VALUE_SYM_HW : PE_VALUE_SYM_SW; -} - -static int tool(yyscan_t scanner, enum perf_tool_event event) -{ - YYSTYPE *yylval = parse_events_get_lval(scanner); - - yylval->num = event; - return PE_VALUE_SYM_TOOL; + yylval->term_type = type; + return PE_TERM; } -static int term(yyscan_t scanner, int type) +static void modifiers_error(struct parse_events_state *parse_state, yyscan_t scanner, + int pos, char mod_char, const char *mod_name) { - YYSTYPE *yylval = parse_events_get_lval(scanner); + struct parse_events_error *error = parse_state->error; + char *help = NULL; - yylval->num = type; - return PE_TERM; + if (asprintf(&help, "Duplicate modifier '%c' (%s)", mod_char, mod_name) > 0) + parse_events_error__handle(error, get_column(scanner) + pos, help , NULL); } -static int hw_term(yyscan_t scanner, int config) +static int modifiers(struct parse_events_state *parse_state, yyscan_t scanner) { YYSTYPE *yylval = parse_events_get_lval(scanner); char *text = parse_events_get_text(scanner); - - yylval->hardware_term.str = strdup(text); - yylval->hardware_term.num = PERF_TYPE_HARDWARE + config; - return PE_TERM_HW; + struct parse_events_modifier mod = { .precise = 0, }; + + for (size_t i = 0, n = strlen(text); i < n; i++) { +#define CASE(c, field) \ + case c: \ + if (mod.field) { \ + modifiers_error(parse_state, scanner, i, c, #field); \ + return PE_ERROR; \ + } \ + mod.field = true; \ + break + + switch (text[i]) { + CASE('u', user); + CASE('k', kernel); + CASE('h', hypervisor); + CASE('I', non_idle); + CASE('G', guest); + CASE('H', host); + case 'p': + mod.precise++; + /* + * precise ip: + * + * 0 - SAMPLE_IP can have arbitrary skid + * 1 - SAMPLE_IP must have constant skid + * 2 - SAMPLE_IP requested to have 0 skid + * 3 - SAMPLE_IP must have 0 skid + * + * See also PERF_RECORD_MISC_EXACT_IP + */ + if (mod.precise > 3) { + struct parse_events_error *error = parse_state->error; + char *help = strdup("Maximum precise value is 3"); + + if (help) { + parse_events_error__handle(error, get_column(scanner) + i, + help , NULL); + } + return PE_ERROR; + } + break; + CASE('P', precise_max); + CASE('S', sample_read); + CASE('D', pinned); + CASE('W', weak); + CASE('e', exclusive); + CASE('b', bpf); + CASE('R', retire_lat); + CASE('X', dont_regroup); + default: + return PE_ERROR; + } +#undef CASE + } + yylval->mod = mod; + return PE_MODIFIER_EVENT; } #define YY_USER_ACTION \ @@ -166,38 +198,36 @@ do { \ yycolumn += yyleng; \ } while (0); -#define USER_REJECT \ - yycolumn -= yyleng; \ - REJECT - %} %x mem %s config %x event -%x array group [^,{}/]*[{][^}]*[}][^,{}/]* event_pmu [^,{}/]+[/][^/]*[/][^,{}/]* event [^,{}/]+ -bpf_object [^,{}]+\.(o|bpf)[a-zA-Z0-9._]* -bpf_source [^,{}]+\.c[a-zA-Z0-9._]* num_dec [0-9]+ -num_hex 0x[a-fA-F0-9]+ -num_raw_hex [a-fA-F0-9]+ -name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]!\-]* -name_tag [\'][a-zA-Z_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\'] -name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* +num_hex 0x[a-fA-F0-9]{1,16} +num_raw_hex [a-fA-F0-9]{1,16} +/* Regular pattern to match the token PE_NAME. */ +name_start [a-zA-Z0-9_*?\[\]] +name {name_start}[a-zA-Z0-9_*?.\[\]!\-]* +/* PE_NAME token when inside a config term list, allows ':'. */ +term_name {name_start}[a-zA-Z0-9_*?.\[\]!\-:]* +/* + * PE_NAME token when quoted, allows ':,.='. + * Matches the RHS of terms like: name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks'. + */ +quoted_name [\']{name_start}[a-zA-Z0-9_*?.\[\]!\-:,\.=]*[\'] drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? /* - * If you add a modifier you need to update check_modifier(). + * If you add a modifier you need to update modifiers(). * Also, the letters in modifier_event must not be in modifier_bp. */ -modifier_event [ukhpPGHSDIWeb]+ +modifier_event [ukhpPGHSDIWebRX]{1,17} modifier_bp [rwx]{1,3} -lc_type (L1-dcache|l1-d|l1d|L1-data|L1-icache|l1-i|l1i|L1-instruction|LLC|L2|dTLB|d-tlb|Data-TLB|iTLB|i-tlb|Instruction-TLB|branch|branches|bpu|btb|bpc|node) -lc_op_result (load|loads|read|store|stores|write|prefetch|prefetches|speculative-read|speculative-load|refs|Reference|ops|access|misses|miss) digit [0-9] non_digit [^0-9] @@ -234,8 +264,6 @@ non_digit [^0-9] } {event_pmu} | -{bpf_object} | -{bpf_source} | {event} { BEGIN(INITIAL); REWIND(1); @@ -251,14 +279,6 @@ non_digit [^0-9] } } -<array>{ -"]" { BEGIN(config); return ']'; } -{num_dec} { return value(yyscanner, 10); } -{num_hex} { return value(yyscanner, 16); } -, { return ','; } -"\.\.\." { return PE_ARRAY_RANGE; } -} - <config>{ /* * Please update config_term_names when new static term is added. @@ -267,6 +287,7 @@ config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); } config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); } config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); } config3 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG3); } +config4 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG4); } name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); } period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); } freq { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ); } @@ -282,28 +303,20 @@ overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); } no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); } percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); } aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); } +aux-action { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_ACTION); } aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); } metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); } -cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); } -stalled-cycles-frontend|idle-cycles-frontend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } -stalled-cycles-backend|idle-cycles-backend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } -instructions { return hw_term(yyscanner, PERF_COUNT_HW_INSTRUCTIONS); } -cache-references { return hw_term(yyscanner, PERF_COUNT_HW_CACHE_REFERENCES); } -cache-misses { return hw_term(yyscanner, PERF_COUNT_HW_CACHE_MISSES); } -branch-instructions|branches { return hw_term(yyscanner, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); } -branch-misses { return hw_term(yyscanner, PERF_COUNT_HW_BRANCH_MISSES); } -bus-cycles { return hw_term(yyscanner, PERF_COUNT_HW_BUS_CYCLES); } -ref-cycles { return hw_term(yyscanner, PERF_COUNT_HW_REF_CPU_CYCLES); } +cpu { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CPU); } +ratio-to-prev { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV); } +legacy-hardware-config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG); } +legacy-cache-config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG); } r{num_raw_hex} { return str(yyscanner, PE_RAW); } r0x{num_raw_hex} { return str(yyscanner, PE_RAW); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } -{lc_type} { return lc_str(yyscanner, _parse_state); } -{lc_type}-{lc_op_result} { return lc_str(yyscanner, _parse_state); } -{lc_type}-{lc_op_result}-{lc_op_result} { return lc_str(yyscanner, _parse_state); } -{name_minus} { return str(yyscanner, PE_NAME); } -\[all\] { return PE_ARRAY_ALL; } -"[" { BEGIN(array); return '['; } +{num_dec} { return value(_parse_state, yyscanner, 10); } +{num_hex} { return value(_parse_state, yyscanner, 16); } +{term_name} { return str(yyscanner, PE_NAME); } @{drv_cfg_term} { return drv_str(yyscanner, PE_DRV_CFG_TERM); } } @@ -323,8 +336,8 @@ r0x{num_raw_hex} { return str(yyscanner, PE_RAW); } */ "/"/{digit} { return PE_BP_SLASH; } "/"/{non_digit} { BEGIN(config); return '/'; } -{num_dec} { return value(yyscanner, 10); } -{num_hex} { return value(yyscanner, 16); } +{num_dec} { return value(_parse_state, yyscanner, 10); } +{num_hex} { return value(_parse_state, yyscanner, 16); } /* * We need to separate 'mem:' scanner part, in order to get specific * modifier bits parsed out. Otherwise we would need to handle PE_NAME @@ -339,45 +352,14 @@ r0x{num_raw_hex} { return str(yyscanner, PE_RAW); } <<EOF>> { BEGIN(INITIAL); } } -cpu-cycles|cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); } -stalled-cycles-frontend|idle-cycles-frontend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } -stalled-cycles-backend|idle-cycles-backend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } -instructions { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); } -cache-references { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); } -cache-misses { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); } -branch-instructions|branches { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); } -branch-misses { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); } -bus-cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES); } -ref-cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES); } -cpu-clock { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); } -task-clock { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK); } -page-faults|faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS); } -minor-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN); } -major-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ); } -context-switches|cs { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); } -cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS); } -alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); } -emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } -dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } -duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); } -user_time { return tool(yyscanner, PERF_TOOL_USER_TIME); } -system_time { return tool(yyscanner, PERF_TOOL_SYSTEM_TIME); } -bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } -cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); } - -{lc_type} { return str(yyscanner, PE_LEGACY_CACHE); } -{lc_type}-{lc_op_result} { return str(yyscanner, PE_LEGACY_CACHE); } -{lc_type}-{lc_op_result}-{lc_op_result} { return str(yyscanner, PE_LEGACY_CACHE); } mem: { BEGIN(mem); return PE_PREFIX_MEM; } r{num_raw_hex} { return str(yyscanner, PE_RAW); } -{num_dec} { return value(yyscanner, 10); } -{num_hex} { return value(yyscanner, 16); } +{num_dec} { return value(_parse_state, yyscanner, 10); } +{num_hex} { return value(_parse_state, yyscanner, 16); } -{modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); } -{bpf_object} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); } -{bpf_source} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); } +{modifier_event} { return modifiers(_parse_state, yyscanner); } {name} { return str(yyscanner, PE_NAME); } -{name_tag} { return str(yyscanner, PE_NAME); } +{quoted_name} { return quoted_str(yyscanner, PE_NAME); } "/" { BEGIN(config); return '/'; } , { BEGIN(event); return ','; } : { return ':'; } @@ -388,8 +370,3 @@ r{num_raw_hex} { return str(yyscanner, PE_RAW); } . { } %% - -int parse_events_wrap(void *scanner __maybe_unused) -{ - return 1; -} diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 9f28d4b5502f..c194de5ec1ec 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -6,26 +6,27 @@ %{ +#ifndef NDEBUG #define YYDEBUG 1 +#endif #include <errno.h> -#include <fnmatch.h> -#include <stdio.h> #include <linux/compiler.h> #include <linux/types.h> -#include <linux/zalloc.h> #include "pmu.h" #include "pmus.h" #include "evsel.h" #include "parse-events.h" #include "parse-events-bison.h" +int parse_events_lex(YYSTYPE * yylval_param, YYLTYPE * yylloc_param , void *yyscanner); void parse_events_error(YYLTYPE *loc, void *parse_state, void *scanner, char const *msg); -#define ABORT_ON(val) \ +#define PE_ABORT(val) \ do { \ - if (val) \ - YYABORT; \ + if (val == -ENOMEM) \ + YYNOMEM; \ + YYABORT; \ } while (0) static struct list_head* alloc_list(void) @@ -54,36 +55,22 @@ static void free_list_evsel(struct list_head* list_evsel) %} %token PE_START_EVENTS PE_START_TERMS -%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_TERM -%token PE_VALUE_SYM_TOOL +%token PE_VALUE PE_TERM %token PE_EVENT_NAME %token PE_RAW PE_NAME -%token PE_BPF_OBJECT PE_BPF_SOURCE %token PE_MODIFIER_EVENT PE_MODIFIER_BP PE_BP_COLON PE_BP_SLASH -%token PE_LEGACY_CACHE -%token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP +%token PE_PREFIX_MEM %token PE_ERROR -%token PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE -%token PE_ARRAY_ALL PE_ARRAY_RANGE %token PE_DRV_CFG_TERM -%token PE_TERM_HW %type <num> PE_VALUE -%type <num> PE_VALUE_SYM_HW -%type <num> PE_VALUE_SYM_SW -%type <num> PE_VALUE_SYM_TOOL -%type <num> PE_TERM -%type <num> value_sym +%type <mod> PE_MODIFIER_EVENT +%type <term_type> PE_TERM %type <str> PE_RAW %type <str> PE_NAME -%type <str> PE_BPF_OBJECT -%type <str> PE_BPF_SOURCE -%type <str> PE_LEGACY_CACHE -%type <str> PE_MODIFIER_EVENT %type <str> PE_MODIFIER_BP %type <str> PE_EVENT_NAME -%type <str> PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE %type <str> PE_DRV_CFG_TERM -%type <str> name_or_raw name_or_legacy +%type <str> name_or_raw %destructor { free ($$); } <str> %type <term> event_term %destructor { parse_events_term__delete ($$); } <term> @@ -92,13 +79,10 @@ static void free_list_evsel(struct list_head* list_evsel) %type <list_terms> opt_pmu_config %destructor { parse_events_terms__delete ($$); } <list_terms> %type <list_evsel> event_pmu -%type <list_evsel> event_legacy_symbol -%type <list_evsel> event_legacy_cache %type <list_evsel> event_legacy_mem %type <list_evsel> event_legacy_tracepoint %type <list_evsel> event_legacy_numeric %type <list_evsel> event_legacy_raw -%type <list_evsel> event_bpf_file %type <list_evsel> event_def %type <list_evsel> event_mod %type <list_evsel> event_name @@ -110,32 +94,27 @@ static void free_list_evsel(struct list_head* list_evsel) %destructor { free_list_evsel ($$); } <list_evsel> %type <tracepoint_name> tracepoint_name %destructor { free ($$.sys); free ($$.event); } <tracepoint_name> -%type <array> array -%type <array> array_term -%type <array> array_terms -%destructor { free ($$.ranges); } <array> -%type <hardware_term> PE_TERM_HW -%destructor { free ($$.str); } <hardware_term> %union { char *str; u64 num; + struct parse_events_modifier mod; + enum parse_events__term_type term_type; struct list_head *list_evsel; - struct list_head *list_terms; + struct parse_events_terms *list_terms; struct parse_events_term *term; struct tracepoint_name { char *sys; char *event; } tracepoint_name; - struct parse_events_array array; - struct hardware_term { - char *str; - u64 num; - } hardware_term; } %% + /* + * Entry points. We are either parsing events or terminals. Just terminal + * parsing is used for parsing events in sysfs. + */ start: PE_START_EVENTS start_events | @@ -143,31 +122,36 @@ PE_START_TERMS start_terms start_events: groups { + /* Take the parsed events, groups.. and place into parse_state. */ + struct list_head *groups = $1; struct parse_events_state *parse_state = _parse_state; - /* frees $1 */ - parse_events_update_lists($1, &parse_state->list); + list_splice_tail(groups, &parse_state->list); + free(groups); } -groups: +groups: /* A list of groups or events. */ groups ',' group { - struct list_head *list = $1; - struct list_head *group = $3; + /* Merge group into the list of events/groups. */ + struct list_head *groups = $1; + struct list_head *group = $3; - /* frees $3 */ - parse_events_update_lists(group, list); - $$ = list; + list_splice_tail(group, groups); + free(group); + $$ = groups; } | groups ',' event { - struct list_head *list = $1; + /* Merge event into the list of events/groups. */ + struct list_head *groups = $1; struct list_head *event = $3; - /* frees $3 */ - parse_events_update_lists(event, list); - $$ = list; + + list_splice_tail(event, groups); + free(event); + $$ = groups; } | group @@ -177,20 +161,13 @@ event group: group_def ':' PE_MODIFIER_EVENT { + /* Apply the modifier to the events in the group_def. */ struct list_head *list = $1; int err; - err = parse_events__modifier_group(list, $3); - free($3); - if (err) { - struct parse_events_state *parse_state = _parse_state; - struct parse_events_error *error = parse_state->error; - - parse_events_error__handle(error, @3.first_column, - strdup("Bad modifier"), NULL); - free_list_evsel(list); + err = parse_events__modifier_group(_parse_state, &@3, list, $3); + if (err) YYABORT; - } $$ = list; } | @@ -201,7 +178,10 @@ PE_NAME '{' events '}' { struct list_head *list = $3; - /* Takes ownership of $1. */ + /* + * Set the first entry of list to be the leader. Set the group name on + * the leader to $1 taking ownership. + */ parse_events__set_leader($1, list); $$ = list; } @@ -210,6 +190,7 @@ PE_NAME '{' events '}' { struct list_head *list = $2; + /* Set the first entry of list to be the leader clearing the group name. */ parse_events__set_leader(NULL, list); $$ = list; } @@ -217,12 +198,12 @@ PE_NAME '{' events '}' events: events ',' event { + struct list_head *events = $1; struct list_head *event = $3; - struct list_head *list = $1; - /* frees $3 */ - parse_events_update_lists(event, list); - $$ = list; + list_splice_tail(event, events); + free(event); + $$ = events; } | event @@ -240,17 +221,9 @@ event_name PE_MODIFIER_EVENT * (there could be more events added for multiple tracepoint * definitions via '*?'. */ - err = parse_events__modifier_event(list, $2, false); - free($2); - if (err) { - struct parse_events_state *parse_state = _parse_state; - struct parse_events_error *error = parse_state->error; - - parse_events_error__handle(error, @2.first_column, - strdup("Bad modifier"), NULL); - free_list_evsel(list); + err = parse_events__modifier_event(_parse_state, &@2, list, $2); + if (err) YYABORT; - } $$ = list; } | @@ -259,13 +232,17 @@ event_name event_name: PE_EVENT_NAME event_def { - int err; + /* + * When an event is parsed the text is rewound and the entire text of + * the event is set to the str of PE_EVENT_NAME token matched here. If + * no name was on an event via a term, set the name to the entire text + * taking ownership of the allocation. + */ + int err = parse_events__set_default_name($2, $1); - err = parse_events_name($2, $1); - free($1); if (err) { free_list_evsel($2); - YYABORT; + YYNOMEM; } $$ = $2; } @@ -273,100 +250,22 @@ PE_EVENT_NAME event_def event_def event_def: event_pmu | - event_legacy_symbol | - event_legacy_cache sep_dc | event_legacy_mem sep_dc | event_legacy_tracepoint sep_dc | event_legacy_numeric sep_dc | - event_legacy_raw sep_dc | - event_bpf_file + event_legacy_raw sep_dc event_pmu: PE_NAME opt_pmu_config { - struct parse_events_state *parse_state = _parse_state; - struct parse_events_error *error = parse_state->error; - struct list_head *list = NULL, *orig_terms = NULL, *terms= NULL; - char *pattern = NULL; - -#define CLEANUP_YYABORT \ - do { \ - parse_events_terms__delete($2); \ - parse_events_terms__delete(orig_terms); \ - free(list); \ - free($1); \ - free(pattern); \ - YYABORT; \ - } while(0) - - if (parse_events_copy_term_list($2, &orig_terms)) - CLEANUP_YYABORT; + /* List of created evsels. */ + struct list_head *list = NULL; + int err = parse_events_multi_pmu_add_or_add_pmu(_parse_state, $1, $2, &list, &@1); - if (error) - error->idx = @1.first_column; - - list = alloc_list(); - if (!list) - CLEANUP_YYABORT; - /* Attempt to add to list assuming $1 is a PMU name. */ - if (parse_events_add_pmu(parse_state, list, $1, $2, /*auto_merge_stats=*/false)) { - struct perf_pmu *pmu = NULL; - int ok = 0; - - /* Failure to add, try wildcard expansion of $1 as a PMU name. */ - if (asprintf(&pattern, "%s*", $1) < 0) - CLEANUP_YYABORT; - - while ((pmu = perf_pmus__scan(pmu)) != NULL) { - char *name = pmu->name; - - if (parse_events__filter_pmu(parse_state, pmu)) - continue; - - if (!strncmp(name, "uncore_", 7) && - strncmp($1, "uncore_", 7)) - name += 7; - if (!perf_pmu__match(pattern, name, $1) || - !perf_pmu__match(pattern, pmu->alias_name, $1)) { - bool auto_merge_stats = perf_pmu__auto_merge_stats(pmu); - - if (parse_events_copy_term_list(orig_terms, &terms)) - CLEANUP_YYABORT; - if (!parse_events_add_pmu(parse_state, list, pmu->name, terms, - auto_merge_stats)) { - ok++; - parse_state->wild_card_pmus = true; - } - parse_events_terms__delete(terms); - } - } - - if (!ok) { - /* Failure to add, assume $1 is an event name. */ - zfree(&list); - ok = !parse_events_multi_pmu_add(parse_state, $1, $2, &list); - $2 = NULL; - } - if (!ok) - CLEANUP_YYABORT; - } parse_events_terms__delete($2); - parse_events_terms__delete(orig_terms); - free(pattern); - free($1); - $$ = list; -#undef CLEANUP_YYABORT -} -| -PE_KERNEL_PMU_EVENT sep_dc -{ - struct list_head *list; - int err; - - err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list); free($1); - if (err < 0) - YYABORT; + if (err) + PE_ABORT(err); $$ = list; } | @@ -375,130 +274,19 @@ PE_NAME sep_dc struct list_head *list; int err; - err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list); - free($1); - if (err < 0) - YYABORT; - $$ = list; -} -| -PE_KERNEL_PMU_EVENT opt_pmu_config -{ - struct list_head *list; - int err; - - /* frees $2 */ - err = parse_events_multi_pmu_add(_parse_state, $1, $2, &list); - free($1); - if (err < 0) - YYABORT; - $$ = list; -} -| -PE_PMU_EVENT_FAKE sep_dc -{ - struct list_head *list; - int err; - - list = alloc_list(); - if (!list) - YYABORT; - - err = parse_events_add_pmu(_parse_state, list, $1, /*head_config=*/NULL, - /*auto_merge_stats=*/false); - free($1); + err = parse_events_multi_pmu_add(_parse_state, $1, /*const_parsed_terms*/NULL, &list, &@1); if (err < 0) { - free(list); - YYABORT; - } - $$ = list; -} -| -PE_PMU_EVENT_FAKE opt_pmu_config -{ - struct list_head *list; - int err; - - list = alloc_list(); - if (!list) - YYABORT; - - err = parse_events_add_pmu(_parse_state, list, $1, $2, /*auto_merge_stats=*/false); - free($1); - parse_events_terms__delete($2); - if (err < 0) { - free(list); - YYABORT; - } - $$ = list; -} - -value_sym: -PE_VALUE_SYM_HW -| -PE_VALUE_SYM_SW - -event_legacy_symbol: -value_sym '/' event_config '/' -{ - struct list_head *list; - int type = $1 >> 16; - int config = $1 & 255; - int err; - bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE); + struct parse_events_state *parse_state = _parse_state; + struct parse_events_error *error = parse_state->error; + char *help; - list = alloc_list(); - ABORT_ON(!list); - err = parse_events_add_numeric(_parse_state, list, type, config, $3, wildcard); - parse_events_terms__delete($3); - if (err) { - free_list_evsel(list); - YYABORT; + if (asprintf(&help, "Unable to find event on a PMU of '%s'", $1) < 0) + help = NULL; + parse_events_error__handle(error, @1.first_column, strdup("Bad event name"), help); + free($1); + PE_ABORT(err); } - $$ = list; -} -| -value_sym sep_slash_slash_dc -{ - struct list_head *list; - int type = $1 >> 16; - int config = $1 & 255; - bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE); - - list = alloc_list(); - ABORT_ON(!list); - ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config, - /*head_config=*/NULL, wildcard)); - $$ = list; -} -| -PE_VALUE_SYM_TOOL sep_slash_slash_dc -{ - struct list_head *list; - - list = alloc_list(); - ABORT_ON(!list); - ABORT_ON(parse_events_add_tool(_parse_state, list, $1)); - $$ = list; -} - -event_legacy_cache: -PE_LEGACY_CACHE opt_event_config -{ - struct parse_events_state *parse_state = _parse_state; - struct list_head *list; - int err; - - list = alloc_list(); - ABORT_ON(!list); - err = parse_events_add_cache(list, &parse_state->idx, $1, parse_state, $2); - - parse_events_terms__delete($2); free($1); - if (err) { - free_list_evsel(list); - YYABORT; - } $$ = list; } @@ -509,14 +297,16 @@ PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event int err; list = alloc_list(); - ABORT_ON(!list); + if (!list) + YYNOMEM; + err = parse_events_add_breakpoint(_parse_state, list, $2, $6, $4, $7); parse_events_terms__delete($7); free($6); if (err) { free(list); - YYABORT; + PE_ABORT(err); } $$ = list; } @@ -527,13 +317,15 @@ PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE opt_event_config int err; list = alloc_list(); - ABORT_ON(!list); + if (!list) + YYNOMEM; + err = parse_events_add_breakpoint(_parse_state, list, $2, NULL, $4, $5); parse_events_terms__delete($5); if (err) { free(list); - YYABORT; + PE_ABORT(err); } $$ = list; } @@ -544,14 +336,16 @@ PE_PREFIX_MEM PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event_config int err; list = alloc_list(); - ABORT_ON(!list); + if (!list) + YYNOMEM; + err = parse_events_add_breakpoint(_parse_state, list, $2, $4, 0, $5); parse_events_terms__delete($5); free($4); if (err) { free(list); - YYABORT; + PE_ABORT(err); } $$ = list; } @@ -562,13 +356,14 @@ PE_PREFIX_MEM PE_VALUE opt_event_config int err; list = alloc_list(); - ABORT_ON(!list); + if (!list) + YYNOMEM; err = parse_events_add_breakpoint(_parse_state, list, $2, NULL, 0, $3); parse_events_terms__delete($3); if (err) { free(list); - YYABORT; + PE_ABORT(err); } $$ = list; } @@ -582,19 +377,18 @@ tracepoint_name opt_event_config int err; list = alloc_list(); - ABORT_ON(!list); - if (error) - error->idx = @1.first_column; + if (!list) + YYNOMEM; - err = parse_events_add_tracepoint(list, &parse_state->idx, $1.sys, $1.event, - error, $2); + err = parse_events_add_tracepoint(parse_state, list, $1.sys, $1.event, + error, $2, &@1); parse_events_terms__delete($2); free($1.sys); free($1.event); if (err) { free(list); - YYABORT; + PE_ABORT(err); } $$ = list; } @@ -614,13 +408,14 @@ PE_VALUE ':' PE_VALUE opt_event_config int err; list = alloc_list(); - ABORT_ON(!list); + if (!list) + YYNOMEM; err = parse_events_add_numeric(_parse_state, list, (u32)$1, $3, $4, /*wildcard=*/false); parse_events_terms__delete($4); if (err) { free(list); - YYABORT; + PE_ABORT(err); } $$ = list; } @@ -633,52 +428,20 @@ PE_RAW opt_event_config u64 num; list = alloc_list(); - ABORT_ON(!list); + if (!list) + YYNOMEM; errno = 0; num = strtoull($1 + 1, NULL, 16); - ABORT_ON(errno); + /* Given the lexer will only give [a-fA-F0-9]+ a failure here should be impossible. */ + if (errno) + YYABORT; free($1); err = parse_events_add_numeric(_parse_state, list, PERF_TYPE_RAW, num, $2, /*wildcard=*/false); parse_events_terms__delete($2); if (err) { free(list); - YYABORT; - } - $$ = list; -} - -event_bpf_file: -PE_BPF_OBJECT opt_event_config -{ - struct parse_events_state *parse_state = _parse_state; - struct list_head *list; - int err; - - list = alloc_list(); - ABORT_ON(!list); - err = parse_events_load_bpf(parse_state, list, $1, false, $2); - parse_events_terms__delete($2); - free($1); - if (err) { - free(list); - YYABORT; - } - $$ = list; -} -| -PE_BPF_SOURCE opt_event_config -{ - struct list_head *list; - int err; - - list = alloc_list(); - ABORT_ON(!list); - err = parse_events_load_bpf(_parse_state, list, $1, true, $2); - parse_events_terms__delete($2); - if (err) { - free(list); - YYABORT; + PE_ABORT(err); } $$ = list; } @@ -722,54 +485,54 @@ start_terms: event_config event_config: event_config ',' event_term { - struct list_head *head = $1; + struct parse_events_terms *head = $1; struct parse_events_term *term = $3; if (!head) { parse_events_term__delete(term); YYABORT; } - list_add_tail(&term->list, head); + list_add_tail(&term->list, &head->terms); $$ = $1; } | event_term { - struct list_head *head = malloc(sizeof(*head)); + struct parse_events_terms *head = malloc(sizeof(*head)); struct parse_events_term *term = $1; - ABORT_ON(!head); - INIT_LIST_HEAD(head); - list_add_tail(&term->list, head); + if (!head) + YYNOMEM; + parse_events_terms__init(head); + list_add_tail(&term->list, &head->terms); $$ = head; } -name_or_raw: PE_RAW | PE_NAME | PE_LEGACY_CACHE - -name_or_legacy: PE_NAME | PE_LEGACY_CACHE +name_or_raw: PE_RAW | PE_NAME event_term: PE_RAW { struct parse_events_term *term; + int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_RAW, + strdup("raw"), $1, &@1, &@1); - if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_RAW, - strdup("raw"), $1, &@1, &@1)) { + if (err) { free($1); - YYABORT; + PE_ABORT(err); } $$ = term; } | -name_or_raw '=' name_or_legacy +name_or_raw '=' name_or_raw { struct parse_events_term *term; + int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, $1, $3, &@1, &@3); - if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, - $1, $3, &@1, &@3)) { + if (err) { free($1); free($3); - YYABORT; + PE_ABORT(err); } $$ = term; } @@ -777,36 +540,12 @@ name_or_raw '=' name_or_legacy name_or_raw '=' PE_VALUE { struct parse_events_term *term; + int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, + $1, $3, /*novalue=*/false, &@1, &@3); - if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, - $1, $3, false, &@1, &@3)) { - free($1); - YYABORT; - } - $$ = term; -} -| -name_or_raw '=' PE_TERM_HW -{ - struct parse_events_term *term; - - if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, - $1, $3.str, &@1, &@3)) { - free($1); - free($3.str); - YYABORT; - } - $$ = term; -} -| -PE_LEGACY_CACHE -{ - struct parse_events_term *term; - - if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE, - $1, 1, true, &@1, NULL)) { + if (err) { free($1); - YYABORT; + PE_ABORT(err); } $$ = term; } @@ -814,45 +553,24 @@ PE_LEGACY_CACHE PE_NAME { struct parse_events_term *term; + int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, + $1, /*num=*/1, /*novalue=*/true, &@1, /*loc_val=*/NULL); - if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, - $1, 1, true, &@1, NULL)) { + if (err) { free($1); - YYABORT; - } - $$ = term; -} -| -PE_TERM_HW -{ - struct parse_events_term *term; - - if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_HARDWARE, - $1.str, $1.num & 255, false, &@1, NULL)) { - free($1.str); - YYABORT; + PE_ABORT(err); } $$ = term; } | -PE_TERM '=' name_or_legacy +PE_TERM '=' name_or_raw { struct parse_events_term *term; + int err = parse_events_term__str(&term, $1, /*config=*/NULL, $3, &@1, &@3); - if (parse_events_term__str(&term, (int)$1, NULL, $3, &@1, &@3)) { + if (err) { free($3); - YYABORT; - } - $$ = term; -} -| -PE_TERM '=' PE_TERM_HW -{ - struct parse_events_term *term; - - if (parse_events_term__str(&term, (int)$1, NULL, $3.str, &@1, &@3)) { - free($3.str); - YYABORT; + PE_ABORT(err); } $$ = term; } @@ -860,53 +578,37 @@ PE_TERM '=' PE_TERM_HW PE_TERM '=' PE_TERM { struct parse_events_term *term; + int err = parse_events_term__term(&term, $1, $3, &@1, &@3); + + if (err) + PE_ABORT(err); - ABORT_ON(parse_events_term__term(&term, (int)$1, (int)$3, &@1, &@3)); $$ = term; } | PE_TERM '=' PE_VALUE { struct parse_events_term *term; + int err = parse_events_term__num(&term, $1, + /*config=*/NULL, $3, /*novalue=*/false, + &@1, &@3); + + if (err) + PE_ABORT(err); - ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3, false, &@1, &@3)); $$ = term; } | PE_TERM { struct parse_events_term *term; + int err = parse_events_term__num(&term, $1, + /*config=*/NULL, /*num=*/1, /*novalue=*/true, + &@1, /*loc_val=*/NULL); - ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, true, &@1, NULL)); - $$ = term; -} -| -name_or_raw array '=' name_or_legacy -{ - struct parse_events_term *term; + if (err) + PE_ABORT(err); - if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, - $1, $4, &@1, &@4)) { - free($1); - free($4); - free($2.ranges); - YYABORT; - } - term->array = $2; - $$ = term; -} -| -name_or_raw array '=' PE_VALUE -{ - struct parse_events_term *term; - - if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, - $1, $4, false, &@1, &@4)) { - free($1); - free($2.ranges); - YYABORT; - } - term->array = $2; $$ = term; } | @@ -914,82 +616,32 @@ PE_DRV_CFG_TERM { struct parse_events_term *term; char *config = strdup($1); + int err; - ABORT_ON(!config); - if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG, - config, $1, &@1, NULL)) { + if (!config) + YYNOMEM; + err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG, config, $1, &@1, NULL); + if (err) { free($1); free(config); - YYABORT; + PE_ABORT(err); } $$ = term; } -array: -'[' array_terms ']' -{ - $$ = $2; -} -| -PE_ARRAY_ALL -{ - $$.nr_ranges = 0; - $$.ranges = NULL; -} - -array_terms: -array_terms ',' array_term -{ - struct parse_events_array new_array; - - new_array.nr_ranges = $1.nr_ranges + $3.nr_ranges; - new_array.ranges = realloc($1.ranges, - sizeof(new_array.ranges[0]) * - new_array.nr_ranges); - ABORT_ON(!new_array.ranges); - memcpy(&new_array.ranges[$1.nr_ranges], $3.ranges, - $3.nr_ranges * sizeof(new_array.ranges[0])); - free($3.ranges); - $$ = new_array; -} -| -array_term - -array_term: -PE_VALUE -{ - struct parse_events_array array; - - array.nr_ranges = 1; - array.ranges = malloc(sizeof(array.ranges[0])); - ABORT_ON(!array.ranges); - array.ranges[0].start = $1; - array.ranges[0].length = 1; - $$ = array; -} -| -PE_VALUE PE_ARRAY_RANGE PE_VALUE -{ - struct parse_events_array array; - - ABORT_ON($3 < $1); - array.nr_ranges = 1; - array.ranges = malloc(sizeof(array.ranges[0])); - ABORT_ON(!array.ranges); - array.ranges[0].start = $1; - array.ranges[0].length = $3 - $1 + 1; - $$ = array; -} - sep_dc: ':' | -sep_slash_slash_dc: '/' '/' | ':' | - %% -void parse_events_error(YYLTYPE *loc, void *parse_state, +void parse_events_error(YYLTYPE *loc, void *_parse_state, void *scanner __maybe_unused, char const *msg __maybe_unused) { - parse_events_evlist_error(parse_state, loc->last_column, "parser error"); + struct parse_events_state *parse_state = _parse_state; + + if (!parse_state->error || !list_empty(&parse_state->error->list)) + return; + + parse_events_error__handle(parse_state->error, loc->last_column, + strdup("Unrecognized input"), NULL); } diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c index a4a100425b3a..cda1c620968e 100644 --- a/tools/perf/util/parse-regs-options.c +++ b/tools/perf/util/parse-regs-options.c @@ -46,22 +46,18 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr) if (!strcmp(s, "?")) { fprintf(stderr, "available registers: "); -#ifdef HAVE_PERF_REGS_SUPPORT - for (r = sample_reg_masks; r->name; r++) { + for (r = arch__sample_reg_masks(); r->name; r++) { if (r->mask & mask) fprintf(stderr, "%s ", r->name); } -#endif fputc('\n', stderr); /* just printing available regs */ goto error; } -#ifdef HAVE_PERF_REGS_SUPPORT - for (r = sample_reg_masks; r->name; r++) { + for (r = arch__sample_reg_masks(); r->name; r++) { if ((r->mask & mask) && !strcasecmp(s, r->name)) break; } -#endif if (!r || !r->name) { ui__warning("Unknown register \"%s\", check man page or run \"perf record %s?\"\n", s, intr ? "-I" : "--user-regs="); diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index 00adf872bf00..2e62f272fda8 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -68,14 +68,12 @@ bool is_directory(const char *base_path, const struct dirent *dent) return S_ISDIR(st.st_mode); } -bool is_executable_file(const char *base_path, const struct dirent *dent) +bool is_directory_at(int dir_fd, const char *path) { - char path[PATH_MAX]; struct stat st; - snprintf(path, sizeof(path), "%s/%s", base_path, dent->d_name); - if (stat(path, &st)) + if (fstatat(dir_fd, path, &st, /*flags=*/0)) return false; - return !S_ISDIR(st.st_mode) && (st.st_mode & S_IXUSR); + return S_ISDIR(st.st_mode); } diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h index d94902c22222..fb850fb55c60 100644 --- a/tools/perf/util/path.h +++ b/tools/perf/util/path.h @@ -12,6 +12,6 @@ int path__join3(char *bf, size_t size, const char *path1, const char *path2, con bool is_regular_file(const char *file); bool is_directory(const char *base_path, const struct dirent *dent); -bool is_executable_file(const char *base_path, const struct dirent *dent); +bool is_directory_at(int dir_fd, const char *path); #endif /* _PERF_PATH_H */ diff --git a/tools/perf/util/perf-regs-arch/Build b/tools/perf/util/perf-regs-arch/Build new file mode 100644 index 000000000000..be95402aa540 --- /dev/null +++ b/tools/perf/util/perf-regs-arch/Build @@ -0,0 +1,9 @@ +perf-util-y += perf_regs_aarch64.o +perf-util-y += perf_regs_arm.o +perf-util-y += perf_regs_csky.o +perf-util-y += perf_regs_loongarch.o +perf-util-y += perf_regs_mips.o +perf-util-y += perf_regs_powerpc.o +perf-util-y += perf_regs_riscv.o +perf-util-y += perf_regs_s390.o +perf-util-y += perf_regs_x86.o diff --git a/tools/perf/util/perf-regs-arch/perf_regs_aarch64.c b/tools/perf/util/perf-regs-arch/perf_regs_aarch64.c new file mode 100644 index 000000000000..9dcda80d310f --- /dev/null +++ b/tools/perf/util/perf-regs-arch/perf_regs_aarch64.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "../perf_regs.h" +#include "../../../arch/arm64/include/uapi/asm/perf_regs.h" + +const char *__perf_reg_name_arm64(int id) +{ + switch (id) { + case PERF_REG_ARM64_X0: + return "x0"; + case PERF_REG_ARM64_X1: + return "x1"; + case PERF_REG_ARM64_X2: + return "x2"; + case PERF_REG_ARM64_X3: + return "x3"; + case PERF_REG_ARM64_X4: + return "x4"; + case PERF_REG_ARM64_X5: + return "x5"; + case PERF_REG_ARM64_X6: + return "x6"; + case PERF_REG_ARM64_X7: + return "x7"; + case PERF_REG_ARM64_X8: + return "x8"; + case PERF_REG_ARM64_X9: + return "x9"; + case PERF_REG_ARM64_X10: + return "x10"; + case PERF_REG_ARM64_X11: + return "x11"; + case PERF_REG_ARM64_X12: + return "x12"; + case PERF_REG_ARM64_X13: + return "x13"; + case PERF_REG_ARM64_X14: + return "x14"; + case PERF_REG_ARM64_X15: + return "x15"; + case PERF_REG_ARM64_X16: + return "x16"; + case PERF_REG_ARM64_X17: + return "x17"; + case PERF_REG_ARM64_X18: + return "x18"; + case PERF_REG_ARM64_X19: + return "x19"; + case PERF_REG_ARM64_X20: + return "x20"; + case PERF_REG_ARM64_X21: + return "x21"; + case PERF_REG_ARM64_X22: + return "x22"; + case PERF_REG_ARM64_X23: + return "x23"; + case PERF_REG_ARM64_X24: + return "x24"; + case PERF_REG_ARM64_X25: + return "x25"; + case PERF_REG_ARM64_X26: + return "x26"; + case PERF_REG_ARM64_X27: + return "x27"; + case PERF_REG_ARM64_X28: + return "x28"; + case PERF_REG_ARM64_X29: + return "x29"; + case PERF_REG_ARM64_SP: + return "sp"; + case PERF_REG_ARM64_LR: + return "lr"; + case PERF_REG_ARM64_PC: + return "pc"; + case PERF_REG_ARM64_VG: + return "vg"; + default: + return NULL; + } + + return NULL; +} + +uint64_t __perf_reg_ip_arm64(void) +{ + return PERF_REG_ARM64_PC; +} + +uint64_t __perf_reg_sp_arm64(void) +{ + return PERF_REG_ARM64_SP; +} diff --git a/tools/perf/util/perf-regs-arch/perf_regs_arm.c b/tools/perf/util/perf-regs-arch/perf_regs_arm.c new file mode 100644 index 000000000000..e29d130a587a --- /dev/null +++ b/tools/perf/util/perf-regs-arch/perf_regs_arm.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "../perf_regs.h" +#include "../../../arch/arm/include/uapi/asm/perf_regs.h" + +const char *__perf_reg_name_arm(int id) +{ + switch (id) { + case PERF_REG_ARM_R0: + return "r0"; + case PERF_REG_ARM_R1: + return "r1"; + case PERF_REG_ARM_R2: + return "r2"; + case PERF_REG_ARM_R3: + return "r3"; + case PERF_REG_ARM_R4: + return "r4"; + case PERF_REG_ARM_R5: + return "r5"; + case PERF_REG_ARM_R6: + return "r6"; + case PERF_REG_ARM_R7: + return "r7"; + case PERF_REG_ARM_R8: + return "r8"; + case PERF_REG_ARM_R9: + return "r9"; + case PERF_REG_ARM_R10: + return "r10"; + case PERF_REG_ARM_FP: + return "fp"; + case PERF_REG_ARM_IP: + return "ip"; + case PERF_REG_ARM_SP: + return "sp"; + case PERF_REG_ARM_LR: + return "lr"; + case PERF_REG_ARM_PC: + return "pc"; + default: + return NULL; + } + + return NULL; +} + +uint64_t __perf_reg_ip_arm(void) +{ + return PERF_REG_ARM_PC; +} + +uint64_t __perf_reg_sp_arm(void) +{ + return PERF_REG_ARM_SP; +} diff --git a/tools/perf/util/perf-regs-arch/perf_regs_csky.c b/tools/perf/util/perf-regs-arch/perf_regs_csky.c new file mode 100644 index 000000000000..75b461ef2eba --- /dev/null +++ b/tools/perf/util/perf-regs-arch/perf_regs_csky.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "../perf_regs.h" +#include "../../arch/csky/include/uapi/asm/perf_regs.h" + +const char *__perf_reg_name_csky(int id) +{ + switch (id) { + case PERF_REG_CSKY_A0: + return "a0"; + case PERF_REG_CSKY_A1: + return "a1"; + case PERF_REG_CSKY_A2: + return "a2"; + case PERF_REG_CSKY_A3: + return "a3"; + case PERF_REG_CSKY_REGS0: + return "regs0"; + case PERF_REG_CSKY_REGS1: + return "regs1"; + case PERF_REG_CSKY_REGS2: + return "regs2"; + case PERF_REG_CSKY_REGS3: + return "regs3"; + case PERF_REG_CSKY_REGS4: + return "regs4"; + case PERF_REG_CSKY_REGS5: + return "regs5"; + case PERF_REG_CSKY_REGS6: + return "regs6"; + case PERF_REG_CSKY_REGS7: + return "regs7"; + case PERF_REG_CSKY_REGS8: + return "regs8"; + case PERF_REG_CSKY_REGS9: + return "regs9"; + case PERF_REG_CSKY_SP: + return "sp"; + case PERF_REG_CSKY_LR: + return "lr"; + case PERF_REG_CSKY_PC: + return "pc"; +#if defined(__CSKYABIV2__) + case PERF_REG_CSKY_EXREGS0: + return "exregs0"; + case PERF_REG_CSKY_EXREGS1: + return "exregs1"; + case PERF_REG_CSKY_EXREGS2: + return "exregs2"; + case PERF_REG_CSKY_EXREGS3: + return "exregs3"; + case PERF_REG_CSKY_EXREGS4: + return "exregs4"; + case PERF_REG_CSKY_EXREGS5: + return "exregs5"; + case PERF_REG_CSKY_EXREGS6: + return "exregs6"; + case PERF_REG_CSKY_EXREGS7: + return "exregs7"; + case PERF_REG_CSKY_EXREGS8: + return "exregs8"; + case PERF_REG_CSKY_EXREGS9: + return "exregs9"; + case PERF_REG_CSKY_EXREGS10: + return "exregs10"; + case PERF_REG_CSKY_EXREGS11: + return "exregs11"; + case PERF_REG_CSKY_EXREGS12: + return "exregs12"; + case PERF_REG_CSKY_EXREGS13: + return "exregs13"; + case PERF_REG_CSKY_EXREGS14: + return "exregs14"; + case PERF_REG_CSKY_TLS: + return "tls"; + case PERF_REG_CSKY_HI: + return "hi"; + case PERF_REG_CSKY_LO: + return "lo"; +#endif + default: + return NULL; + } + + return NULL; +} + +uint64_t __perf_reg_ip_csky(void) +{ + return PERF_REG_CSKY_PC; +} + +uint64_t __perf_reg_sp_csky(void) +{ + return PERF_REG_CSKY_SP; +} diff --git a/tools/perf/util/perf-regs-arch/perf_regs_loongarch.c b/tools/perf/util/perf-regs-arch/perf_regs_loongarch.c new file mode 100644 index 000000000000..043f97f4e3ac --- /dev/null +++ b/tools/perf/util/perf-regs-arch/perf_regs_loongarch.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "../perf_regs.h" +#include "../../../arch/loongarch/include/uapi/asm/perf_regs.h" + +const char *__perf_reg_name_loongarch(int id) +{ + switch (id) { + case PERF_REG_LOONGARCH_PC: + return "PC"; + case PERF_REG_LOONGARCH_R1: + return "%r1"; + case PERF_REG_LOONGARCH_R2: + return "%r2"; + case PERF_REG_LOONGARCH_R3: + return "%r3"; + case PERF_REG_LOONGARCH_R4: + return "%r4"; + case PERF_REG_LOONGARCH_R5: + return "%r5"; + case PERF_REG_LOONGARCH_R6: + return "%r6"; + case PERF_REG_LOONGARCH_R7: + return "%r7"; + case PERF_REG_LOONGARCH_R8: + return "%r8"; + case PERF_REG_LOONGARCH_R9: + return "%r9"; + case PERF_REG_LOONGARCH_R10: + return "%r10"; + case PERF_REG_LOONGARCH_R11: + return "%r11"; + case PERF_REG_LOONGARCH_R12: + return "%r12"; + case PERF_REG_LOONGARCH_R13: + return "%r13"; + case PERF_REG_LOONGARCH_R14: + return "%r14"; + case PERF_REG_LOONGARCH_R15: + return "%r15"; + case PERF_REG_LOONGARCH_R16: + return "%r16"; + case PERF_REG_LOONGARCH_R17: + return "%r17"; + case PERF_REG_LOONGARCH_R18: + return "%r18"; + case PERF_REG_LOONGARCH_R19: + return "%r19"; + case PERF_REG_LOONGARCH_R20: + return "%r20"; + case PERF_REG_LOONGARCH_R21: + return "%r21"; + case PERF_REG_LOONGARCH_R22: + return "%r22"; + case PERF_REG_LOONGARCH_R23: + return "%r23"; + case PERF_REG_LOONGARCH_R24: + return "%r24"; + case PERF_REG_LOONGARCH_R25: + return "%r25"; + case PERF_REG_LOONGARCH_R26: + return "%r26"; + case PERF_REG_LOONGARCH_R27: + return "%r27"; + case PERF_REG_LOONGARCH_R28: + return "%r28"; + case PERF_REG_LOONGARCH_R29: + return "%r29"; + case PERF_REG_LOONGARCH_R30: + return "%r30"; + case PERF_REG_LOONGARCH_R31: + return "%r31"; + default: + break; + } + return NULL; +} + +uint64_t __perf_reg_ip_loongarch(void) +{ + return PERF_REG_LOONGARCH_PC; +} + +uint64_t __perf_reg_sp_loongarch(void) +{ + return PERF_REG_LOONGARCH_R3; +} diff --git a/tools/perf/util/perf-regs-arch/perf_regs_mips.c b/tools/perf/util/perf-regs-arch/perf_regs_mips.c new file mode 100644 index 000000000000..793178fc3c78 --- /dev/null +++ b/tools/perf/util/perf-regs-arch/perf_regs_mips.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "../perf_regs.h" +#include "../../../arch/mips/include/uapi/asm/perf_regs.h" + +const char *__perf_reg_name_mips(int id) +{ + switch (id) { + case PERF_REG_MIPS_PC: + return "PC"; + case PERF_REG_MIPS_R1: + return "$1"; + case PERF_REG_MIPS_R2: + return "$2"; + case PERF_REG_MIPS_R3: + return "$3"; + case PERF_REG_MIPS_R4: + return "$4"; + case PERF_REG_MIPS_R5: + return "$5"; + case PERF_REG_MIPS_R6: + return "$6"; + case PERF_REG_MIPS_R7: + return "$7"; + case PERF_REG_MIPS_R8: + return "$8"; + case PERF_REG_MIPS_R9: + return "$9"; + case PERF_REG_MIPS_R10: + return "$10"; + case PERF_REG_MIPS_R11: + return "$11"; + case PERF_REG_MIPS_R12: + return "$12"; + case PERF_REG_MIPS_R13: + return "$13"; + case PERF_REG_MIPS_R14: + return "$14"; + case PERF_REG_MIPS_R15: + return "$15"; + case PERF_REG_MIPS_R16: + return "$16"; + case PERF_REG_MIPS_R17: + return "$17"; + case PERF_REG_MIPS_R18: + return "$18"; + case PERF_REG_MIPS_R19: + return "$19"; + case PERF_REG_MIPS_R20: + return "$20"; + case PERF_REG_MIPS_R21: + return "$21"; + case PERF_REG_MIPS_R22: + return "$22"; + case PERF_REG_MIPS_R23: + return "$23"; + case PERF_REG_MIPS_R24: + return "$24"; + case PERF_REG_MIPS_R25: + return "$25"; + case PERF_REG_MIPS_R28: + return "$28"; + case PERF_REG_MIPS_R29: + return "$29"; + case PERF_REG_MIPS_R30: + return "$30"; + case PERF_REG_MIPS_R31: + return "$31"; + default: + break; + } + return NULL; +} + +uint64_t __perf_reg_ip_mips(void) +{ + return PERF_REG_MIPS_PC; +} + +uint64_t __perf_reg_sp_mips(void) +{ + return PERF_REG_MIPS_R29; +} diff --git a/tools/perf/util/perf-regs-arch/perf_regs_powerpc.c b/tools/perf/util/perf-regs-arch/perf_regs_powerpc.c new file mode 100644 index 000000000000..08636bb09a3a --- /dev/null +++ b/tools/perf/util/perf-regs-arch/perf_regs_powerpc.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "../perf_regs.h" +#include "../../../arch/powerpc/include/uapi/asm/perf_regs.h" + +const char *__perf_reg_name_powerpc(int id) +{ + switch (id) { + case PERF_REG_POWERPC_R0: + return "r0"; + case PERF_REG_POWERPC_R1: + return "r1"; + case PERF_REG_POWERPC_R2: + return "r2"; + case PERF_REG_POWERPC_R3: + return "r3"; + case PERF_REG_POWERPC_R4: + return "r4"; + case PERF_REG_POWERPC_R5: + return "r5"; + case PERF_REG_POWERPC_R6: + return "r6"; + case PERF_REG_POWERPC_R7: + return "r7"; + case PERF_REG_POWERPC_R8: + return "r8"; + case PERF_REG_POWERPC_R9: + return "r9"; + case PERF_REG_POWERPC_R10: + return "r10"; + case PERF_REG_POWERPC_R11: + return "r11"; + case PERF_REG_POWERPC_R12: + return "r12"; + case PERF_REG_POWERPC_R13: + return "r13"; + case PERF_REG_POWERPC_R14: + return "r14"; + case PERF_REG_POWERPC_R15: + return "r15"; + case PERF_REG_POWERPC_R16: + return "r16"; + case PERF_REG_POWERPC_R17: + return "r17"; + case PERF_REG_POWERPC_R18: + return "r18"; + case PERF_REG_POWERPC_R19: + return "r19"; + case PERF_REG_POWERPC_R20: + return "r20"; + case PERF_REG_POWERPC_R21: + return "r21"; + case PERF_REG_POWERPC_R22: + return "r22"; + case PERF_REG_POWERPC_R23: + return "r23"; + case PERF_REG_POWERPC_R24: + return "r24"; + case PERF_REG_POWERPC_R25: + return "r25"; + case PERF_REG_POWERPC_R26: + return "r26"; + case PERF_REG_POWERPC_R27: + return "r27"; + case PERF_REG_POWERPC_R28: + return "r28"; + case PERF_REG_POWERPC_R29: + return "r29"; + case PERF_REG_POWERPC_R30: + return "r30"; + case PERF_REG_POWERPC_R31: + return "r31"; + case PERF_REG_POWERPC_NIP: + return "nip"; + case PERF_REG_POWERPC_MSR: + return "msr"; + case PERF_REG_POWERPC_ORIG_R3: + return "orig_r3"; + case PERF_REG_POWERPC_CTR: + return "ctr"; + case PERF_REG_POWERPC_LINK: + return "link"; + case PERF_REG_POWERPC_XER: + return "xer"; + case PERF_REG_POWERPC_CCR: + return "ccr"; + case PERF_REG_POWERPC_SOFTE: + return "softe"; + case PERF_REG_POWERPC_TRAP: + return "trap"; + case PERF_REG_POWERPC_DAR: + return "dar"; + case PERF_REG_POWERPC_DSISR: + return "dsisr"; + case PERF_REG_POWERPC_SIER: + return "sier"; + case PERF_REG_POWERPC_MMCRA: + return "mmcra"; + case PERF_REG_POWERPC_MMCR0: + return "mmcr0"; + case PERF_REG_POWERPC_MMCR1: + return "mmcr1"; + case PERF_REG_POWERPC_MMCR2: + return "mmcr2"; + case PERF_REG_POWERPC_MMCR3: + return "mmcr3"; + case PERF_REG_POWERPC_SIER2: + return "sier2"; + case PERF_REG_POWERPC_SIER3: + return "sier3"; + case PERF_REG_POWERPC_PMC1: + return "pmc1"; + case PERF_REG_POWERPC_PMC2: + return "pmc2"; + case PERF_REG_POWERPC_PMC3: + return "pmc3"; + case PERF_REG_POWERPC_PMC4: + return "pmc4"; + case PERF_REG_POWERPC_PMC5: + return "pmc5"; + case PERF_REG_POWERPC_PMC6: + return "pmc6"; + case PERF_REG_POWERPC_SDAR: + return "sdar"; + case PERF_REG_POWERPC_SIAR: + return "siar"; + default: + break; + } + return NULL; +} + +uint64_t __perf_reg_ip_powerpc(void) +{ + return PERF_REG_POWERPC_NIP; +} + +uint64_t __perf_reg_sp_powerpc(void) +{ + return PERF_REG_POWERPC_R1; +} diff --git a/tools/perf/util/perf-regs-arch/perf_regs_riscv.c b/tools/perf/util/perf-regs-arch/perf_regs_riscv.c new file mode 100644 index 000000000000..337b687c655d --- /dev/null +++ b/tools/perf/util/perf-regs-arch/perf_regs_riscv.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "../perf_regs.h" +#include "../../../arch/riscv/include/uapi/asm/perf_regs.h" + +const char *__perf_reg_name_riscv(int id) +{ + switch (id) { + case PERF_REG_RISCV_PC: + return "pc"; + case PERF_REG_RISCV_RA: + return "ra"; + case PERF_REG_RISCV_SP: + return "sp"; + case PERF_REG_RISCV_GP: + return "gp"; + case PERF_REG_RISCV_TP: + return "tp"; + case PERF_REG_RISCV_T0: + return "t0"; + case PERF_REG_RISCV_T1: + return "t1"; + case PERF_REG_RISCV_T2: + return "t2"; + case PERF_REG_RISCV_S0: + return "s0"; + case PERF_REG_RISCV_S1: + return "s1"; + case PERF_REG_RISCV_A0: + return "a0"; + case PERF_REG_RISCV_A1: + return "a1"; + case PERF_REG_RISCV_A2: + return "a2"; + case PERF_REG_RISCV_A3: + return "a3"; + case PERF_REG_RISCV_A4: + return "a4"; + case PERF_REG_RISCV_A5: + return "a5"; + case PERF_REG_RISCV_A6: + return "a6"; + case PERF_REG_RISCV_A7: + return "a7"; + case PERF_REG_RISCV_S2: + return "s2"; + case PERF_REG_RISCV_S3: + return "s3"; + case PERF_REG_RISCV_S4: + return "s4"; + case PERF_REG_RISCV_S5: + return "s5"; + case PERF_REG_RISCV_S6: + return "s6"; + case PERF_REG_RISCV_S7: + return "s7"; + case PERF_REG_RISCV_S8: + return "s8"; + case PERF_REG_RISCV_S9: + return "s9"; + case PERF_REG_RISCV_S10: + return "s10"; + case PERF_REG_RISCV_S11: + return "s11"; + case PERF_REG_RISCV_T3: + return "t3"; + case PERF_REG_RISCV_T4: + return "t4"; + case PERF_REG_RISCV_T5: + return "t5"; + case PERF_REG_RISCV_T6: + return "t6"; + default: + return NULL; + } + + return NULL; +} + +uint64_t __perf_reg_ip_riscv(void) +{ + return PERF_REG_RISCV_PC; +} + +uint64_t __perf_reg_sp_riscv(void) +{ + return PERF_REG_RISCV_SP; +} diff --git a/tools/perf/util/perf-regs-arch/perf_regs_s390.c b/tools/perf/util/perf-regs-arch/perf_regs_s390.c new file mode 100644 index 000000000000..d69bba881080 --- /dev/null +++ b/tools/perf/util/perf-regs-arch/perf_regs_s390.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "../perf_regs.h" +#include "../../../arch/s390/include/uapi/asm/perf_regs.h" + +const char *__perf_reg_name_s390(int id) +{ + switch (id) { + case PERF_REG_S390_R0: + return "R0"; + case PERF_REG_S390_R1: + return "R1"; + case PERF_REG_S390_R2: + return "R2"; + case PERF_REG_S390_R3: + return "R3"; + case PERF_REG_S390_R4: + return "R4"; + case PERF_REG_S390_R5: + return "R5"; + case PERF_REG_S390_R6: + return "R6"; + case PERF_REG_S390_R7: + return "R7"; + case PERF_REG_S390_R8: + return "R8"; + case PERF_REG_S390_R9: + return "R9"; + case PERF_REG_S390_R10: + return "R10"; + case PERF_REG_S390_R11: + return "R11"; + case PERF_REG_S390_R12: + return "R12"; + case PERF_REG_S390_R13: + return "R13"; + case PERF_REG_S390_R14: + return "R14"; + case PERF_REG_S390_R15: + return "R15"; + case PERF_REG_S390_FP0: + return "FP0"; + case PERF_REG_S390_FP1: + return "FP1"; + case PERF_REG_S390_FP2: + return "FP2"; + case PERF_REG_S390_FP3: + return "FP3"; + case PERF_REG_S390_FP4: + return "FP4"; + case PERF_REG_S390_FP5: + return "FP5"; + case PERF_REG_S390_FP6: + return "FP6"; + case PERF_REG_S390_FP7: + return "FP7"; + case PERF_REG_S390_FP8: + return "FP8"; + case PERF_REG_S390_FP9: + return "FP9"; + case PERF_REG_S390_FP10: + return "FP10"; + case PERF_REG_S390_FP11: + return "FP11"; + case PERF_REG_S390_FP12: + return "FP12"; + case PERF_REG_S390_FP13: + return "FP13"; + case PERF_REG_S390_FP14: + return "FP14"; + case PERF_REG_S390_FP15: + return "FP15"; + case PERF_REG_S390_MASK: + return "MASK"; + case PERF_REG_S390_PC: + return "PC"; + default: + return NULL; + } + + return NULL; +} + +uint64_t __perf_reg_ip_s390(void) +{ + return PERF_REG_S390_PC; +} + +uint64_t __perf_reg_sp_s390(void) +{ + return PERF_REG_S390_R15; +} diff --git a/tools/perf/util/perf-regs-arch/perf_regs_x86.c b/tools/perf/util/perf-regs-arch/perf_regs_x86.c new file mode 100644 index 000000000000..708954a9d35d --- /dev/null +++ b/tools/perf/util/perf-regs-arch/perf_regs_x86.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "../perf_regs.h" +#include "../../../arch/x86/include/uapi/asm/perf_regs.h" + +const char *__perf_reg_name_x86(int id) +{ + switch (id) { + case PERF_REG_X86_AX: + return "AX"; + case PERF_REG_X86_BX: + return "BX"; + case PERF_REG_X86_CX: + return "CX"; + case PERF_REG_X86_DX: + return "DX"; + case PERF_REG_X86_SI: + return "SI"; + case PERF_REG_X86_DI: + return "DI"; + case PERF_REG_X86_BP: + return "BP"; + case PERF_REG_X86_SP: + return "SP"; + case PERF_REG_X86_IP: + return "IP"; + case PERF_REG_X86_FLAGS: + return "FLAGS"; + case PERF_REG_X86_CS: + return "CS"; + case PERF_REG_X86_SS: + return "SS"; + case PERF_REG_X86_DS: + return "DS"; + case PERF_REG_X86_ES: + return "ES"; + case PERF_REG_X86_FS: + return "FS"; + case PERF_REG_X86_GS: + return "GS"; + case PERF_REG_X86_R8: + return "R8"; + case PERF_REG_X86_R9: + return "R9"; + case PERF_REG_X86_R10: + return "R10"; + case PERF_REG_X86_R11: + return "R11"; + case PERF_REG_X86_R12: + return "R12"; + case PERF_REG_X86_R13: + return "R13"; + case PERF_REG_X86_R14: + return "R14"; + case PERF_REG_X86_R15: + return "R15"; + +#define XMM(x) \ + case PERF_REG_X86_XMM ## x: \ + case PERF_REG_X86_XMM ## x + 1: \ + return "XMM" #x; + XMM(0) + XMM(1) + XMM(2) + XMM(3) + XMM(4) + XMM(5) + XMM(6) + XMM(7) + XMM(8) + XMM(9) + XMM(10) + XMM(11) + XMM(12) + XMM(13) + XMM(14) + XMM(15) +#undef XMM + default: + return NULL; + } + + return NULL; +} + +uint64_t __perf_reg_ip_x86(void) +{ + return PERF_REG_X86_IP; +} + +uint64_t __perf_reg_sp_x86(void) +{ + return PERF_REG_X86_SP; +} diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c index e1e2d701599c..6ecf38314f01 100644 --- a/tools/perf/util/perf_api_probe.c +++ b/tools/perf/util/perf_api_probe.c @@ -59,23 +59,34 @@ out_delete: static bool perf_probe_api(setup_probe_fn_t fn) { - const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL}; + struct perf_pmu *pmu; struct perf_cpu_map *cpus; struct perf_cpu cpu; - int ret, i = 0; + int ret = 0; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) return false; cpu = perf_cpu_map__cpu(cpus, 0); perf_cpu_map__put(cpus); - do { - ret = perf_do_probe_api(fn, cpu, try[i++]); - if (!ret) - return true; - } while (ret == -EAGAIN && try[i]); - + ret = perf_do_probe_api(fn, cpu, "software/cpu-clock/u"); + if (!ret) + return true; + + pmu = perf_pmus__scan_core(/*pmu=*/NULL); + if (pmu) { + const char *try[] = {"cycles", "instructions", NULL}; + char buf[256]; + int i = 0; + + while (ret == -EAGAIN && try[i]) { + snprintf(buf, sizeof(buf), "%s/%s/u", pmu->name, try[i++]); + ret = perf_do_probe_api(fn, cpu, buf); + if (!ret) + return true; + } + } return false; } @@ -140,7 +151,7 @@ bool perf_can_record_cpu_wide(void) struct perf_cpu cpu; int fd; - cpus = perf_cpu_map__new(NULL); + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) return false; diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 2247991451f3..741c3d657a8b 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -7,6 +7,8 @@ #include <linux/types.h> #include <linux/perf_event.h> #include "util/evsel_fprintf.h" +#include "util/pmu.h" +#include "util/pmus.h" #include "trace-event.h" struct bit_names { @@ -55,6 +57,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value) bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), bit_name(TYPE_SAVE), bit_name(HW_INDEX), bit_name(PRIV_SAVE), + bit_name(COUNTERS), { .name = NULL, } }; #undef bit_name @@ -74,23 +77,24 @@ static void __p_read_format(char *buf, size_t size, u64 value) } #define ENUM_ID_TO_STR_CASE(x) case x: return (#x); -static const char *stringify_perf_type_id(u64 value) +static const char *stringify_perf_type_id(struct perf_pmu *pmu, u32 type) { - switch (value) { + switch (type) { ENUM_ID_TO_STR_CASE(PERF_TYPE_HARDWARE) ENUM_ID_TO_STR_CASE(PERF_TYPE_SOFTWARE) ENUM_ID_TO_STR_CASE(PERF_TYPE_TRACEPOINT) ENUM_ID_TO_STR_CASE(PERF_TYPE_HW_CACHE) - ENUM_ID_TO_STR_CASE(PERF_TYPE_RAW) ENUM_ID_TO_STR_CASE(PERF_TYPE_BREAKPOINT) + case PERF_TYPE_RAW: + return pmu ? pmu->name : "PERF_TYPE_RAW"; default: - return NULL; + return pmu ? pmu->name : NULL; } } static const char *stringify_perf_hw_id(u64 value) { - switch (value) { + switch (value & PERF_HW_EVENT_MASK) { ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CPU_CYCLES) ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_INSTRUCTIONS) ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_REFERENCES) @@ -163,77 +167,100 @@ static const char *stringify_perf_sw_id(u64 value) } #undef ENUM_ID_TO_STR_CASE -#define PRINT_ID(_s, _f) \ -do { \ - const char *__s = _s; \ - if (__s == NULL) \ - snprintf(buf, size, _f, value); \ - else \ - snprintf(buf, size, _f" (%s)", value, __s); \ -} while (0) -#define print_id_unsigned(_s) PRINT_ID(_s, "%"PRIu64) -#define print_id_hex(_s) PRINT_ID(_s, "%#"PRIx64) +static void print_id_unsigned(char *buf, size_t size, u64 value, const char *s) +{ + if (s == NULL) + snprintf(buf, size, "%"PRIu64, value); + else + snprintf(buf, size, "%"PRIu64" (%s)", value, s); +} -static void __p_type_id(char *buf, size_t size, u64 value) +static void print_id_hex(char *buf, size_t size, u64 value, const char *s) { - print_id_unsigned(stringify_perf_type_id(value)); + if (s == NULL) + snprintf(buf, size, "%#"PRIx64, value); + else + snprintf(buf, size, "%#"PRIx64" (%s)", value, s); } -static void __p_config_hw_id(char *buf, size_t size, u64 value) +static void __p_type_id(char *buf, size_t size, struct perf_pmu *pmu, u32 type) { - print_id_hex(stringify_perf_hw_id(value)); + print_id_unsigned(buf, size, type, stringify_perf_type_id(pmu, type)); } -static void __p_config_sw_id(char *buf, size_t size, u64 value) +static void __p_config_hw_id(char *buf, size_t size, struct perf_pmu *pmu, u64 config) { - print_id_hex(stringify_perf_sw_id(value)); + const char *name = stringify_perf_hw_id(config); + + if (name == NULL) { + if (pmu == NULL) { + snprintf(buf, size, "%#"PRIx64, config); + } else { + snprintf(buf, size, "%#"PRIx64" (%s/config=%#"PRIx64"/)", config, pmu->name, + config); + } + } else { + if (pmu == NULL) + snprintf(buf, size, "%#"PRIx64" (%s)", config, name); + else + snprintf(buf, size, "%#"PRIx64" (%s/%s/)", config, pmu->name, name); + } +} + +static void __p_config_sw_id(char *buf, size_t size, u64 id) +{ + print_id_hex(buf, size, id, stringify_perf_sw_id(id)); } -static void __p_config_hw_cache_id(char *buf, size_t size, u64 value) +static void __p_config_hw_cache_id(char *buf, size_t size, struct perf_pmu *pmu, u64 config) { - const char *hw_cache_str = stringify_perf_hw_cache_id(value & 0xff); + const char *hw_cache_str = stringify_perf_hw_cache_id(config & 0xff); const char *hw_cache_op_str = - stringify_perf_hw_cache_op_id((value & 0xff00) >> 8); + stringify_perf_hw_cache_op_id((config & 0xff00) >> 8); const char *hw_cache_op_result_str = - stringify_perf_hw_cache_op_result_id((value & 0xff0000) >> 16); + stringify_perf_hw_cache_op_result_id((config & 0xff0000) >> 16); - if (hw_cache_str == NULL || hw_cache_op_str == NULL || - hw_cache_op_result_str == NULL) { - snprintf(buf, size, "%#"PRIx64, value); + if (hw_cache_str == NULL || hw_cache_op_str == NULL || hw_cache_op_result_str == NULL) { + if (pmu == NULL) { + snprintf(buf, size, "%#"PRIx64, config); + } else { + snprintf(buf, size, "%#"PRIx64" (%s/config=%#"PRIx64"/)", config, pmu->name, + config); + } } else { - snprintf(buf, size, "%#"PRIx64" (%s | %s | %s)", value, - hw_cache_op_result_str, hw_cache_op_str, hw_cache_str); + if (pmu == NULL) { + snprintf(buf, size, "%#"PRIx64" (%s | %s | %s)", config, + hw_cache_op_result_str, hw_cache_op_str, hw_cache_str); + } else { + snprintf(buf, size, "%#"PRIx64" (%s/%s | %s | %s/)", config, pmu->name, + hw_cache_op_result_str, hw_cache_op_str, hw_cache_str); + } } } -#ifdef HAVE_LIBTRACEEVENT -static void __p_config_tracepoint_id(char *buf, size_t size, u64 value) +static void __p_config_tracepoint_id(char *buf, size_t size, u64 id) { - char *str = tracepoint_id_to_name(value); + char *str = tracepoint_id_to_name(id); - print_id_hex(str); + print_id_hex(buf, size, id, str); free(str); } -#endif -static void __p_config_id(char *buf, size_t size, u32 type, u64 value) +static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type, u64 config) { switch (type) { case PERF_TYPE_HARDWARE: - return __p_config_hw_id(buf, size, value); + return __p_config_hw_id(buf, size, pmu, config); case PERF_TYPE_SOFTWARE: - return __p_config_sw_id(buf, size, value); + return __p_config_sw_id(buf, size, config); case PERF_TYPE_HW_CACHE: - return __p_config_hw_cache_id(buf, size, value); + return __p_config_hw_cache_id(buf, size, pmu, config); case PERF_TYPE_TRACEPOINT: -#ifdef HAVE_LIBTRACEEVENT - return __p_config_tracepoint_id(buf, size, value); -#endif + return __p_config_tracepoint_id(buf, size, config); case PERF_TYPE_RAW: case PERF_TYPE_BREAKPOINT: default: - snprintf(buf, size, "%#"PRIx64, value); - return; + return print_id_hex(buf, size, config, perf_pmu__name_from_config(pmu, config)); } } @@ -245,8 +272,8 @@ static void __p_config_id(char *buf, size_t size, u32 type, u64 value) #define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val) #define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val) #define p_read_format(val) __p_read_format(buf, BUF_SIZE, val) -#define p_type_id(val) __p_type_id(buf, BUF_SIZE, val) -#define p_config_id(val) __p_config_id(buf, BUF_SIZE, attr->type, val) +#define p_type_id(val) __p_type_id(buf, BUF_SIZE, pmu, val) +#define p_config_id(val) __p_config_id(pmu, buf, BUF_SIZE, attr->type, val) #define PRINT_ATTRn(_n, _f, _p, _a) \ do { \ @@ -261,9 +288,17 @@ do { \ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, attr__fprintf_f attr__fprintf, void *priv) { + struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); char buf[BUF_SIZE]; int ret = 0; + if (!pmu && (attr->type == PERF_TYPE_HARDWARE || attr->type == PERF_TYPE_HW_CACHE)) { + u32 extended_type = attr->config >> PERF_PMU_TYPE_SHIFT; + + if (extended_type) + pmu = perf_pmus__find_by_type(extended_type); + } + PRINT_ATTRn("type", type, p_type_id, true); PRINT_ATTRf(size, p_unsigned); PRINT_ATTRn("config", config, p_config_id, true); @@ -308,6 +343,8 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(inherit_thread, p_unsigned); PRINT_ATTRf(remove_on_exec, p_unsigned); PRINT_ATTRf(sigtrap, p_unsigned); + PRINT_ATTRf(defer_callchain, p_unsigned); + PRINT_ATTRf(defer_output, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned, false); PRINT_ATTRf(bp_type, p_unsigned); @@ -322,6 +359,9 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(sample_max_stack, p_unsigned); PRINT_ATTRf(aux_sample_size, p_unsigned); PRINT_ATTRf(sig_data, p_unsigned); + PRINT_ATTRf(aux_start_paused, p_unsigned); + PRINT_ATTRf(aux_pause, p_unsigned); + PRINT_ATTRf(aux_resume, p_unsigned); return ret; } diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 9bdbaa37f813..44b90bbf2d07 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -3,6 +3,7 @@ #include <string.h> #include "perf_regs.h" #include "util/sample.h" +#include "debug.h" int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused, char **new_op __maybe_unused) @@ -12,730 +13,21 @@ int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused, uint64_t __weak arch__intr_reg_mask(void) { - return PERF_REGS_MASK; + return 0; } uint64_t __weak arch__user_reg_mask(void) { - return PERF_REGS_MASK; -} - -#ifdef HAVE_PERF_REGS_SUPPORT - -#define perf_event_arm_regs perf_event_arm64_regs -#include "../../arch/arm64/include/uapi/asm/perf_regs.h" -#undef perf_event_arm_regs - -#include "../../arch/arm/include/uapi/asm/perf_regs.h" -#include "../../arch/csky/include/uapi/asm/perf_regs.h" -#include "../../arch/loongarch/include/uapi/asm/perf_regs.h" -#include "../../arch/mips/include/uapi/asm/perf_regs.h" -#include "../../arch/powerpc/include/uapi/asm/perf_regs.h" -#include "../../arch/riscv/include/uapi/asm/perf_regs.h" -#include "../../arch/s390/include/uapi/asm/perf_regs.h" -#include "../../arch/x86/include/uapi/asm/perf_regs.h" - -static const char *__perf_reg_name_arm64(int id) -{ - switch (id) { - case PERF_REG_ARM64_X0: - return "x0"; - case PERF_REG_ARM64_X1: - return "x1"; - case PERF_REG_ARM64_X2: - return "x2"; - case PERF_REG_ARM64_X3: - return "x3"; - case PERF_REG_ARM64_X4: - return "x4"; - case PERF_REG_ARM64_X5: - return "x5"; - case PERF_REG_ARM64_X6: - return "x6"; - case PERF_REG_ARM64_X7: - return "x7"; - case PERF_REG_ARM64_X8: - return "x8"; - case PERF_REG_ARM64_X9: - return "x9"; - case PERF_REG_ARM64_X10: - return "x10"; - case PERF_REG_ARM64_X11: - return "x11"; - case PERF_REG_ARM64_X12: - return "x12"; - case PERF_REG_ARM64_X13: - return "x13"; - case PERF_REG_ARM64_X14: - return "x14"; - case PERF_REG_ARM64_X15: - return "x15"; - case PERF_REG_ARM64_X16: - return "x16"; - case PERF_REG_ARM64_X17: - return "x17"; - case PERF_REG_ARM64_X18: - return "x18"; - case PERF_REG_ARM64_X19: - return "x19"; - case PERF_REG_ARM64_X20: - return "x20"; - case PERF_REG_ARM64_X21: - return "x21"; - case PERF_REG_ARM64_X22: - return "x22"; - case PERF_REG_ARM64_X23: - return "x23"; - case PERF_REG_ARM64_X24: - return "x24"; - case PERF_REG_ARM64_X25: - return "x25"; - case PERF_REG_ARM64_X26: - return "x26"; - case PERF_REG_ARM64_X27: - return "x27"; - case PERF_REG_ARM64_X28: - return "x28"; - case PERF_REG_ARM64_X29: - return "x29"; - case PERF_REG_ARM64_SP: - return "sp"; - case PERF_REG_ARM64_LR: - return "lr"; - case PERF_REG_ARM64_PC: - return "pc"; - case PERF_REG_ARM64_VG: - return "vg"; - default: - return NULL; - } - - return NULL; -} - -static const char *__perf_reg_name_arm(int id) -{ - switch (id) { - case PERF_REG_ARM_R0: - return "r0"; - case PERF_REG_ARM_R1: - return "r1"; - case PERF_REG_ARM_R2: - return "r2"; - case PERF_REG_ARM_R3: - return "r3"; - case PERF_REG_ARM_R4: - return "r4"; - case PERF_REG_ARM_R5: - return "r5"; - case PERF_REG_ARM_R6: - return "r6"; - case PERF_REG_ARM_R7: - return "r7"; - case PERF_REG_ARM_R8: - return "r8"; - case PERF_REG_ARM_R9: - return "r9"; - case PERF_REG_ARM_R10: - return "r10"; - case PERF_REG_ARM_FP: - return "fp"; - case PERF_REG_ARM_IP: - return "ip"; - case PERF_REG_ARM_SP: - return "sp"; - case PERF_REG_ARM_LR: - return "lr"; - case PERF_REG_ARM_PC: - return "pc"; - default: - return NULL; - } - - return NULL; -} - -static const char *__perf_reg_name_csky(int id) -{ - switch (id) { - case PERF_REG_CSKY_A0: - return "a0"; - case PERF_REG_CSKY_A1: - return "a1"; - case PERF_REG_CSKY_A2: - return "a2"; - case PERF_REG_CSKY_A3: - return "a3"; - case PERF_REG_CSKY_REGS0: - return "regs0"; - case PERF_REG_CSKY_REGS1: - return "regs1"; - case PERF_REG_CSKY_REGS2: - return "regs2"; - case PERF_REG_CSKY_REGS3: - return "regs3"; - case PERF_REG_CSKY_REGS4: - return "regs4"; - case PERF_REG_CSKY_REGS5: - return "regs5"; - case PERF_REG_CSKY_REGS6: - return "regs6"; - case PERF_REG_CSKY_REGS7: - return "regs7"; - case PERF_REG_CSKY_REGS8: - return "regs8"; - case PERF_REG_CSKY_REGS9: - return "regs9"; - case PERF_REG_CSKY_SP: - return "sp"; - case PERF_REG_CSKY_LR: - return "lr"; - case PERF_REG_CSKY_PC: - return "pc"; -#if defined(__CSKYABIV2__) - case PERF_REG_CSKY_EXREGS0: - return "exregs0"; - case PERF_REG_CSKY_EXREGS1: - return "exregs1"; - case PERF_REG_CSKY_EXREGS2: - return "exregs2"; - case PERF_REG_CSKY_EXREGS3: - return "exregs3"; - case PERF_REG_CSKY_EXREGS4: - return "exregs4"; - case PERF_REG_CSKY_EXREGS5: - return "exregs5"; - case PERF_REG_CSKY_EXREGS6: - return "exregs6"; - case PERF_REG_CSKY_EXREGS7: - return "exregs7"; - case PERF_REG_CSKY_EXREGS8: - return "exregs8"; - case PERF_REG_CSKY_EXREGS9: - return "exregs9"; - case PERF_REG_CSKY_EXREGS10: - return "exregs10"; - case PERF_REG_CSKY_EXREGS11: - return "exregs11"; - case PERF_REG_CSKY_EXREGS12: - return "exregs12"; - case PERF_REG_CSKY_EXREGS13: - return "exregs13"; - case PERF_REG_CSKY_EXREGS14: - return "exregs14"; - case PERF_REG_CSKY_TLS: - return "tls"; - case PERF_REG_CSKY_HI: - return "hi"; - case PERF_REG_CSKY_LO: - return "lo"; -#endif - default: - return NULL; - } - - return NULL; -} - -static inline const char *__perf_reg_name_loongarch(int id) -{ - switch (id) { - case PERF_REG_LOONGARCH_PC: - return "PC"; - case PERF_REG_LOONGARCH_R1: - return "%r1"; - case PERF_REG_LOONGARCH_R2: - return "%r2"; - case PERF_REG_LOONGARCH_R3: - return "%r3"; - case PERF_REG_LOONGARCH_R4: - return "%r4"; - case PERF_REG_LOONGARCH_R5: - return "%r5"; - case PERF_REG_LOONGARCH_R6: - return "%r6"; - case PERF_REG_LOONGARCH_R7: - return "%r7"; - case PERF_REG_LOONGARCH_R8: - return "%r8"; - case PERF_REG_LOONGARCH_R9: - return "%r9"; - case PERF_REG_LOONGARCH_R10: - return "%r10"; - case PERF_REG_LOONGARCH_R11: - return "%r11"; - case PERF_REG_LOONGARCH_R12: - return "%r12"; - case PERF_REG_LOONGARCH_R13: - return "%r13"; - case PERF_REG_LOONGARCH_R14: - return "%r14"; - case PERF_REG_LOONGARCH_R15: - return "%r15"; - case PERF_REG_LOONGARCH_R16: - return "%r16"; - case PERF_REG_LOONGARCH_R17: - return "%r17"; - case PERF_REG_LOONGARCH_R18: - return "%r18"; - case PERF_REG_LOONGARCH_R19: - return "%r19"; - case PERF_REG_LOONGARCH_R20: - return "%r20"; - case PERF_REG_LOONGARCH_R21: - return "%r21"; - case PERF_REG_LOONGARCH_R22: - return "%r22"; - case PERF_REG_LOONGARCH_R23: - return "%r23"; - case PERF_REG_LOONGARCH_R24: - return "%r24"; - case PERF_REG_LOONGARCH_R25: - return "%r25"; - case PERF_REG_LOONGARCH_R26: - return "%r26"; - case PERF_REG_LOONGARCH_R27: - return "%r27"; - case PERF_REG_LOONGARCH_R28: - return "%r28"; - case PERF_REG_LOONGARCH_R29: - return "%r29"; - case PERF_REG_LOONGARCH_R30: - return "%r30"; - case PERF_REG_LOONGARCH_R31: - return "%r31"; - default: - break; - } - return NULL; -} - -static const char *__perf_reg_name_mips(int id) -{ - switch (id) { - case PERF_REG_MIPS_PC: - return "PC"; - case PERF_REG_MIPS_R1: - return "$1"; - case PERF_REG_MIPS_R2: - return "$2"; - case PERF_REG_MIPS_R3: - return "$3"; - case PERF_REG_MIPS_R4: - return "$4"; - case PERF_REG_MIPS_R5: - return "$5"; - case PERF_REG_MIPS_R6: - return "$6"; - case PERF_REG_MIPS_R7: - return "$7"; - case PERF_REG_MIPS_R8: - return "$8"; - case PERF_REG_MIPS_R9: - return "$9"; - case PERF_REG_MIPS_R10: - return "$10"; - case PERF_REG_MIPS_R11: - return "$11"; - case PERF_REG_MIPS_R12: - return "$12"; - case PERF_REG_MIPS_R13: - return "$13"; - case PERF_REG_MIPS_R14: - return "$14"; - case PERF_REG_MIPS_R15: - return "$15"; - case PERF_REG_MIPS_R16: - return "$16"; - case PERF_REG_MIPS_R17: - return "$17"; - case PERF_REG_MIPS_R18: - return "$18"; - case PERF_REG_MIPS_R19: - return "$19"; - case PERF_REG_MIPS_R20: - return "$20"; - case PERF_REG_MIPS_R21: - return "$21"; - case PERF_REG_MIPS_R22: - return "$22"; - case PERF_REG_MIPS_R23: - return "$23"; - case PERF_REG_MIPS_R24: - return "$24"; - case PERF_REG_MIPS_R25: - return "$25"; - case PERF_REG_MIPS_R28: - return "$28"; - case PERF_REG_MIPS_R29: - return "$29"; - case PERF_REG_MIPS_R30: - return "$30"; - case PERF_REG_MIPS_R31: - return "$31"; - default: - break; - } - return NULL; -} - -static const char *__perf_reg_name_powerpc(int id) -{ - switch (id) { - case PERF_REG_POWERPC_R0: - return "r0"; - case PERF_REG_POWERPC_R1: - return "r1"; - case PERF_REG_POWERPC_R2: - return "r2"; - case PERF_REG_POWERPC_R3: - return "r3"; - case PERF_REG_POWERPC_R4: - return "r4"; - case PERF_REG_POWERPC_R5: - return "r5"; - case PERF_REG_POWERPC_R6: - return "r6"; - case PERF_REG_POWERPC_R7: - return "r7"; - case PERF_REG_POWERPC_R8: - return "r8"; - case PERF_REG_POWERPC_R9: - return "r9"; - case PERF_REG_POWERPC_R10: - return "r10"; - case PERF_REG_POWERPC_R11: - return "r11"; - case PERF_REG_POWERPC_R12: - return "r12"; - case PERF_REG_POWERPC_R13: - return "r13"; - case PERF_REG_POWERPC_R14: - return "r14"; - case PERF_REG_POWERPC_R15: - return "r15"; - case PERF_REG_POWERPC_R16: - return "r16"; - case PERF_REG_POWERPC_R17: - return "r17"; - case PERF_REG_POWERPC_R18: - return "r18"; - case PERF_REG_POWERPC_R19: - return "r19"; - case PERF_REG_POWERPC_R20: - return "r20"; - case PERF_REG_POWERPC_R21: - return "r21"; - case PERF_REG_POWERPC_R22: - return "r22"; - case PERF_REG_POWERPC_R23: - return "r23"; - case PERF_REG_POWERPC_R24: - return "r24"; - case PERF_REG_POWERPC_R25: - return "r25"; - case PERF_REG_POWERPC_R26: - return "r26"; - case PERF_REG_POWERPC_R27: - return "r27"; - case PERF_REG_POWERPC_R28: - return "r28"; - case PERF_REG_POWERPC_R29: - return "r29"; - case PERF_REG_POWERPC_R30: - return "r30"; - case PERF_REG_POWERPC_R31: - return "r31"; - case PERF_REG_POWERPC_NIP: - return "nip"; - case PERF_REG_POWERPC_MSR: - return "msr"; - case PERF_REG_POWERPC_ORIG_R3: - return "orig_r3"; - case PERF_REG_POWERPC_CTR: - return "ctr"; - case PERF_REG_POWERPC_LINK: - return "link"; - case PERF_REG_POWERPC_XER: - return "xer"; - case PERF_REG_POWERPC_CCR: - return "ccr"; - case PERF_REG_POWERPC_SOFTE: - return "softe"; - case PERF_REG_POWERPC_TRAP: - return "trap"; - case PERF_REG_POWERPC_DAR: - return "dar"; - case PERF_REG_POWERPC_DSISR: - return "dsisr"; - case PERF_REG_POWERPC_SIER: - return "sier"; - case PERF_REG_POWERPC_MMCRA: - return "mmcra"; - case PERF_REG_POWERPC_MMCR0: - return "mmcr0"; - case PERF_REG_POWERPC_MMCR1: - return "mmcr1"; - case PERF_REG_POWERPC_MMCR2: - return "mmcr2"; - case PERF_REG_POWERPC_MMCR3: - return "mmcr3"; - case PERF_REG_POWERPC_SIER2: - return "sier2"; - case PERF_REG_POWERPC_SIER3: - return "sier3"; - case PERF_REG_POWERPC_PMC1: - return "pmc1"; - case PERF_REG_POWERPC_PMC2: - return "pmc2"; - case PERF_REG_POWERPC_PMC3: - return "pmc3"; - case PERF_REG_POWERPC_PMC4: - return "pmc4"; - case PERF_REG_POWERPC_PMC5: - return "pmc5"; - case PERF_REG_POWERPC_PMC6: - return "pmc6"; - case PERF_REG_POWERPC_SDAR: - return "sdar"; - case PERF_REG_POWERPC_SIAR: - return "siar"; - default: - break; - } - return NULL; -} - -static const char *__perf_reg_name_riscv(int id) -{ - switch (id) { - case PERF_REG_RISCV_PC: - return "pc"; - case PERF_REG_RISCV_RA: - return "ra"; - case PERF_REG_RISCV_SP: - return "sp"; - case PERF_REG_RISCV_GP: - return "gp"; - case PERF_REG_RISCV_TP: - return "tp"; - case PERF_REG_RISCV_T0: - return "t0"; - case PERF_REG_RISCV_T1: - return "t1"; - case PERF_REG_RISCV_T2: - return "t2"; - case PERF_REG_RISCV_S0: - return "s0"; - case PERF_REG_RISCV_S1: - return "s1"; - case PERF_REG_RISCV_A0: - return "a0"; - case PERF_REG_RISCV_A1: - return "a1"; - case PERF_REG_RISCV_A2: - return "a2"; - case PERF_REG_RISCV_A3: - return "a3"; - case PERF_REG_RISCV_A4: - return "a4"; - case PERF_REG_RISCV_A5: - return "a5"; - case PERF_REG_RISCV_A6: - return "a6"; - case PERF_REG_RISCV_A7: - return "a7"; - case PERF_REG_RISCV_S2: - return "s2"; - case PERF_REG_RISCV_S3: - return "s3"; - case PERF_REG_RISCV_S4: - return "s4"; - case PERF_REG_RISCV_S5: - return "s5"; - case PERF_REG_RISCV_S6: - return "s6"; - case PERF_REG_RISCV_S7: - return "s7"; - case PERF_REG_RISCV_S8: - return "s8"; - case PERF_REG_RISCV_S9: - return "s9"; - case PERF_REG_RISCV_S10: - return "s10"; - case PERF_REG_RISCV_S11: - return "s11"; - case PERF_REG_RISCV_T3: - return "t3"; - case PERF_REG_RISCV_T4: - return "t4"; - case PERF_REG_RISCV_T5: - return "t5"; - case PERF_REG_RISCV_T6: - return "t6"; - default: - return NULL; - } - - return NULL; + return 0; } -static const char *__perf_reg_name_s390(int id) -{ - switch (id) { - case PERF_REG_S390_R0: - return "R0"; - case PERF_REG_S390_R1: - return "R1"; - case PERF_REG_S390_R2: - return "R2"; - case PERF_REG_S390_R3: - return "R3"; - case PERF_REG_S390_R4: - return "R4"; - case PERF_REG_S390_R5: - return "R5"; - case PERF_REG_S390_R6: - return "R6"; - case PERF_REG_S390_R7: - return "R7"; - case PERF_REG_S390_R8: - return "R8"; - case PERF_REG_S390_R9: - return "R9"; - case PERF_REG_S390_R10: - return "R10"; - case PERF_REG_S390_R11: - return "R11"; - case PERF_REG_S390_R12: - return "R12"; - case PERF_REG_S390_R13: - return "R13"; - case PERF_REG_S390_R14: - return "R14"; - case PERF_REG_S390_R15: - return "R15"; - case PERF_REG_S390_FP0: - return "FP0"; - case PERF_REG_S390_FP1: - return "FP1"; - case PERF_REG_S390_FP2: - return "FP2"; - case PERF_REG_S390_FP3: - return "FP3"; - case PERF_REG_S390_FP4: - return "FP4"; - case PERF_REG_S390_FP5: - return "FP5"; - case PERF_REG_S390_FP6: - return "FP6"; - case PERF_REG_S390_FP7: - return "FP7"; - case PERF_REG_S390_FP8: - return "FP8"; - case PERF_REG_S390_FP9: - return "FP9"; - case PERF_REG_S390_FP10: - return "FP10"; - case PERF_REG_S390_FP11: - return "FP11"; - case PERF_REG_S390_FP12: - return "FP12"; - case PERF_REG_S390_FP13: - return "FP13"; - case PERF_REG_S390_FP14: - return "FP14"; - case PERF_REG_S390_FP15: - return "FP15"; - case PERF_REG_S390_MASK: - return "MASK"; - case PERF_REG_S390_PC: - return "PC"; - default: - return NULL; - } +static const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; - return NULL; -} - -static const char *__perf_reg_name_x86(int id) +const struct sample_reg * __weak arch__sample_reg_masks(void) { - switch (id) { - case PERF_REG_X86_AX: - return "AX"; - case PERF_REG_X86_BX: - return "BX"; - case PERF_REG_X86_CX: - return "CX"; - case PERF_REG_X86_DX: - return "DX"; - case PERF_REG_X86_SI: - return "SI"; - case PERF_REG_X86_DI: - return "DI"; - case PERF_REG_X86_BP: - return "BP"; - case PERF_REG_X86_SP: - return "SP"; - case PERF_REG_X86_IP: - return "IP"; - case PERF_REG_X86_FLAGS: - return "FLAGS"; - case PERF_REG_X86_CS: - return "CS"; - case PERF_REG_X86_SS: - return "SS"; - case PERF_REG_X86_DS: - return "DS"; - case PERF_REG_X86_ES: - return "ES"; - case PERF_REG_X86_FS: - return "FS"; - case PERF_REG_X86_GS: - return "GS"; - case PERF_REG_X86_R8: - return "R8"; - case PERF_REG_X86_R9: - return "R9"; - case PERF_REG_X86_R10: - return "R10"; - case PERF_REG_X86_R11: - return "R11"; - case PERF_REG_X86_R12: - return "R12"; - case PERF_REG_X86_R13: - return "R13"; - case PERF_REG_X86_R14: - return "R14"; - case PERF_REG_X86_R15: - return "R15"; - -#define XMM(x) \ - case PERF_REG_X86_XMM ## x: \ - case PERF_REG_X86_XMM ## x + 1: \ - return "XMM" #x; - XMM(0) - XMM(1) - XMM(2) - XMM(3) - XMM(4) - XMM(5) - XMM(6) - XMM(7) - XMM(8) - XMM(9) - XMM(10) - XMM(11) - XMM(12) - XMM(13) - XMM(14) - XMM(15) -#undef XMM - default: - return NULL; - } - - return NULL; + return sample_reg_masks; } const char *perf_reg_name(int id, const char *arch) @@ -790,4 +82,53 @@ out: *valp = regs->cache_regs[id]; return 0; } -#endif + +uint64_t perf_arch_reg_ip(const char *arch) +{ + if (!strcmp(arch, "arm")) + return __perf_reg_ip_arm(); + else if (!strcmp(arch, "arm64")) + return __perf_reg_ip_arm64(); + else if (!strcmp(arch, "csky")) + return __perf_reg_ip_csky(); + else if (!strcmp(arch, "loongarch")) + return __perf_reg_ip_loongarch(); + else if (!strcmp(arch, "mips")) + return __perf_reg_ip_mips(); + else if (!strcmp(arch, "powerpc")) + return __perf_reg_ip_powerpc(); + else if (!strcmp(arch, "riscv")) + return __perf_reg_ip_riscv(); + else if (!strcmp(arch, "s390")) + return __perf_reg_ip_s390(); + else if (!strcmp(arch, "x86")) + return __perf_reg_ip_x86(); + + pr_err("Fail to find IP register for arch %s, returns 0\n", arch); + return 0; +} + +uint64_t perf_arch_reg_sp(const char *arch) +{ + if (!strcmp(arch, "arm")) + return __perf_reg_sp_arm(); + else if (!strcmp(arch, "arm64")) + return __perf_reg_sp_arm64(); + else if (!strcmp(arch, "csky")) + return __perf_reg_sp_csky(); + else if (!strcmp(arch, "loongarch")) + return __perf_reg_sp_loongarch(); + else if (!strcmp(arch, "mips")) + return __perf_reg_sp_mips(); + else if (!strcmp(arch, "powerpc")) + return __perf_reg_sp_powerpc(); + else if (!strcmp(arch, "riscv")) + return __perf_reg_sp_riscv(); + else if (!strcmp(arch, "s390")) + return __perf_reg_sp_s390(); + else if (!strcmp(arch, "x86")) + return __perf_reg_sp_x86(); + + pr_err("Fail to find SP register for arch %s, returns 0\n", arch); + return 0; +} diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index ce1127af05e4..f2d0736d65cc 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -26,33 +26,43 @@ enum { int arch_sdt_arg_parse_op(char *old_op, char **new_op); uint64_t arch__intr_reg_mask(void); uint64_t arch__user_reg_mask(void); - -#ifdef HAVE_PERF_REGS_SUPPORT -extern const struct sample_reg sample_reg_masks[]; - -#include <perf_regs.h> - -#define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP)) +const struct sample_reg *arch__sample_reg_masks(void); const char *perf_reg_name(int id, const char *arch); int perf_reg_value(u64 *valp, struct regs_dump *regs, int id); +uint64_t perf_arch_reg_ip(const char *arch); +uint64_t perf_arch_reg_sp(const char *arch); +const char *__perf_reg_name_arm64(int id); +uint64_t __perf_reg_ip_arm64(void); +uint64_t __perf_reg_sp_arm64(void); +const char *__perf_reg_name_arm(int id); +uint64_t __perf_reg_ip_arm(void); +uint64_t __perf_reg_sp_arm(void); +const char *__perf_reg_name_csky(int id); +uint64_t __perf_reg_ip_csky(void); +uint64_t __perf_reg_sp_csky(void); +const char *__perf_reg_name_loongarch(int id); +uint64_t __perf_reg_ip_loongarch(void); +uint64_t __perf_reg_sp_loongarch(void); +const char *__perf_reg_name_mips(int id); +uint64_t __perf_reg_ip_mips(void); +uint64_t __perf_reg_sp_mips(void); +const char *__perf_reg_name_powerpc(int id); +uint64_t __perf_reg_ip_powerpc(void); +uint64_t __perf_reg_sp_powerpc(void); +const char *__perf_reg_name_riscv(int id); +uint64_t __perf_reg_ip_riscv(void); +uint64_t __perf_reg_sp_riscv(void); +const char *__perf_reg_name_s390(int id); +uint64_t __perf_reg_ip_s390(void); +uint64_t __perf_reg_sp_s390(void); +const char *__perf_reg_name_x86(int id); +uint64_t __perf_reg_ip_x86(void); +uint64_t __perf_reg_sp_x86(void); -#else -#define PERF_REGS_MASK 0 -#define PERF_REGS_MAX 0 - -#define DWARF_MINIMAL_REGS PERF_REGS_MASK - -static inline const char *perf_reg_name(int id __maybe_unused, const char *arch __maybe_unused) +static inline uint64_t DWARF_MINIMAL_REGS(const char *arch) { - return "unknown"; + return (1ULL << perf_arch_reg_ip(arch)) | (1ULL << perf_arch_reg_sp(arch)); } -static inline int perf_reg_value(u64 *valp __maybe_unused, - struct regs_dump *regs __maybe_unused, - int id __maybe_unused) -{ - return 0; -} -#endif /* HAVE_PERF_REGS_SUPPORT */ #endif /* __PERF_REGS_H */ diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index 862e4a689868..d9043f4afbe7 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -15,6 +15,7 @@ #include "util/strbuf.h" #include "util/thread_map.h" +#include <errno.h> #include <string.h> #include <linux/kernel.h> #include <perfmon/pfmlib_perf_event.h> @@ -47,10 +48,6 @@ int parse_libpfm_events_option(const struct option *opt, const char *str, p_orig = p = strdup(str); if (!p) return -1; - /* - * force loading of the PMU list - */ - perf_pmus__scan(NULL); for (q = p; strsep(&p, ",{}"); q = p) { sep = p ? str + (p - p_orig - 1) : ""; @@ -145,7 +142,20 @@ static bool is_libpfm_event_supported(const char *name, struct perf_cpu_map *cpu evsel->is_libpfm_event = true; - if (evsel__open(evsel, cpus, threads) < 0) + ret = evsel__open(evsel, cpus, threads); + if (ret == -EACCES) { + /* + * This happens if the paranoid value + * /proc/sys/kernel/perf_event_paranoid is set to 2 + * Re-run with exclude_kernel set; we don't do that + * by default as some ARM machines do not support it. + * + */ + evsel->core.attr.exclude_kernel = 1; + ret = evsel__open(evsel, cpus, threads); + + } + if (ret < 0) result = false; evsel__close(evsel); @@ -220,7 +230,8 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, } if (is_libpfm_event_supported(name, cpus, threads)) { - print_cb->print_event(print_state, pinfo->name, topic, + print_cb->print_event(print_state, topic, pinfo->name, + /*pmu_type=*/PERF_TYPE_RAW, name, info->equiv, /*scale_unit=*/NULL, /*deprecated=*/NULL, "PFM event", @@ -254,8 +265,9 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, continue; print_cb->print_event(print_state, - pinfo->name, topic, + pinfo->name, + /*pmu_type=*/PERF_TYPE_RAW, name, /*alias=*/NULL, /*scale_unit=*/NULL, /*deprecated=*/NULL, "PFM event", diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 28380e7aa8d0..956ea273c2c7 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -12,24 +12,111 @@ #include <stdbool.h> #include <dirent.h> #include <api/fs/fs.h> +#include <api/io.h> +#include <api/io_dir.h> #include <locale.h> #include <fnmatch.h> #include <math.h> #include "debug.h" #include "evsel.h" #include "pmu.h" +#include "drm_pmu.h" +#include "hwmon_pmu.h" #include "pmus.h" -#include "pmu-bison.h" -#include "pmu-flex.h" +#include "tool_pmu.h" +#include "tp_pmu.h" +#include <util/pmu-bison.h> +#include <util/pmu-flex.h> #include "parse-events.h" #include "print-events.h" +#include "hashmap.h" #include "header.h" #include "string2.h" #include "strbuf.h" #include "fncache.h" #include "util/evsel_config.h" +#include <regex.h> -struct perf_pmu perf_pmu__fake; +#define UNIT_MAX_LEN 31 /* max length for event unit name */ + +enum event_source { + /* An event loaded from /sys/bus/event_source/devices/<pmu>/events. */ + EVENT_SRC_SYSFS, + /* An event loaded from a CPUID matched json file. */ + EVENT_SRC_CPU_JSON, + /* + * An event loaded from a /sys/bus/event_source/devices/<pmu>/identifier matched json + * file. + */ + EVENT_SRC_SYS_JSON, +}; + +/** + * struct perf_pmu_alias - An event either read from sysfs or builtin in + * pmu-events.c, created by parsing the pmu-events json files. + */ +struct perf_pmu_alias { + /** @name: Name of the event like "mem-loads". */ + char *name; + /** @desc: Optional short description of the event. */ + char *desc; + /** @long_desc: Optional long description. */ + char *long_desc; + /** + * @topic: Optional topic such as cache or pipeline, particularly for + * json events. + */ + char *topic; + /** @terms: Owned copy of the event terms. */ + char *terms; + /** + * @legacy_terms: If the event aliases a legacy event, holds a copy + * ofthe legacy event string. + */ + char *legacy_terms; + /** + * @pmu_name: The name copied from the json struct pmu_event. This can + * differ from the PMU name as it won't have suffixes. + */ + char *pmu_name; + /** @unit: Units for the event, such as bytes or cache lines. */ + char unit[UNIT_MAX_LEN+1]; + /** @scale: Value to scale read counter values by. */ + double scale; + /** @retirement_latency_mean: Value to be given for unsampled retirement latency mean. */ + double retirement_latency_mean; + /** @retirement_latency_min: Value to be given for unsampled retirement latency min. */ + double retirement_latency_min; + /** @retirement_latency_max: Value to be given for unsampled retirement latency max. */ + double retirement_latency_max; + /** + * @per_pkg: Does the file + * <sysfs>/bus/event_source/devices/<pmu_name>/events/<name>.per-pkg or + * equivalent json value exist and have the value 1. + */ + bool per_pkg; + /** + * @snapshot: Does the file + * <sysfs>/bus/event_source/devices/<pmu_name>/events/<name>.snapshot + * exist and have the value 1. + */ + bool snapshot; + /** + * @deprecated: Is the event hidden and so not shown in perf list by + * default. + */ + bool deprecated; + /** + * @legacy_deprecated_checked: Legacy events may not be supported by the + * PMU need to be checked. If they aren't supported they are marked + * deprecated. + */ + bool legacy_deprecated_checked; + /** @from_sysfs: Was the alias from sysfs or a json event? */ + bool from_sysfs; + /** @info_loaded: Have the scale, unit and other values been read from disk? */ + bool info_loaded; +}; /** * struct perf_pmu_format - Values from a format file read from @@ -40,6 +127,10 @@ struct perf_pmu perf_pmu__fake; * value=PERF_PMU_FORMAT_VALUE_CONFIG and bits 0 to 7 will be set. */ struct perf_pmu_format { + /** @list: Element on list within struct perf_pmu. */ + struct list_head list; + /** @bits: Which config bits are set by this format value. */ + DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS); /** @name: The modifier/file name. */ char *name; /** @@ -47,61 +138,120 @@ struct perf_pmu_format { * are from PERF_PMU_FORMAT_VALUE_CONFIG to * PERF_PMU_FORMAT_VALUE_CONFIG_END. */ - int value; - /** @bits: Which config bits are set by this format value. */ - DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS); - /** @list: Element on list within struct perf_pmu. */ - struct list_head list; + u16 value; + /** @loaded: Has the contents been loaded/parsed. */ + bool loaded; }; +static int pmu_aliases_parse(struct perf_pmu *pmu); + +static struct perf_pmu_format *perf_pmu__new_format(struct list_head *list, char *name) +{ + struct perf_pmu_format *format; + + format = zalloc(sizeof(*format)); + if (!format) + return NULL; + + format->name = strdup(name); + if (!format->name) { + free(format); + return NULL; + } + list_add_tail(&format->list, list); + return format; +} + +/* Called at the end of parsing a format. */ +void perf_pmu_format__set_value(void *vformat, int config, unsigned long *bits) +{ + struct perf_pmu_format *format = vformat; + + format->value = config; + memcpy(format->bits, bits, sizeof(format->bits)); +} + +static void __perf_pmu_format__load(struct perf_pmu_format *format, FILE *file) +{ + void *scanner; + int ret; + + ret = perf_pmu_lex_init(&scanner); + if (ret) + return; + + perf_pmu_set_in(file, scanner); + ret = perf_pmu_parse(format, scanner); + perf_pmu_lex_destroy(scanner); + format->loaded = true; +} + +static void perf_pmu_format__load(const struct perf_pmu *pmu, struct perf_pmu_format *format) +{ + char path[PATH_MAX]; + FILE *file = NULL; + + if (format->loaded) + return; + + if (!perf_pmu__pathname_scnprintf(path, sizeof(path), pmu->name, "format")) + return; + + assert(strlen(path) + strlen(format->name) + 2 < sizeof(path)); + strcat(path, "/"); + strcat(path, format->name); + + file = fopen(path, "r"); + if (!file) + return; + __perf_pmu_format__load(format, file); + fclose(file); +} + /* * Parse & process all the sysfs attributes located under * the directory specified in 'dir' parameter. */ -int perf_pmu__format_parse(int dirfd, struct list_head *head) +static int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load) { - struct dirent *evt_ent; - DIR *format_dir; + struct io_dirent64 *evt_ent; + struct io_dir format_dir; int ret = 0; - format_dir = fdopendir(dirfd); - if (!format_dir) - return -EINVAL; + io_dir__init(&format_dir, dirfd); - while (!ret && (evt_ent = readdir(format_dir))) { + while ((evt_ent = io_dir__readdir(&format_dir)) != NULL) { + struct perf_pmu_format *format; char *name = evt_ent->d_name; - int fd; - void *scanner; - FILE *file; - if (!strcmp(name, ".") || !strcmp(name, "..")) + if (io_dir__is_dir(&format_dir, evt_ent)) continue; - - ret = -EINVAL; - fd = openat(dirfd, name, O_RDONLY); - if (fd < 0) - break; - - file = fdopen(fd, "r"); - if (!file) { - close(fd); + format = perf_pmu__new_format(&pmu->format, name); + if (!format) { + ret = -ENOMEM; break; } - ret = perf_pmu_lex_init(&scanner); - if (ret) { + if (eager_load) { + FILE *file; + int fd = openat(dirfd, name, O_RDONLY); + + if (fd < 0) { + ret = -errno; + break; + } + file = fdopen(fd, "r"); + if (!file) { + close(fd); + break; + } + __perf_pmu_format__load(format, file); fclose(file); - break; } - - perf_pmu_set_in(file, scanner); - ret = perf_pmu_parse(head, name, scanner); - perf_pmu_lex_destroy(scanner); - fclose(file); } - closedir(format_dir); + close(format_dir.dirfd); return ret; } @@ -110,7 +260,7 @@ int perf_pmu__format_parse(int dirfd, struct list_head *head) * located at: * /sys/bus/event_source/devices/<dev>/format as sysfs group attributes. */ -static int pmu_format(int dirfd, const char *name, struct list_head *format) +static int pmu_format(struct perf_pmu *pmu, int dirfd, const char *name, bool eager_load) { int fd; @@ -119,13 +269,13 @@ static int pmu_format(int dirfd, const char *name, struct list_head *format) return 0; /* it'll close the fd */ - if (perf_pmu__format_parse(fd, format)) + if (perf_pmu__format_parse(pmu, fd, eager_load)) return -1; return 0; } -int perf_pmu__convert_scale(const char *scale, char **end, double *sval) +static int parse_double(const char *scale, char **end, double *sval) { char *lc; int ret = 0; @@ -162,17 +312,26 @@ out: return ret; } -static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, int dirfd, char *name) +int perf_pmu__convert_scale(const char *scale, char **end, double *sval) +{ + return parse_double(scale, end, sval); +} + +static int perf_pmu__parse_scale(struct perf_pmu *pmu, struct perf_pmu_alias *alias) { struct stat st; ssize_t sret; + size_t len; char scale[128]; int fd, ret = -1; char path[PATH_MAX]; - scnprintf(path, PATH_MAX, "%s.scale", name); + len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); + if (!len) + return 0; + scnprintf(path + len, sizeof(path) - len, "%s/events/%s.scale", pmu->name, alias->name); - fd = openat(dirfd, path, O_RDONLY); + fd = open(path, O_RDONLY); if (fd == -1) return -1; @@ -194,15 +353,20 @@ error: return ret; } -static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, int dirfd, char *name) +static int perf_pmu__parse_unit(struct perf_pmu *pmu, struct perf_pmu_alias *alias) { char path[PATH_MAX]; + size_t len; ssize_t sret; int fd; - scnprintf(path, PATH_MAX, "%s.unit", name); - fd = openat(dirfd, path, O_RDONLY); + len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); + if (!len) + return 0; + scnprintf(path + len, sizeof(path) - len, "%s/events/%s.unit", pmu->name, alias->name); + + fd = open(path, O_RDONLY); if (fd == -1) return -1; @@ -224,223 +388,322 @@ error: return -1; } -static int -perf_pmu__parse_per_pkg(struct perf_pmu_alias *alias, int dirfd, char *name) +static bool perf_pmu__parse_event_source_bool(const char *pmu_name, const char *event_name, + const char *suffix) { char path[PATH_MAX]; + size_t len; int fd; - scnprintf(path, PATH_MAX, "%s.per-pkg", name); + len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); + if (!len) + return false; + + scnprintf(path + len, sizeof(path) - len, "%s/events/%s.%s", pmu_name, event_name, suffix); - fd = openat(dirfd, path, O_RDONLY); + fd = open(path, O_RDONLY); if (fd == -1) - return -1; + return false; - close(fd); +#ifndef NDEBUG + { + char buf[8]; - alias->per_pkg = true; - return 0; + len = read(fd, buf, sizeof(buf)); + assert(len == 1 || len == 2); + assert(buf[0] == '1'); + } +#endif + + close(fd); + return true; } -static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, - int dirfd, char *name) +static void perf_pmu__parse_per_pkg(struct perf_pmu *pmu, struct perf_pmu_alias *alias) { - char path[PATH_MAX]; - int fd; + alias->per_pkg = perf_pmu__parse_event_source_bool(pmu->name, alias->name, "per-pkg"); +} - scnprintf(path, PATH_MAX, "%s.snapshot", name); +static void perf_pmu__parse_snapshot(struct perf_pmu *pmu, struct perf_pmu_alias *alias) +{ + alias->snapshot = perf_pmu__parse_event_source_bool(pmu->name, alias->name, "snapshot"); +} - fd = openat(dirfd, path, O_RDONLY); - if (fd == -1) - return -1; +/* Delete an alias entry. */ +static void perf_pmu_free_alias(struct perf_pmu_alias *alias) +{ + if (!alias) + return; - alias->snapshot = true; - close(fd); - return 0; + zfree(&alias->name); + zfree(&alias->desc); + zfree(&alias->long_desc); + zfree(&alias->topic); + zfree(&alias->pmu_name); + zfree(&alias->terms); + zfree(&alias->legacy_terms); + free(alias); } -static void perf_pmu_assign_str(char *name, const char *field, char **old_str, - char **new_str) +static void perf_pmu__del_aliases(struct perf_pmu *pmu) { - if (!*old_str) - goto set_new; + struct hashmap_entry *entry; + size_t bkt; - if (*new_str) { /* Have new string, check with old */ - if (strcasecmp(*old_str, *new_str)) - pr_debug("alias %s differs in field '%s'\n", - name, field); - zfree(old_str); - } else /* Nothing new --> keep old string */ + if (!pmu->aliases) return; -set_new: - *old_str = *new_str; - *new_str = NULL; + + hashmap__for_each_entry(pmu->aliases, entry, bkt) + perf_pmu_free_alias(entry->pvalue); + + hashmap__free(pmu->aliases); + pmu->aliases = NULL; } -static void perf_pmu_update_alias(struct perf_pmu_alias *old, - struct perf_pmu_alias *newalias) +static struct perf_pmu_alias *perf_pmu__find_alias(struct perf_pmu *pmu, + const char *name, + bool load) { - perf_pmu_assign_str(old->name, "desc", &old->desc, &newalias->desc); - perf_pmu_assign_str(old->name, "long_desc", &old->long_desc, - &newalias->long_desc); - perf_pmu_assign_str(old->name, "topic", &old->topic, &newalias->topic); - perf_pmu_assign_str(old->name, "value", &old->str, &newalias->str); - old->scale = newalias->scale; - old->per_pkg = newalias->per_pkg; - old->snapshot = newalias->snapshot; - memcpy(old->unit, newalias->unit, sizeof(old->unit)); + struct perf_pmu_alias *alias; + bool has_sysfs_event; + char event_file_name[NAME_MAX + 8]; + + if (hashmap__find(pmu->aliases, name, &alias)) + return alias; + + if (!load || pmu->sysfs_aliases_loaded) + return NULL; + + /* + * Test if alias/event 'name' exists in the PMU's sysfs/events + * directory. If not skip parsing the sysfs aliases. Sysfs event + * name must be all lower or all upper case. + */ + scnprintf(event_file_name, sizeof(event_file_name), "events/%s", name); + for (size_t i = 7, n = 7 + strlen(name); i < n; i++) + event_file_name[i] = tolower(event_file_name[i]); + + has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name); + if (!has_sysfs_event) { + for (size_t i = 7, n = 7 + strlen(name); i < n; i++) + event_file_name[i] = toupper(event_file_name[i]); + + has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name); + } + if (has_sysfs_event) { + pmu_aliases_parse(pmu); + if (hashmap__find(pmu->aliases, name, &alias)) + return alias; + } + + return NULL; } -/* Delete an alias entry. */ -void perf_pmu_free_alias(struct perf_pmu_alias *newalias) +static bool assign_str(const char *name, const char *field, char **old_str, + const char *new_str) { - zfree(&newalias->name); - zfree(&newalias->desc); - zfree(&newalias->long_desc); - zfree(&newalias->topic); - zfree(&newalias->str); - zfree(&newalias->pmu_name); - parse_events_terms__purge(&newalias->terms); - free(newalias); + if (!*old_str && new_str) { + *old_str = strdup(new_str); + return true; + } + + if (!new_str || !strcasecmp(*old_str, new_str)) + return false; /* Nothing to update. */ + + pr_debug("alias %s differs in field '%s' ('%s' != '%s')\n", + name, field, *old_str, new_str); + zfree(old_str); + *old_str = strdup(new_str); + return true; } -static void perf_pmu__del_aliases(struct perf_pmu *pmu) +static void read_alias_info(struct perf_pmu *pmu, struct perf_pmu_alias *alias) { - struct perf_pmu_alias *alias, *tmp; + if (!alias->from_sysfs || alias->info_loaded) + return; - list_for_each_entry_safe(alias, tmp, &pmu->aliases, list) { - list_del(&alias->list); - perf_pmu_free_alias(alias); - } + /* + * load unit name and scale if available + */ + perf_pmu__parse_unit(pmu, alias); + perf_pmu__parse_scale(pmu, alias); + perf_pmu__parse_per_pkg(pmu, alias); + perf_pmu__parse_snapshot(pmu, alias); } -/* Merge an alias, search in alias list. If this name is already - * present merge both of them to combine all information. - */ -static bool perf_pmu_merge_alias(struct perf_pmu_alias *newalias, - struct list_head *alist) +struct update_alias_data { + struct perf_pmu *pmu; + struct perf_pmu_alias *alias; + bool legacy; +}; + +static int update_alias(const struct pmu_event *pe, + const struct pmu_events_table *table __maybe_unused, + void *vdata) { - struct perf_pmu_alias *a; + struct update_alias_data *data = vdata; + int ret = 0; - list_for_each_entry(a, alist, list) { - if (!strcasecmp(newalias->name, a->name)) { - if (newalias->pmu_name && a->pmu_name && - !strcasecmp(newalias->pmu_name, a->pmu_name)) { - continue; - } - perf_pmu_update_alias(a, newalias); - perf_pmu_free_alias(newalias); - return true; + read_alias_info(data->pmu, data->alias); + assign_str(pe->name, "desc", &data->alias->desc, pe->desc); + assign_str(pe->name, "long_desc", &data->alias->long_desc, pe->long_desc); + assign_str(pe->name, "topic", &data->alias->topic, pe->topic); + data->alias->per_pkg = pe->perpkg; + if (pe->event) { + if (data->legacy) { + zfree(&data->alias->legacy_terms); + data->alias->legacy_terms = strdup(pe->event); + } else { + zfree(&data->alias->terms); + data->alias->terms = strdup(pe->event); } } - return false; + if (!ret && pe->unit) { + char *unit; + + ret = perf_pmu__convert_scale(pe->unit, &unit, &data->alias->scale); + if (!ret) + snprintf(data->alias->unit, sizeof(data->alias->unit), "%s", unit); + } + if (!ret && pe->retirement_latency_mean) { + ret = parse_double(pe->retirement_latency_mean, NULL, + &data->alias->retirement_latency_mean); + } + if (!ret && pe->retirement_latency_min) { + ret = parse_double(pe->retirement_latency_min, NULL, + &data->alias->retirement_latency_min); + } + if (!ret && pe->retirement_latency_max) { + ret = parse_double(pe->retirement_latency_max, NULL, + &data->alias->retirement_latency_max); + } + return ret; } -static int __perf_pmu__new_alias(struct list_head *list, int dirfd, char *name, - char *desc, char *val, const struct pmu_event *pe) +static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, + const char *desc, const char *val, int val_fd, + const struct pmu_event *pe, enum event_source src) { - struct parse_events_term *term; - struct perf_pmu_alias *alias; - int ret; - char newval[256]; + struct perf_pmu_alias *alias, *old_alias; + int ret = 0; const char *long_desc = NULL, *topic = NULL, *unit = NULL, *pmu_name = NULL; bool deprecated = false, perpkg = false; + if (perf_pmu__find_alias(pmu, name, /*load=*/ false)) { + /* Alias was already created/loaded. */ + return 0; + } + if (pe) { long_desc = pe->long_desc; topic = pe->topic; unit = pe->unit; perpkg = pe->perpkg; deprecated = pe->deprecated; - pmu_name = pe->pmu; + if (pe->pmu && strcmp(pe->pmu, "default_core")) + pmu_name = pe->pmu; } - alias = malloc(sizeof(*alias)); + alias = zalloc(sizeof(*alias)); if (!alias) return -ENOMEM; - INIT_LIST_HEAD(&alias->terms); alias->scale = 1.0; alias->unit[0] = '\0'; alias->per_pkg = perpkg; alias->snapshot = false; alias->deprecated = deprecated; + alias->retirement_latency_mean = 0.0; + alias->retirement_latency_min = 0.0; + alias->retirement_latency_max = 0.0; - ret = parse_events_terms(&alias->terms, val); - if (ret) { - pr_err("Cannot parse alias %s: %d\n", val, ret); - free(alias); - return ret; + if (!ret && pe && pe->retirement_latency_mean) { + ret = parse_double(pe->retirement_latency_mean, NULL, + &alias->retirement_latency_mean); } - - /* Scan event and remove leading zeroes, spaces, newlines, some - * platforms have terms specified as - * event=0x0091 (read from files ../<PMU>/events/<FILE> - * and terms specified as event=0x91 (read from JSON files). - * - * Rebuild string to make alias->str member comparable. - */ - memset(newval, 0, sizeof(newval)); - ret = 0; - list_for_each_entry(term, &alias->terms, list) { - if (ret) - ret += scnprintf(newval + ret, sizeof(newval) - ret, - ","); - if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) - ret += scnprintf(newval + ret, sizeof(newval) - ret, - "%s=%#x", term->config, term->val.num); - else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) - ret += scnprintf(newval + ret, sizeof(newval) - ret, - "%s=%s", term->config, term->val.str); + if (!ret && pe && pe->retirement_latency_min) { + ret = parse_double(pe->retirement_latency_min, NULL, + &alias->retirement_latency_min); } - - alias->name = strdup(name); - if (dirfd >= 0) { - /* - * load unit name and scale if available - */ - perf_pmu__parse_unit(alias, dirfd, name); - perf_pmu__parse_scale(alias, dirfd, name); - perf_pmu__parse_per_pkg(alias, dirfd, name); - perf_pmu__parse_snapshot(alias, dirfd, name); + if (!ret && pe && pe->retirement_latency_max) { + ret = parse_double(pe->retirement_latency_max, NULL, + &alias->retirement_latency_max); } + if (ret) + return ret; + if (val_fd < 0) { + alias->terms = strdup(val); + } else { + char buf[256]; + struct io io; + size_t line_len; + + io__init(&io, val_fd, buf, sizeof(buf)); + ret = io__getline(&io, &alias->terms, &line_len) < 0 ? -errno : 0; + if (ret) { + pr_err("Failed to read alias %s\n", name); + return ret; + } + if (line_len >= 1 && alias->terms[line_len - 1] == '\n') + alias->terms[line_len - 1] = '\0'; + } + alias->name = strdup(name); alias->desc = desc ? strdup(desc) : NULL; - alias->long_desc = long_desc ? strdup(long_desc) : - desc ? strdup(desc) : NULL; + alias->long_desc = long_desc ? strdup(long_desc) : NULL; alias->topic = topic ? strdup(topic) : NULL; + alias->pmu_name = pmu_name ? strdup(pmu_name) : NULL; if (unit) { - if (perf_pmu__convert_scale(unit, (char **)&unit, &alias->scale) < 0) + if (perf_pmu__convert_scale(unit, (char **)&unit, &alias->scale) < 0) { + perf_pmu_free_alias(alias); return -1; + } snprintf(alias->unit, sizeof(alias->unit), "%s", unit); } - alias->str = strdup(newval); - alias->pmu_name = pmu_name ? strdup(pmu_name) : NULL; - - if (!perf_pmu_merge_alias(alias, list)) - list_add_tail(&alias->list, list); + switch (src) { + default: + case EVENT_SRC_SYSFS: + alias->from_sysfs = true; + if (pmu->events_table || pmu->is_core) { + /* Update an event from sysfs with json data. */ + struct update_alias_data data = { + .pmu = pmu, + .alias = alias, + .legacy = false, + }; + if ((pmu_events_table__find_event(pmu->events_table, pmu, name, + update_alias, &data) == 0)) { + /* + * Override sysfs encodings with json encodings + * specific to the cpuid. + */ + pmu->cpu_common_json_aliases++; + } + if (pmu->is_core) { + /* Add in legacy encodings. */ + data.legacy = true; + if (pmu_events_table__find_event( + perf_pmu__default_core_events_table(), + pmu, name, update_alias, &data) == 0) + pmu->cpu_common_json_aliases++; + } + } + pmu->sysfs_aliases++; + break; + case EVENT_SRC_CPU_JSON: + pmu->cpu_json_aliases++; + break; + case EVENT_SRC_SYS_JSON: + pmu->sys_json_aliases++; + break; + } + hashmap__set(pmu->aliases, alias->name, alias, /*old_key=*/ NULL, &old_alias); + perf_pmu_free_alias(old_alias); return 0; } -static int perf_pmu__new_alias(struct list_head *list, int dirfd, char *name, FILE *file) -{ - char buf[256]; - int ret; - - ret = fread(buf, 1, sizeof(buf), file); - if (ret == 0) - return -EINVAL; - - buf[ret] = 0; - - /* Remove trailing newline from sysfs file */ - strim(buf); - - return __perf_pmu__new_alias(list, dirfd, name, NULL, buf, NULL); -} - -static inline bool pmu_alias_info_file(char *name) +static inline bool pmu_alias_info_file(const char *name) { size_t len; @@ -458,22 +721,19 @@ static inline bool pmu_alias_info_file(char *name) } /* - * Process all the sysfs attributes located under the directory - * specified in 'dir' parameter. + * Reading the pmu event aliases definition, which should be located at: + * /sys/bus/event_source/devices/<dev>/events as sysfs group attributes. */ -static int pmu_aliases_parse(int dirfd, struct list_head *head) +static int __pmu_aliases_parse(struct perf_pmu *pmu, int events_dir_fd) { - struct dirent *evt_ent; - DIR *event_dir; - int fd; + struct io_dirent64 *evt_ent; + struct io_dir event_dir; - event_dir = fdopendir(dirfd); - if (!event_dir) - return -EINVAL; + io_dir__init(&event_dir, events_dir_fd); - while ((evt_ent = readdir(event_dir))) { + while ((evt_ent = io_dir__readdir(&event_dir))) { char *name = evt_ent->d_name; - FILE *file; + int fd; if (!strcmp(name, ".") || !strcmp(name, "..")) continue; @@ -484,66 +744,86 @@ static int pmu_aliases_parse(int dirfd, struct list_head *head) if (pmu_alias_info_file(name)) continue; - fd = openat(dirfd, name, O_RDONLY); + fd = openat(events_dir_fd, name, O_RDONLY); if (fd == -1) { pr_debug("Cannot open %s\n", name); continue; } - file = fdopen(fd, "r"); - if (!file) { - close(fd); - continue; - } - if (perf_pmu__new_alias(head, dirfd, name, file) < 0) + if (perf_pmu__new_alias(pmu, name, /*desc=*/ NULL, + /*val=*/ NULL, fd, /*pe=*/ NULL, + EVENT_SRC_SYSFS) < 0) pr_debug("Cannot set up %s\n", name); - fclose(file); + close(fd); } - closedir(event_dir); + pmu->sysfs_aliases_loaded = true; return 0; } -/* - * Reading the pmu event aliases definition, which should be located at: - * /sys/bus/event_source/devices/<dev>/events as sysfs group attributes. - */ -static int pmu_aliases(int dirfd, const char *name, struct list_head *head) +static int pmu_aliases_parse(struct perf_pmu *pmu) { - int fd; + char path[PATH_MAX]; + size_t len; + int events_dir_fd, ret; - fd = perf_pmu__pathname_fd(dirfd, name, "events", O_DIRECTORY); - if (fd < 0) + if (pmu->sysfs_aliases_loaded) return 0; - /* it'll close the fd */ - if (pmu_aliases_parse(fd, head)) - return -1; + len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); + if (!len) + return 0; + scnprintf(path + len, sizeof(path) - len, "%s/events", pmu->name); - return 0; + events_dir_fd = open(path, O_DIRECTORY); + if (events_dir_fd == -1) { + pmu->sysfs_aliases_loaded = true; + return 0; + } + ret = __pmu_aliases_parse(pmu, events_dir_fd); + close(events_dir_fd); + return ret; +} + +static int pmu_aliases_parse_eager(struct perf_pmu *pmu, int sysfs_fd) +{ + char path[NAME_MAX + 8]; + int ret, events_dir_fd; + + scnprintf(path, sizeof(path), "%s/events", pmu->name); + events_dir_fd = openat(sysfs_fd, path, O_DIRECTORY, 0); + if (events_dir_fd == -1) { + pmu->sysfs_aliases_loaded = true; + return 0; + } + ret = __pmu_aliases_parse(pmu, events_dir_fd); + close(events_dir_fd); + return ret; } -static int pmu_alias_terms(struct perf_pmu_alias *alias, - struct list_head *terms) +static int pmu_alias_terms(struct perf_pmu_alias *alias, struct list_head *terms) { - struct parse_events_term *term, *cloned; - LIST_HEAD(list); + struct parse_events_terms alias_terms; + struct parse_events_term *term; int ret; - list_for_each_entry(term, &alias->terms, list) { - ret = parse_events_term__clone(&cloned, term); - if (ret) { - parse_events_terms__purge(&list); - return ret; - } + parse_events_terms__init(&alias_terms); + ret = parse_events_terms(&alias_terms, alias->terms); + if (ret) { + pr_err("Cannot parse '%s' terms '%s': %d\n", + alias->name, alias->terms, ret); + parse_events_terms__exit(&alias_terms); + return ret; + } + list_for_each_entry(term, &alias_terms.terms, list) { /* * Weak terms don't override command line options, * which we don't want for implicit terms in aliases. */ - cloned->weak = true; - list_add_tail(&cloned->list, &list); + term->weak = true; } - list_splice(&list, terms); + list_splice_init(&alias_terms.terms, terms); + parse_events_terms__exit(&alias_terms); return 0; } @@ -551,32 +831,41 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias, * Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64) * may have a "cpus" file. */ -static struct perf_cpu_map *pmu_cpumask(int dirfd, const char *name, bool is_core) +static struct perf_cpu_map *pmu_cpumask(int dirfd, const char *pmu_name, bool is_core) { - struct perf_cpu_map *cpus; const char *templates[] = { "cpumask", "cpus", NULL }; const char **template; - char pmu_name[PATH_MAX]; - struct perf_pmu pmu = {.name = pmu_name}; - FILE *file; - strlcpy(pmu_name, name, sizeof(pmu_name)); for (template = templates; *template; template++) { - file = perf_pmu__open_file_at(&pmu, dirfd, *template); - if (!file) + struct io io; + char buf[128]; + char *cpumask = NULL; + size_t cpumask_len; + ssize_t ret; + struct perf_cpu_map *cpus; + + io.fd = perf_pmu__pathname_fd(dirfd, pmu_name, *template, O_RDONLY); + if (io.fd < 0) continue; - cpus = perf_cpu_map__read(file); - fclose(file); + + io__init(&io, io.fd, buf, sizeof(buf)); + ret = io__getline(&io, &cpumask, &cpumask_len); + close(io.fd); + if (ret < 0) + continue; + + cpus = perf_cpu_map__new(cpumask); + free(cpumask); if (cpus) return cpus; } /* Nothing found, for core PMUs assume this means all CPUs. */ - return is_core ? perf_cpu_map__get(cpu_map__online()) : NULL; + return is_core ? cpu_map__online() : NULL; } static bool pmu_is_uncore(int dirfd, const char *name) @@ -622,147 +911,176 @@ static int is_sysfs_pmu_core(const char *name) return file_available(path); } -char *perf_pmu__getcpuid(struct perf_pmu *pmu) -{ - char *cpuid; - static bool printed; - - cpuid = getenv("PERF_CPUID"); - if (cpuid) - cpuid = strdup(cpuid); - if (!cpuid) - cpuid = get_cpuid_str(pmu); - if (!cpuid) - return NULL; - - if (!printed) { - pr_debug("Using CPUID %s\n", cpuid); - printed = true; - } - return cpuid; -} - -__weak const struct pmu_events_table *pmu_events_table__find(void) -{ - return perf_pmu__find_events_table(NULL); -} - -__weak const struct pmu_metrics_table *pmu_metrics_table__find(void) +/** + * Return the length of the PMU name not including the suffix for uncore PMUs. + * + * We want to deduplicate many similar uncore PMUs by stripping their suffixes, + * but there are never going to be too many core PMUs and the suffixes might be + * interesting. "arm_cortex_a53" vs "arm_cortex_a57" or "cpum_cf" for example. + * + * @skip_duplicate_pmus: False in verbose mode so all uncore PMUs are visible + */ +static size_t pmu_deduped_name_len(const struct perf_pmu *pmu, const char *name, + bool skip_duplicate_pmus) { - return perf_pmu__find_metrics_table(NULL); + return skip_duplicate_pmus && !pmu->is_core + ? pmu_name_len_no_suffix(name) + : strlen(name); } /** - * perf_pmu__match_ignoring_suffix - Does the pmu_name match tok ignoring any - * trailing suffix? The Suffix must be in form - * tok_{digits}, or tok{digits}. + * perf_pmu__match_wildcard - Does the pmu_name start with tok and is then only + * followed by nothing or a suffix? tok may contain + * part of a suffix. * @pmu_name: The pmu_name with possible suffix. - * @tok: The possible match to pmu_name without suffix. + * @tok: The wildcard argument to match. */ -static bool perf_pmu__match_ignoring_suffix(const char *pmu_name, const char *tok) +static bool perf_pmu__match_wildcard(const char *pmu_name, const char *tok) { - const char *p; + const char *p, *suffix; + bool has_hex = false; + size_t tok_len = strlen(tok); - if (strncmp(pmu_name, tok, strlen(tok))) + /* Check start of pmu_name for equality. */ + if (strncmp(pmu_name, tok, tok_len)) return false; - p = pmu_name + strlen(tok); + suffix = p = pmu_name + tok_len; if (*p == 0) return true; - if (*p == '_') + if (*p == '_') { ++p; + ++suffix; + } /* Ensure we end in a number */ while (1) { - if (!isdigit(*p)) + if (!isxdigit(*p)) return false; + if (!has_hex) + has_hex = !isdigit(*p); if (*(++p) == 0) break; } + if (has_hex) + return (p - suffix) > 2; + return true; } /** - * pmu_uncore_alias_match - does name match the PMU name? - * @pmu_name: the json struct pmu_event name. This may lack a suffix (which + * perf_pmu__match_ignoring_suffix_uncore - Does the pmu_name match tok ignoring + * any trailing suffix on pmu_name and + * tok? The Suffix must be in form + * tok_{digits}, or tok{digits}. + * @pmu_name: The pmu_name with possible suffix. + * @tok: The possible match to pmu_name. + */ +static bool perf_pmu__match_ignoring_suffix_uncore(const char *pmu_name, const char *tok) +{ + size_t pmu_name_len, tok_len; + + /* For robustness, check for NULL. */ + if (pmu_name == NULL) + return tok == NULL; + + /* uncore_ prefixes are ignored. */ + if (!strncmp(pmu_name, "uncore_", 7)) + pmu_name += 7; + if (!strncmp(tok, "uncore_", 7)) + tok += 7; + + pmu_name_len = pmu_name_len_no_suffix(pmu_name); + tok_len = pmu_name_len_no_suffix(tok); + if (pmu_name_len != tok_len) + return false; + + return strncmp(pmu_name, tok, pmu_name_len) == 0; +} + + +/** + * perf_pmu__match_wildcard_uncore - does to_match match the PMU's name? + * @pmu_name: The pmu->name or pmu->alias to match against. + * @to_match: the json struct pmu_event name. This may lack a suffix (which * matches) or be of the form "socket,pmuname" which will match * "socketX_pmunameY". - * @name: a real full PMU name as from sysfs. */ -static bool pmu_uncore_alias_match(const char *pmu_name, const char *name) +static bool perf_pmu__match_wildcard_uncore(const char *pmu_name, const char *to_match) { - char *tmp = NULL, *tok, *str; - bool res; + char *mutable_to_match, *tok, *tmp; - if (strchr(pmu_name, ',') == NULL) - return perf_pmu__match_ignoring_suffix(name, pmu_name); + if (!pmu_name) + return false; + + /* uncore_ prefixes are ignored. */ + if (!strncmp(pmu_name, "uncore_", 7)) + pmu_name += 7; + if (!strncmp(to_match, "uncore_", 7)) + to_match += 7; + + if (strchr(to_match, ',') == NULL) + return perf_pmu__match_wildcard(pmu_name, to_match); - str = strdup(pmu_name); - if (!str) + /* Process comma separated list of PMU name components. */ + mutable_to_match = strdup(to_match); + if (!mutable_to_match) return false; - /* - * uncore alias may be from different PMU with common prefix - */ - tok = strtok_r(str, ",", &tmp); - if (strncmp(pmu_name, tok, strlen(tok))) { - res = false; - goto out; + tok = strtok_r(mutable_to_match, ",", &tmp); + while (tok) { + size_t tok_len = strlen(tok); + + if (strncmp(pmu_name, tok, tok_len)) { + /* Mismatch between part of pmu_name and tok. */ + free(mutable_to_match); + return false; + } + /* Move pmu_name forward over tok and suffix. */ + pmu_name += tok_len; + while (*pmu_name != '\0' && isdigit(*pmu_name)) + pmu_name++; + if (*pmu_name == '_') + pmu_name++; + + tok = strtok_r(NULL, ",", &tmp); } + free(mutable_to_match); + return *pmu_name == '\0'; +} - /* - * Match more complex aliases where the alias name is a comma-delimited - * list of tokens, orderly contained in the matching PMU name. - * - * Example: For alias "socket,pmuname" and PMU "socketX_pmunameY", we - * match "socket" in "socketX_pmunameY" and then "pmuname" in - * "pmunameY". - */ - while (1) { - char *next_tok = strtok_r(NULL, ",", &tmp); +bool pmu_uncore_identifier_match(const char *compat, const char *id) +{ + regex_t re; + regmatch_t pmatch[1]; + int match; - name = strstr(name, tok); - if (!name || - (!next_tok && !perf_pmu__match_ignoring_suffix(name, tok))) { - res = false; - goto out; - } - if (!next_tok) - break; - tok = next_tok; - name += strlen(tok); + if (regcomp(&re, compat, REG_EXTENDED) != 0) { + /* Warn unable to generate match particular string. */ + pr_info("Invalid regular expression %s\n", compat); + return false; } - res = true; -out: - free(str); - return res; -} + match = !regexec(&re, id, 1, pmatch, 0); + if (match) { + /* Ensure a full match. */ + match = pmatch[0].rm_so == 0 && (size_t)pmatch[0].rm_eo == strlen(id); + } + regfree(&re); -struct pmu_add_cpu_aliases_map_data { - /* List being added to. */ - struct list_head *head; - /* If a pmu_event lacks a given PMU the default used. */ - char *default_pmu_name; - /* The PMU that we're searching for events for. */ - struct perf_pmu *pmu; -}; + return match; +} static int pmu_add_cpu_aliases_map_callback(const struct pmu_event *pe, const struct pmu_events_table *table __maybe_unused, void *vdata) { - struct pmu_add_cpu_aliases_map_data *data = vdata; - const char *pname = pe->pmu ?: data->default_pmu_name; + struct perf_pmu *pmu = vdata; - if (!strcmp(pname, data->pmu->name) || - (data->pmu->is_uncore && pmu_uncore_alias_match(pname, data->pmu->name))) { - /* need type casts to override 'const' */ - __perf_pmu__new_alias(data->head, -1, (char *)pe->name, (char *)pe->desc, - (char *)pe->event, pe); - } + perf_pmu__new_alias(pmu, pe->name, pe->desc, pe->event, /*val_fd=*/ -1, + pe, EVENT_SRC_CPU_JSON); return 0; } @@ -770,86 +1088,85 @@ static int pmu_add_cpu_aliases_map_callback(const struct pmu_event *pe, * From the pmu_events_table, find the events that correspond to the given * PMU and add them to the list 'head'. */ -void pmu_add_cpu_aliases_table(struct list_head *head, struct perf_pmu *pmu, - const struct pmu_events_table *table) +void pmu_add_cpu_aliases_table(struct perf_pmu *pmu, const struct pmu_events_table *table) { - struct pmu_add_cpu_aliases_map_data data = { - .head = head, - .default_pmu_name = perf_pmus__default_pmu_name(), - .pmu = pmu, - }; - - pmu_events_table_for_each_event(table, pmu_add_cpu_aliases_map_callback, &data); - free(data.default_pmu_name); + pmu_events_table__for_each_event(table, pmu, pmu_add_cpu_aliases_map_callback, pmu); } -static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) +static void pmu_add_cpu_aliases(struct perf_pmu *pmu) { - const struct pmu_events_table *table; + if (!pmu->events_table && !pmu->is_core) + return; - table = perf_pmu__find_events_table(pmu); - if (!table) + if (pmu->cpu_aliases_added) return; - pmu_add_cpu_aliases_table(head, pmu, table); -} + pmu_add_cpu_aliases_table(pmu, pmu->events_table); + if (pmu->is_core) + pmu_add_cpu_aliases_table(pmu, perf_pmu__default_core_events_table()); -struct pmu_sys_event_iter_data { - struct list_head *head; - struct perf_pmu *pmu; -}; + pmu->cpu_aliases_added = true; +} static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe, const struct pmu_events_table *table __maybe_unused, - void *data) + void *vdata) { - struct pmu_sys_event_iter_data *idata = data; - struct perf_pmu *pmu = idata->pmu; + struct perf_pmu *pmu = vdata; - if (!pe->compat || !pe->pmu) + if (!pe->compat || !pe->pmu) { + /* No data to match. */ return 0; + } - if (!strcmp(pmu->id, pe->compat) && - pmu_uncore_alias_match(pe->pmu, pmu->name)) { - __perf_pmu__new_alias(idata->head, -1, - (char *)pe->name, - (char *)pe->desc, - (char *)pe->event, - pe); + if (!perf_pmu__match_wildcard_uncore(pmu->name, pe->pmu) && + !perf_pmu__match_wildcard_uncore(pmu->alias_name, pe->pmu)) { + /* PMU name/alias_name don't match. */ + return 0; } + if (pmu_uncore_identifier_match(pe->compat, pmu->id)) { + /* Id matched. */ + perf_pmu__new_alias(pmu, + pe->name, + pe->desc, + pe->event, + /*val_fd=*/ -1, + pe, + EVENT_SRC_SYS_JSON); + } return 0; } -void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu) +void pmu_add_sys_aliases(struct perf_pmu *pmu) { - struct pmu_sys_event_iter_data idata = { - .head = head, - .pmu = pmu, - }; - if (!pmu->id) return; - pmu_for_each_sys_event(pmu_add_sys_aliases_iter_fn, &idata); + pmu_for_each_sys_event(pmu_add_sys_aliases_iter_fn, pmu); } -struct perf_event_attr * __weak -perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) +static char *pmu_find_alias_name(struct perf_pmu *pmu, int dirfd) { - return NULL; -} + FILE *file = perf_pmu__open_file_at(pmu, dirfd, "alias"); + char *line = NULL; + size_t line_len = 0; + ssize_t ret; -char * __weak -pmu_find_real_name(const char *name) -{ - return (char *)name; -} + if (!file) + return NULL; -char * __weak -pmu_find_alias_name(const char *name __maybe_unused) -{ - return NULL; + ret = getline(&line, &line_len, file); + if (ret < 0) { + fclose(file); + return NULL; + } + /* Remove trailing newline. */ + if (ret > 0 && line[ret - 1] == '\n') + line[--ret] = '\0'; + + fclose(file); + return line; } static int pmu_max_precise(int dirfd, struct perf_pmu *pmu) @@ -860,72 +1177,135 @@ static int pmu_max_precise(int dirfd, struct perf_pmu *pmu) return max_precise; } -struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name) +void __weak +perf_pmu__arch_init(struct perf_pmu *pmu) +{ + if (pmu->is_core) + pmu->mem_events = perf_mem_events; +} + +/* Variant of str_hash that does tolower on each character. */ +static size_t aliases__hash(long key, void *ctx __maybe_unused) +{ + const char *s = (const char *)key; + size_t h = 0; + + while (*s) { + h = h * 31 + tolower(*s); + s++; + } + return h; +} + +static bool aliases__equal(long key1, long key2, void *ctx __maybe_unused) +{ + return strcasecmp((const char *)key1, (const char *)key2) == 0; +} + +int perf_pmu__init(struct perf_pmu *pmu, __u32 type, const char *name) +{ + pmu->type = type; + INIT_LIST_HEAD(&pmu->format); + INIT_LIST_HEAD(&pmu->caps); + + pmu->name = strdup(name); + if (!pmu->name) + return -ENOMEM; + + pmu->aliases = hashmap__new(aliases__hash, aliases__equal, /*ctx=*/ NULL); + if (!pmu->aliases) + return -ENOMEM; + + return 0; +} + +static __u32 wellknown_pmu_type(const char *pmu_name) +{ + struct { + const char *pmu_name; + __u32 type; + } wellknown_pmus[] = { + { + "software", + PERF_TYPE_SOFTWARE + }, + { + "tracepoint", + PERF_TYPE_TRACEPOINT + }, + { + "breakpoint", + PERF_TYPE_BREAKPOINT + }, + }; + for (size_t i = 0; i < ARRAY_SIZE(wellknown_pmus); i++) { + if (!strcmp(wellknown_pmus[i].pmu_name, pmu_name)) + return wellknown_pmus[i].type; + } + return PERF_TYPE_MAX; +} + +struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *name, + bool eager_load) { struct perf_pmu *pmu; - LIST_HEAD(format); - LIST_HEAD(aliases); - __u32 type; - char *name = pmu_find_real_name(lookup_name); - char *alias_name; - /* - * The pmu data we store & need consists of the pmu - * type value and format definitions. Load both right - * now. - */ - if (pmu_format(dirfd, name, &format)) + pmu = zalloc(sizeof(*pmu)); + if (!pmu) return NULL; + if (perf_pmu__init(pmu, PERF_PMU_TYPE_FAKE, name) != 0) { + perf_pmu__delete(pmu); + return NULL; + } + /* - * Check the aliases first to avoid unnecessary work. + * Read type early to fail fast if a lookup name isn't a PMU. Ensure + * that type value is successfully assigned (return 1). */ - if (pmu_aliases(dirfd, name, &aliases)) - return NULL; + if (perf_pmu__scan_file_at(pmu, dirfd, "type", "%u", &pmu->type) != 1) { + /* Double check the PMU's name isn't wellknown. */ + pmu->type = wellknown_pmu_type(name); + if (pmu->type == PERF_TYPE_MAX) { + perf_pmu__delete(pmu); + return NULL; + } + } - pmu = zalloc(sizeof(*pmu)); - if (!pmu) + /* + * The pmu data we store & need consists of the pmu + * type value and format definitions. Load both right + * now. + */ + if (pmu_format(pmu, dirfd, name, eager_load)) { + perf_pmu__delete(pmu); return NULL; + } pmu->is_core = is_pmu_core(name); pmu->cpus = pmu_cpumask(dirfd, name, pmu->is_core); - pmu->name = strdup(name); - if (!pmu->name) - goto err; - - /* Read type, and ensure that type value is successfully assigned (return 1) */ - if (perf_pmu__scan_file_at(pmu, dirfd, "type", "%u", &type) != 1) - goto err; - - alias_name = pmu_find_alias_name(name); - if (alias_name) { - pmu->alias_name = strdup(alias_name); - if (!pmu->alias_name) - goto err; - } - pmu->type = type; pmu->is_uncore = pmu_is_uncore(dirfd, name); if (pmu->is_uncore) pmu->id = pmu_id(name); pmu->max_precise = pmu_max_precise(dirfd, pmu); - pmu_add_cpu_aliases(&aliases, pmu); - pmu_add_sys_aliases(&aliases, pmu); - - INIT_LIST_HEAD(&pmu->format); - INIT_LIST_HEAD(&pmu->aliases); - INIT_LIST_HEAD(&pmu->caps); - list_splice(&format, &pmu->format); - list_splice(&aliases, &pmu->aliases); + pmu->alias_name = pmu_find_alias_name(pmu, dirfd); + pmu->events_table = perf_pmu__find_events_table(pmu); + /* + * Load the sys json events/aliases when loading the PMU as each event + * may have a different compat regular expression. We therefore can't + * know the number of sys json events/aliases without computing the + * regular expressions for them all. + */ + pmu_add_sys_aliases(pmu); list_add_tail(&pmu->list, pmus); - pmu->default_config = perf_pmu__get_default_config(pmu); + perf_pmu__arch_init(pmu); + + if (eager_load) + pmu_aliases_parse_eager(pmu, dirfd); return pmu; -err: - zfree(&pmu->name); - free(pmu); - return NULL; } /* Creates the PMU when sysfs scanning fails. */ @@ -947,12 +1327,17 @@ struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pm pmu->cpus = cpu_map__online(); INIT_LIST_HEAD(&pmu->format); - INIT_LIST_HEAD(&pmu->aliases); + pmu->aliases = hashmap__new(aliases__hash, aliases__equal, /*ctx=*/ NULL); INIT_LIST_HEAD(&pmu->caps); list_add_tail(&pmu->list, core_pmus); return pmu; } +bool perf_pmu__is_fake(const struct perf_pmu *pmu) +{ + return pmu->type == PERF_PMU_TYPE_FAKE; +} + void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu) { struct perf_pmu_format *format; @@ -963,22 +1348,28 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu) pmu->formats_checked = true; /* fake pmu doesn't have format list */ - if (pmu == &perf_pmu__fake) + if (perf_pmu__is_fake(pmu)) return; - list_for_each_entry(format, &pmu->format, list) + list_for_each_entry(format, &pmu->format, list) { + perf_pmu_format__load(pmu, format); if (format->value >= PERF_PMU_FORMAT_VALUE_CONFIG_END) { pr_warning("WARNING: '%s' format '%s' requires 'perf_event_attr::config%d'" "which is not supported by this version of perf!\n", pmu->name, format->name, format->value); return; } + } } bool evsel__is_aux_event(const struct evsel *evsel) { - struct perf_pmu *pmu = evsel__find_pmu(evsel); + struct perf_pmu *pmu; + + if (evsel->needs_auxtrace_mmap) + return true; + pmu = evsel__find_pmu(evsel); return pmu && pmu->auxtrace; } @@ -1000,7 +1391,7 @@ void evsel__set_config_if_unset(struct perf_pmu *pmu, struct evsel *evsel, if (term) user_bits = term->val.cfg_chg; - bits = perf_pmu__format_bits(&pmu->format, config_name); + bits = perf_pmu__format_bits(pmu, config_name); /* Do nothing if the user changed the value */ if (bits & user_bits) @@ -1012,7 +1403,7 @@ void evsel__set_config_if_unset(struct perf_pmu *pmu, struct evsel *evsel, } static struct perf_pmu_format * -pmu_find_format(struct list_head *formats, const char *name) +pmu_find_format(const struct list_head *formats, const char *name) { struct perf_pmu_format *format; @@ -1023,9 +1414,9 @@ pmu_find_format(struct list_head *formats, const char *name) return NULL; } -__u64 perf_pmu__format_bits(struct list_head *formats, const char *name) +__u64 perf_pmu__format_bits(struct perf_pmu *pmu, const char *name) { - struct perf_pmu_format *format = pmu_find_format(formats, name); + struct perf_pmu_format *format = pmu_find_format(&pmu->format, name); __u64 bits = 0; int fbit; @@ -1038,13 +1429,14 @@ __u64 perf_pmu__format_bits(struct list_head *formats, const char *name) return bits; } -int perf_pmu__format_type(struct list_head *formats, const char *name) +int perf_pmu__format_type(struct perf_pmu *pmu, const char *name) { - struct perf_pmu_format *format = pmu_find_format(formats, name); + struct perf_pmu_format *format = pmu_find_format(&pmu->format, name); if (!format) return -1; + perf_pmu_format__load(pmu, format); return format->value; } @@ -1089,12 +1481,12 @@ static __u64 pmu_format_max_value(const unsigned long *format) * in a config string) later on in the term list. */ static int pmu_resolve_param_term(struct parse_events_term *term, - struct list_head *head_terms, + struct parse_events_terms *head_terms, __u64 *value) { struct parse_events_term *t; - list_for_each_entry(t, head_terms, list) { + list_for_each_entry(t, &head_terms->terms, list) { if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM && t->config && !strcmp(t->config, term->config)) { t->used = true; @@ -1109,7 +1501,7 @@ static int pmu_resolve_param_term(struct parse_events_term *term, return -1; } -static char *pmu_formats_string(struct list_head *formats) +static char *pmu_formats_string(const struct list_head *formats) { struct perf_pmu_format *format; char *str = NULL; @@ -1135,12 +1527,12 @@ error: * Setup one of config[12] attr members based on the * user input data - term parameter. */ -static int pmu_config_term(const char *pmu_name, - struct list_head *formats, +static int pmu_config_term(const struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_term *term, - struct list_head *head_terms, - bool zero, struct parse_events_error *err) + struct parse_events_terms *head_terms, + bool zero, bool apply_hardcoded, + struct parse_events_error *err) { struct perf_pmu_format *format; __u64 *vp; @@ -1154,21 +1546,88 @@ static int pmu_config_term(const char *pmu_name, return 0; /* - * Hardcoded terms should be already in, so nothing - * to be done for them. + * Hardcoded terms are generally handled in event parsing, which + * traditionally have had to handle not having a PMU. An alias may + * have hard coded config values, optionally apply them below. */ - if (parse_events__is_hardcoded_term(term)) + if (parse_events__is_hardcoded_term(term)) { + /* Config terms set all bits in the config. */ + DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS); + + if (!apply_hardcoded) + return 0; + + bitmap_fill(bits, PERF_PMU_FORMAT_BITS); + + switch (term->type_term) { + case PARSE_EVENTS__TERM_TYPE_CONFIG: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config, zero); + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG1: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config1, zero); + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG2: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config2, zero); + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG3: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config3, zero); + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG4: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config4, zero); + break; + case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + assert(term->val.num < PERF_COUNT_HW_MAX); + assert(pmu->is_core); + attr->config = term->val.num; + if (perf_pmus__supports_extended_type()) + attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + attr->type = PERF_TYPE_HARDWARE; + break; + case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG: { +#ifndef NDEBUG + int cache_type = term->val.num & 0xFF; + int cache_op = (term->val.num >> 8) & 0xFF; + int cache_result = (term->val.num >> 16) & 0xFF; + + assert(cache_type < PERF_COUNT_HW_CACHE_MAX); + assert(cache_op < PERF_COUNT_HW_CACHE_OP_MAX); + assert(cache_result < PERF_COUNT_HW_CACHE_RESULT_MAX); +#endif + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + assert((term->val.num & ~0xFFFFFF) == 0); + assert(pmu->is_core); + attr->config = term->val.num; + if (perf_pmus__supports_extended_type()) + attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + attr->type = PERF_TYPE_HW_CACHE; + break; + } + case PARSE_EVENTS__TERM_TYPE_USER: /* Not hardcoded. */ + return -EINVAL; + case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: + /* Skip non-config terms. */ + break; + default: + break; + } return 0; + } - format = pmu_find_format(formats, term->config); + format = pmu_find_format(&pmu->format, term->config); if (!format) { - char *pmu_term = pmu_formats_string(formats); + char *pmu_term = pmu_formats_string(&pmu->format); char *unknown_term; char *help_msg; if (asprintf(&unknown_term, "unknown term '%s' for pmu '%s'", - term->config, pmu_name) < 0) + term->config, pmu->name) < 0) unknown_term = NULL; help_msg = parse_events_formats_error_string(pmu_term); if (err) { @@ -1182,7 +1641,7 @@ static int pmu_config_term(const char *pmu_name, free(pmu_term); return -EINVAL; } - + perf_pmu_format__load(pmu, format); switch (format->value) { case PERF_PMU_FORMAT_VALUE_CONFIG: vp = &attr->config; @@ -1196,6 +1655,9 @@ static int pmu_config_term(const char *pmu_name, case PERF_PMU_FORMAT_VALUE_CONFIG3: vp = &attr->config3; break; + case PERF_PMU_FORMAT_VALUE_CONFIG4: + vp = &attr->config4; + break; default: return -EINVAL; } @@ -1240,13 +1702,12 @@ static int pmu_config_term(const char *pmu_name, if (err) { char *err_str; - parse_events_error__handle(err, term->err_val, - asprintf(&err_str, - "value too big for format, maximum is %llu", - (unsigned long long)max_val) < 0 - ? strdup("value too big for format") - : err_str, - NULL); + if (asprintf(&err_str, + "value too big for format (%s), maximum is %llu", + format->name, (unsigned long long)max_val) < 0) { + err_str = strdup("value too big for format"); + } + parse_events_error__handle(err, term->err_val, err_str, /*help=*/NULL); return -EINVAL; } /* @@ -1259,16 +1720,21 @@ static int pmu_config_term(const char *pmu_name, return 0; } -int perf_pmu__config_terms(const char *pmu_name, struct list_head *formats, +int perf_pmu__config_terms(const struct perf_pmu *pmu, struct perf_event_attr *attr, - struct list_head *head_terms, - bool zero, struct parse_events_error *err) + struct parse_events_terms *terms, + bool zero, bool apply_hardcoded, + struct parse_events_error *err) { struct parse_events_term *term; - list_for_each_entry(term, head_terms, list) { - if (pmu_config_term(pmu_name, formats, attr, term, head_terms, - zero, err)) + if (perf_pmu__is_hwmon(pmu)) + return hwmon_pmu__config_terms(pmu, attr, terms, err); + if (perf_pmu__is_drm(pmu)) + return drm_pmu__config_terms(pmu, attr, terms, err); + + list_for_each_entry(term, &terms->terms, list) { + if (pmu_config_term(pmu, attr, term, terms, zero, apply_hardcoded, err)) return -EINVAL; } @@ -1281,30 +1747,35 @@ int perf_pmu__config_terms(const char *pmu_name, struct list_head *formats, * 2) pmu format definitions - specified by pmu parameter */ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, - struct list_head *head_terms, + struct parse_events_terms *head_terms, + bool apply_hardcoded, struct parse_events_error *err) { - bool zero = !!pmu->default_config; + bool zero = !!pmu->perf_event_attr_init_default; + + /* Fake PMU doesn't have proper terms so nothing to configure in attr. */ + if (perf_pmu__is_fake(pmu)) + return 0; - return perf_pmu__config_terms(pmu->name, &pmu->format, attr, - head_terms, zero, err); + return perf_pmu__config_terms(pmu, attr, head_terms, zero, apply_hardcoded, err); } static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu, struct parse_events_term *term) { struct perf_pmu_alias *alias; - char *name; + const char *name; if (parse_events__is_hardcoded_term(term)) return NULL; if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { - if (term->val.num != 1) + if (!term->no_value) return NULL; if (pmu_find_format(&pmu->format, term->config)) return NULL; name = term->config; + } else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) { if (strcasecmp(term->config, "event")) return NULL; @@ -1313,26 +1784,55 @@ static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu, return NULL; } - list_for_each_entry(alias, &pmu->aliases, list) { - if (!strcasecmp(alias->name, name)) - return alias; + alias = perf_pmu__find_alias(pmu, name, /*load=*/ true); + if (alias || pmu->cpu_aliases_added) + return alias; + + /* Alias doesn't exist, try to get it from the json events. */ + if ((pmu_events_table__find_event(pmu->events_table, pmu, name, + pmu_add_cpu_aliases_map_callback, + pmu) == 0) || + (pmu->is_core && + pmu_events_table__find_event(perf_pmu__default_core_events_table(), + pmu, name, + pmu_add_cpu_aliases_map_callback, + pmu) == 0)) { + alias = perf_pmu__find_alias(pmu, name, /*load=*/ false); } - return NULL; + return alias; } -static int check_info_data(struct perf_pmu_alias *alias, - struct perf_pmu_info *info) +static int check_info_data(struct perf_pmu *pmu, + struct perf_pmu_alias *alias, + struct perf_pmu_info *info, + struct parse_events_error *err, + int column) { + read_alias_info(pmu, alias); /* * Only one term in event definition can * define unit, scale and snapshot, fail * if there's more than one. */ - if ((info->unit && alias->unit[0]) || - (info->scale && alias->scale) || - (info->snapshot && alias->snapshot)) + if (info->unit && alias->unit[0]) { + parse_events_error__handle(err, column, + strdup("Attempt to set event's unit twice"), + NULL); + return -EINVAL; + } + if (info->scale && alias->scale) { + parse_events_error__handle(err, column, + strdup("Attempt to set event's scale twice"), + NULL); + return -EINVAL; + } + if (info->snapshot && alias->snapshot) { + parse_events_error__handle(err, column, + strdup("Attempt to set event snapshot twice"), + NULL); return -EINVAL; + } if (alias->unit[0]) info->unit = alias->unit; @@ -1346,17 +1846,37 @@ static int check_info_data(struct perf_pmu_alias *alias, return 0; } +static int perf_pmu__parse_terms_to_attr(struct perf_pmu *pmu, const char *terms_str, + struct perf_event_attr *attr) +{ + struct parse_events_terms terms; + int ret; + + parse_events_terms__init(&terms); + ret = parse_events_terms(&terms, terms_str); + if (ret) { + pr_debug("Failed to parse terms '%s': %d\n", terms_str, ret); + parse_events_terms__exit(&terms); + return ret; + } + ret = perf_pmu__config(pmu, attr, &terms, /*apply_hardcoded=*/true, /*err=*/NULL); + parse_events_terms__exit(&terms); + return ret; +} + /* * Find alias in the terms list and replace it with the terms * defined for the alias */ -int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, - struct perf_pmu_info *info) +int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms, + struct perf_pmu_info *info, bool *rewrote_terms, + u64 *alternate_hw_config, struct parse_events_error *err) { struct parse_events_term *term, *h; struct perf_pmu_alias *alias; int ret; + *rewrote_terms = false; info->per_pkg = false; /* @@ -1366,26 +1886,65 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, info->unit = NULL; info->scale = 0.0; info->snapshot = false; + info->retirement_latency_mean = 0.0; + info->retirement_latency_min = 0.0; + info->retirement_latency_max = 0.0; + + if (perf_pmu__is_hwmon(pmu)) { + ret = hwmon_pmu__check_alias(head_terms, info, err); + goto out; + } + if (perf_pmu__is_drm(pmu)) { + ret = drm_pmu__check_alias(pmu, head_terms, info, err); + goto out; + } - list_for_each_entry_safe(term, h, head_terms, list) { + /* Fake PMU doesn't rewrite terms. */ + if (perf_pmu__is_fake(pmu)) + goto out; + + list_for_each_entry_safe(term, h, &head_terms->terms, list) { alias = pmu_find_alias(pmu, term); if (!alias) continue; ret = pmu_alias_terms(alias, &term->list); - if (ret) + if (ret) { + parse_events_error__handle(err, term->err_term, + strdup("Failed to parse terms"), + NULL); return ret; + } - ret = check_info_data(alias, info); + *rewrote_terms = true; + ret = check_info_data(pmu, alias, info, err, term->err_term); if (ret) return ret; + if (alias->legacy_terms) { + struct perf_event_attr attr = {.config = 0,}; + + ret = perf_pmu__parse_terms_to_attr(pmu, alias->legacy_terms, &attr); + if (ret) { + parse_events_error__handle(err, term->err_term, + strdup("Error evaluating legacy terms"), + NULL); + return ret; + } + if (attr.type == PERF_TYPE_HARDWARE) + *alternate_hw_config = attr.config & PERF_HW_EVENT_MASK; + } + if (alias->per_pkg) info->per_pkg = true; + info->retirement_latency_mean = alias->retirement_latency_mean; + info->retirement_latency_min = alias->retirement_latency_min; + info->retirement_latency_max = alias->retirement_latency_max; + list_del_init(&term->list); parse_events_term__delete(term); } - +out: /* * if no unit or scale found in aliases, then * set defaults as for evsel @@ -1400,36 +1959,36 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, return 0; } -int perf_pmu__new_format(struct list_head *list, char *name, - int config, unsigned long *bits) -{ - struct perf_pmu_format *format; +struct find_event_args { + const char *event; + void *state; + pmu_event_callback cb; +}; - format = zalloc(sizeof(*format)); - if (!format) - return -ENOMEM; +static int find_event_callback(void *state, struct pmu_event_info *info) +{ + struct find_event_args *args = state; - format->name = strdup(name); - format->value = config; - memcpy(format->bits, bits, sizeof(format->bits)); + if (!strcmp(args->event, info->name)) + return args->cb(args->state, info); - list_add_tail(&format->list, list); return 0; } -void perf_pmu__set_format(unsigned long *bits, long from, long to) +int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void *state, pmu_event_callback cb) { - long b; - - if (!to) - to = from; + struct find_event_args args = { + .event = event, + .state = state, + .cb = cb, + }; - memset(bits, 0, BITS_TO_BYTES(PERF_PMU_FORMAT_BITS)); - for (b = from; b <= to; b++) - __set_bit(b, bits); + /* Sub-optimal, but function is only used by tests. */ + return perf_pmu__for_each_event(pmu, /*skip_duplicate_pmus=*/ false, + &args, find_event_callback); } -void perf_pmu__del_formats(struct list_head *formats) +static void perf_pmu__del_formats(struct list_head *formats) { struct perf_pmu_format *fmt, *tmp; @@ -1451,6 +2010,68 @@ bool perf_pmu__has_format(const struct perf_pmu *pmu, const char *name) return false; } +int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_callback cb) +{ + static const char *const terms[] = { + "config=0..0xffffffffffffffff", + "config1=0..0xffffffffffffffff", + "config2=0..0xffffffffffffffff", + "config3=0..0xffffffffffffffff", + "config4=0..0xffffffffffffffff", + "legacy-hardware-config=0..9,", + "legacy-cache-config=0..0xffffff,", + "name=string", + "period=number", + "freq=number", + "branch_type=(u|k|hv|any|...)", + "time", + "call-graph=(fp|dwarf|lbr)", + "stack-size=number", + "max-stack=number", + "nr=number", + "inherit", + "no-inherit", + "overwrite", + "no-overwrite", + "percore", + "aux-output", + "aux-action=(pause|resume|start-paused)", + "aux-sample-size=number", + "cpu=number", + "ratio-to-prev=string", + }; + struct perf_pmu_format *format; + int ret; + + /* + * max-events and driver-config are missing above as are the internal + * types user, metric-id, and raw. Assert against the enum + * parse_events__term_type so they are kept in sync. + */ + _Static_assert(ARRAY_SIZE(terms) == __PARSE_EVENTS__TERM_TYPE_NR - 4, + "perf_pmu__for_each_format()'s terms must be kept in sync with enum parse_events__term_type"); + list_for_each_entry(format, &pmu->format, list) { + perf_pmu_format__load(pmu, format); + ret = cb(state, format->name, (int)format->value, format->bits); + if (ret) + return ret; + } + if (!pmu->is_core) + return 0; + + for (size_t i = 0; i < ARRAY_SIZE(terms); i++) { + int config = PERF_PMU_FORMAT_VALUE_CONFIG; + + if (i < PERF_PMU_FORMAT_VALUE_CONFIG_END) + config = i; + + ret = cb(state, terms[i], config, /*bits=*/NULL); + if (ret) + return ret; + } + return 0; +} + bool is_pmu_core(const char *name) { return !strcmp(name, "cpu") || !strcmp(name, "cpum_cf") || is_sysfs_pmu_core(name); @@ -1466,19 +2087,320 @@ bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu) return !pmu->is_core || perf_pmus__num_core_pmus() == 1; } -bool perf_pmu__have_event(const struct perf_pmu *pmu, const char *name) +bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name) { - struct perf_pmu_alias *alias; + if (!name) + return false; + if (perf_pmu__is_tool(pmu) && tool_pmu__skip_event(name)) + return false; + if (perf_pmu__is_tracepoint(pmu)) + return tp_pmu__have_event(pmu, name); + if (perf_pmu__is_hwmon(pmu)) + return hwmon_pmu__have_event(pmu, name); + if (perf_pmu__is_drm(pmu)) + return drm_pmu__have_event(pmu, name); + if (perf_pmu__find_alias(pmu, name, /*load=*/ true) != NULL) + return true; + if (pmu->cpu_aliases_added || (!pmu->events_table && !pmu->is_core)) + return false; + if (pmu_events_table__find_event(pmu->events_table, pmu, name, NULL, NULL) == 0) + return true; + return pmu->is_core && + pmu_events_table__find_event(perf_pmu__default_core_events_table(), + pmu, name, NULL, NULL) == 0; +} + +size_t perf_pmu__num_events(struct perf_pmu *pmu) +{ + size_t nr; + + if (perf_pmu__is_tracepoint(pmu)) + return tp_pmu__num_events(pmu); + if (perf_pmu__is_hwmon(pmu)) + return hwmon_pmu__num_events(pmu); + if (perf_pmu__is_drm(pmu)) + return drm_pmu__num_events(pmu); + + pmu_aliases_parse(pmu); + nr = pmu->sysfs_aliases + pmu->sys_json_aliases; + + if (pmu->cpu_aliases_added) { + nr += pmu->cpu_json_aliases; + } else if (pmu->events_table || pmu->is_core) { + nr += pmu_events_table__num_events(pmu->events_table, pmu); + if (pmu->is_core) { + nr += pmu_events_table__num_events( + perf_pmu__default_core_events_table(), pmu); + } + nr -= pmu->cpu_common_json_aliases; + } else { + assert(pmu->cpu_json_aliases == 0 && pmu->cpu_common_json_aliases == 0); + } + + if (perf_pmu__is_tool(pmu)) + nr -= tool_pmu__num_skip_events(); + + return pmu->selectable ? nr + 1 : nr; +} + +static int sub_non_neg(int a, int b) +{ + if (b > a) + return 0; + return a - b; +} - list_for_each_entry(alias, &pmu->aliases, list) { - if (!strcmp(alias->name, name)) +static char *format_alias(char *buf, int len, const struct perf_pmu *pmu, + const struct perf_pmu_alias *alias, bool skip_duplicate_pmus) +{ + struct parse_events_terms terms; + struct parse_events_term *term; + int ret, used; + size_t pmu_name_len = pmu_deduped_name_len(pmu, pmu->name, + skip_duplicate_pmus); + + /* Paramemterized events have the parameters shown. */ + if (strstr(alias->terms, "=?")) { + /* No parameters. */ + snprintf(buf, len, "%.*s/%s/", (int)pmu_name_len, pmu->name, alias->name); + return buf; + } + + parse_events_terms__init(&terms); + ret = parse_events_terms(&terms, alias->terms); + if (ret) { + pr_err("Failure to parse '%s' terms '%s': %d\n", + alias->name, alias->terms, ret); + parse_events_terms__exit(&terms); + snprintf(buf, len, "%.*s/%s/", (int)pmu_name_len, pmu->name, alias->name); + return buf; + } + used = snprintf(buf, len, "%.*s/%s", (int)pmu_name_len, pmu->name, alias->name); + + list_for_each_entry(term, &terms.terms, list) { + if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) + used += snprintf(buf + used, sub_non_neg(len, used), + ",%s=%s", term->config, + term->val.str); + } + parse_events_terms__exit(&terms); + if (sub_non_neg(len, used) > 0) { + buf[used] = '/'; + used++; + } + if (sub_non_neg(len, used) > 0) { + buf[used] = '\0'; + used++; + } else + buf[len - 1] = '\0'; + + return buf; +} + +static bool perf_pmu_alias__check_deprecated(struct perf_pmu *pmu, struct perf_pmu_alias *alias) +{ + struct perf_event_attr attr = {.config = 0,}; + const char *check_terms; + bool has_legacy_config; + + if (alias->legacy_deprecated_checked) + return alias->deprecated; + + alias->legacy_deprecated_checked = true; + if (alias->deprecated) + return true; + + check_terms = alias->terms; + has_legacy_config = + strstr(check_terms, "legacy-hardware-config=") != NULL || + strstr(check_terms, "legacy-cache-config=") != NULL; + if (!has_legacy_config && alias->legacy_terms) { + check_terms = alias->legacy_terms; + has_legacy_config = + strstr(check_terms, "legacy-hardware-config=") != NULL || + strstr(check_terms, "legacy-cache-config=") != NULL; + } + if (!has_legacy_config) + return false; + + if (perf_pmu__parse_terms_to_attr(pmu, check_terms, &attr) != 0) { + /* Parsing failed, set as deprecated. */ + alias->deprecated = true; + } else if (attr.type < PERF_TYPE_MAX) { + /* Flag unsupported legacy events as deprecated. */ + alias->deprecated = !is_event_supported(attr.type, attr.config); + } + return alias->deprecated; +} + +int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, + void *state, pmu_event_callback cb) +{ + char buf[1024]; + struct pmu_event_info info = { + .pmu = pmu, + .event_type_desc = "Kernel PMU event", + }; + int ret = 0; + struct hashmap_entry *entry; + size_t bkt; + + if (perf_pmu__is_tracepoint(pmu)) + return tp_pmu__for_each_event(pmu, state, cb); + if (perf_pmu__is_hwmon(pmu)) + return hwmon_pmu__for_each_event(pmu, state, cb); + if (perf_pmu__is_drm(pmu)) + return drm_pmu__for_each_event(pmu, state, cb); + + pmu_aliases_parse(pmu); + pmu_add_cpu_aliases(pmu); + hashmap__for_each_entry(pmu->aliases, entry, bkt) { + struct perf_pmu_alias *event = entry->pvalue; + size_t buf_used, pmu_name_len; + + if (perf_pmu__is_tool(pmu) && tool_pmu__skip_event(event->name)) + continue; + + info.pmu_name = event->pmu_name ?: pmu->name; + pmu_name_len = pmu_deduped_name_len(pmu, info.pmu_name, + skip_duplicate_pmus); + info.alias = NULL; + if (event->desc) { + info.name = event->name; + buf_used = 0; + } else { + info.name = format_alias(buf, sizeof(buf), pmu, event, + skip_duplicate_pmus); + if (pmu->is_core) { + info.alias = info.name; + info.name = event->name; + } + buf_used = strlen(buf) + 1; + } + info.scale_unit = NULL; + if (strlen(event->unit) || event->scale != 1.0) { + info.scale_unit = buf + buf_used; + buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used, + "%G%s", event->scale, event->unit) + 1; + } + info.desc = event->desc; + info.long_desc = event->long_desc; + info.encoding_desc = buf + buf_used; + buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used, + "%.*s/%s/", (int)pmu_name_len, info.pmu_name, event->terms) + 1; + info.str = event->terms; + info.topic = event->topic; + info.deprecated = perf_pmu_alias__check_deprecated(pmu, event); + ret = cb(state, &info); + if (ret) + goto out; + } + if (pmu->selectable) { + info.name = buf; + snprintf(buf, sizeof(buf), "%s//", pmu->name); + info.alias = NULL; + info.scale_unit = NULL; + info.desc = NULL; + info.long_desc = NULL; + info.encoding_desc = NULL; + info.topic = NULL; + info.pmu_name = pmu->name; + info.deprecated = false; + ret = cb(state, &info); + } +out: + return ret; +} + +static bool perf_pmu___name_match(const struct perf_pmu *pmu, const char *to_match, bool wildcard) +{ + const char *names[2] = { + pmu->name, + pmu->alias_name, + }; + if (pmu->is_core) { + for (size_t i = 0; i < ARRAY_SIZE(names); i++) { + const char *name = names[i]; + + if (!name) + continue; + + if (!strcmp(name, to_match)) { + /* Exact name match. */ + return true; + } + } + if (!strcmp(to_match, "default_core")) { + /* + * jevents and tests use default_core as a marker for any core + * PMU as the PMU name varies across architectures. + */ + return true; + } + return false; + } + if (!pmu->is_uncore) { + /* + * PMU isn't core or uncore, some kind of broken CPU mask + * situation. Only match exact name. + */ + for (size_t i = 0; i < ARRAY_SIZE(names); i++) { + const char *name = names[i]; + + if (!name) + continue; + + if (!strcmp(name, to_match)) { + /* Exact name match. */ + return true; + } + } + return false; + } + for (size_t i = 0; i < ARRAY_SIZE(names); i++) { + const char *name = names[i]; + + if (!name) + continue; + + if (wildcard && perf_pmu__match_wildcard_uncore(name, to_match)) + return true; + if (!wildcard && perf_pmu__match_ignoring_suffix_uncore(name, to_match)) return true; } return false; } +/** + * perf_pmu__name_wildcard_match - Called by the jevents generated code to see + * if pmu matches the json to_match string. + * @pmu: The pmu whose name/alias to match. + * @to_match: The possible match to pmu_name. + */ +bool perf_pmu__name_wildcard_match(const struct perf_pmu *pmu, const char *to_match) +{ + return perf_pmu___name_match(pmu, to_match, /*wildcard=*/true); +} + +/** + * perf_pmu__name_no_suffix_match - Does pmu's name match to_match ignoring any + * trailing suffix on the pmu_name and/or tok? + * @pmu: The pmu whose name/alias to match. + * @to_match: The possible match to pmu_name. + */ +bool perf_pmu__name_no_suffix_match(const struct perf_pmu *pmu, const char *to_match) +{ + return perf_pmu___name_match(pmu, to_match, /*wildcard=*/false); +} + bool perf_pmu__is_software(const struct perf_pmu *pmu) { + const char *known_sw_pmus[] = { + "kprobe", + "msr", + "uprobe", + }; + if (pmu->is_core || pmu->is_uncore || pmu->auxtrace) return false; switch (pmu->type) { @@ -1488,12 +2410,17 @@ bool perf_pmu__is_software(const struct perf_pmu *pmu) case PERF_TYPE_HW_CACHE: return false; case PERF_TYPE_RAW: return false; case PERF_TYPE_BREAKPOINT: return true; + case PERF_PMU_TYPE_TOOL: return true; default: break; } - return !strcmp(pmu->name, "kprobe") || !strcmp(pmu->name, "uprobe"); + for (size_t i = 0; i < ARRAY_SIZE(known_sw_pmus); i++) { + if (!strcmp(pmu->name, known_sw_pmus[i])) + return true; + } + return false; } -FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name) +FILE *perf_pmu__open_file(const struct perf_pmu *pmu, const char *name) { char path[PATH_MAX]; @@ -1504,7 +2431,7 @@ FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name) return fopen(path, "r"); } -FILE *perf_pmu__open_file_at(struct perf_pmu *pmu, int dirfd, const char *name) +FILE *perf_pmu__open_file_at(const struct perf_pmu *pmu, int dirfd, const char *name) { int fd; @@ -1515,7 +2442,7 @@ FILE *perf_pmu__open_file_at(struct perf_pmu *pmu, int dirfd, const char *name) return fdopen(fd, "r"); } -int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, +int perf_pmu__scan_file(const struct perf_pmu *pmu, const char *name, const char *fmt, ...) { va_list args; @@ -1532,7 +2459,7 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, return ret; } -int perf_pmu__scan_file_at(struct perf_pmu *pmu, int dirfd, const char *name, +int perf_pmu__scan_file_at(const struct perf_pmu *pmu, int dirfd, const char *name, const char *fmt, ...) { va_list args; @@ -1549,7 +2476,7 @@ int perf_pmu__scan_file_at(struct perf_pmu *pmu, int dirfd, const char *name, return ret; } -bool perf_pmu__file_exists(struct perf_pmu *pmu, const char *name) +bool perf_pmu__file_exists(const struct perf_pmu *pmu, const char *name) { char path[PATH_MAX]; @@ -1595,6 +2522,17 @@ static void perf_pmu__del_caps(struct perf_pmu *pmu) } } +struct perf_pmu_caps *perf_pmu__get_cap(struct perf_pmu *pmu, const char *name) +{ + struct perf_pmu_caps *caps; + + list_for_each_entry(caps, &pmu->caps, list) { + if (!strcmp(caps->name, name)) + return caps; + } + return NULL; +} + /* * Reading/parsing the given pmu capabilities, which should be located at: * /sys/bus/event_source/devices/<dev>/caps as sysfs group attributes. @@ -1602,10 +2540,9 @@ static void perf_pmu__del_caps(struct perf_pmu *pmu) */ int perf_pmu__caps_parse(struct perf_pmu *pmu) { - struct stat st; char caps_path[PATH_MAX]; - DIR *caps_dir; - struct dirent *evt_ent; + struct io_dir caps_dir; + struct io_dirent64 *evt_ent; int caps_fd; if (pmu->caps_initialized) @@ -1616,24 +2553,21 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) if (!perf_pmu__pathname_scnprintf(caps_path, sizeof(caps_path), pmu->name, "caps")) return -1; - if (stat(caps_path, &st) < 0) { + caps_fd = open(caps_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY); + if (caps_fd == -1) { pmu->caps_initialized = true; return 0; /* no error if caps does not exist */ } - caps_dir = opendir(caps_path); - if (!caps_dir) - return -EINVAL; - - caps_fd = dirfd(caps_dir); + io_dir__init(&caps_dir, caps_fd); - while ((evt_ent = readdir(caps_dir)) != NULL) { + while ((evt_ent = io_dir__readdir(&caps_dir)) != NULL) { char *name = evt_ent->d_name; char value[128]; FILE *file; int fd; - if (!strcmp(name, ".") || !strcmp(name, "..")) + if (io_dir__is_dir(&caps_dir, evt_ent)) continue; fd = openat(caps_fd, name, O_RDONLY); @@ -1655,7 +2589,7 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) fclose(file); } - closedir(caps_dir); + close(caps_fd); pmu->caps_initialized = true; return pmu->nr_caps; @@ -1710,23 +2644,31 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, name ?: "N/A", buf, config_name, config); } -int perf_pmu__match(char *pattern, char *name, char *tok) +bool perf_pmu__wildcard_match(const struct perf_pmu *pmu, const char *wildcard_to_match) { - if (!name) - return -1; + const char *names[2] = { + pmu->name, + pmu->alias_name, + }; + bool need_fnmatch = strisglob(wildcard_to_match); - if (fnmatch(pattern, name, 0)) - return -1; + if (!strncmp(wildcard_to_match, "uncore_", 7)) + wildcard_to_match += 7; - if (tok && !perf_pmu__match_ignoring_suffix(name, tok)) - return -1; + for (size_t i = 0; i < ARRAY_SIZE(names); i++) { + const char *pmu_name = names[i]; - return 0; -} + if (!pmu_name) + continue; -double __weak perf_pmu__cpu_slots_per_cycle(void) -{ - return NAN; + if (!strncmp(pmu_name, "uncore_", 7)) + pmu_name += 7; + + if (perf_pmu__match_wildcard(pmu_name, wildcard_to_match) || + (need_fnmatch && !fnmatch(wildcard_to_match, pmu_name, 0))) + return true; + } + return false; } int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size) @@ -1756,17 +2698,19 @@ int perf_pmu__event_source_devices_fd(void) * then pathname will be filled with * "/sys/bus/event_source/devices/cs_etm/format" * - * Return 0 if the sysfs mountpoint couldn't be found or if no - * characters were written. + * Return 0 if the sysfs mountpoint couldn't be found, if no characters were + * written or if the buffer size is exceeded. */ int perf_pmu__pathname_scnprintf(char *buf, size_t size, const char *pmu_name, const char *filename) { - char base_path[PATH_MAX]; + size_t len; - if (!perf_pmu__event_source_devices_scnprintf(base_path, sizeof(base_path))) + len = perf_pmu__event_source_devices_scnprintf(buf, size); + if (!len || (len + strlen(pmu_name) + strlen(filename) + 1) >= size) return 0; - return scnprintf(buf, size, "%s%s/%s", base_path, pmu_name, filename); + + return scnprintf(buf + len, size - len, "%s/%s", pmu_name, filename); } int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, int flags) @@ -1779,14 +2723,43 @@ int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, void perf_pmu__delete(struct perf_pmu *pmu) { + if (!pmu) + return; + + if (perf_pmu__is_hwmon(pmu)) + hwmon_pmu__exit(pmu); + else if (perf_pmu__is_drm(pmu)) + drm_pmu__exit(pmu); + perf_pmu__del_formats(&pmu->format); perf_pmu__del_aliases(pmu); perf_pmu__del_caps(pmu); perf_cpu_map__put(pmu->cpus); - zfree(&pmu->default_config); zfree(&pmu->name); zfree(&pmu->alias_name); + zfree(&pmu->id); free(pmu); } + +const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config) +{ + struct hashmap_entry *entry; + size_t bkt; + + if (!pmu) + return NULL; + + pmu_aliases_parse(pmu); + pmu_add_cpu_aliases(pmu); + hashmap__for_each_entry(pmu->aliases, entry, bkt) { + struct perf_pmu_alias *event = entry->pvalue; + struct perf_event_attr attr = {.config = 0,}; + int ret = perf_pmu__parse_terms_to_attr(pmu, event->terms, &attr); + + if (ret == 0 && config == attr.config) + return event->name; + } + return NULL; +} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 6b414cecbad2..8f11bfe8ed6d 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -10,8 +10,11 @@ #include <stdio.h> #include "parse-events.h" #include "pmu-events/pmu-events.h" +#include "map_symbol.h" +#include "mem-events.h" struct evsel_config_term; +struct hashmap; struct perf_cpu_map; struct print_callbacks; @@ -20,6 +23,7 @@ enum { PERF_PMU_FORMAT_VALUE_CONFIG1, PERF_PMU_FORMAT_VALUE_CONFIG2, PERF_PMU_FORMAT_VALUE_CONFIG3, + PERF_PMU_FORMAT_VALUE_CONFIG4, PERF_PMU_FORMAT_VALUE_CONFIG_END, }; @@ -34,12 +38,36 @@ struct perf_pmu_caps { struct list_head list; }; +enum pmu_kind { + /* A perf event syscall PMU. */ + PERF_PMU_KIND_PE, + /* A perf tool provided DRM PMU. */ + PERF_PMU_KIND_DRM, + /* A perf tool provided HWMON PMU. */ + PERF_PMU_KIND_HWMON, + /* Perf tool provided PMU for tool events like time. */ + PERF_PMU_KIND_TOOL, + /* A testing PMU kind. */ + PERF_PMU_KIND_FAKE +}; + +enum { + PERF_PMU_TYPE_PE_START = 0, + PERF_PMU_TYPE_PE_END = 0xFFFDFFFF, + PERF_PMU_TYPE_DRM_START = 0xFFFE0000, + PERF_PMU_TYPE_DRM_END = 0xFFFEFFFF, + PERF_PMU_TYPE_HWMON_START = 0xFFFF0000, + PERF_PMU_TYPE_HWMON_END = 0xFFFFFFFD, + PERF_PMU_TYPE_TOOL = 0xFFFFFFFE, + PERF_PMU_TYPE_FAKE = 0xFFFFFFFF, +}; + /** * struct perf_pmu */ struct perf_pmu { /** @name: The name of the PMU such as "cpu". */ - char *name; + const char *name; /** * @alias_name: Optional alternate name for the PMU determined in * architecture specific code. @@ -49,7 +77,7 @@ struct perf_pmu { * @id: Optional PMU identifier read from * <sysfs>/bus/event_source/devices/<name>/identifier. */ - char *id; + const char *id; /** * @type: Perf event attributed type value, read from * <sysfs>/bus/event_source/devices/<name>/type. @@ -92,10 +120,11 @@ struct perf_pmu { */ int max_precise; /** - * @default_config: Optional default perf_event_attr determined in - * architecture specific code. + * @perf_event_attr_init_default: Optional function to default + * initialize PMU specific parts of the perf_event_attr. */ - struct perf_event_attr *default_config; + void (*perf_event_attr_init_default)(const struct perf_pmu *pmu, + struct perf_event_attr *attr); /** * @cpus: Empty or the contents of either of: * <sysfs>/bus/event_source/devices/<name>/cpumask. @@ -113,7 +142,29 @@ struct perf_pmu { * event read from <sysfs>/bus/event_source/devices/<name>/events/ or * from json events in pmu-events.c. */ - struct list_head aliases; + struct hashmap *aliases; + /** + * @events_table: The events table for json events in pmu-events.c. + */ + const struct pmu_events_table *events_table; + /** @sysfs_aliases: Number of sysfs aliases loaded. */ + uint32_t sysfs_aliases; + /** @cpu_json_aliases: Number of json event aliases loaded specific to the CPUID. */ + uint32_t cpu_json_aliases; + /** @sys_json_aliases: Number of json event aliases loaded matching the PMU's identifier. */ + uint32_t sys_json_aliases; + /** + * @cpu_common_json_aliases: Number of json events that overlapped with sysfs when + * loading all sysfs events. + */ + uint32_t cpu_common_json_aliases; + /** @sysfs_aliases_loaded: Are sysfs aliases loaded from disk? */ + bool sysfs_aliases_loaded; + /** + * @cpu_aliases_added: Have all json events table entries for the PMU + * been added? + */ + bool cpu_aliases_added; /** @caps_initialized: Has the list caps been initialized? */ bool caps_initialized; /** @nr_caps: The length of the list caps. */ @@ -145,129 +196,106 @@ struct perf_pmu { * exclude_host. */ bool exclude_guest; + /** + * @checked: Are the missing features checked? + */ + bool checked; } missing_features; -}; -/** @perf_pmu__fake: A special global PMU used for testing. */ -extern struct perf_pmu perf_pmu__fake; + /** + * @mem_events: List of the supported mem events + */ + struct perf_mem_event *mem_events; +}; struct perf_pmu_info { const char *unit; double scale; + double retirement_latency_mean; + double retirement_latency_min; + double retirement_latency_max; bool per_pkg; bool snapshot; }; -#define UNIT_MAX_LEN 31 /* max length for event unit name */ - -/** - * struct perf_pmu_alias - An event either read from sysfs or builtin in - * pmu-events.c, created by parsing the pmu-events json files. - */ -struct perf_pmu_alias { - /** @name: Name of the event like "mem-loads". */ - char *name; - /** @desc: Optional short description of the event. */ - char *desc; - /** @long_desc: Optional long description. */ - char *long_desc; - /** - * @topic: Optional topic such as cache or pipeline, particularly for - * json events. - */ - char *topic; - /** - * @str: Comma separated parameter list like - * "event=0xcd,umask=0x1,ldlat=0x3". - */ - char *str; - /** @terms: Owned list of the original parsed parameters. */ - struct list_head terms; - /** @list: List element of struct perf_pmu aliases. */ - struct list_head list; - /** @unit: Units for the event, such as bytes or cache lines. */ - char unit[UNIT_MAX_LEN+1]; - /** @scale: Value to scale read counter values by. */ - double scale; - /** - * @per_pkg: Does the file - * <sysfs>/bus/event_source/devices/<pmu_name>/events/<name>.per-pkg or - * equivalent json value exist and have the value 1. - */ - bool per_pkg; - /** - * @snapshot: Does the file - * <sysfs>/bus/event_source/devices/<pmu_name>/events/<name>.snapshot - * exist and have the value 1. - */ - bool snapshot; - /** - * @deprecated: Is the event hidden and so not shown in perf list by - * default. - */ +struct pmu_event_info { + const struct perf_pmu *pmu; + const char *name; + const char* alias; + const char *scale_unit; + const char *desc; + const char *long_desc; + const char *encoding_desc; + const char *topic; + const char *pmu_name; + const char *event_type_desc; + const char *str; bool deprecated; - /** - * @pmu_name: The name copied from the json struct pmu_event. This can - * differ from the PMU name as it won't have suffixes. - */ - char *pmu_name; }; -void pmu_add_sys_aliases(struct list_head *head, struct perf_pmu *pmu); +typedef int (*pmu_event_callback)(void *state, struct pmu_event_info *info); +typedef int (*pmu_format_callback)(void *state, const char *name, int config, + const unsigned long *bits); + +void pmu_add_sys_aliases(struct perf_pmu *pmu); int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, - struct list_head *head_terms, + struct parse_events_terms *head_terms, + bool apply_hardcoded, struct parse_events_error *error); -int perf_pmu__config_terms(const char *pmu_name, struct list_head *formats, +int perf_pmu__config_terms(const struct perf_pmu *pmu, struct perf_event_attr *attr, - struct list_head *head_terms, - bool zero, struct parse_events_error *error); -__u64 perf_pmu__format_bits(struct list_head *formats, const char *name); -int perf_pmu__format_type(struct list_head *formats, const char *name); -int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, - struct perf_pmu_info *info); -struct list_head *perf_pmu__alias(struct perf_pmu *pmu, - struct list_head *head_terms); -void perf_pmu_error(struct list_head *list, char *name, void *scanner, char const *msg); - -int perf_pmu__new_format(struct list_head *list, char *name, - int config, unsigned long *bits); -void perf_pmu__set_format(unsigned long *bits, long from, long to); -int perf_pmu__format_parse(int dirfd, struct list_head *head); -void perf_pmu__del_formats(struct list_head *formats); + struct parse_events_terms *terms, + bool zero, bool apply_hardcoded, + struct parse_events_error *error); +__u64 perf_pmu__format_bits(struct perf_pmu *pmu, const char *name); +int perf_pmu__format_type(struct perf_pmu *pmu, const char *name); +int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms, + struct perf_pmu_info *info, bool *rewrote_terms, + u64 *alternate_hw_config, struct parse_events_error *err); +int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void *state, pmu_event_callback cb); + +void perf_pmu_format__set_value(void *format, int config, unsigned long *bits); bool perf_pmu__has_format(const struct perf_pmu *pmu, const char *name); +int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_callback cb); bool is_pmu_core(const char *name); bool perf_pmu__supports_legacy_cache(const struct perf_pmu *pmu); bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu); -bool perf_pmu__have_event(const struct perf_pmu *pmu, const char *name); +bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name); +size_t perf_pmu__num_events(struct perf_pmu *pmu); +int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, + void *state, pmu_event_callback cb); +bool perf_pmu__name_wildcard_match(const struct perf_pmu *pmu, const char *to_match); +bool perf_pmu__name_no_suffix_match(const struct perf_pmu *pmu, const char *to_match); + /** * perf_pmu_is_software - is the PMU a software PMU as in it uses the * perf_sw_context in the kernel? */ bool perf_pmu__is_software(const struct perf_pmu *pmu); -FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name); -FILE *perf_pmu__open_file_at(struct perf_pmu *pmu, int dirfd, const char *name); +FILE *perf_pmu__open_file(const struct perf_pmu *pmu, const char *name); +FILE *perf_pmu__open_file_at(const struct perf_pmu *pmu, int dirfd, const char *name); -int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4); -int perf_pmu__scan_file_at(struct perf_pmu *pmu, int dirfd, const char *name, +int perf_pmu__scan_file(const struct perf_pmu *pmu, const char *name, const char *fmt, ...) + __scanf(3, 4); +int perf_pmu__scan_file_at(const struct perf_pmu *pmu, int dirfd, const char *name, const char *fmt, ...) __scanf(4, 5); -bool perf_pmu__file_exists(struct perf_pmu *pmu, const char *name); +bool perf_pmu__file_exists(const struct perf_pmu *pmu, const char *name); int perf_pmu__test(void); -struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu); -void pmu_add_cpu_aliases_table(struct list_head *head, struct perf_pmu *pmu, +void perf_pmu__arch_init(struct perf_pmu *pmu); +void pmu_add_cpu_aliases_table(struct perf_pmu *pmu, const struct pmu_events_table *table); -char *perf_pmu__getcpuid(struct perf_pmu *pmu); -const struct pmu_events_table *pmu_events_table__find(void); -const struct pmu_metrics_table *pmu_metrics_table__find(void); -void perf_pmu_free_alias(struct perf_pmu_alias *alias); +bool pmu_uncore_identifier_match(const char *compat, const char *id); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); +struct perf_pmu_caps *perf_pmu__get_cap(struct perf_pmu *pmu, const char *name); + int perf_pmu__caps_parse(struct perf_pmu *pmu); void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, @@ -275,19 +303,40 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, const char *config_name); void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu); -int perf_pmu__match(char *pattern, char *name, char *tok); +bool perf_pmu__wildcard_match(const struct perf_pmu *pmu, const char *wildcard_to_match); -char *pmu_find_real_name(const char *name); -char *pmu_find_alias_name(const char *name); -double perf_pmu__cpu_slots_per_cycle(void); int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size); int perf_pmu__pathname_scnprintf(char *buf, size_t size, const char *pmu_name, const char *filename); int perf_pmu__event_source_devices_fd(void); int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, int flags); -struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name); +int perf_pmu__init(struct perf_pmu *pmu, __u32 type, const char *name); +struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name, + bool eager_load); struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus); void perf_pmu__delete(struct perf_pmu *pmu); +const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config); +bool perf_pmu__is_fake(const struct perf_pmu *pmu); + +static inline enum pmu_kind perf_pmu__kind(const struct perf_pmu *pmu) +{ + __u32 type; + + if (!pmu) + return PERF_PMU_KIND_PE; + + type = pmu->type; + if (type <= PERF_PMU_TYPE_PE_END) + return PERF_PMU_KIND_PE; + if (type <= PERF_PMU_TYPE_DRM_END) + return PERF_PMU_KIND_DRM; + if (type <= PERF_PMU_TYPE_HWMON_END) + return PERF_PMU_KIND_HWMON; + if (type == PERF_PMU_TYPE_TOOL) + return PERF_PMU_KIND_TOOL; + return PERF_PMU_KIND_FAKE; +} + #endif /* __PMU_H */ diff --git a/tools/perf/util/pmu.y b/tools/perf/util/pmu.y index dff4e892ac4d..198907a8a48a 100644 --- a/tools/perf/util/pmu.y +++ b/tools/perf/util/pmu.y @@ -1,16 +1,22 @@ %define api.pure full -%parse-param {struct list_head *format} -%parse-param {char *name} +%parse-param {void *format} %parse-param {void *scanner} %lex-param {void* scanner} %{ +#ifndef NDEBUG +#define YYDEBUG 1 +#endif + #include <linux/compiler.h> #include <linux/list.h> #include <linux/bitmap.h> #include <string.h> #include "pmu.h" +#include "pmu-bison.h" + +int perf_pmu_lex(YYSTYPE * yylval_param , void *yyscanner); #define ABORT_ON(val) \ do { \ @@ -18,6 +24,20 @@ do { \ YYABORT; \ } while (0) +static void perf_pmu_error(void *format, void *scanner, const char *msg); + +static void perf_pmu__set_format(unsigned long *bits, long from, long to) +{ + long b; + + if (!to) + to = from; + + memset(bits, 0, BITS_TO_BYTES(PERF_PMU_FORMAT_BITS)); + for (b = from; b <= to; b++) + __set_bit(b, bits); +} + %} %token PP_CONFIG @@ -42,16 +62,12 @@ format_term format_term: PP_CONFIG ':' bits { - ABORT_ON(perf_pmu__new_format(format, name, - PERF_PMU_FORMAT_VALUE_CONFIG, - $3)); + perf_pmu_format__set_value(format, PERF_PMU_FORMAT_VALUE_CONFIG, $3); } | PP_CONFIG PP_VALUE ':' bits { - ABORT_ON(perf_pmu__new_format(format, name, - $2, - $4)); + perf_pmu_format__set_value(format, $2, $4); } bits: @@ -78,9 +94,8 @@ PP_VALUE %% -void perf_pmu_error(struct list_head *list __maybe_unused, - char *name __maybe_unused, - void *scanner __maybe_unused, - char const *msg __maybe_unused) +static void perf_pmu_error(void *format __maybe_unused, + void *scanner __maybe_unused, + const char *msg __maybe_unused) { } diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c index c58ba9fb6a36..98be2eb8f1f0 100644 --- a/tools/perf/util/pmus.c +++ b/tools/perf/util/pmus.c @@ -1,17 +1,26 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/list.h> +#include <linux/list_sort.h> +#include <linux/string.h> #include <linux/zalloc.h> +#include <api/io_dir.h> #include <subcmd/pager.h> #include <sys/types.h> -#include <dirent.h> +#include <ctype.h> #include <pthread.h> #include <string.h> #include <unistd.h> +#include "cpumap.h" #include "debug.h" +#include "drm_pmu.h" #include "evsel.h" #include "pmus.h" #include "pmu.h" +#include "hwmon_pmu.h" +#include "tool_pmu.h" #include "print-events.h" +#include "strbuf.h" +#include "string2.h" /* * core_pmus: A PMU belongs to core_pmus if it's name is "cpu" or it's sysfs @@ -30,8 +39,74 @@ */ static LIST_HEAD(core_pmus); static LIST_HEAD(other_pmus); -static bool read_sysfs_core_pmus; -static bool read_sysfs_all_pmus; +enum perf_tool_pmu_type { + PERF_TOOL_PMU_TYPE_PE_CORE, + PERF_TOOL_PMU_TYPE_PE_OTHER, + PERF_TOOL_PMU_TYPE_TOOL, + PERF_TOOL_PMU_TYPE_HWMON, + PERF_TOOL_PMU_TYPE_DRM, + +#define PERF_TOOL_PMU_TYPE_PE_CORE_MASK (1 << PERF_TOOL_PMU_TYPE_PE_CORE) +#define PERF_TOOL_PMU_TYPE_PE_OTHER_MASK (1 << PERF_TOOL_PMU_TYPE_PE_OTHER) +#define PERF_TOOL_PMU_TYPE_TOOL_MASK (1 << PERF_TOOL_PMU_TYPE_TOOL) +#define PERF_TOOL_PMU_TYPE_HWMON_MASK (1 << PERF_TOOL_PMU_TYPE_HWMON) +#define PERF_TOOL_PMU_TYPE_DRM_MASK (1 << PERF_TOOL_PMU_TYPE_DRM) + +#define PERF_TOOL_PMU_TYPE_ALL_MASK (PERF_TOOL_PMU_TYPE_PE_CORE_MASK | \ + PERF_TOOL_PMU_TYPE_PE_OTHER_MASK | \ + PERF_TOOL_PMU_TYPE_TOOL_MASK | \ + PERF_TOOL_PMU_TYPE_HWMON_MASK | \ + PERF_TOOL_PMU_TYPE_DRM_MASK) +}; +static unsigned int read_pmu_types; + +static void pmu_read_sysfs(unsigned int to_read_pmus); + +size_t pmu_name_len_no_suffix(const char *str) +{ + int orig_len, len; + bool has_hex_digits = false; + + orig_len = len = strlen(str); + + /* Count trailing digits. */ + while (len > 0 && isxdigit(str[len - 1])) { + if (!isdigit(str[len - 1])) + has_hex_digits = true; + len--; + } + + if (len > 0 && len != orig_len && str[len - 1] == '_') { + /* + * There is a '_{num}' suffix. For decimal suffixes any length + * will do, for hexadecimal ensure more than 2 hex digits so + * that S390's cpum_cf PMU doesn't match. + */ + if (!has_hex_digits || (orig_len - len) > 2) + return len - 1; + } + /* Use the full length. */ + return orig_len; +} + +int pmu_name_cmp(const char *lhs_pmu_name, const char *rhs_pmu_name) +{ + unsigned long long lhs_num = 0, rhs_num = 0; + size_t lhs_pmu_name_len = pmu_name_len_no_suffix(lhs_pmu_name); + size_t rhs_pmu_name_len = pmu_name_len_no_suffix(rhs_pmu_name); + int ret = strncmp(lhs_pmu_name, rhs_pmu_name, + lhs_pmu_name_len < rhs_pmu_name_len ? lhs_pmu_name_len : rhs_pmu_name_len); + + if (lhs_pmu_name_len != rhs_pmu_name_len || ret != 0 || lhs_pmu_name_len == 0) + return ret; + + if (lhs_pmu_name_len + 1 < strlen(lhs_pmu_name)) + lhs_num = strtoull(&lhs_pmu_name[lhs_pmu_name_len + 1], NULL, 16); + if (rhs_pmu_name_len + 1 < strlen(rhs_pmu_name)) + rhs_num = strtoull(&rhs_pmu_name[rhs_pmu_name_len + 1], NULL, 16); + + return lhs_num < rhs_num ? -1 : (lhs_num > rhs_num ? 1 : 0); +} void perf_pmus__destroy(void) { @@ -47,8 +122,7 @@ void perf_pmus__destroy(void) perf_pmu__delete(pmu); } - read_sysfs_core_pmus = false; - read_sysfs_all_pmus = false; + read_pmu_types = 0; } static struct perf_pmu *pmu_find(const char *name) @@ -74,6 +148,7 @@ struct perf_pmu *perf_pmus__find(const char *name) struct perf_pmu *pmu; int dirfd; bool core_pmu; + unsigned int to_read_pmus = 0; /* * Once PMU is loaded it stays in the list, @@ -84,18 +159,41 @@ struct perf_pmu *perf_pmus__find(const char *name) if (pmu) return pmu; - if (read_sysfs_all_pmus) + if (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK) return NULL; core_pmu = is_pmu_core(name); - if (core_pmu && read_sysfs_core_pmus) + if (core_pmu && (read_pmu_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK)) return NULL; dirfd = perf_pmu__event_source_devices_fd(); - pmu = perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name); + pmu = perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name, + /*eager_load=*/false); close(dirfd); - return pmu; + if (pmu) + return pmu; + + /* Looking up an individual perf event PMU failed, check if a tool PMU should be read. */ + if (!strncmp(name, "hwmon_", 6)) + to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK; + else if (!strncmp(name, "drm_", 4)) + to_read_pmus |= PERF_TOOL_PMU_TYPE_DRM_MASK; + else if (!strcmp(name, "tool")) + to_read_pmus |= PERF_TOOL_PMU_TYPE_TOOL_MASK; + + if (to_read_pmus) { + pmu_read_sysfs(to_read_pmus); + pmu = pmu_find(name); + if (pmu) + return pmu; + } + /* Read all necessary PMUs from sysfs and see if the PMU is found. */ + to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK; + if (!core_pmu) + to_read_pmus |= PERF_TOOL_PMU_TYPE_PE_OTHER_MASK; + pmu_read_sysfs(to_read_pmus); + return pmu_find(name); } static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name) @@ -112,55 +210,82 @@ static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name) if (pmu) return pmu; - if (read_sysfs_all_pmus) + if (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK) return NULL; core_pmu = is_pmu_core(name); - if (core_pmu && read_sysfs_core_pmus) + if (core_pmu && (read_pmu_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK)) return NULL; - return perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name); + return perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name, + /*eager_load=*/false); } -/* Add all pmus in sysfs to pmu list: */ -static void pmu_read_sysfs(bool core_only) +static int pmus_cmp(void *priv __maybe_unused, + const struct list_head *lhs, const struct list_head *rhs) { - int fd; - DIR *dir; - struct dirent *dent; + struct perf_pmu *lhs_pmu = container_of(lhs, struct perf_pmu, list); + struct perf_pmu *rhs_pmu = container_of(rhs, struct perf_pmu, list); - if (read_sysfs_all_pmus || (core_only && read_sysfs_core_pmus)) - return; + return pmu_name_cmp(lhs_pmu->name ?: "", rhs_pmu->name ?: ""); +} - fd = perf_pmu__event_source_devices_fd(); - if (fd < 0) - return; +/* Add all pmus in sysfs to pmu list: */ +static void pmu_read_sysfs(unsigned int to_read_types) +{ + struct perf_pmu *tool_pmu; - dir = fdopendir(fd); - if (!dir) { - close(fd); + if ((read_pmu_types & to_read_types) == to_read_types) { + /* All requested PMU types have been read. */ return; } - while ((dent = readdir(dir))) { - if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) - continue; - if (core_only && !is_pmu_core(dent->d_name)) - continue; - /* add to static LIST_HEAD(core_pmus) or LIST_HEAD(other_pmus): */ - perf_pmu__find2(fd, dent->d_name); - } + if (to_read_types & (PERF_TOOL_PMU_TYPE_PE_CORE_MASK | PERF_TOOL_PMU_TYPE_PE_OTHER_MASK)) { + int fd = perf_pmu__event_source_devices_fd(); + struct io_dir dir; + struct io_dirent64 *dent; + bool core_only = (to_read_types & PERF_TOOL_PMU_TYPE_PE_OTHER_MASK) == 0; + + if (fd < 0) + goto skip_pe_pmus; + + io_dir__init(&dir, fd); - closedir(dir); - if (list_empty(&core_pmus)) { + while ((dent = io_dir__readdir(&dir)) != NULL) { + if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) + continue; + if (core_only && !is_pmu_core(dent->d_name)) + continue; + /* add to static LIST_HEAD(core_pmus) or LIST_HEAD(other_pmus): */ + perf_pmu__find2(fd, dent->d_name); + } + + close(fd); + } +skip_pe_pmus: + if ((to_read_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK) && list_empty(&core_pmus)) { if (!perf_pmu__create_placeholder_core_pmu(&core_pmus)) pr_err("Failure to set up any core PMUs\n"); } - if (!list_empty(&core_pmus)) { - read_sysfs_core_pmus = true; - if (!core_only) - read_sysfs_all_pmus = true; + list_sort(NULL, &core_pmus, pmus_cmp); + + if ((to_read_types & PERF_TOOL_PMU_TYPE_TOOL_MASK) != 0 && + (read_pmu_types & PERF_TOOL_PMU_TYPE_TOOL_MASK) == 0) { + tool_pmu = tool_pmu__new(); + if (tool_pmu) + list_add_tail(&tool_pmu->list, &other_pmus); } + if ((to_read_types & PERF_TOOL_PMU_TYPE_HWMON_MASK) != 0 && + (read_pmu_types & PERF_TOOL_PMU_TYPE_HWMON_MASK) == 0) + perf_pmus__read_hwmon_pmus(&other_pmus); + + if ((to_read_types & PERF_TOOL_PMU_TYPE_DRM_MASK) != 0 && + (read_pmu_types & PERF_TOOL_PMU_TYPE_DRM_MASK) == 0) + perf_pmus__read_drm_pmus(&other_pmus); + + list_sort(NULL, &other_pmus, pmus_cmp); + + read_pmu_types |= to_read_types; } static struct perf_pmu *__perf_pmus__find_by_type(unsigned int type) @@ -181,12 +306,23 @@ static struct perf_pmu *__perf_pmus__find_by_type(unsigned int type) struct perf_pmu *perf_pmus__find_by_type(unsigned int type) { + unsigned int to_read_pmus; struct perf_pmu *pmu = __perf_pmus__find_by_type(type); - if (pmu || read_sysfs_all_pmus) + if (pmu || (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK)) return pmu; - pmu_read_sysfs(/*core_only=*/false); + if (type >= PERF_PMU_TYPE_PE_START && type <= PERF_PMU_TYPE_PE_END) { + to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK | + PERF_TOOL_PMU_TYPE_PE_OTHER_MASK; + } else if (type >= PERF_PMU_TYPE_DRM_START && type <= PERF_PMU_TYPE_DRM_END) { + to_read_pmus = PERF_TOOL_PMU_TYPE_DRM_MASK; + } else if (type >= PERF_PMU_TYPE_HWMON_START && type <= PERF_PMU_TYPE_HWMON_END) { + to_read_pmus = PERF_TOOL_PMU_TYPE_HWMON_MASK; + } else { + to_read_pmus = PERF_TOOL_PMU_TYPE_TOOL_MASK; + } + pmu_read_sysfs(to_read_pmus); pmu = __perf_pmus__find_by_type(type); return pmu; } @@ -200,7 +336,7 @@ struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu) bool use_core_pmus = !pmu || pmu->is_core; if (!pmu) { - pmu_read_sysfs(/*core_only=*/false); + pmu_read_sysfs(PERF_TOOL_PMU_TYPE_ALL_MASK); pmu = list_prepare_entry(pmu, &core_pmus, list); } if (use_core_pmus) { @@ -218,8 +354,8 @@ struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu) struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu) { if (!pmu) { - pmu_read_sysfs(/*core_only=*/true); - pmu = list_prepare_entry(pmu, &core_pmus, list); + pmu_read_sysfs(PERF_TOOL_PMU_TYPE_PE_CORE_MASK); + return list_first_entry_or_null(&core_pmus, typeof(*pmu), list); } list_for_each_entry_continue(pmu, &core_pmus, list) return pmu; @@ -227,6 +363,129 @@ struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu) return NULL; } +struct perf_pmu *perf_pmus__scan_for_event(struct perf_pmu *pmu, const char *event) +{ + bool use_core_pmus = !pmu || pmu->is_core; + + if (!pmu) { + /* Hwmon filename values that aren't used. */ + enum hwmon_type type; + int number; + /* + * Core PMUs, other sysfs PMUs and tool PMU can take all event + * types or aren't wother optimizing for. + */ + unsigned int to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK | + PERF_TOOL_PMU_TYPE_PE_OTHER_MASK | + PERF_TOOL_PMU_TYPE_TOOL_MASK; + + /* Could the event be a hwmon event? */ + if (parse_hwmon_filename(event, &type, &number, /*item=*/NULL, /*alarm=*/NULL)) + to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK; + + /* Could the event be a DRM event? */ + if (strlen(event) > 4 && strncmp("drm-", event, 4) == 0) + to_read_pmus |= PERF_TOOL_PMU_TYPE_DRM_MASK; + + pmu_read_sysfs(to_read_pmus); + pmu = list_prepare_entry(pmu, &core_pmus, list); + } + if (use_core_pmus) { + list_for_each_entry_continue(pmu, &core_pmus, list) + return pmu; + + pmu = NULL; + pmu = list_prepare_entry(pmu, &other_pmus, list); + } + list_for_each_entry_continue(pmu, &other_pmus, list) + return pmu; + return NULL; +} + +struct perf_pmu *perf_pmus__scan_matching_wildcard(struct perf_pmu *pmu, const char *wildcard) +{ + bool use_core_pmus = !pmu || pmu->is_core; + + if (!pmu) { + /* + * Core PMUs, other sysfs PMUs and tool PMU can have any name or + * aren't wother optimizing for. + */ + unsigned int to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK | + PERF_TOOL_PMU_TYPE_PE_OTHER_MASK | + PERF_TOOL_PMU_TYPE_TOOL_MASK; + + /* + * Hwmon PMUs have an alias from a sysfs name like hwmon0, + * hwmon1, etc. or have a name of hwmon_<name>. They therefore + * can only have a wildcard match if the wildcard begins with + * "hwmon". Similarly drm PMUs must start "drm_", avoid reading + * such events unless the PMU could match. + */ + if (strisglob(wildcard)) { + to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK | + PERF_TOOL_PMU_TYPE_DRM_MASK; + } else if (strlen(wildcard) >= 4 && strncmp("drm_", wildcard, 4) == 0) { + to_read_pmus |= PERF_TOOL_PMU_TYPE_DRM_MASK; + } else if (strlen(wildcard) >= 5 && strncmp("hwmon", wildcard, 5) == 0) { + to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK; + } + + pmu_read_sysfs(to_read_pmus); + pmu = list_prepare_entry(pmu, &core_pmus, list); + } + if (use_core_pmus) { + list_for_each_entry_continue(pmu, &core_pmus, list) { + if (perf_pmu__wildcard_match(pmu, wildcard)) + return pmu; + } + pmu = NULL; + pmu = list_prepare_entry(pmu, &other_pmus, list); + } + list_for_each_entry_continue(pmu, &other_pmus, list) { + if (perf_pmu__wildcard_match(pmu, wildcard)) + return pmu; + } + return NULL; +} + +static struct perf_pmu *perf_pmus__scan_skip_duplicates(struct perf_pmu *pmu) +{ + bool use_core_pmus = !pmu || pmu->is_core; + int last_pmu_name_len = 0; + const char *last_pmu_name = (pmu && pmu->name) ? pmu->name : ""; + + if (!pmu) { + pmu_read_sysfs(PERF_TOOL_PMU_TYPE_ALL_MASK); + pmu = list_prepare_entry(pmu, &core_pmus, list); + } else + last_pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: ""); + + if (use_core_pmus) { + list_for_each_entry_continue(pmu, &core_pmus, list) { + int pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: ""); + + if (last_pmu_name_len == pmu_name_len && + !strncmp(last_pmu_name, pmu->name ?: "", pmu_name_len)) + continue; + + return pmu; + } + pmu = NULL; + pmu = list_prepare_entry(pmu, &other_pmus, list); + } + list_for_each_entry_continue(pmu, &other_pmus, list) { + int pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: ""); + + if (last_pmu_name_len == pmu_name_len && + !strncmp(last_pmu_name, pmu->name ?: "", pmu_name_len)) + continue; + + return pmu; + } + return NULL; +} + const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str) { struct perf_pmu *pmu = NULL; @@ -248,229 +507,163 @@ const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str) return NULL; } -int __weak perf_pmus__num_mem_pmus(void) -{ - /* All core PMUs are for mem events. */ - return perf_pmus__num_core_pmus(); -} - /** Struct for ordering events as output in perf list. */ struct sevent { /** PMU for event. */ const struct perf_pmu *pmu; - /** - * Optional event for name, desc, etc. If not present then this is a - * selectable PMU and the event name is shown as "//". - */ - const struct perf_pmu_alias *event; - /** Is the PMU for the CPU? */ - bool is_cpu; + const char *name; + const char* alias; + const char *scale_unit; + const char *desc; + const char *long_desc; + const char *encoding_desc; + const char *topic; + const char *pmu_name; + const char *event_type_desc; + bool deprecated; }; static int cmp_sevent(const void *a, const void *b) { const struct sevent *as = a; const struct sevent *bs = b; - const char *a_pmu_name = NULL, *b_pmu_name = NULL; - const char *a_name = "//", *a_desc = NULL, *a_topic = ""; - const char *b_name = "//", *b_desc = NULL, *b_topic = ""; + bool a_iscpu, b_iscpu; int ret; - if (as->event) { - a_name = as->event->name; - a_desc = as->event->desc; - a_topic = as->event->topic ?: ""; - a_pmu_name = as->event->pmu_name; - } - if (bs->event) { - b_name = bs->event->name; - b_desc = bs->event->desc; - b_topic = bs->event->topic ?: ""; - b_pmu_name = bs->event->pmu_name; - } /* Put extra events last. */ - if (!!a_desc != !!b_desc) - return !!a_desc - !!b_desc; + if (!!as->desc != !!bs->desc) + return !!as->desc - !!bs->desc; /* Order by topics. */ - ret = strcmp(a_topic, b_topic); + ret = strcmp(as->topic ?: "", bs->topic ?: ""); if (ret) return ret; /* Order CPU core events to be first */ - if (as->is_cpu != bs->is_cpu) - return as->is_cpu ? -1 : 1; + a_iscpu = as->pmu ? as->pmu->is_core : true; + b_iscpu = bs->pmu ? bs->pmu->is_core : true; + if (a_iscpu != b_iscpu) + return a_iscpu ? -1 : 1; /* Order by PMU name. */ if (as->pmu != bs->pmu) { - a_pmu_name = a_pmu_name ?: (as->pmu->name ?: ""); - b_pmu_name = b_pmu_name ?: (bs->pmu->name ?: ""); - ret = strcmp(a_pmu_name, b_pmu_name); + ret = strcmp(as->pmu_name ?: "", bs->pmu_name ?: ""); if (ret) return ret; } /* Order by event name. */ - return strcmp(a_name, b_name); + return strcmp(as->name, bs->name); } -static bool pmu_alias_is_duplicate(struct sevent *alias_a, - struct sevent *alias_b) +static bool pmu_alias_is_duplicate(struct sevent *a, struct sevent *b) { - const char *a_pmu_name = NULL, *b_pmu_name = NULL; - const char *a_name = "//", *b_name = "//"; - - - if (alias_a->event) { - a_name = alias_a->event->name; - a_pmu_name = alias_a->event->pmu_name; - } - if (alias_b->event) { - b_name = alias_b->event->name; - b_pmu_name = alias_b->event->pmu_name; - } - /* Different names -> never duplicates */ - if (strcmp(a_name, b_name)) + if (strcmp(a->name ?: "//", b->name ?: "//")) return false; /* Don't remove duplicates for different PMUs */ - a_pmu_name = a_pmu_name ?: (alias_a->pmu->name ?: ""); - b_pmu_name = b_pmu_name ?: (alias_b->pmu->name ?: ""); - return strcmp(a_pmu_name, b_pmu_name) == 0; + return strcmp(a->pmu_name, b->pmu_name) == 0; } -static int sub_non_neg(int a, int b) -{ - if (b > a) - return 0; - return a - b; -} +struct events_callback_state { + struct sevent *aliases; + size_t aliases_len; + size_t index; +}; -static char *format_alias(char *buf, int len, const struct perf_pmu *pmu, - const struct perf_pmu_alias *alias) +static int perf_pmus__print_pmu_events__callback(void *vstate, + struct pmu_event_info *info) { - struct parse_events_term *term; - int used = snprintf(buf, len, "%s/%s", pmu->name, alias->name); + struct events_callback_state *state = vstate; + struct sevent *s; - list_for_each_entry(term, &alias->terms, list) { - if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) - used += snprintf(buf + used, sub_non_neg(len, used), - ",%s=%s", term->config, - term->val.str); + if (state->index >= state->aliases_len) { + pr_err("Unexpected event %s/%s/\n", info->pmu->name, info->name); + return 1; } - - if (sub_non_neg(len, used) > 0) { - buf[used] = '/'; - used++; - } - if (sub_non_neg(len, used) > 0) { - buf[used] = '\0'; - used++; - } else - buf[len - 1] = '\0'; - - return buf; + assert(info->pmu != NULL || info->name != NULL); + s = &state->aliases[state->index]; + s->pmu = info->pmu; +#define COPY_STR(str) s->str = info->str ? strdup(info->str) : NULL + COPY_STR(name); + COPY_STR(alias); + COPY_STR(scale_unit); + COPY_STR(desc); + COPY_STR(long_desc); + COPY_STR(encoding_desc); + COPY_STR(topic); + COPY_STR(pmu_name); + COPY_STR(event_type_desc); +#undef COPY_STR + s->deprecated = info->deprecated; + state->index++; + return 0; } void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *print_state) { struct perf_pmu *pmu; - struct perf_pmu_alias *event; - char buf[1024]; int printed = 0; - int len, j; + int len; struct sevent *aliases; + struct events_callback_state state; + bool skip_duplicate_pmus = print_cb->skip_duplicate_pmus(print_state); + struct perf_pmu *(*scan_fn)(struct perf_pmu *); + + if (skip_duplicate_pmus) + scan_fn = perf_pmus__scan_skip_duplicates; + else + scan_fn = perf_pmus__scan; pmu = NULL; len = 0; - while ((pmu = perf_pmus__scan(pmu)) != NULL) { - list_for_each_entry(event, &pmu->aliases, list) - len++; - if (pmu->selectable) - len++; - } + while ((pmu = scan_fn(pmu)) != NULL) + len += perf_pmu__num_events(pmu); + aliases = zalloc(sizeof(struct sevent) * len); if (!aliases) { pr_err("FATAL: not enough memory to print PMU events\n"); return; } pmu = NULL; - j = 0; - while ((pmu = perf_pmus__scan(pmu)) != NULL) { - bool is_cpu = pmu->is_core; - - list_for_each_entry(event, &pmu->aliases, list) { - aliases[j].event = event; - aliases[j].pmu = pmu; - aliases[j].is_cpu = is_cpu; - j++; - } - if (pmu->selectable) { - aliases[j].event = NULL; - aliases[j].pmu = pmu; - aliases[j].is_cpu = is_cpu; - j++; - } + state = (struct events_callback_state) { + .aliases = aliases, + .aliases_len = len, + .index = 0, + }; + while ((pmu = scan_fn(pmu)) != NULL) { + perf_pmu__for_each_event(pmu, skip_duplicate_pmus, &state, + perf_pmus__print_pmu_events__callback); } - len = j; qsort(aliases, len, sizeof(struct sevent), cmp_sevent); - for (j = 0; j < len; j++) { - const char *name, *alias = NULL, *scale_unit = NULL, - *desc = NULL, *long_desc = NULL, - *encoding_desc = NULL, *topic = NULL, - *pmu_name = NULL; - bool deprecated = false; - size_t buf_used; - + for (int j = 0; j < len; j++) { /* Skip duplicates */ - if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1])) - continue; + if (j < len - 1 && pmu_alias_is_duplicate(&aliases[j], &aliases[j + 1])) + goto free; - if (!aliases[j].event) { - /* A selectable event. */ - pmu_name = aliases[j].pmu->name; - buf_used = snprintf(buf, sizeof(buf), "%s//", pmu_name) + 1; - name = buf; - } else { - if (aliases[j].event->desc) { - name = aliases[j].event->name; - buf_used = 0; - } else { - name = format_alias(buf, sizeof(buf), aliases[j].pmu, - aliases[j].event); - if (aliases[j].is_cpu) { - alias = name; - name = aliases[j].event->name; - } - buf_used = strlen(buf) + 1; - } - pmu_name = aliases[j].event->pmu_name ?: (aliases[j].pmu->name ?: ""); - if (strlen(aliases[j].event->unit) || aliases[j].event->scale != 1.0) { - scale_unit = buf + buf_used; - buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used, - "%G%s", aliases[j].event->scale, - aliases[j].event->unit) + 1; - } - desc = aliases[j].event->desc; - long_desc = aliases[j].event->long_desc; - topic = aliases[j].event->topic; - encoding_desc = buf + buf_used; - buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used, - "%s/%s/", pmu_name, aliases[j].event->str) + 1; - deprecated = aliases[j].event->deprecated; - } print_cb->print_event(print_state, - pmu_name, - topic, - name, - alias, - scale_unit, - deprecated, - "Kernel PMU event", - desc, - long_desc, - encoding_desc); + aliases[j].topic, + aliases[j].pmu_name, + aliases[j].pmu->type, + aliases[j].name, + aliases[j].alias, + aliases[j].scale_unit, + aliases[j].deprecated, + aliases[j].event_type_desc, + aliases[j].desc, + aliases[j].long_desc, + aliases[j].encoding_desc); +free: + zfree(&aliases[j].name); + zfree(&aliases[j].alias); + zfree(&aliases[j].scale_unit); + zfree(&aliases[j].desc); + zfree(&aliases[j].long_desc); + zfree(&aliases[j].encoding_desc); + zfree(&aliases[j].topic); + zfree(&aliases[j].pmu_name); + zfree(&aliases[j].event_type_desc); } if (printed && pager_in_use()) printf("\n"); @@ -478,6 +671,100 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p zfree(&aliases); } +struct build_format_string_args { + struct strbuf short_string; + struct strbuf long_string; + int num_formats; +}; + +static int build_format_string(void *state, const char *name, int config, + const unsigned long *bits) +{ + struct build_format_string_args *args = state; + unsigned int num_bits; + int ret1, ret2 = 0; + + (void)config; + args->num_formats++; + if (args->num_formats > 1) { + strbuf_addch(&args->long_string, ','); + if (args->num_formats < 4) + strbuf_addch(&args->short_string, ','); + } + num_bits = bits ? bitmap_weight(bits, PERF_PMU_FORMAT_BITS) : 0; + if (num_bits <= 1) { + ret1 = strbuf_addf(&args->long_string, "%s", name); + if (args->num_formats < 4) + ret2 = strbuf_addf(&args->short_string, "%s", name); + } else if (num_bits > 8) { + ret1 = strbuf_addf(&args->long_string, "%s=0..0x%llx", name, + ULLONG_MAX >> (64 - num_bits)); + if (args->num_formats < 4) { + ret2 = strbuf_addf(&args->short_string, "%s=0..0x%llx", name, + ULLONG_MAX >> (64 - num_bits)); + } + } else { + ret1 = strbuf_addf(&args->long_string, "%s=0..%llu", name, + ULLONG_MAX >> (64 - num_bits)); + if (args->num_formats < 4) { + ret2 = strbuf_addf(&args->short_string, "%s=0..%llu", name, + ULLONG_MAX >> (64 - num_bits)); + } + } + return ret1 < 0 ? ret1 : (ret2 < 0 ? ret2 : 0); +} + +void perf_pmus__print_raw_pmu_events(const struct print_callbacks *print_cb, void *print_state) +{ + bool skip_duplicate_pmus = print_cb->skip_duplicate_pmus(print_state); + struct perf_pmu *(*scan_fn)(struct perf_pmu *); + struct perf_pmu *pmu = NULL; + + if (skip_duplicate_pmus) + scan_fn = perf_pmus__scan_skip_duplicates; + else + scan_fn = perf_pmus__scan; + + while ((pmu = scan_fn(pmu)) != NULL) { + struct build_format_string_args format_args = { + .short_string = STRBUF_INIT, + .long_string = STRBUF_INIT, + .num_formats = 0, + }; + int len = pmu_name_len_no_suffix(pmu->name); + const char *desc = "(see 'man perf-list' or 'man perf-record' on how to encode it)"; + + if (!pmu->is_core) + desc = NULL; + + strbuf_addf(&format_args.short_string, "%.*s/", len, pmu->name); + strbuf_addf(&format_args.long_string, "%.*s/", len, pmu->name); + perf_pmu__for_each_format(pmu, &format_args, build_format_string); + + if (format_args.num_formats > 3) + strbuf_addf(&format_args.short_string, ",.../modifier"); + else + strbuf_addf(&format_args.short_string, "/modifier"); + + strbuf_addf(&format_args.long_string, "/modifier"); + print_cb->print_event(print_state, + /*topic=*/NULL, + /*pmu_name=*/NULL, + pmu->type, + format_args.short_string.buf, + /*event_alias=*/NULL, + /*scale_unit=*/NULL, + /*deprecated=*/false, + "Raw event descriptor", + desc, + /*long_desc=*/NULL, + format_args.long_string.buf); + + strbuf_release(&format_args.short_string); + strbuf_release(&format_args.long_string); + } +} + bool perf_pmus__have_event(const char *pname, const char *name) { struct perf_pmu *pmu = perf_pmus__find(pname); @@ -529,46 +816,72 @@ bool perf_pmus__supports_extended_type(void) return perf_pmus__do_support_extended_type; } -char *perf_pmus__default_pmu_name(void) +struct perf_pmu *perf_pmus__find_by_attr(const struct perf_event_attr *attr) { - int fd; - DIR *dir; - struct dirent *dent; - char *result = NULL; + struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); + u32 type = attr->type; + bool legacy_core_type = type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE; - if (!list_empty(&core_pmus)) - return strdup(list_first_entry(&core_pmus, struct perf_pmu, list)->name); + if (!pmu && legacy_core_type && perf_pmus__supports_extended_type()) { + type = attr->config >> PERF_PMU_TYPE_SHIFT; - fd = perf_pmu__event_source_devices_fd(); - if (fd < 0) - return strdup("cpu"); - - dir = fdopendir(fd); - if (!dir) { - close(fd); - return strdup("cpu"); + pmu = perf_pmus__find_by_type(type); } - - while ((dent = readdir(dir))) { - if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) - continue; - if (is_pmu_core(dent->d_name)) { - result = strdup(dent->d_name); - break; - } + if (!pmu && (legacy_core_type || type == PERF_TYPE_RAW)) { + /* + * For legacy events, if there was no extended type info then + * assume the PMU is the first core PMU. + * + * On architectures like ARM there is no sysfs PMU with type + * PERF_TYPE_RAW, assume the RAW events are going to be handled + * by the first core PMU. + */ + pmu = perf_pmus__find_core_pmu(); } - - closedir(dir); - return result ?: strdup("cpu"); + return pmu; } struct perf_pmu *evsel__find_pmu(const struct evsel *evsel) { struct perf_pmu *pmu = evsel->pmu; - if (!pmu) { - pmu = perf_pmus__find_by_type(evsel->core.attr.type); - ((struct evsel *)evsel)->pmu = pmu; - } + if (pmu) + return pmu; + + pmu = perf_pmus__find_by_attr(&evsel->core.attr); + ((struct evsel *)evsel)->pmu = pmu; return pmu; } + +struct perf_pmu *perf_pmus__find_core_pmu(void) +{ + return perf_pmus__scan_core(NULL); +} + +struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name) +{ + /* + * Some PMU functions read from the sysfs mount point, so care is + * needed, hence passing the eager_load flag to load things like the + * format files. + */ + return perf_pmu__lookup(&other_pmus, test_sysfs_dirfd, name, /*eager_load=*/true); +} + +struct perf_pmu *perf_pmus__add_test_hwmon_pmu(const char *hwmon_dir, + const char *sysfs_name, + const char *name) +{ + return hwmon_pmu__new(&other_pmus, hwmon_dir, sysfs_name, name); +} + +struct perf_pmu *perf_pmus__fake_pmu(void) +{ + static struct perf_pmu fake = { + .name = "fake", + .type = PERF_PMU_TYPE_FAKE, + .format = LIST_HEAD_INIT(fake.format), + }; + + return &fake; +} diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h index a21464432d0f..7cb36863711a 100644 --- a/tools/perf/util/pmus.h +++ b/tools/perf/util/pmus.h @@ -2,24 +2,41 @@ #ifndef __PMUS_H #define __PMUS_H +#include <stdbool.h> +#include <stddef.h> + +struct perf_event_attr; struct perf_pmu; struct print_callbacks; +size_t pmu_name_len_no_suffix(const char *str); +/* Exposed for testing only. */ +int pmu_name_cmp(const char *lhs_pmu_name, const char *rhs_pmu_name); + void perf_pmus__destroy(void); struct perf_pmu *perf_pmus__find(const char *name); struct perf_pmu *perf_pmus__find_by_type(unsigned int type); +struct perf_pmu *perf_pmus__find_by_attr(const struct perf_event_attr *attr); struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu); struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu); +struct perf_pmu *perf_pmus__scan_for_event(struct perf_pmu *pmu, const char *event); +struct perf_pmu *perf_pmus__scan_matching_wildcard(struct perf_pmu *pmu, const char *wildcard); const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str); -int perf_pmus__num_mem_pmus(void); void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *print_state); +void perf_pmus__print_raw_pmu_events(const struct print_callbacks *print_cb, void *print_state); bool perf_pmus__have_event(const char *pname, const char *name); int perf_pmus__num_core_pmus(void); bool perf_pmus__supports_extended_type(void); -char *perf_pmus__default_pmu_name(void); + +struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name); +struct perf_pmu *perf_pmus__add_test_hwmon_pmu(const char *hwmon_dir, + const char *sysfs_name, + const char *name); +struct perf_pmu *perf_pmus__fake_pmu(void); +struct perf_pmu *perf_pmus__find_core_pmu(void); #endif /* __PMUS_H */ diff --git a/tools/perf/util/powerpc-vpadtl.c b/tools/perf/util/powerpc-vpadtl.c new file mode 100644 index 000000000000..d1c3396f182f --- /dev/null +++ b/tools/perf/util/powerpc-vpadtl.c @@ -0,0 +1,733 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * VPA DTL PMU support + */ + +#include <linux/string.h> +#include <errno.h> +#include <inttypes.h> +#include "color.h" +#include "evlist.h" +#include "session.h" +#include "auxtrace.h" +#include "data.h" +#include "machine.h" +#include "debug.h" +#include "powerpc-vpadtl.h" +#include "sample.h" +#include "tool.h" + +/* + * Structure to save the auxtrace queue + */ +struct powerpc_vpadtl { + struct auxtrace auxtrace; + struct auxtrace_queues queues; + struct auxtrace_heap heap; + u32 auxtrace_type; + struct perf_session *session; + struct machine *machine; + u32 pmu_type; + u64 sample_id; +}; + +struct boottb_freq { + u64 boot_tb; + u64 tb_freq; + u64 timebase; + u64 padded[3]; +}; + +struct powerpc_vpadtl_queue { + struct powerpc_vpadtl *vpa; + unsigned int queue_nr; + struct auxtrace_buffer *buffer; + struct thread *thread; + bool on_heap; + struct powerpc_vpadtl_entry *dtl; + u64 timestamp; + unsigned long pkt_len; + unsigned long buf_len; + u64 boot_tb; + u64 tb_freq; + unsigned int tb_buffer; + unsigned int size; + bool done; + pid_t pid; + pid_t tid; + int cpu; +}; + +const char *dispatch_reasons[11] = { + "external_interrupt", + "firmware_internal_event", + "H_PROD", + "decrementer_interrupt", + "system_reset", + "firmware_internal_event", + "conferred_cycles", + "time_slice", + "virtual_memory_page_fault", + "expropriated_adjunct", + "priv_doorbell"}; + +const char *preempt_reasons[10] = { + "unused", + "firmware_internal_event", + "H_CEDE", + "H_CONFER", + "time_slice", + "migration_hibernation_page_fault", + "virtual_memory_page_fault", + "H_CONFER_ADJUNCT", + "hcall_adjunct", + "HDEC_adjunct"}; + +#define dtl_entry_size sizeof(struct powerpc_vpadtl_entry) + +/* + * Function to dump the dispatch trace data when perf report + * is invoked with -D + */ +static void powerpc_vpadtl_dump(struct powerpc_vpadtl *vpa __maybe_unused, + unsigned char *buf, size_t len) +{ + struct powerpc_vpadtl_entry *dtl; + int pkt_len, pos = 0; + const char *color = PERF_COLOR_BLUE; + + color_fprintf(stdout, color, + ". ... VPA DTL PMU data: size %zu bytes, entries is %zu\n", + len, len/dtl_entry_size); + + if (len % dtl_entry_size) + len = len - (len % dtl_entry_size); + + while (len) { + pkt_len = dtl_entry_size; + printf("."); + color_fprintf(stdout, color, " %08x: ", pos); + dtl = (struct powerpc_vpadtl_entry *)buf; + if (dtl->timebase != 0) { + printf("dispatch_reason:%s, preempt_reason:%s, " + "enqueue_to_dispatch_time:%d, ready_to_enqueue_time:%d, " + "waiting_to_ready_time:%d\n", + dispatch_reasons[dtl->dispatch_reason], + preempt_reasons[dtl->preempt_reason], + be32_to_cpu(dtl->enqueue_to_dispatch_time), + be32_to_cpu(dtl->ready_to_enqueue_time), + be32_to_cpu(dtl->waiting_to_ready_time)); + } else { + struct boottb_freq *boot_tb = (struct boottb_freq *)buf; + + printf("boot_tb: %" PRIu64 ", tb_freq: %" PRIu64 "\n", + boot_tb->boot_tb, boot_tb->tb_freq); + } + + pos += pkt_len; + buf += pkt_len; + len -= pkt_len; + } +} + +static unsigned long long powerpc_vpadtl_timestamp(struct powerpc_vpadtl_queue *vpaq) +{ + struct powerpc_vpadtl_entry *record = vpaq->dtl; + unsigned long long timestamp = 0; + unsigned long long boot_tb; + unsigned long long diff; + double result, div; + double boot_freq; + /* + * Formula used to get timestamp that can be co-related with + * other perf events: + * ((timbase from DTL entry - boot time) / frequency) * 1000000000 + */ + if (record->timebase) { + boot_tb = vpaq->boot_tb; + boot_freq = vpaq->tb_freq; + diff = be64_to_cpu(record->timebase) - boot_tb; + div = diff / boot_freq; + result = div; + result = result * 1000000000; + timestamp = result; + } + + return timestamp; +} + +static struct powerpc_vpadtl *session_to_vpa(struct perf_session *session) +{ + return container_of(session->auxtrace, struct powerpc_vpadtl, auxtrace); +} + +static void powerpc_vpadtl_dump_event(struct powerpc_vpadtl *vpa, unsigned char *buf, + size_t len) +{ + printf(".\n"); + powerpc_vpadtl_dump(vpa, buf, len); +} + +/* + * Generate perf sample for each entry in the dispatch trace log. + * - sample ip is picked from srr0 field of powerpc_vpadtl_entry + * - sample cpu is logical cpu. + * - cpumode is set to PERF_RECORD_MISC_KERNEL + * - Additionally save the details in raw_data of sample. This + * is to print the relevant fields in perf_sample__fprintf_synth() + * when called from builtin-script + */ +static int powerpc_vpadtl_sample(struct powerpc_vpadtl_entry *record, + struct powerpc_vpadtl *vpa, u64 save, int cpu) +{ + struct perf_sample sample; + union perf_event event; + + sample.ip = be64_to_cpu(record->srr0); + sample.period = 1; + sample.cpu = cpu; + sample.id = vpa->sample_id; + sample.callchain = NULL; + sample.branch_stack = NULL; + memset(&event, 0, sizeof(event)); + sample.cpumode = PERF_RECORD_MISC_KERNEL; + sample.time = save; + sample.raw_data = record; + sample.raw_size = sizeof(record); + event.sample.header.type = PERF_RECORD_SAMPLE; + event.sample.header.misc = sample.cpumode; + event.sample.header.size = sizeof(struct perf_event_header); + + if (perf_session__deliver_synth_event(vpa->session, &event, &sample)) { + pr_debug("Failed to create sample for dtl entry\n"); + return -1; + } + + return 0; +} + +static int powerpc_vpadtl_get_buffer(struct powerpc_vpadtl_queue *vpaq) +{ + struct auxtrace_buffer *buffer = vpaq->buffer; + struct auxtrace_queues *queues = &vpaq->vpa->queues; + struct auxtrace_queue *queue; + + queue = &queues->queue_array[vpaq->queue_nr]; + buffer = auxtrace_buffer__next(queue, buffer); + + if (!buffer) + return 0; + + vpaq->buffer = buffer; + vpaq->size = buffer->size; + + /* If the aux_buffer doesn't have data associated, try to load it */ + if (!buffer->data) { + /* get the file desc associated with the perf data file */ + int fd = perf_data__fd(vpaq->vpa->session->data); + + buffer->data = auxtrace_buffer__get_data(buffer, fd); + if (!buffer->data) + return -ENOMEM; + } + + vpaq->buf_len = buffer->size; + + if (buffer->size % dtl_entry_size) + vpaq->buf_len = buffer->size - (buffer->size % dtl_entry_size); + + if (vpaq->tb_buffer != buffer->buffer_nr) { + vpaq->pkt_len = 0; + vpaq->tb_buffer = 0; + } + + return 1; +} + +/* + * The first entry in the queue for VPA DTL PMU has the boot timebase, + * frequency details which are needed to get timestamp which is required to + * correlate with other events. Save the boot_tb and tb_freq as part of + * powerpc_vpadtl_queue. The very next entry is the actual trace data to + * be returned. + */ +static int powerpc_vpadtl_decode(struct powerpc_vpadtl_queue *vpaq) +{ + int ret; + char *buf; + struct boottb_freq *boottb; + + ret = powerpc_vpadtl_get_buffer(vpaq); + if (ret <= 0) + return ret; + + boottb = (struct boottb_freq *)vpaq->buffer->data; + if (boottb->timebase == 0) { + vpaq->boot_tb = boottb->boot_tb; + vpaq->tb_freq = boottb->tb_freq; + vpaq->pkt_len += dtl_entry_size; + } + + buf = vpaq->buffer->data; + buf += vpaq->pkt_len; + vpaq->dtl = (struct powerpc_vpadtl_entry *)buf; + + vpaq->tb_buffer = vpaq->buffer->buffer_nr; + vpaq->buffer = NULL; + vpaq->buf_len = 0; + + return 1; +} + +static int powerpc_vpadtl_decode_all(struct powerpc_vpadtl_queue *vpaq) +{ + int ret; + unsigned char *buf; + + if (!vpaq->buf_len || vpaq->pkt_len == vpaq->size) { + ret = powerpc_vpadtl_get_buffer(vpaq); + if (ret <= 0) + return ret; + } + + if (vpaq->buffer) { + buf = vpaq->buffer->data; + buf += vpaq->pkt_len; + vpaq->dtl = (struct powerpc_vpadtl_entry *)buf; + if ((long long)be64_to_cpu(vpaq->dtl->timebase) <= 0) { + if (vpaq->pkt_len != dtl_entry_size && vpaq->buf_len) { + vpaq->pkt_len += dtl_entry_size; + vpaq->buf_len -= dtl_entry_size; + } + return -1; + } + vpaq->pkt_len += dtl_entry_size; + vpaq->buf_len -= dtl_entry_size; + } else { + return 0; + } + + return 1; +} + +static int powerpc_vpadtl_run_decoder(struct powerpc_vpadtl_queue *vpaq, u64 *timestamp) +{ + struct powerpc_vpadtl *vpa = vpaq->vpa; + struct powerpc_vpadtl_entry *record; + int ret; + unsigned long long vpaq_timestamp; + + while (1) { + ret = powerpc_vpadtl_decode_all(vpaq); + if (!ret) { + pr_debug("All data in the queue has been processed.\n"); + return 1; + } + + /* + * Error is detected when decoding VPA PMU trace. Continue to + * the next trace data and find out more dtl entries. + */ + if (ret < 0) + continue; + + record = vpaq->dtl; + + vpaq_timestamp = powerpc_vpadtl_timestamp(vpaq); + + /* Update timestamp for the last record */ + if (vpaq_timestamp > vpaq->timestamp) + vpaq->timestamp = vpaq_timestamp; + + /* + * If the timestamp of the queue is later than timestamp of the + * coming perf event, bail out so can allow the perf event to + * be processed ahead. + */ + if (vpaq->timestamp >= *timestamp) { + *timestamp = vpaq->timestamp; + vpaq->pkt_len -= dtl_entry_size; + vpaq->buf_len += dtl_entry_size; + return 0; + } + + ret = powerpc_vpadtl_sample(record, vpa, vpaq_timestamp, vpaq->cpu); + if (ret) + continue; + } + return 0; +} + +/* + * For each of the PERF_RECORD_XX record, compare the timestamp + * of perf record with timestamp of top element in the auxtrace heap. + * Process the auxtrace queue if the timestamp of element from heap is + * lower than timestamp from entry in perf record. + * + * Update the timestamp of the auxtrace heap with the timestamp + * of last processed entry from the auxtrace buffer. + */ +static int powerpc_vpadtl_process_queues(struct powerpc_vpadtl *vpa, u64 timestamp) +{ + unsigned int queue_nr; + u64 ts; + int ret; + + while (1) { + struct auxtrace_queue *queue; + struct powerpc_vpadtl_queue *vpaq; + + if (!vpa->heap.heap_cnt) + return 0; + + if (vpa->heap.heap_array[0].ordinal >= timestamp) + return 0; + + queue_nr = vpa->heap.heap_array[0].queue_nr; + queue = &vpa->queues.queue_array[queue_nr]; + vpaq = queue->priv; + + auxtrace_heap__pop(&vpa->heap); + + if (vpa->heap.heap_cnt) { + ts = vpa->heap.heap_array[0].ordinal + 1; + if (ts > timestamp) + ts = timestamp; + } else { + ts = timestamp; + } + + ret = powerpc_vpadtl_run_decoder(vpaq, &ts); + if (ret < 0) { + auxtrace_heap__add(&vpa->heap, queue_nr, ts); + return ret; + } + + if (!ret) { + ret = auxtrace_heap__add(&vpa->heap, queue_nr, ts); + if (ret < 0) + return ret; + } else { + vpaq->on_heap = false; + } + } + return 0; +} + +static struct powerpc_vpadtl_queue *powerpc_vpadtl__alloc_queue(struct powerpc_vpadtl *vpa, + unsigned int queue_nr) +{ + struct powerpc_vpadtl_queue *vpaq; + + vpaq = zalloc(sizeof(*vpaq)); + if (!vpaq) + return NULL; + + vpaq->vpa = vpa; + vpaq->queue_nr = queue_nr; + + return vpaq; +} + +/* + * When the Dispatch Trace Log data is collected along with other events + * like sched tracepoint events, it needs to be correlated and present + * interleaved along with these events. Perf events can be collected + * parallely across the CPUs. + * + * An auxtrace_queue is created for each CPU. Data within each queue is in + * increasing order of timestamp. Allocate and setup auxtrace queues here. + * All auxtrace queues is maintained in auxtrace heap in the increasing order + * of timestamp. So always the lowest timestamp (entries to be processed first) + * is on top of the heap. + * + * To add to auxtrace heap, fetch the timestamp from first DTL entry + * for each of the queue. + */ +static int powerpc_vpadtl__setup_queue(struct powerpc_vpadtl *vpa, + struct auxtrace_queue *queue, + unsigned int queue_nr) +{ + struct powerpc_vpadtl_queue *vpaq = queue->priv; + + if (list_empty(&queue->head) || vpaq) + return 0; + + vpaq = powerpc_vpadtl__alloc_queue(vpa, queue_nr); + if (!vpaq) + return -ENOMEM; + + queue->priv = vpaq; + + if (queue->cpu != -1) + vpaq->cpu = queue->cpu; + + if (!vpaq->on_heap) { + int ret; +retry: + ret = powerpc_vpadtl_decode(vpaq); + if (!ret) + return 0; + + if (ret < 0) + goto retry; + + vpaq->timestamp = powerpc_vpadtl_timestamp(vpaq); + + ret = auxtrace_heap__add(&vpa->heap, queue_nr, vpaq->timestamp); + if (ret) + return ret; + vpaq->on_heap = true; + } + + return 0; +} + +static int powerpc_vpadtl__setup_queues(struct powerpc_vpadtl *vpa) +{ + unsigned int i; + int ret; + + for (i = 0; i < vpa->queues.nr_queues; i++) { + ret = powerpc_vpadtl__setup_queue(vpa, &vpa->queues.queue_array[i], i); + if (ret) + return ret; + } + + return 0; +} + +static int powerpc_vpadtl__update_queues(struct powerpc_vpadtl *vpa) +{ + if (vpa->queues.new_data) { + vpa->queues.new_data = false; + return powerpc_vpadtl__setup_queues(vpa); + } + + return 0; +} + +static int powerpc_vpadtl_process_event(struct perf_session *session, + union perf_event *event __maybe_unused, + struct perf_sample *sample, + const struct perf_tool *tool) +{ + struct powerpc_vpadtl *vpa = session_to_vpa(session); + int err = 0; + + if (dump_trace) + return 0; + + if (!tool->ordered_events) { + pr_err("VPA requires ordered events\n"); + return -EINVAL; + } + + if (sample->time) { + err = powerpc_vpadtl__update_queues(vpa); + if (err) + return err; + + err = powerpc_vpadtl_process_queues(vpa, sample->time); + } + + return err; +} + +/* + * Process PERF_RECORD_AUXTRACE records + */ +static int powerpc_vpadtl_process_auxtrace_event(struct perf_session *session, + union perf_event *event, + const struct perf_tool *tool __maybe_unused) +{ + struct powerpc_vpadtl *vpa = session_to_vpa(session); + struct auxtrace_buffer *buffer; + int fd = perf_data__fd(session->data); + off_t data_offset; + int err; + + if (!dump_trace) + return 0; + + if (perf_data__is_pipe(session->data)) { + data_offset = 0; + } else { + data_offset = lseek(fd, 0, SEEK_CUR); + if (data_offset == -1) + return -errno; + } + + err = auxtrace_queues__add_event(&vpa->queues, session, event, + data_offset, &buffer); + + if (err) + return err; + + /* Dump here now we have copied a piped trace out of the pipe */ + if (auxtrace_buffer__get_data(buffer, fd)) { + powerpc_vpadtl_dump_event(vpa, buffer->data, buffer->size); + auxtrace_buffer__put_data(buffer); + } + + return 0; +} + +static int powerpc_vpadtl_flush(struct perf_session *session __maybe_unused, + const struct perf_tool *tool __maybe_unused) +{ + return 0; +} + +static void powerpc_vpadtl_free_events(struct perf_session *session) +{ + struct powerpc_vpadtl *vpa = session_to_vpa(session); + struct auxtrace_queues *queues = &vpa->queues; + + for (unsigned int i = 0; i < queues->nr_queues; i++) + zfree(&queues->queue_array[i].priv); + + auxtrace_queues__free(queues); +} + +static void powerpc_vpadtl_free(struct perf_session *session) +{ + struct powerpc_vpadtl *vpa = session_to_vpa(session); + + auxtrace_heap__free(&vpa->heap); + powerpc_vpadtl_free_events(session); + session->auxtrace = NULL; + free(vpa); +} + +static const char * const powerpc_vpadtl_info_fmts[] = { + [POWERPC_VPADTL_TYPE] = " PMU Type %"PRId64"\n", +}; + +static void powerpc_vpadtl_print_info(__u64 *arr) +{ + if (!dump_trace) + return; + + fprintf(stdout, powerpc_vpadtl_info_fmts[POWERPC_VPADTL_TYPE], arr[POWERPC_VPADTL_TYPE]); +} + +static void set_event_name(struct evlist *evlist, u64 id, + const char *name) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.id && evsel->core.id[0] == id) { + if (evsel->name) + zfree(&evsel->name); + evsel->name = strdup(name); + break; + } + } +} + +static int +powerpc_vpadtl_synth_events(struct powerpc_vpadtl *vpa, struct perf_session *session) +{ + struct evlist *evlist = session->evlist; + struct evsel *evsel; + struct perf_event_attr attr; + bool found = false; + u64 id; + int err; + + evlist__for_each_entry(evlist, evsel) { + if (strstarts(evsel->name, "vpa_dtl")) { + found = true; + break; + } + } + + if (!found) { + pr_debug("No selected events with VPA trace data\n"); + return 0; + } + + memset(&attr, 0, sizeof(struct perf_event_attr)); + attr.size = sizeof(struct perf_event_attr); + attr.sample_type = evsel->core.attr.sample_type; + attr.sample_id_all = evsel->core.attr.sample_id_all; + attr.type = PERF_TYPE_SYNTH; + attr.config = PERF_SYNTH_POWERPC_VPA_DTL; + + /* create new id val to be a fixed offset from evsel id */ + id = auxtrace_synth_id_range_start(evsel); + + err = perf_session__deliver_synth_attr_event(session, &attr, id); + if (err) + return err; + + vpa->sample_id = id; + set_event_name(evlist, id, "vpa-dtl"); + + return 0; +} + +/* + * Process the PERF_RECORD_AUXTRACE_INFO records and setup + * the infrastructure to process auxtrace events. PERF_RECORD_AUXTRACE_INFO + * is processed first since it is of type perf_user_event_type. + * Initialise the aux buffer queues using auxtrace_queues__init(). + * auxtrace_queue is created for each CPU. + */ +int powerpc_vpadtl_process_auxtrace_info(union perf_event *event, + struct perf_session *session) +{ + struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; + size_t min_sz = sizeof(u64) * POWERPC_VPADTL_TYPE; + struct powerpc_vpadtl *vpa; + int err; + + if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + + min_sz) + return -EINVAL; + + vpa = zalloc(sizeof(struct powerpc_vpadtl)); + if (!vpa) + return -ENOMEM; + + err = auxtrace_queues__init(&vpa->queues); + if (err) + goto err_free; + + vpa->session = session; + vpa->machine = &session->machines.host; /* No kvm support */ + vpa->auxtrace_type = auxtrace_info->type; + vpa->pmu_type = auxtrace_info->priv[POWERPC_VPADTL_TYPE]; + + vpa->auxtrace.process_event = powerpc_vpadtl_process_event; + vpa->auxtrace.process_auxtrace_event = powerpc_vpadtl_process_auxtrace_event; + vpa->auxtrace.flush_events = powerpc_vpadtl_flush; + vpa->auxtrace.free_events = powerpc_vpadtl_free_events; + vpa->auxtrace.free = powerpc_vpadtl_free; + session->auxtrace = &vpa->auxtrace; + + powerpc_vpadtl_print_info(&auxtrace_info->priv[0]); + + if (dump_trace) + return 0; + + err = powerpc_vpadtl_synth_events(vpa, session); + if (err) + goto err_free_queues; + + err = auxtrace_queues__process_index(&vpa->queues, session); + if (err) + goto err_free_queues; + + return 0; + +err_free_queues: + auxtrace_queues__free(&vpa->queues); + session->auxtrace = NULL; + +err_free: + free(vpa); + return err; +} diff --git a/tools/perf/util/powerpc-vpadtl.h b/tools/perf/util/powerpc-vpadtl.h new file mode 100644 index 000000000000..ca809660b9bb --- /dev/null +++ b/tools/perf/util/powerpc-vpadtl.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * VPA DTL PMU Support + */ + +#ifndef INCLUDE__PERF_POWERPC_VPADTL_H__ +#define INCLUDE__PERF_POWERPC_VPADTL_H__ + +enum { + POWERPC_VPADTL_TYPE, + VPADTL_AUXTRACE_PRIV_MAX, +}; + +#define VPADTL_AUXTRACE_PRIV_SIZE (VPADTL_AUXTRACE_PRIV_MAX * sizeof(u64)) + +union perf_event; +struct perf_session; +struct perf_pmu; + +int powerpc_vpadtl_process_auxtrace_info(union perf_event *event, + struct perf_session *session); + +#endif diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index a7566edc86a3..8f3ed83853a9 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -9,6 +9,7 @@ #include <unistd.h> #include <api/fs/tracing_path.h> +#include <api/io.h> #include <linux/stddef.h> #include <linux/perf_event.h> #include <linux/zalloc.h> @@ -28,6 +29,8 @@ #include "tracepoint.h" #include "pfm.h" #include "thread_map.h" +#include "tool_pmu.h" +#include "util.h" #define MAX_NAME_LEN 100 @@ -37,107 +40,10 @@ static const char * const event_type_descriptors[] = { "Software event", "Tracepoint event", "Hardware cache event", - "Raw hardware event descriptor", + "Raw event descriptor", "Hardware breakpoint", }; -static const struct event_symbol event_symbols_tool[PERF_TOOL_MAX] = { - [PERF_TOOL_DURATION_TIME] = { - .symbol = "duration_time", - .alias = "", - }, - [PERF_TOOL_USER_TIME] = { - .symbol = "user_time", - .alias = "", - }, - [PERF_TOOL_SYSTEM_TIME] = { - .symbol = "system_time", - .alias = "", - }, -}; - -/* - * Print the events from <debugfs_mount_point>/tracing/events - */ -void print_tracepoint_events(const struct print_callbacks *print_cb __maybe_unused, void *print_state __maybe_unused) -{ - char *events_path = get_tracing_file("events"); - int events_fd = open(events_path, O_PATH); - - put_tracing_file(events_path); - if (events_fd < 0) { - printf("Error: failed to open tracing events directory\n"); - return; - } - -#ifdef HAVE_SCANDIRAT_SUPPORT -{ - struct dirent **sys_namelist = NULL; - int sys_items = tracing_events__scandir_alphasort(&sys_namelist); - - for (int i = 0; i < sys_items; i++) { - struct dirent *sys_dirent = sys_namelist[i]; - struct dirent **evt_namelist = NULL; - int dir_fd; - int evt_items; - - if (sys_dirent->d_type != DT_DIR || - !strcmp(sys_dirent->d_name, ".") || - !strcmp(sys_dirent->d_name, "..")) - goto next_sys; - - dir_fd = openat(events_fd, sys_dirent->d_name, O_PATH); - if (dir_fd < 0) - goto next_sys; - - evt_items = scandirat(events_fd, sys_dirent->d_name, &evt_namelist, NULL, alphasort); - for (int j = 0; j < evt_items; j++) { - struct dirent *evt_dirent = evt_namelist[j]; - char evt_path[MAXPATHLEN]; - int evt_fd; - - if (evt_dirent->d_type != DT_DIR || - !strcmp(evt_dirent->d_name, ".") || - !strcmp(evt_dirent->d_name, "..")) - goto next_evt; - - snprintf(evt_path, sizeof(evt_path), "%s/id", evt_dirent->d_name); - evt_fd = openat(dir_fd, evt_path, O_RDONLY); - if (evt_fd < 0) - goto next_evt; - close(evt_fd); - - snprintf(evt_path, MAXPATHLEN, "%s:%s", - sys_dirent->d_name, evt_dirent->d_name); - print_cb->print_event(print_state, - /*topic=*/NULL, - /*pmu_name=*/NULL, - evt_path, - /*event_alias=*/NULL, - /*scale_unit=*/NULL, - /*deprecated=*/false, - "Tracepoint event", - /*desc=*/NULL, - /*long_desc=*/NULL, - /*encoding_desc=*/NULL); -next_evt: - free(evt_namelist[j]); - } - close(dir_fd); - free(evt_namelist); -next_sys: - free(sys_namelist[i]); - } - - free(sys_namelist); -} -#else - printf("\nWARNING: Your libc doesn't have the scandirat function, please ask its maintainers to implement it.\n" - " As a rough fallback, please do 'ls %s' to see the available tracepoint events.\n", events_path); -#endif - close(events_fd); -} - void print_sdt_events(const struct print_callbacks *print_cb, void *print_state) { struct strlist *bidlist, *sdtlist; @@ -215,6 +121,7 @@ void print_sdt_events(const struct print_callbacks *print_cb, void *print_state) print_cb->print_event(print_state, /*topic=*/NULL, /*pmu_name=*/NULL, + PERF_TYPE_TRACEPOINT, evt_name ?: sdt_name->s, /*event_alias=*/NULL, /*deprecated=*/false, @@ -232,7 +139,6 @@ void print_sdt_events(const struct print_callbacks *print_cb, void *print_state) bool is_event_supported(u8 type, u64 config) { bool ret = true; - int open_return; struct evsel *evsel; struct perf_event_attr attr = { .type = type, @@ -246,20 +152,33 @@ bool is_event_supported(u8 type, u64 config) evsel = evsel__new(&attr); if (evsel) { - open_return = evsel__open(evsel, NULL, tmap); - ret = open_return >= 0; + ret = evsel__open(evsel, NULL, tmap) >= 0; - if (open_return == -EACCES) { + if (!ret) { /* - * This happens if the paranoid value + * The event may fail to open if the paranoid value * /proc/sys/kernel/perf_event_paranoid is set to 2 - * Re-run with exclude_kernel set; we don't do that - * by default as some ARM machines do not support it. - * + * Re-run with exclude_kernel set; we don't do that by + * default as some ARM machines do not support it. */ evsel->core.attr.exclude_kernel = 1; ret = evsel__open(evsel, NULL, tmap) >= 0; } + + if (!ret) { + /* + * The event may fail to open if the PMU requires + * exclude_guest to be set (e.g. as the Apple M1 PMU + * requires). + * Re-run with exclude_guest set; we don't do that by + * default as it's equally legitimate for another PMU + * driver to require that exclude_guest is clear. + */ + evsel->core.attr.exclude_guest = 1; + ret = evsel__open(evsel, NULL, tmap) >= 0; + } + + evsel__close(evsel); evsel__delete(evsel); } @@ -267,127 +186,137 @@ bool is_event_supported(u8 type, u64 config) return ret; } -int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state) +/** struct mep - RB-tree node for building printing information. */ +struct mep { + /** nd - RB-tree element. */ + struct rb_node nd; + /** @metric_group: Owned metric group name, separated others with ';'. */ + char *metric_group; + const char *metric_name; + const char *metric_desc; + const char *metric_long_desc; + const char *metric_expr; + const char *metric_threshold; + const char *metric_unit; + const char *pmu_name; +}; + +static int mep_cmp(struct rb_node *rb_node, const void *entry) { - struct perf_pmu *pmu = NULL; - const char *event_type_descriptor = event_type_descriptors[PERF_TYPE_HW_CACHE]; + struct mep *a = container_of(rb_node, struct mep, nd); + struct mep *b = (struct mep *)entry; + int ret; - /* - * Only print core PMUs, skipping uncore for performance and - * PERF_TYPE_SOFTWARE that can succeed in opening legacy cache evenst. - */ - while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { - if (pmu->is_uncore || pmu->type == PERF_TYPE_SOFTWARE) - continue; + ret = strcmp(a->metric_group, b->metric_group); + if (ret) + return ret; - for (int type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { - for (int op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { - /* skip invalid cache type */ - if (!evsel__is_cache_op_valid(type, op)) - continue; - - for (int res = 0; res < PERF_COUNT_HW_CACHE_RESULT_MAX; res++) { - char name[64]; - char alias_name[128]; - __u64 config; - int ret; - - __evsel__hw_cache_type_op_res_name(type, op, res, - name, sizeof(name)); - - ret = parse_events__decode_legacy_cache(name, pmu->type, - &config); - if (ret || !is_event_supported(PERF_TYPE_HW_CACHE, config)) - continue; - snprintf(alias_name, sizeof(alias_name), "%s/%s/", - pmu->name, name); - print_cb->print_event(print_state, - "cache", - pmu->name, - name, - alias_name, - /*scale_unit=*/NULL, - /*deprecated=*/false, - event_type_descriptor, - /*desc=*/NULL, - /*long_desc=*/NULL, - /*encoding_desc=*/NULL); - } - } - } - } - return 0; + return strcmp(a->metric_name, b->metric_name); } -void print_tool_events(const struct print_callbacks *print_cb, void *print_state) +static struct rb_node *mep_new(struct rblist *rl __maybe_unused, const void *entry) { - // Start at 1 because the first enum entry means no tool event. - for (int i = 1; i < PERF_TOOL_MAX; ++i) { - print_cb->print_event(print_state, - "tool", - /*pmu_name=*/NULL, - event_symbols_tool[i].symbol, - event_symbols_tool[i].alias, - /*scale_unit=*/NULL, - /*deprecated=*/false, - "Tool event", - /*desc=*/NULL, - /*long_desc=*/NULL, - /*encoding_desc=*/NULL); - } + struct mep *me = malloc(sizeof(struct mep)); + + if (!me) + return NULL; + + memcpy(me, entry, sizeof(struct mep)); + return &me->nd; } -void print_symbol_events(const struct print_callbacks *print_cb, void *print_state, - unsigned int type, const struct event_symbol *syms, - unsigned int max) +static void mep_delete(struct rblist *rl __maybe_unused, + struct rb_node *nd) { - struct strlist *evt_name_list = strlist__new(NULL, NULL); - struct str_node *nd; - - if (!evt_name_list) { - pr_debug("Failed to allocate new strlist for symbol events\n"); - return; - } - for (unsigned int i = 0; i < max; i++) { - /* - * New attr.config still not supported here, the latest - * example was PERF_COUNT_SW_CGROUP_SWITCHES - */ - if (syms[i].symbol == NULL) - continue; + struct mep *me = container_of(nd, struct mep, nd); - if (!is_event_supported(type, i)) - continue; + zfree(&me->metric_group); + free(me); +} - if (strlen(syms[i].alias)) { - char name[MAX_NAME_LEN]; +static struct mep *mep_lookup(struct rblist *groups, const char *metric_group, + const char *metric_name) +{ + struct rb_node *nd; + struct mep me = { + .metric_group = strdup(metric_group), + .metric_name = metric_name, + }; + nd = rblist__find(groups, &me); + if (nd) { + free(me.metric_group); + return container_of(nd, struct mep, nd); + } + rblist__add_node(groups, &me); + nd = rblist__find(groups, &me); + if (nd) + return container_of(nd, struct mep, nd); + return NULL; +} - snprintf(name, MAX_NAME_LEN, "%s OR %s", syms[i].symbol, syms[i].alias); - strlist__add(evt_name_list, name); - } else - strlist__add(evt_name_list, syms[i].symbol); +static int metricgroup__add_to_mep_groups_callback(const struct pmu_metric *pm, + const struct pmu_metrics_table *table __maybe_unused, + void *vdata) +{ + struct rblist *groups = vdata; + const char *g; + char *omg, *mg; + + mg = strdup(pm->metric_group ?: pm->metric_name); + if (!mg) + return -ENOMEM; + omg = mg; + while ((g = strsep(&mg, ";")) != NULL) { + struct mep *me; + + g = skip_spaces(g); + if (strlen(g)) + me = mep_lookup(groups, g, pm->metric_name); + else + me = mep_lookup(groups, pm->metric_name, pm->metric_name); + + if (me) { + me->metric_desc = pm->desc; + me->metric_long_desc = pm->long_desc; + me->metric_expr = pm->metric_expr; + me->metric_threshold = pm->metric_threshold; + me->metric_unit = pm->unit; + me->pmu_name = pm->pmu; + } } + free(omg); - strlist__for_each_entry(nd, evt_name_list) { - char *alias = strstr(nd->s, " OR "); + return 0; +} - if (alias) { - *alias = '\0'; - alias += 4; - } - print_cb->print_event(print_state, - /*topic=*/NULL, - /*pmu_name=*/NULL, - nd->s, - alias, - /*scale_unit=*/NULL, - /*deprecated=*/false, - event_type_descriptors[type], - /*desc=*/NULL, - /*long_desc=*/NULL, - /*encoding_desc=*/NULL); +void metricgroup__print(const struct print_callbacks *print_cb, void *print_state) +{ + struct rblist groups; + struct rb_node *node, *next; + const struct pmu_metrics_table *table = pmu_metrics_table__find(); + + rblist__init(&groups); + groups.node_new = mep_new; + groups.node_cmp = mep_cmp; + groups.node_delete = mep_delete; + + metricgroup__for_each_metric(table, metricgroup__add_to_mep_groups_callback, &groups); + + for (node = rb_first_cached(&groups.entries); node; node = next) { + struct mep *me = container_of(node, struct mep, nd); + + print_cb->print_metric(print_state, + me->metric_group, + me->metric_name, + me->metric_desc, + me->metric_long_desc, + me->metric_expr, + me->metric_threshold, + me->metric_unit, + me->pmu_name); + next = rb_next(node); + rblist__remove_node(&groups, node); } - strlist__delete(evt_name_list); } /* @@ -395,20 +324,12 @@ void print_symbol_events(const struct print_callbacks *print_cb, void *print_sta */ void print_events(const struct print_callbacks *print_cb, void *print_state) { - print_symbol_events(print_cb, print_state, PERF_TYPE_HARDWARE, - event_symbols_hw, PERF_COUNT_HW_MAX); - print_symbol_events(print_cb, print_state, PERF_TYPE_SOFTWARE, - event_symbols_sw, PERF_COUNT_SW_MAX); - - print_tool_events(print_cb, print_state); - - print_hwcache_events(print_cb, print_state); - perf_pmus__print_pmu_events(print_cb, print_state); print_cb->print_event(print_state, /*topic=*/NULL, /*pmu_name=*/NULL, + PERF_TYPE_RAW, "rNNN", /*event_alias=*/NULL, /*scale_unit=*/NULL, @@ -418,21 +339,12 @@ void print_events(const struct print_callbacks *print_cb, void *print_state) /*long_desc=*/NULL, /*encoding_desc=*/NULL); - print_cb->print_event(print_state, - /*topic=*/NULL, - /*pmu_name=*/NULL, - "cpu/t1=v1[,t2=v2,t3 ...]/modifier", - /*event_alias=*/NULL, - /*scale_unit=*/NULL, - /*deprecated=*/false, - event_type_descriptors[PERF_TYPE_RAW], - "(see 'man perf-list' on how to encode it)", - /*long_desc=*/NULL, - /*encoding_desc=*/NULL); + perf_pmus__print_raw_pmu_events(print_cb, print_state); print_cb->print_event(print_state, /*topic=*/NULL, /*pmu_name=*/NULL, + PERF_TYPE_BREAKPOINT, "mem:<addr>[/len][:access]", /*scale_unit=*/NULL, /*event_alias=*/NULL, @@ -442,8 +354,6 @@ void print_events(const struct print_callbacks *print_cb, void *print_state) /*long_desc=*/NULL, /*encoding_desc=*/NULL); - print_tracepoint_events(print_cb, print_state); - print_sdt_events(print_cb, print_state); metricgroup__print(print_cb, print_state); diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h index d7fab411e75c..eabba5d4a1fd 100644 --- a/tools/perf/util/print-events.h +++ b/tools/perf/util/print-events.h @@ -12,7 +12,7 @@ struct print_callbacks { void (*print_start)(void *print_state); void (*print_end)(void *print_state); void (*print_event)(void *print_state, const char *topic, - const char *pmu_name, + const char *pmu_name, u32 pmu_type, const char *event_name, const char *event_alias, const char *scale_unit, bool deprecated, const char *event_type_desc, @@ -25,18 +25,15 @@ struct print_callbacks { const char *long_desc, const char *expr, const char *threshold, - const char *unit); + const char *unit, + const char *pmu_name); + bool (*skip_duplicate_pmus)(void *print_state); }; /** Print all events, the default when no options are specified. */ void print_events(const struct print_callbacks *print_cb, void *print_state); -int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state); void print_sdt_events(const struct print_callbacks *print_cb, void *print_state); -void print_symbol_events(const struct print_callbacks *print_cb, void *print_state, - unsigned int type, const struct event_symbol *syms, - unsigned int max); -void print_tool_events(const struct print_callbacks *print_cb, void *print_state); -void print_tracepoint_events(const struct print_callbacks *print_cb, void *print_state); +void metricgroup__print(const struct print_callbacks *print_cb, void *print_state); bool is_event_supported(u8 type, u64 config); #endif /* __PERF_PRINT_EVENTS_H */ diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c new file mode 100644 index 000000000000..02e6fbb8ca04 --- /dev/null +++ b/tools/perf/util/print_insn.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Instruction binary disassembler based on capstone. + * + * Author(s): Changbin Du <changbin.du@huawei.com> + */ +#include <inttypes.h> +#include <string.h> +#include <stdbool.h> +#include "capstone.h" +#include "debug.h" +#include "sample.h" +#include "symbol.h" +#include "machine.h" +#include "thread.h" +#include "print_insn.h" +#include "dump-insn.h" +#include "map.h" +#include "dso.h" + +size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp) +{ + int printed = 0; + + for (int i = 0; i < sample->insn_len; i++) { + printed += fprintf(fp, "%02x", (unsigned char)sample->insn[i]); + if (sample->insn_len - i > 1) + printed += fprintf(fp, " "); + } + return printed; +} + +static bool is64bitip(struct machine *machine, struct addr_location *al) +{ + const struct dso *dso = al->map ? map__dso(al->map) : NULL; + + if (dso) + return dso__is_64_bit(dso); + + return machine__is(machine, "x86_64") || + machine__normalized_is(machine, "arm64") || + machine__normalized_is(machine, "s390"); +} + +ssize_t fprintf_insn_asm(struct machine *machine, struct thread *thread, u8 cpumode, + bool is64bit, const uint8_t *code, size_t code_size, + uint64_t ip, int *lenp, int print_opts, FILE *fp) +{ + return capstone__fprintf_insn_asm(machine, thread, cpumode, is64bit, code, code_size, + ip, lenp, print_opts, fp); +} + +size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, + struct machine *machine, FILE *fp, + struct addr_location *al) +{ + bool is64bit = is64bitip(machine, al); + ssize_t printed; + + printed = fprintf_insn_asm(machine, thread, sample->cpumode, is64bit, + (uint8_t *)sample->insn, sample->insn_len, + sample->ip, NULL, 0, fp); + if (printed < 0) + return sample__fprintf_insn_raw(sample, fp); + + return printed; +} diff --git a/tools/perf/util/print_insn.h b/tools/perf/util/print_insn.h new file mode 100644 index 000000000000..07d11af3fc1c --- /dev/null +++ b/tools/perf/util/print_insn.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef PERF_PRINT_INSN_H +#define PERF_PRINT_INSN_H + +#include <stddef.h> +#include <stdio.h> + +struct perf_sample; +struct thread; +struct machine; +struct perf_insn; + +#define PRINT_INSN_IMM_HEX (1<<0) + +size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, + struct machine *machine, FILE *fp, struct addr_location *al); +size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp); +ssize_t fprintf_insn_asm(struct machine *machine, struct thread *thread, u8 cpumode, + bool is64bit, const uint8_t *code, size_t code_size, + uint64_t ip, int *lenp, int print_opts, FILE *fp); + +#endif /* PERF_PRINT_INSN_H */ diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 16822a8a540f..710e4620923e 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -11,6 +11,7 @@ #include <sys/stat.h> #include <fcntl.h> #include <errno.h> +#include <libgen.h> #include <stdio.h> #include <unistd.h> #include <stdlib.h> @@ -39,6 +40,7 @@ #include "session.h" #include "string2.h" #include "strbuf.h" +#include "parse-events.h" #include <subcmd/pager.h> #include <linux/ctype.h> @@ -50,9 +52,14 @@ #define PERFPROBE_GROUP "probe" +/* Defined in kernel/trace/trace.h */ +#define MAX_EVENT_NAME_LEN 64 + bool probe_event_dry_run; /* Dry run flag */ struct probe_conf probe_conf = { .magic_num = DEFAULT_PROBE_MAGIC_NUM }; +static char *synthesize_perf_probe_point(struct perf_probe_point *pp); + #define semantic_error(msg ...) pr_err("Semantic error :" msg) int e_snprintf(char *str, size_t size, const char *format, ...) @@ -68,12 +75,14 @@ int e_snprintf(char *str, size_t size, const char *format, ...) } static struct machine *host_machine; +static struct perf_env host_env; /* Initialize symbol maps and path of vmlinux/modules */ int init_probe_symbol_maps(bool user_only) { int ret; + perf_env__init(&host_env); symbol_conf.allow_aliases = true; ret = symbol__init(NULL); if (ret < 0) { @@ -87,7 +96,7 @@ int init_probe_symbol_maps(bool user_only) if (symbol_conf.vmlinux_name) pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name); - host_machine = machine__new_host(); + host_machine = machine__new_host(&host_env); if (!host_machine) { pr_debug("machine__new_host() failed.\n"); symbol__exit(); @@ -104,6 +113,7 @@ void exit_probe_symbol_maps(void) machine__delete(host_machine); host_machine = NULL; symbol__exit(); + perf_env__exit(&host_env); } static struct ref_reloc_sym *kernel_get_ref_reloc_sym(struct map **pmap) @@ -147,10 +157,32 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr, return 0; } +struct kernel_get_module_map_cb_args { + const char *module; + struct map *result; +}; + +static int kernel_get_module_map_cb(struct map *map, void *data) +{ + struct kernel_get_module_map_cb_args *args = data; + struct dso *dso = map__dso(map); + const char *short_name = dso__short_name(dso); + u16 short_name_len = dso__short_name_len(dso); + + if (strncmp(short_name + 1, args->module, short_name_len - 2) == 0 && + args->module[short_name_len - 2] == '\0') { + args->result = map__get(map); + return 1; + } + return 0; +} + static struct map *kernel_get_module_map(const char *module) { - struct maps *maps = machine__kernel_maps(host_machine); - struct map_rb_node *pos; + struct kernel_get_module_map_cb_args args = { + .module = module, + .result = NULL, + }; /* A file path -- this is an offline module */ if (module && strchr(module, '/')) @@ -162,19 +194,9 @@ static struct map *kernel_get_module_map(const char *module) return map__get(map); } - maps__for_each_entry(maps, pos) { - /* short_name is "[module]" */ - struct dso *dso = map__dso(pos->map); - const char *short_name = dso->short_name; - u16 short_name_len = dso->short_name_len; + maps__for_each_map(machine__kernel_maps(host_machine), kernel_get_module_map_cb, &args); - if (strncmp(short_name + 1, module, - short_name_len - 2) == 0 && - module[short_name_len - 2] == '\0') { - return map__get(pos->map); - } - } - return NULL; + return args.result; } struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user) @@ -187,10 +209,9 @@ struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user) map = dso__new_map(target); dso = map ? map__dso(map) : NULL; if (dso) { - mutex_lock(&dso->lock); - nsinfo__put(dso->nsinfo); - dso->nsinfo = nsinfo__get(nsi); - mutex_unlock(&dso->lock); + mutex_lock(dso__lock(dso)); + dso__set_nsinfo(dso, nsinfo__get(nsi)); + mutex_unlock(dso__lock(dso)); } return map; } else { @@ -221,7 +242,7 @@ static int convert_exec_to_group(const char *exec, char **result) } } - ret = e_snprintf(buf, 64, "%s_%s", PERFPROBE_GROUP, ptr1); + ret = e_snprintf(buf, sizeof(buf), "%s_%s", PERFPROBE_GROUP, ptr1); if (ret < 0) goto out; @@ -328,7 +349,7 @@ elf_err: return mod_name; } -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT static int kernel_get_module_dso(const char *module, struct dso **pdso) { @@ -344,6 +365,7 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso) map = maps__find_by_name(machine__kernel_maps(host_machine), module_name); if (map) { dso = map__dso(map); + map__put(map); goto found; } pr_debug("Failed to find module %s.\n", module); @@ -352,11 +374,11 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso) map = machine__kernel_map(host_machine); dso = map__dso(map); - if (!dso->has_build_id) + if (!dso__has_build_id(dso)) dso__read_running_kernel_build_id(dso, host_machine); vmlinux_name = symbol_conf.vmlinux_name; - dso->load_errno = 0; + *dso__load_errno(dso) = 0; if (vmlinux_name) ret = dso__load_vmlinux(dso, map, vmlinux_name, false); else @@ -483,7 +505,7 @@ static struct debuginfo *open_from_debuginfod(struct dso *dso, struct nsinfo *ns if (!c) return NULL; - build_id__sprintf(&dso->bid, sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); fd = debuginfod_find_debuginfo(c, (const unsigned char *)sbuild_id, 0, &path); if (fd >= 0) @@ -526,7 +548,7 @@ static struct debuginfo *open_debuginfo(const char *module, struct nsinfo *nsi, if (!module || !strchr(module, '/')) { err = kernel_get_module_dso(module, &dso); if (err < 0) { - if (!dso || dso->load_errno == 0) { + if (!dso || *dso__load_errno(dso) == 0) { if (!str_error_r(-err, reason, STRERR_BUFSIZE)) strcpy(reason, "(unknown)"); } else @@ -543,7 +565,7 @@ static struct debuginfo *open_debuginfo(const char *module, struct nsinfo *nsi, } return NULL; } - path = dso->long_name; + path = dso__long_name(dso); } nsinfo__mountns_enter(nsi, &nsc); ret = debuginfo__new(path); @@ -961,8 +983,9 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, debuginfo__delete(dinfo); if (ntevs == 0) { /* No error but failed to find probe point. */ - pr_warning("Probe point '%s' not found.\n", - synthesize_perf_probe_point(&pev->point)); + char *probe_point = synthesize_perf_probe_point(&pev->point); + pr_warning("Probe point '%s' not found.\n", probe_point); + free(probe_point); return -ENODEV; } else if (ntevs < 0) { /* Error path : ntevs < 0 */ @@ -1020,6 +1043,17 @@ static int _show_one_line(FILE *fp, int l, bool skip, bool show_num) return rv; } +static int sprint_line_description(char *sbuf, size_t size, struct line_range *lr) +{ + if (!lr->function) + return snprintf(sbuf, size, "file: %s, line: %d", lr->file, lr->start); + + if (lr->file) + return snprintf(sbuf, size, "function: %s, file:%s, line: %d", lr->function, lr->file, lr->start); + + return snprintf(sbuf, size, "function: %s, line:%d", lr->function, lr->start); +} + #define show_one_line_with_num(f,l) _show_one_line(f,l,false,true) #define show_one_line(f,l) _show_one_line(f,l,false,false) #define skip_one_line(f,l) _show_one_line(f,l,true,false) @@ -1032,7 +1066,6 @@ static int _show_one_line(FILE *fp, int l, bool skip, bool show_num) static int __show_line_range(struct line_range *lr, const char *module, bool user) { - struct build_id bid; int l = 1; struct int_node *ln; struct debuginfo *dinfo; @@ -1049,17 +1082,23 @@ static int __show_line_range(struct line_range *lr, const char *module, ret = debuginfo__find_line_range(dinfo, lr); if (!ret) { /* Not found, retry with an alternative */ + pr_debug2("Failed to find line range in debuginfo. Fallback to alternative\n"); ret = get_alternative_line_range(dinfo, lr, module, user); if (!ret) ret = debuginfo__find_line_range(dinfo, lr); + else /* Ignore error, we just failed to find it. */ + ret = -ENOENT; } if (dinfo->build_id) { + struct build_id bid; + build_id__init(&bid, dinfo->build_id, BUILD_ID_SIZE); - build_id__sprintf(&bid, sbuild_id); + build_id__snprintf(&bid, sbuild_id, sizeof(sbuild_id)); } debuginfo__delete(dinfo); if (ret == 0 || ret == -ENOENT) { - pr_warning("Specified source line is not found.\n"); + sprint_line_description(sbuf, sizeof(sbuf), lr); + pr_warning("Specified source line(%s) is not found.\n", sbuf); return -ENOENT; } else if (ret < 0) { pr_warning("Debuginfo analysis failed.\n"); @@ -1234,7 +1273,7 @@ out: return ret; } -#else /* !HAVE_DWARF_SUPPORT */ +#else /* !HAVE_LIBDW_SUPPORT */ static void debuginfo_cache__exit(void) { @@ -1327,32 +1366,41 @@ static bool is_c_func_name(const char *name) * * SRC[:SLN[+NUM|-ELN]] * FNC[@SRC][:SLN[+NUM|-ELN]] + * + * FNC@SRC accepts `FNC@*` which forcibly specify FNC as function name. + * SRC and FUNC can be quoted by double/single quotes. */ int parse_line_range_desc(const char *arg, struct line_range *lr) { - char *range, *file, *name = strdup(arg); - int err; + char *buf = strdup(arg); + char *p; + int err = 0; - if (!name) + if (!buf) return -ENOMEM; lr->start = 0; lr->end = INT_MAX; - range = strchr(name, ':'); - if (range) { - *range++ = '\0'; + p = strpbrk_esq(buf, ":"); + if (p) { + if (p == buf) { + semantic_error("No file/function name in '%s'.\n", p); + err = -EINVAL; + goto out; + } + *(p++) = '\0'; - err = parse_line_num(&range, &lr->start, "start line"); + err = parse_line_num(&p, &lr->start, "start line"); if (err) - goto err; + goto out; - if (*range == '+' || *range == '-') { - const char c = *range++; + if (*p == '+' || *p == '-') { + const char c = *(p++); - err = parse_line_num(&range, &lr->end, "end line"); + err = parse_line_num(&p, &lr->end, "end line"); if (err) - goto err; + goto out; if (c == '+') { lr->end += lr->start; @@ -1372,36 +1420,43 @@ int parse_line_range_desc(const char *arg, struct line_range *lr) if (lr->start > lr->end) { semantic_error("Start line must be smaller" " than end line.\n"); - goto err; + goto out; } - if (*range != '\0') { - semantic_error("Tailing with invalid str '%s'.\n", range); - goto err; + if (*p != '\0') { + semantic_error("Tailing with invalid str '%s'.\n", p); + goto out; } } - file = strchr(name, '@'); - if (file) { - *file = '\0'; - lr->file = strdup(++file); - if (lr->file == NULL) { - err = -ENOMEM; - goto err; + p = strpbrk_esq(buf, "@"); + if (p) { + *p++ = '\0'; + if (strcmp(p, "*")) { + lr->file = strdup_esq(p); + if (lr->file == NULL) { + err = -ENOMEM; + goto out; + } } - lr->function = name; - } else if (strchr(name, '/') || strchr(name, '.')) - lr->file = name; - else if (is_c_func_name(name))/* We reuse it for checking funcname */ - lr->function = name; + if (*buf != '\0') + lr->function = strdup_esq(buf); + if (!lr->function && !lr->file) { + semantic_error("Only '@*' is not allowed.\n"); + err = -EINVAL; + goto out; + } + } else if (strpbrk_esq(buf, "/.")) + lr->file = strdup_esq(buf); + else if (is_c_func_name(buf))/* We reuse it for checking funcname */ + lr->function = strdup_esq(buf); else { /* Invalid name */ - semantic_error("'%s' is not a valid function name.\n", name); + semantic_error("'%s' is not a valid function name.\n", buf); err = -EINVAL; - goto err; + goto out; } - return 0; -err: - free(name); +out: + free(buf); return err; } @@ -1409,19 +1464,19 @@ static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev) { char *ptr; - ptr = strpbrk_esc(*arg, ":"); + ptr = strpbrk_esq(*arg, ":"); if (ptr) { *ptr = '\0'; if (!pev->sdt && !is_c_func_name(*arg)) goto ng_name; - pev->group = strdup_esc(*arg); + pev->group = strdup_esq(*arg); if (!pev->group) return -ENOMEM; *arg = ptr + 1; } else pev->group = NULL; - pev->event = strdup_esc(*arg); + pev->event = strdup_esq(*arg); if (pev->event == NULL) return -ENOMEM; @@ -1460,7 +1515,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) arg++; } - ptr = strpbrk_esc(arg, ";=@+%"); + ptr = strpbrk_esq(arg, ";=@+%"); if (pev->sdt) { if (ptr) { if (*ptr != '@') { @@ -1474,7 +1529,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) pev->target = build_id_cache__origname(tmp); free(tmp); } else - pev->target = strdup_esc(ptr + 1); + pev->target = strdup_esq(ptr + 1); if (!pev->target) return -ENOMEM; *ptr = '\0'; @@ -1515,7 +1570,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) file_spec = true; } - ptr = strpbrk_esc(arg, ";:+@%"); + ptr = strpbrk_esq(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1524,7 +1579,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) if (arg[0] == '\0') tmp = NULL; else { - tmp = strdup_esc(arg); + tmp = strdup_esq(arg); if (tmp == NULL) return -ENOMEM; } @@ -1562,7 +1617,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) return -ENOMEM; break; } - ptr = strpbrk_esc(arg, ";:+@%"); + ptr = strpbrk_esq(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1589,7 +1644,9 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) semantic_error("SRC@SRC is not allowed.\n"); return -EINVAL; } - pp->file = strdup_esc(arg); + if (!strcmp(arg, "*")) + break; + pp->file = strdup_esq(arg); if (pp->file == NULL) return -ENOMEM; break; @@ -2009,7 +2066,7 @@ out: } /* Compose only probe point (not argument) */ -char *synthesize_perf_probe_point(struct perf_probe_point *pp) +static char *synthesize_perf_probe_point(struct perf_probe_point *pp) { struct strbuf buf; char *tmp, *ret = NULL; @@ -2062,14 +2119,18 @@ char *synthesize_perf_probe_command(struct perf_probe_event *pev) goto out; tmp = synthesize_perf_probe_point(&pev->point); - if (!tmp || strbuf_addstr(&buf, tmp) < 0) + if (!tmp || strbuf_addstr(&buf, tmp) < 0) { + free(tmp); goto out; + } free(tmp); for (i = 0; i < pev->nargs; i++) { tmp = synthesize_perf_probe_arg(pev->args + i); - if (!tmp || strbuf_addf(&buf, " %s", tmp) < 0) + if (!tmp || strbuf_addf(&buf, " %s", tmp) < 0) { + free(tmp); goto out; + } free(tmp); } @@ -2254,9 +2315,7 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp, ret = pp->function ? 0 : -ENOMEM; out: - if (map && !is_kprobe) { - map__put(map); - } + map__put(map); return ret; } @@ -2360,6 +2419,7 @@ void clear_perf_probe_event(struct perf_probe_event *pev) } pev->nargs = 0; zfree(&pev->args); + nsinfo__zput(pev->nsi); } #define strdup_or_goto(str, label) \ @@ -2720,7 +2780,7 @@ int show_perf_probe_events(struct strfilter *filter) static int get_new_event_name(char *buf, size_t len, const char *base, struct strlist *namelist, bool ret_event, - bool allow_suffix) + bool allow_suffix, bool not_C_symname) { int i, ret; char *p, *nbase; @@ -2731,15 +2791,32 @@ static int get_new_event_name(char *buf, size_t len, const char *base, if (!nbase) return -ENOMEM; - /* Cut off the dot suffixes (e.g. .const, .isra) and version suffixes */ - p = strpbrk(nbase, ".@"); - if (p && p != nbase) - *p = '\0'; + if (not_C_symname) { + /* Replace non-alnum with '_' */ + char *s, *d; + + s = d = nbase; + do { + if (*s && !isalnum(*s)) { + if (d != nbase && *(d - 1) != '_') + *d++ = '_'; + } else + *d++ = *s; + } while (*s++); + } else { + /* Cut off the dot suffixes (e.g. .const, .isra) and version suffixes */ + p = strpbrk(nbase, ".@"); + if (p && p != nbase) + *p = '\0'; + } /* Try no suffix number */ ret = e_snprintf(buf, len, "%s%s", nbase, ret_event ? "__return" : ""); if (ret < 0) { - pr_debug("snprintf() failed: %d\n", ret); + pr_warning("snprintf() failed: %d; the event name '%s' is too long\n" + " Hint: Set a shorter event with syntax \"EVENT=PROBEDEF\"\n" + " EVENT: Event name (max length: %d bytes).\n", + ret, nbase, MAX_EVENT_NAME_LEN); goto out; } if (!strlist__has_entry(namelist, buf)) @@ -2759,7 +2836,10 @@ static int get_new_event_name(char *buf, size_t len, const char *base, for (i = 1; i < MAX_EVENT_INDEX; i++) { ret = e_snprintf(buf, len, "%s_%d", nbase, i); if (ret < 0) { - pr_debug("snprintf() failed: %d\n", ret); + pr_warning("Add suffix failed: %d; the event name '%s' is too long\n" + " Hint: Set a shorter event with syntax \"EVENT=PROBEDEF\"\n" + " EVENT: Event name (max length: %d bytes).\n", + ret, nbase, MAX_EVENT_NAME_LEN); goto out; } if (!strlist__has_entry(namelist, buf)) @@ -2800,13 +2880,18 @@ static void warn_uprobe_event_compat(struct probe_trace_event *tev) if (!tev->uprobes || tev->nargs == 0 || !buf) goto out; - for (i = 0; i < tev->nargs; i++) - if (strglobmatch(tev->args[i].value, "[$@+-]*")) { - pr_warning("Please upgrade your kernel to at least " - "3.14 to have access to feature %s\n", + for (i = 0; i < tev->nargs; i++) { + if (strchr(tev->args[i].value, '@')) { + pr_warning("%s accesses a variable by symbol name, but that is not supported for user application probe.\n", + tev->args[i].value); + break; + } + if (strglobmatch(tev->args[i].value, "[$+-]*")) { + pr_warning("Please upgrade your kernel to at least 3.14 to have access to feature %s\n", tev->args[i].value); break; } + } out: free(buf); } @@ -2818,7 +2903,8 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, bool allow_suffix) { const char *event, *group; - char buf[64]; + bool not_C_symname = true; + char buf[MAX_EVENT_NAME_LEN]; int ret; /* If probe_event or trace_event already have the name, reuse it */ @@ -2832,8 +2918,10 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, (strncmp(pev->point.function, "0x", 2) != 0) && !strisglob(pev->point.function)) event = pev->point.function; - else + else { event = tev->point.realname; + not_C_symname = !is_known_C_lang(tev->lang); + } } if (pev->group && !pev->sdt) group = pev->group; @@ -2842,9 +2930,16 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, else group = PERFPROBE_GROUP; + if (strlen(group) >= MAX_EVENT_NAME_LEN) { + pr_err("Probe group string='%s' is too long (>= %d bytes)\n", + group, MAX_EVENT_NAME_LEN); + return -ENOMEM; + } + /* Get an unused new event name */ - ret = get_new_event_name(buf, 64, event, namelist, - tev->point.retprobe, allow_suffix); + ret = get_new_event_name(buf, sizeof(buf), event, namelist, + tev->point.retprobe, allow_suffix, + not_C_symname); if (ret < 0) return ret; @@ -3673,66 +3768,12 @@ void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs) /* Loop 3: cleanup and free trace events */ for (i = 0; i < npevs; i++) { pev = &pevs[i]; - for (j = 0; j < pevs[i].ntevs; j++) - clear_probe_trace_event(&pevs[i].tevs[j]); - zfree(&pevs[i].tevs); - pevs[i].ntevs = 0; - nsinfo__zput(pev->nsi); - clear_perf_probe_event(&pevs[i]); - } -} - -int add_perf_probe_events(struct perf_probe_event *pevs, int npevs) -{ - int ret; - - ret = init_probe_symbol_maps(pevs->uprobes); - if (ret < 0) - return ret; - - ret = convert_perf_probe_events(pevs, npevs); - if (ret == 0) - ret = apply_perf_probe_events(pevs, npevs); - - cleanup_perf_probe_events(pevs, npevs); - - exit_probe_symbol_maps(); - return ret; -} - -int del_perf_probe_events(struct strfilter *filter) -{ - int ret, ret2, ufd = -1, kfd = -1; - char *str = strfilter__string(filter); - - if (!str) - return -EINVAL; - - /* Get current event names */ - ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW); - if (ret < 0) - goto out; - - ret = probe_file__del_events(kfd, filter); - if (ret < 0 && ret != -ENOENT) - goto error; - - ret2 = probe_file__del_events(ufd, filter); - if (ret2 < 0 && ret2 != -ENOENT) { - ret = ret2; - goto error; + for (j = 0; j < pev->ntevs; j++) + clear_probe_trace_event(&pev->tevs[j]); + zfree(&pev->tevs); + pev->ntevs = 0; + clear_perf_probe_event(pev); } - ret = 0; - -error: - if (kfd >= 0) - close(kfd); - if (ufd >= 0) - close(ufd); -out: - free(str); - - return ret; } int show_available_funcs(const char *target, struct nsinfo *nsi, @@ -3771,8 +3812,8 @@ int show_available_funcs(const char *target, struct nsinfo *nsi, /* Show all (filtered) symbols */ setup_pager(); - for (size_t i = 0; i < dso->symbol_names_len; i++) { - struct symbol *pos = dso->symbol_names[i]; + for (size_t i = 0; i < dso__symbol_names_len(dso); i++) { + struct symbol *pos = dso__symbol_names(dso)[i]; if (strfilter__compare(_filter, pos->name)) printf("%s\n", pos->name); diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 8ad5b1579f1d..71905ede0207 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -58,6 +58,7 @@ struct probe_trace_event { char *group; /* Group name */ struct probe_trace_point point; /* Trace point */ int nargs; /* Number of args */ + int lang; /* Dwarf language code */ bool uprobes; /* uprobes only */ struct probe_trace_arg *args; /* Arguments */ }; @@ -137,7 +138,6 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev); char *synthesize_perf_probe_command(struct perf_probe_event *pev); char *synthesize_probe_trace_command(struct probe_trace_event *tev); char *synthesize_perf_probe_arg(struct perf_probe_arg *pa); -char *synthesize_perf_probe_point(struct perf_probe_point *pp); int perf_probe_event__copy(struct perf_probe_event *dst, struct perf_probe_event *src); @@ -160,7 +160,6 @@ void line_range__clear(struct line_range *lr); /* Initialize line range */ int line_range__init(struct line_range *lr); -int add_perf_probe_events(struct perf_probe_event *pevs, int npevs); int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs); int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs); int show_probe_trace_events(struct perf_probe_event *pevs, int npevs); @@ -169,8 +168,6 @@ void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs); struct strfilter; -int del_perf_probe_events(struct strfilter *filter); - int show_perf_probe_event(const char *group, const char *event, struct perf_probe_event *pev, const char *module, bool use_stdout); diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 3d50de3217d5..5069fb61f48c 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -366,25 +366,6 @@ int probe_file__del_strlist(int fd, struct strlist *namelist) return ret; } -int probe_file__del_events(int fd, struct strfilter *filter) -{ - struct strlist *namelist; - int ret; - - namelist = strlist__new(NULL, NULL); - if (!namelist) - return -ENOMEM; - - ret = probe_file__get_events(fd, filter, namelist); - if (ret < 0) - goto out; - - ret = probe_file__del_strlist(fd, namelist); -out: - strlist__delete(namelist); - return ret; -} - /* Caller must ensure to remove this entry from list */ static void probe_cache_entry__delete(struct probe_cache_entry *entry) { @@ -467,10 +448,10 @@ static int probe_cache__open(struct probe_cache *pcache, const char *target, if (!target || !strcmp(target, DSO__NAME_KALLSYMS)) { target = DSO__NAME_KALLSYMS; is_kallsyms = true; - ret = sysfs__sprintf_build_id("/", sbuildid); + ret = sysfs__snprintf_build_id("/", sbuildid, sizeof(sbuildid)); } else { nsinfo__mountns_enter(nsi, &nsc); - ret = filename__sprintf_build_id(target, sbuildid); + ret = filename__snprintf_build_id(target, sbuildid, sizeof(sbuildid)); nsinfo__mountns_exit(&nsc); } diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h index 0dba88c0f5f0..c2bb6a5b9dcc 100644 --- a/tools/perf/util/probe-file.h +++ b/tools/perf/util/probe-file.h @@ -44,7 +44,6 @@ struct strlist *probe_file__get_namelist(int fd); struct strlist *probe_file__get_rawlist(int fd); int probe_file__add_event(int fd, struct probe_trace_event *tev); -int probe_file__del_events(int fd, struct strfilter *filter); int probe_file__get_events(int fd, struct strfilter *filter, struct strlist *plist); int probe_file__del_strlist(int fd, struct strlist *namelist); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index f171360b0ef4..5ffd97ee4898 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -23,6 +23,7 @@ #include "event.h" #include "dso.h" #include "debug.h" +#include "debuginfo.h" #include "intlist.h" #include "strbuf.h" #include "strlist.h" @@ -31,125 +32,19 @@ #include "probe-file.h" #include "string2.h" -#ifdef HAVE_DEBUGINFOD_SUPPORT -#include <elfutils/debuginfod.h> -#endif - /* Kprobe tracer basic type is up to u64 */ #define MAX_BASIC_TYPE_BITS 64 -/* Dwarf FL wrappers */ -static char *debuginfo_path; /* Currently dummy */ - -static const Dwfl_Callbacks offline_callbacks = { - .find_debuginfo = dwfl_standard_find_debuginfo, - .debuginfo_path = &debuginfo_path, - - .section_address = dwfl_offline_section_address, - - /* We use this table for core files too. */ - .find_elf = dwfl_build_id_find_elf, -}; - -/* Get a Dwarf from offline image */ -static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, - const char *path) +bool is_known_C_lang(int lang) { - GElf_Addr dummy; - int fd; - - fd = open(path, O_RDONLY); - if (fd < 0) - return fd; - - dbg->dwfl = dwfl_begin(&offline_callbacks); - if (!dbg->dwfl) - goto error; - - dwfl_report_begin(dbg->dwfl); - dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd); - if (!dbg->mod) - goto error; - - dbg->dbg = dwfl_module_getdwarf(dbg->mod, &dbg->bias); - if (!dbg->dbg) - goto error; - - dwfl_module_build_id(dbg->mod, &dbg->build_id, &dummy); - - dwfl_report_end(dbg->dwfl, NULL, NULL); - - return 0; -error: - if (dbg->dwfl) - dwfl_end(dbg->dwfl); - else - close(fd); - memset(dbg, 0, sizeof(*dbg)); - - return -ENOENT; -} - -static struct debuginfo *__debuginfo__new(const char *path) -{ - struct debuginfo *dbg = zalloc(sizeof(*dbg)); - if (!dbg) - return NULL; - - if (debuginfo__init_offline_dwarf(dbg, path) < 0) - zfree(&dbg); - if (dbg) - pr_debug("Open Debuginfo file: %s\n", path); - return dbg; -} - -enum dso_binary_type distro_dwarf_types[] = { - DSO_BINARY_TYPE__FEDORA_DEBUGINFO, - DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, - DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, - DSO_BINARY_TYPE__BUILDID_DEBUGINFO, - DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, - DSO_BINARY_TYPE__NOT_FOUND, -}; - -struct debuginfo *debuginfo__new(const char *path) -{ - enum dso_binary_type *type; - char buf[PATH_MAX], nil = '\0'; - struct dso *dso; - struct debuginfo *dinfo = NULL; - struct build_id bid; - - /* Try to open distro debuginfo files */ - dso = dso__new(path); - if (!dso) - goto out; - - /* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */ - if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0) - dso__set_build_id(dso, &bid); - - for (type = distro_dwarf_types; - !dinfo && *type != DSO_BINARY_TYPE__NOT_FOUND; - type++) { - if (dso__read_binary_type_filename(dso, *type, &nil, - buf, PATH_MAX) < 0) - continue; - dinfo = __debuginfo__new(buf); - } - dso__put(dso); - -out: - /* if failed to open all distro debuginfo, open given binary */ - return dinfo ? : __debuginfo__new(path); -} - -void debuginfo__delete(struct debuginfo *dbg) -{ - if (dbg) { - if (dbg->dwfl) - dwfl_end(dbg->dwfl); - free(dbg); + switch (lang) { + case DW_LANG_C89: + case DW_LANG_C: + case DW_LANG_C99: + case DW_LANG_C11: + return true; + default: + return false; } } @@ -174,7 +69,7 @@ static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs) */ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr, Dwarf_Op *fb_ops, Dwarf_Die *sp_die, - unsigned int machine, + const struct probe_finder *pf, struct probe_trace_arg *tvar) { Dwarf_Attribute attr; @@ -284,7 +179,7 @@ static_var: if (!tvar) return ret2; - regs = get_dwarf_regstr(regn, machine); + regs = get_dwarf_regstr(regn, pf->e_machine, pf->e_flags); if (!regs) { /* This should be a bug in DWARF or this tool */ pr_warning("Mapping for the register number %u " @@ -304,8 +199,6 @@ static_var: return ret2; } -#define BYTES_TO_BITS(nb) ((nb) * BITS_PER_LONG / sizeof(long)) - static int convert_variable_type(Dwarf_Die *vr_die, struct probe_trace_arg *tvar, const char *cast, bool user_access) @@ -335,7 +228,7 @@ static int convert_variable_type(Dwarf_Die *vr_die, total = dwarf_bytesize(vr_die); if (boffs < 0 || total < 0) return -ENOENT; - ret = snprintf(buf, 16, "b%d@%d/%zd", bsize, boffs, + ret = snprintf(buf, 16, "b%d@%d/%d", bsize, boffs, BYTES_TO_BITS(total)); goto formatted; } @@ -571,7 +464,7 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf) dwarf_diename(vr_die)); ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops, - &pf->sp_die, pf->machine, pf->tvar); + &pf->sp_die, pf, pf->tvar); if (ret == -ENOENT && pf->skip_empty_arg) /* This can be found in other place. skip it */ return 0; @@ -722,7 +615,6 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1); if (ret <= 0 || nops == 0) { pf->fb_ops = NULL; -#if _ELFUTILS_PREREQ(0, 142) } else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa && (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) { if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, &frame) != 0 && @@ -733,7 +625,6 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) free(frame); return -ENOENT; } -#endif } /* Call finder's callback handler */ @@ -957,7 +848,6 @@ static int probe_point_lazy_walker(const char *fname, int lineno, /* Find probe points from lazy pattern */ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) { - struct build_id bid; char sbuild_id[SBUILD_ID_SIZE] = ""; int ret = 0; char *fpath; @@ -967,8 +857,10 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) comp_dir = cu_get_comp_dir(&pf->cu_die); if (pf->dbg->build_id) { + struct build_id bid; + build_id__init(&bid, pf->dbg->build_id, BUILD_ID_SIZE); - build_id__sprintf(&bid, sbuild_id); + build_id__snprintf(&bid, sbuild_id, sizeof(sbuild_id)); } ret = find_source_path(pf->fname, sbuild_id, comp_dir, &fpath); if (ret < 0) { @@ -1082,6 +974,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) pr_debug("Matched function: %s [%lx]\n", dwarf_diename(sp_die), (unsigned long)dwarf_dieoffset(sp_die)); pf->fname = fname; + pf->abstrace_dieoffset = dwarf_dieoffset(sp_die); if (pp->line) { /* Function relative line */ dwarf_decl_line(sp_die, &pf->lno); pf->lno += pp->line; @@ -1256,9 +1149,9 @@ static int debuginfo__find_probes(struct debuginfo *dbg, if (gelf_getehdr(elf, &ehdr) == NULL) return -EINVAL; - pf->machine = ehdr.e_machine; + pf->e_machine = ehdr.e_machine; + pf->e_flags = ehdr.e_flags; -#if _ELFUTILS_PREREQ(0, 142) do { GElf_Shdr shdr; @@ -1268,7 +1161,6 @@ static int debuginfo__find_probes(struct debuginfo *dbg, pf->cfi_dbg = dwarf_getcfi(dbg->dbg); } while (0); -#endif ret = debuginfo__find_probe_location(dbg, pf); return ret; @@ -1289,13 +1181,15 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data) struct local_vars_finder *vf = data; struct probe_finder *pf = vf->pf; int tag; + Dwarf_Attribute attr; + Dwarf_Die var_die; tag = dwarf_tag(die_mem); if (tag == DW_TAG_formal_parameter || (tag == DW_TAG_variable && vf->vars)) { if (convert_variable_location(die_mem, vf->pf->addr, vf->pf->fb_ops, &pf->sp_die, - pf->machine, NULL) == 0) { + pf, /*tvar=*/NULL) == 0) { vf->args[vf->nargs].var = (char *)dwarf_diename(die_mem); if (vf->args[vf->nargs].var == NULL) { vf->ret = -ENOMEM; @@ -1306,10 +1200,22 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data) } } - if (dwarf_haspc(die_mem, vf->pf->addr)) + if (dwarf_haspc(die_mem, vf->pf->addr)) { + /* + * when DW_AT_entry_pc contains instruction address, + * also check if the DW_AT_abstract_origin of die_mem + * points to correct die. + */ + if (dwarf_attr(die_mem, DW_AT_abstract_origin, &attr)) { + dwarf_formref_die(&attr, &var_die); + if (pf->abstrace_dieoffset != dwarf_dieoffset(&var_die)) + goto out; + } return DIE_FIND_CB_CONTINUE; - else - return DIE_FIND_CB_SIBLING; + } + +out: + return DIE_FIND_CB_SIBLING; } static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf, @@ -1393,6 +1299,8 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) goto end; } + tev->lang = dwarf_srclang(dwarf_diecu(sc_die, &pf->cu_die, NULL, NULL)); + pr_debug("Probe point found: %s+%lu\n", tev->point.symbol, tev->point.offset); @@ -1499,6 +1407,8 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, if (ret >= 0 && tf.pf.skip_empty_arg) ret = fill_empty_trace_arg(pev, tf.tevs, tf.ntevs); + dwarf_cfi_end(tf.pf.cfi_eh); + if (ret < 0 || tf.ntevs == 0) { for (i = 0; i < tf.ntevs; i++) clear_probe_trace_event(&tf.tevs[i]); @@ -1524,7 +1434,7 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data) tag == DW_TAG_variable) { ret = convert_variable_location(die_mem, af->pf.addr, af->pf.fb_ops, &af->pf.sp_die, - af->pf.machine, NULL); + &af->pf, /*tvar=*/NULL); if (ret == 0 || ret == -ERANGE) { int ret2; bool externs = !af->child; @@ -1677,44 +1587,6 @@ int debuginfo__find_available_vars_at(struct debuginfo *dbg, return (ret < 0) ? ret : af.nvls; } -/* For the kernel module, we need a special code to get a DIE */ -int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, - bool adjust_offset) -{ - int n, i; - Elf32_Word shndx; - Elf_Scn *scn; - Elf *elf; - GElf_Shdr mem, *shdr; - const char *p; - - elf = dwfl_module_getelf(dbg->mod, &dbg->bias); - if (!elf) - return -EINVAL; - - /* Get the number of relocations */ - n = dwfl_module_relocations(dbg->mod); - if (n < 0) - return -ENOENT; - /* Search the relocation related .text section */ - for (i = 0; i < n; i++) { - p = dwfl_module_relocation_info(dbg->mod, i, &shndx); - if (strcmp(p, ".text") == 0) { - /* OK, get the section header */ - scn = elf_getscn(elf, shndx); - if (!scn) - return -ENOENT; - shdr = gelf_getshdr(scn, &mem); - if (!shdr) - return -ENOENT; - *offs = shdr->sh_addr; - if (adjust_offset) - *offs -= shdr->sh_offset; - } - } - return 0; -} - /* Reverse search */ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, struct perf_probe_point *ppt) @@ -1741,8 +1613,21 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, /* Find a corresponding function (name, baseline and baseaddr) */ if (die_find_realfunc(&cudie, (Dwarf_Addr)addr, &spdie)) { - /* Get function entry information */ - func = basefunc = dwarf_diename(&spdie); + /* + * Get function entry information. + * + * As described in the document DWARF Debugging Information + * Format Version 5, section 2.22 Linkage Names, "mangled names, + * are used in various ways, ... to distinguish multiple + * entities that have the same name". + * + * Firstly try to get distinct linkage name, if fail then + * rollback to get associated name in DIE. + */ + func = basefunc = die_get_linkage_name(&spdie); + if (!func) + func = basefunc = dwarf_diename(&spdie); + if (!func || die_entrypc(&spdie, &baseaddr) != 0 || dwarf_decl_line(&spdie, &baseline) != 0) { @@ -2009,41 +1894,6 @@ found: return (ret < 0) ? ret : lf.found; } -#ifdef HAVE_DEBUGINFOD_SUPPORT -/* debuginfod doesn't require the comp_dir but buildid is required */ -static int get_source_from_debuginfod(const char *raw_path, - const char *sbuild_id, char **new_path) -{ - debuginfod_client *c = debuginfod_begin(); - const char *p = raw_path; - int fd; - - if (!c) - return -ENOMEM; - - fd = debuginfod_find_source(c, (const unsigned char *)sbuild_id, - 0, p, new_path); - pr_debug("Search %s from debuginfod -> %d\n", p, fd); - if (fd >= 0) - close(fd); - debuginfod_end(c); - if (fd < 0) { - pr_debug("Failed to find %s in debuginfod (%s)\n", - raw_path, sbuild_id); - return -ENOENT; - } - pr_debug("Got a source %s\n", *new_path); - - return 0; -} -#else -static inline int get_source_from_debuginfod(const char *raw_path __maybe_unused, - const char *sbuild_id __maybe_unused, - char **new_path __maybe_unused) -{ - return -ENOTSUP; -} -#endif /* * Find a src file from a DWARF tag path. Prepend optional source path prefix * and chop off leading directories that do not exist. Result is passed back as @@ -2056,7 +1906,11 @@ int find_source_path(const char *raw_path, const char *sbuild_id, const char *prefix = symbol_conf.source_prefix; if (sbuild_id && !prefix) { - if (!get_source_from_debuginfod(raw_path, sbuild_id, new_path)) + char prefixed_raw_path[PATH_MAX]; + + path__join(prefixed_raw_path, sizeof(prefixed_raw_path), comp_dir, raw_path); + + if (!get_source_from_debuginfod(prefixed_raw_path, sbuild_id, new_path)) return 0; } diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 8bc1c80d3c1c..ecd6d937c592 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -21,24 +21,13 @@ static inline int is_c_varname(const char *name) return isalpha(name[0]) || name[0] == '_'; } -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #include "dwarf-aux.h" +#include "debuginfo.h" -/* TODO: export debuginfo data structure even if no dwarf support */ - -/* debug information structure */ -struct debuginfo { - Dwarf *dbg; - Dwfl_Module *mod; - Dwfl *dwfl; - Dwarf_Addr bias; - const unsigned char *build_id; -}; - -/* This also tries to open distro debuginfo */ -struct debuginfo *debuginfo__new(const char *path); -void debuginfo__delete(struct debuginfo *dbg); +/* Check the language code is known C */ +bool is_known_C_lang(int lang); /* Find probe_trace_events specified by perf_probe_event from debuginfo */ int debuginfo__find_trace_events(struct debuginfo *dbg, @@ -49,9 +38,6 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, struct perf_probe_point *ppt); -int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, - bool adjust_offset); - /* Find a line range */ int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr); @@ -77,17 +63,17 @@ struct probe_finder { const char *fname; /* Real file name */ Dwarf_Die cu_die; /* Current CU */ Dwarf_Die sp_die; + Dwarf_Off abstrace_dieoffset; struct intlist *lcache; /* Line cache for lazy match */ /* For variable searching */ -#if _ELFUTILS_PREREQ(0, 142) - /* Call Frame Information from .eh_frame */ + /* Call Frame Information from .eh_frame. Owned by this struct. */ Dwarf_CFI *cfi_eh; - /* Call Frame Information from .debug_frame */ + /* Call Frame Information from .debug_frame. Not owned. */ Dwarf_CFI *cfi_dbg; -#endif Dwarf_Op *fb_ops; /* Frame base attribute */ - unsigned int machine; /* Target machine arch */ + unsigned int e_machine; /* ELF target machine arch */ + unsigned int e_flags; /* ELF target machine flags */ struct perf_probe_arg *pvar; /* Current target variable */ struct probe_trace_arg *tvar; /* Current result variable */ bool skip_empty_arg; /* Skip non-exist args */ @@ -121,6 +107,8 @@ struct line_finder { int found; }; -#endif /* HAVE_DWARF_SUPPORT */ +#else +#define is_known_C_lang(lang) (false) +#endif /* HAVE_LIBDW_SUPPORT */ #endif /*_PROBE_FINDER_H */ diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c index a1d1e4ef6257..141ffa129c69 100644 --- a/tools/perf/util/pstack.c +++ b/tools/perf/util/pstack.c @@ -63,20 +63,6 @@ void pstack__push(struct pstack *pstack, void *key) pstack->entries[pstack->top++] = key; } -void *pstack__pop(struct pstack *pstack) -{ - void *ret; - - if (pstack->top == 0) { - pr_err("%s: underflow!\n", __func__); - return NULL; - } - - ret = pstack->entries[--pstack->top]; - pstack->entries[pstack->top] = NULL; - return ret; -} - void *pstack__peek(struct pstack *pstack) { if (pstack->top == 0) diff --git a/tools/perf/util/pstack.h b/tools/perf/util/pstack.h index 8729b8be061d..712051b8130f 100644 --- a/tools/perf/util/pstack.h +++ b/tools/perf/util/pstack.h @@ -10,7 +10,6 @@ void pstack__delete(struct pstack *pstack); bool pstack__empty(const struct pstack *pstack); void pstack__remove(struct pstack *pstack, void *key); void pstack__push(struct pstack *pstack, void *key); -void *pstack__pop(struct pstack *pstack); void *pstack__peek(struct pstack *pstack); #endif /* _PERF_PSTACK_ */ diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources deleted file mode 100644 index d4c9b4cd35ef..000000000000 --- a/tools/perf/util/python-ext-sources +++ /dev/null @@ -1,42 +0,0 @@ -# -# List of files needed by perf python extension -# -# Each source file must be placed on its own line so that it can be -# processed by Makefile and util/setup.py accordingly. -# - -util/python.c -../lib/ctype.c -util/cap.c -util/evlist.c -util/evsel.c -util/evsel_fprintf.c -util/perf_event_attr_fprintf.c -util/cpumap.c -util/memswap.c -util/mmap.c -util/namespaces.c -../lib/bitmap.c -../lib/find_bit.c -../lib/list_sort.c -../lib/hweight.c -../lib/string.c -../lib/vsprintf.c -util/thread_map.c -util/util.c -util/cgroup.c -util/parse-branch-options.c -util/rblist.c -util/counts.c -util/print_binary.c -util/strlist.c -util/trace-event.c -../lib/rbtree.c -util/string.c -util/symbol_fprintf.c -util/units.c -util/affinity.c -util/rwsem.c -util/hashmap.c -util/perf_regs.c -util/fncache.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 4eed8ec23994..cc1019d29a5d 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -6,204 +6,27 @@ #include <linux/err.h> #include <perf/cpumap.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #include <perf/mmap.h> -#include "evlist.h" #include "callchain.h" +#include "counts.h" +#include "evlist.h" #include "evsel.h" #include "event.h" +#include "expr.h" #include "print_binary.h" +#include "record.h" +#include "strbuf.h" #include "thread_map.h" +#include "tp_pmu.h" #include "trace-event.h" -#include "mmap.h" -#include "stat.h" #include "metricgroup.h" -#include "util/bpf-filter.h" -#include "util/env.h" -#include "util/pmu.h" -#include "util/pmus.h" +#include "mmap.h" +#include "util/sample.h" #include <internal/lib.h> -#include "util.h" - -#if PY_MAJOR_VERSION < 3 -#define _PyUnicode_FromString(arg) \ - PyString_FromString(arg) -#define _PyUnicode_AsString(arg) \ - PyString_AsString(arg) -#define _PyUnicode_FromFormat(...) \ - PyString_FromFormat(__VA_ARGS__) -#define _PyLong_FromLong(arg) \ - PyInt_FromLong(arg) - -#else - -#define _PyUnicode_FromString(arg) \ - PyUnicode_FromString(arg) -#define _PyUnicode_FromFormat(...) \ - PyUnicode_FromFormat(__VA_ARGS__) -#define _PyLong_FromLong(arg) \ - PyLong_FromLong(arg) -#endif -#ifndef Py_TYPE -#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) -#endif - -/* - * Avoid bringing in event parsing. - */ -int parse_event(struct evlist *evlist __maybe_unused, const char *str __maybe_unused) -{ - return 0; -} - -/* - * Provide these two so that we don't have to link against callchain.c and - * start dragging hist.c, etc. - */ -struct callchain_param callchain_param; - -int parse_callchain_record(const char *arg __maybe_unused, - struct callchain_param *param __maybe_unused) -{ - return 0; -} - -/* - * Add these not to drag util/env.c - */ -struct perf_env perf_env; - -const char *perf_env__cpuid(struct perf_env *env __maybe_unused) -{ - return NULL; -} - -// This one is a bit easier, wouldn't drag too much, but leave it as a stub we need it here -const char *perf_env__arch(struct perf_env *env __maybe_unused) -{ - return NULL; -} - -/* - * These ones are needed not to drag the PMU bandwagon, jevents generated - * pmu_sys_event_tables, etc and evsel__find_pmu() is used so far just for - * doing per PMU perf_event_attr.exclude_guest handling, not really needed, so - * far, for the perf python binding known usecases, revisit if this become - * necessary. - */ -struct perf_pmu *evsel__find_pmu(const struct evsel *evsel __maybe_unused) -{ - return NULL; -} - -int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) -{ - return EOF; -} - -int perf_pmus__num_core_pmus(void) -{ - return 1; -} - -bool evsel__is_aux_event(const struct evsel *evsel __maybe_unused) -{ - return false; -} - -/* - * Add this one here not to drag util/metricgroup.c - */ -int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, - struct rblist *new_metric_events, - struct rblist *old_metric_events) -{ - return 0; -} - -/* - * Add this one here not to drag util/trace-event-info.c - */ -char *tracepoint_id_to_name(u64 config) -{ - return NULL; -} - -/* - * XXX: All these evsel destructors need some better mechanism, like a linked - * list of destructors registered when the relevant code indeed is used instead - * of having more and more calls in perf_evsel__delete(). -- acme - * - * For now, add some more: - * - * Not to drag the BPF bandwagon... - */ -void bpf_counter__destroy(struct evsel *evsel); -int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd); -int bpf_counter__disable(struct evsel *evsel); - -void bpf_counter__destroy(struct evsel *evsel __maybe_unused) -{ -} - -int bpf_counter__install_pe(struct evsel *evsel __maybe_unused, int cpu __maybe_unused, int fd __maybe_unused) -{ - return 0; -} - -int bpf_counter__disable(struct evsel *evsel __maybe_unused) -{ - return 0; -} - -// not to drag util/bpf-filter.c -#ifdef HAVE_BPF_SKEL -int perf_bpf_filter__prepare(struct evsel *evsel __maybe_unused) -{ - return 0; -} - -int perf_bpf_filter__destroy(struct evsel *evsel __maybe_unused) -{ - return 0; -} -#endif - -/* - * Support debug printing even though util/debug.c is not linked. That means - * implementing 'verbose' and 'eprintf'. - */ -int verbose; -int debug_peo_args; - -int eprintf(int level, int var, const char *fmt, ...); - -int eprintf(int level, int var, const char *fmt, ...) -{ - va_list args; - int ret = 0; - - if (var >= level) { - va_start(args, fmt); - ret = vfprintf(stderr, fmt, args); - va_end(args); - } - - return ret; -} - -/* Define PyVarObject_HEAD_INIT for python 2.5 */ -#ifndef PyVarObject_HEAD_INIT -# define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size, -#endif - -#if PY_MAJOR_VERSION < 3 -PyMODINIT_FUNC initperf(void); -#else PyMODINIT_FUNC PyInit_perf(void); -#endif #define member_def(type, member, ptype, help) \ { #member, ptype, \ @@ -223,7 +46,7 @@ struct pyrf_event { }; #define sample_members \ - sample_member_def(sample_ip, ip, T_ULONGLONG, "event type"), \ + sample_member_def(sample_ip, ip, T_ULONGLONG, "event ip"), \ sample_member_def(sample_pid, pid, T_INT, "event pid"), \ sample_member_def(sample_tid, tid, T_INT, "event tid"), \ sample_member_def(sample_time, time, T_ULONGLONG, "event timestamp"), \ @@ -233,7 +56,7 @@ struct pyrf_event { sample_member_def(sample_period, period, T_ULONGLONG, "event period"), \ sample_member_def(sample_cpu, cpu, T_UINT, "event cpu"), -static char pyrf_mmap_event__doc[] = PyDoc_STR("perf mmap event object."); +static const char pyrf_mmap_event__doc[] = PyDoc_STR("perf mmap event object."); static PyMemberDef pyrf_mmap_event__members[] = { sample_members @@ -248,7 +71,7 @@ static PyMemberDef pyrf_mmap_event__members[] = { { .name = NULL, }, }; -static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent) +static PyObject *pyrf_mmap_event__repr(const struct pyrf_event *pevent) { PyObject *ret; char *s; @@ -261,7 +84,7 @@ static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent) pevent->event.mmap.pgoff, pevent->event.mmap.filename) < 0) { ret = PyErr_NoMemory(); } else { - ret = _PyUnicode_FromString(s); + ret = PyUnicode_FromString(s); free(s); } return ret; @@ -277,7 +100,7 @@ static PyTypeObject pyrf_mmap_event__type = { .tp_repr = (reprfunc)pyrf_mmap_event__repr, }; -static char pyrf_task_event__doc[] = PyDoc_STR("perf task (fork/exit) event object."); +static const char pyrf_task_event__doc[] = PyDoc_STR("perf task (fork/exit) event object."); static PyMemberDef pyrf_task_event__members[] = { sample_members @@ -290,9 +113,9 @@ static PyMemberDef pyrf_task_event__members[] = { { .name = NULL, }, }; -static PyObject *pyrf_task_event__repr(struct pyrf_event *pevent) +static PyObject *pyrf_task_event__repr(const struct pyrf_event *pevent) { - return _PyUnicode_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, " + return PyUnicode_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, " "ptid: %u, time: %" PRI_lu64 "}", pevent->event.header.type == PERF_RECORD_FORK ? "fork" : "exit", pevent->event.fork.pid, @@ -312,7 +135,7 @@ static PyTypeObject pyrf_task_event__type = { .tp_repr = (reprfunc)pyrf_task_event__repr, }; -static char pyrf_comm_event__doc[] = PyDoc_STR("perf comm event object."); +static const char pyrf_comm_event__doc[] = PyDoc_STR("perf comm event object."); static PyMemberDef pyrf_comm_event__members[] = { sample_members @@ -323,9 +146,9 @@ static PyMemberDef pyrf_comm_event__members[] = { { .name = NULL, }, }; -static PyObject *pyrf_comm_event__repr(struct pyrf_event *pevent) +static PyObject *pyrf_comm_event__repr(const struct pyrf_event *pevent) { - return _PyUnicode_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }", + return PyUnicode_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }", pevent->event.comm.pid, pevent->event.comm.tid, pevent->event.comm.comm); @@ -341,7 +164,7 @@ static PyTypeObject pyrf_comm_event__type = { .tp_repr = (reprfunc)pyrf_comm_event__repr, }; -static char pyrf_throttle_event__doc[] = PyDoc_STR("perf throttle event object."); +static const char pyrf_throttle_event__doc[] = PyDoc_STR("perf throttle event object."); static PyMemberDef pyrf_throttle_event__members[] = { sample_members @@ -352,11 +175,12 @@ static PyMemberDef pyrf_throttle_event__members[] = { { .name = NULL, }, }; -static PyObject *pyrf_throttle_event__repr(struct pyrf_event *pevent) +static PyObject *pyrf_throttle_event__repr(const struct pyrf_event *pevent) { - struct perf_record_throttle *te = (struct perf_record_throttle *)(&pevent->event.header + 1); + const struct perf_record_throttle *te = (const struct perf_record_throttle *) + (&pevent->event.header + 1); - return _PyUnicode_FromFormat("{ type: %sthrottle, time: %" PRI_lu64 ", id: %" PRI_lu64 + return PyUnicode_FromFormat("{ type: %sthrottle, time: %" PRI_lu64 ", id: %" PRI_lu64 ", stream_id: %" PRI_lu64 " }", pevent->event.header.type == PERF_RECORD_THROTTLE ? "" : "un", te->time, te->id, te->stream_id); @@ -372,7 +196,7 @@ static PyTypeObject pyrf_throttle_event__type = { .tp_repr = (reprfunc)pyrf_throttle_event__repr, }; -static char pyrf_lost_event__doc[] = PyDoc_STR("perf lost event object."); +static const char pyrf_lost_event__doc[] = PyDoc_STR("perf lost event object."); static PyMemberDef pyrf_lost_event__members[] = { sample_members @@ -381,7 +205,7 @@ static PyMemberDef pyrf_lost_event__members[] = { { .name = NULL, }, }; -static PyObject *pyrf_lost_event__repr(struct pyrf_event *pevent) +static PyObject *pyrf_lost_event__repr(const struct pyrf_event *pevent) { PyObject *ret; char *s; @@ -391,7 +215,7 @@ static PyObject *pyrf_lost_event__repr(struct pyrf_event *pevent) pevent->event.lost.id, pevent->event.lost.lost) < 0) { ret = PyErr_NoMemory(); } else { - ret = _PyUnicode_FromString(s); + ret = PyUnicode_FromString(s); free(s); } return ret; @@ -407,7 +231,7 @@ static PyTypeObject pyrf_lost_event__type = { .tp_repr = (reprfunc)pyrf_lost_event__repr, }; -static char pyrf_read_event__doc[] = PyDoc_STR("perf read event object."); +static const char pyrf_read_event__doc[] = PyDoc_STR("perf read event object."); static PyMemberDef pyrf_read_event__members[] = { sample_members @@ -416,9 +240,9 @@ static PyMemberDef pyrf_read_event__members[] = { { .name = NULL, }, }; -static PyObject *pyrf_read_event__repr(struct pyrf_event *pevent) +static PyObject *pyrf_read_event__repr(const struct pyrf_event *pevent) { - return _PyUnicode_FromFormat("{ type: read, pid: %u, tid: %u }", + return PyUnicode_FromFormat("{ type: read, pid: %u, tid: %u }", pevent->event.read.pid, pevent->event.read.tid); /* @@ -437,7 +261,7 @@ static PyTypeObject pyrf_read_event__type = { .tp_repr = (reprfunc)pyrf_read_event__repr, }; -static char pyrf_sample_event__doc[] = PyDoc_STR("perf sample event object."); +static const char pyrf_sample_event__doc[] = PyDoc_STR("perf sample event object."); static PyMemberDef pyrf_sample_event__members[] = { sample_members @@ -445,7 +269,13 @@ static PyMemberDef pyrf_sample_event__members[] = { { .name = NULL, }, }; -static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent) +static void pyrf_sample_event__delete(struct pyrf_event *pevent) +{ + perf_sample__exit(&pevent->sample); + Py_TYPE(pevent)->tp_free((PyObject*)pevent); +} + +static PyObject *pyrf_sample_event__repr(const struct pyrf_event *pevent) { PyObject *ret; char *s; @@ -453,20 +283,20 @@ static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent) if (asprintf(&s, "{ type: sample }") < 0) { ret = PyErr_NoMemory(); } else { - ret = _PyUnicode_FromString(s); + ret = PyUnicode_FromString(s); free(s); } return ret; } #ifdef HAVE_LIBTRACEEVENT -static bool is_tracepoint(struct pyrf_event *pevent) +static bool is_tracepoint(const struct pyrf_event *pevent) { return pevent->evsel->core.attr.type == PERF_TYPE_TRACEPOINT; } static PyObject* -tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field) +tracepoint_field(const struct pyrf_event *pe, struct tep_format_field *field) { struct tep_handle *pevent = field->event->tep; void *data = pe->sample.raw_data; @@ -487,7 +317,7 @@ tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field) } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { - ret = _PyUnicode_FromString((char *)data + offset); + ret = PyUnicode_FromString((char *)data + offset); } else { ret = PyByteArray_FromStringAndSize((const char *) data + offset, len); field->flags &= ~TEP_FIELD_IS_STRING; @@ -509,25 +339,26 @@ tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field) static PyObject* get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name) { - const char *str = _PyUnicode_AsString(PyObject_Str(attr_name)); struct evsel *evsel = pevent->evsel; + struct tep_event *tp_format = evsel__tp_format(evsel); struct tep_format_field *field; - if (!evsel->tp_format) { - struct tep_event *tp_format; - - tp_format = trace_event__tp_format_id(evsel->core.attr.config); - if (IS_ERR_OR_NULL(tp_format)) - return NULL; + if (IS_ERR_OR_NULL(tp_format)) + return NULL; - evsel->tp_format = tp_format; - } + PyObject *obj = PyObject_Str(attr_name); + if (obj == NULL) + return NULL; - field = tep_find_any_field(evsel->tp_format, str); - if (!field) + const char *str = PyUnicode_AsUTF8(obj); + if (str == NULL) { + Py_DECREF(obj); return NULL; + } - return tracepoint_field(pevent, field); + field = tep_find_any_field(tp_format, str); + Py_DECREF(obj); + return field ? tracepoint_field(pevent, field) : NULL; } #endif /* HAVE_LIBTRACEEVENT */ @@ -555,7 +386,7 @@ static PyTypeObject pyrf_sample_event__type = { .tp_getattro = (getattrofunc) pyrf_sample_event__getattro, }; -static char pyrf_context_switch_event__doc[] = PyDoc_STR("perf context_switch event object."); +static const char pyrf_context_switch_event__doc[] = PyDoc_STR("perf context_switch event object."); static PyMemberDef pyrf_context_switch_event__members[] = { sample_members @@ -565,7 +396,7 @@ static PyMemberDef pyrf_context_switch_event__members[] = { { .name = NULL, }, }; -static PyObject *pyrf_context_switch_event__repr(struct pyrf_event *pevent) +static PyObject *pyrf_context_switch_event__repr(const struct pyrf_event *pevent) { PyObject *ret; char *s; @@ -576,7 +407,7 @@ static PyObject *pyrf_context_switch_event__repr(struct pyrf_event *pevent) !!(pevent->event.header.misc & PERF_RECORD_MISC_SWITCH_OUT)) < 0) { ret = PyErr_NoMemory(); } else { - ret = _PyUnicode_FromString(s); + ret = PyUnicode_FromString(s); free(s); } return ret; @@ -603,6 +434,9 @@ static int pyrf_event__setup_types(void) pyrf_sample_event__type.tp_new = pyrf_context_switch_event__type.tp_new = pyrf_throttle_event__type.tp_new = PyType_GenericNew; + + pyrf_sample_event__type.tp_dealloc = (destructor)pyrf_sample_event__delete, + err = PyType_Ready(&pyrf_mmap_event__type); if (err < 0) goto out; @@ -645,7 +479,7 @@ static PyTypeObject *pyrf_event__type[] = { [PERF_RECORD_SWITCH_CPU_WIDE] = &pyrf_context_switch_event__type, }; -static PyObject *pyrf_event__new(union perf_event *event) +static PyObject *pyrf_event__new(const union perf_event *event) { struct pyrf_event *pevent; PyTypeObject *ptype; @@ -653,8 +487,19 @@ static PyObject *pyrf_event__new(union perf_event *event) if ((event->header.type < PERF_RECORD_MMAP || event->header.type > PERF_RECORD_SAMPLE) && !(event->header.type == PERF_RECORD_SWITCH || - event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)) + event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)) { + PyErr_Format(PyExc_TypeError, "Unexpected header type %u", + event->header.type); + return NULL; + } + + // FIXME this better be dynamic or we need to parse everything + // before calling perf_mmap__consume(), including tracepoint fields. + if (sizeof(pevent->event) < event->header.size) { + PyErr_Format(PyExc_TypeError, "Unexpected event size: %zd < %u", + sizeof(pevent->event), event->header.size); return NULL; + } ptype = pyrf_event__type[event->header.type]; pevent = PyObject_New(struct pyrf_event, ptype); @@ -702,8 +547,10 @@ static PyObject *pyrf_cpu_map__item(PyObject *obj, Py_ssize_t i) { struct pyrf_cpu_map *pcpus = (void *)obj; - if (i >= perf_cpu_map__nr(pcpus->cpus)) + if (i >= perf_cpu_map__nr(pcpus->cpus)) { + PyErr_SetString(PyExc_IndexError, "Index out of range"); return NULL; + } return Py_BuildValue("i", perf_cpu_map__cpu(pcpus->cpus, i).cpu); } @@ -713,7 +560,7 @@ static PySequenceMethods pyrf_cpu_map__sequence_methods = { .sq_item = pyrf_cpu_map__item, }; -static char pyrf_cpu_map__doc[] = PyDoc_STR("cpu map object."); +static const char pyrf_cpu_map__doc[] = PyDoc_STR("cpu map object."); static PyTypeObject pyrf_cpu_map__type = { PyVarObject_HEAD_INIT(NULL, 0) @@ -741,14 +588,14 @@ struct pyrf_thread_map { static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, PyObject *args, PyObject *kwargs) { - static char *kwlist[] = { "pid", "tid", "uid", NULL }; - int pid = -1, tid = -1, uid = UINT_MAX; + static char *kwlist[] = { "pid", "tid", NULL }; + int pid = -1, tid = -1; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iii", - kwlist, &pid, &tid, &uid)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", + kwlist, &pid, &tid)) return -1; - pthreads->threads = thread_map__new(pid, tid, uid); + pthreads->threads = thread_map__new(pid, tid); if (pthreads->threads == NULL) return -1; return 0; @@ -771,8 +618,10 @@ static PyObject *pyrf_thread_map__item(PyObject *obj, Py_ssize_t i) { struct pyrf_thread_map *pthreads = (void *)obj; - if (i >= perf_thread_map__nr(pthreads->threads)) + if (i >= perf_thread_map__nr(pthreads->threads)) { + PyErr_SetString(PyExc_IndexError, "Index out of range"); return NULL; + } return Py_BuildValue("i", perf_thread_map__pid(pthreads->threads, i)); } @@ -782,7 +631,7 @@ static PySequenceMethods pyrf_thread_map__sequence_methods = { .sq_item = pyrf_thread_map__item, }; -static char pyrf_thread_map__doc[] = PyDoc_STR("thread map object."); +static const char pyrf_thread_map__doc[] = PyDoc_STR("thread map object."); static PyTypeObject pyrf_thread_map__type = { PyVarObject_HEAD_INIT(NULL, 0) @@ -801,6 +650,295 @@ static int pyrf_thread_map__setup_types(void) return PyType_Ready(&pyrf_thread_map__type); } +/** + * A python wrapper for perf_pmus that are globally owned by the pmus.c code. + */ +struct pyrf_pmu { + PyObject_HEAD + + struct perf_pmu *pmu; +}; + +static void pyrf_pmu__delete(struct pyrf_pmu *ppmu) +{ + Py_TYPE(ppmu)->tp_free((PyObject *)ppmu); +} + +static PyObject *pyrf_pmu__name(PyObject *self) +{ + struct pyrf_pmu *ppmu = (void *)self; + + return PyUnicode_FromString(ppmu->pmu->name); +} + +static bool add_to_dict(PyObject *dict, const char *key, const char *value) +{ + PyObject *pkey, *pvalue; + bool ret; + + if (value == NULL) + return true; + + pkey = PyUnicode_FromString(key); + pvalue = PyUnicode_FromString(value); + + ret = pkey && pvalue && PyDict_SetItem(dict, pkey, pvalue) == 0; + Py_XDECREF(pkey); + Py_XDECREF(pvalue); + return ret; +} + +static int pyrf_pmu__events_cb(void *state, struct pmu_event_info *info) +{ + PyObject *py_list = state; + PyObject *dict = PyDict_New(); + + if (!dict) + return -ENOMEM; + + if (!add_to_dict(dict, "name", info->name) || + !add_to_dict(dict, "alias", info->alias) || + !add_to_dict(dict, "scale_unit", info->scale_unit) || + !add_to_dict(dict, "desc", info->desc) || + !add_to_dict(dict, "long_desc", info->long_desc) || + !add_to_dict(dict, "encoding_desc", info->encoding_desc) || + !add_to_dict(dict, "topic", info->topic) || + !add_to_dict(dict, "event_type_desc", info->event_type_desc) || + !add_to_dict(dict, "str", info->str) || + !add_to_dict(dict, "deprecated", info->deprecated ? "deprecated" : NULL) || + PyList_Append(py_list, dict) != 0) { + Py_DECREF(dict); + return -ENOMEM; + } + Py_DECREF(dict); + return 0; +} + +static PyObject *pyrf_pmu__events(PyObject *self) +{ + struct pyrf_pmu *ppmu = (void *)self; + PyObject *py_list = PyList_New(0); + int ret; + + if (!py_list) + return NULL; + + ret = perf_pmu__for_each_event(ppmu->pmu, + /*skip_duplicate_pmus=*/false, + py_list, + pyrf_pmu__events_cb); + if (ret) { + Py_DECREF(py_list); + errno = -ret; + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + return py_list; +} + +static PyObject *pyrf_pmu__repr(PyObject *self) +{ + struct pyrf_pmu *ppmu = (void *)self; + + return PyUnicode_FromFormat("pmu(%s)", ppmu->pmu->name); +} + +static const char pyrf_pmu__doc[] = PyDoc_STR("perf Performance Monitoring Unit (PMU) object."); + +static PyMethodDef pyrf_pmu__methods[] = { + { + .ml_name = "events", + .ml_meth = (PyCFunction)pyrf_pmu__events, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("Returns a sequence of events encoded as a dictionaries.") + }, + { + .ml_name = "name", + .ml_meth = (PyCFunction)pyrf_pmu__name, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("Name of the PMU including suffixes.") + }, + { .ml_name = NULL, } +}; + +/** The python type for a perf.pmu. */ +static PyTypeObject pyrf_pmu__type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "perf.pmu", + .tp_basicsize = sizeof(struct pyrf_pmu), + .tp_dealloc = (destructor)pyrf_pmu__delete, + .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, + .tp_doc = pyrf_pmu__doc, + .tp_methods = pyrf_pmu__methods, + .tp_str = pyrf_pmu__name, + .tp_repr = pyrf_pmu__repr, +}; + +static int pyrf_pmu__setup_types(void) +{ + pyrf_pmu__type.tp_new = PyType_GenericNew; + return PyType_Ready(&pyrf_pmu__type); +} + + +/** A python iterator for pmus that has no equivalent in the C code. */ +struct pyrf_pmu_iterator { + PyObject_HEAD + struct perf_pmu *pmu; +}; + +static void pyrf_pmu_iterator__dealloc(struct pyrf_pmu_iterator *self) +{ + Py_TYPE(self)->tp_free((PyObject *) self); +} + +static PyObject *pyrf_pmu_iterator__new(PyTypeObject *type, PyObject *args __maybe_unused, + PyObject *kwds __maybe_unused) +{ + struct pyrf_pmu_iterator *itr = (void *)type->tp_alloc(type, 0); + + if (itr != NULL) + itr->pmu = perf_pmus__scan(/*pmu=*/NULL); + + return (PyObject *) itr; +} + +static PyObject *pyrf_pmu_iterator__iter(PyObject *self) +{ + Py_INCREF(self); + return self; +} + +static PyObject *pyrf_pmu_iterator__iternext(PyObject *self) +{ + struct pyrf_pmu_iterator *itr = (void *)self; + struct pyrf_pmu *ppmu; + + if (itr->pmu == NULL) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + // Create object to return. + ppmu = PyObject_New(struct pyrf_pmu, &pyrf_pmu__type); + if (ppmu) { + ppmu->pmu = itr->pmu; + // Advance iterator. + itr->pmu = perf_pmus__scan(itr->pmu); + } + return (PyObject *)ppmu; +} + +/** The python type for the PMU iterator. */ +static PyTypeObject pyrf_pmu_iterator__type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "pmus.iterator", + .tp_doc = "Iterator for the pmus string sequence.", + .tp_basicsize = sizeof(struct pyrf_pmu_iterator), + .tp_itemsize = 0, + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_new = pyrf_pmu_iterator__new, + .tp_dealloc = (destructor) pyrf_pmu_iterator__dealloc, + .tp_iter = pyrf_pmu_iterator__iter, + .tp_iternext = pyrf_pmu_iterator__iternext, +}; + +static int pyrf_pmu_iterator__setup_types(void) +{ + return PyType_Ready(&pyrf_pmu_iterator__type); +} + +static PyObject *pyrf__pmus(PyObject *self, PyObject *args) +{ + // Calling the class creates an instance of the iterator. + return PyObject_CallObject((PyObject *) &pyrf_pmu_iterator__type, /*args=*/NULL); +} + +struct pyrf_counts_values { + PyObject_HEAD + + struct perf_counts_values values; +}; + +static const char pyrf_counts_values__doc[] = PyDoc_STR("perf counts values object."); + +static void pyrf_counts_values__delete(struct pyrf_counts_values *pcounts_values) +{ + Py_TYPE(pcounts_values)->tp_free((PyObject *)pcounts_values); +} + +#define counts_values_member_def(member, ptype, help) \ + { #member, ptype, \ + offsetof(struct pyrf_counts_values, values.member), \ + 0, help } + +static PyMemberDef pyrf_counts_values_members[] = { + counts_values_member_def(val, T_ULONG, "Value of event"), + counts_values_member_def(ena, T_ULONG, "Time for which enabled"), + counts_values_member_def(run, T_ULONG, "Time for which running"), + counts_values_member_def(id, T_ULONG, "Unique ID for an event"), + counts_values_member_def(lost, T_ULONG, "Num of lost samples"), + { .name = NULL, }, +}; + +static PyObject *pyrf_counts_values_get_values(struct pyrf_counts_values *self, void *closure) +{ + PyObject *vals = PyList_New(5); + + if (!vals) + return NULL; + for (int i = 0; i < 5; i++) + PyList_SetItem(vals, i, PyLong_FromLong(self->values.values[i])); + + return vals; +} + +static int pyrf_counts_values_set_values(struct pyrf_counts_values *self, PyObject *list, + void *closure) +{ + Py_ssize_t size; + PyObject *item = NULL; + + if (!PyList_Check(list)) { + PyErr_SetString(PyExc_TypeError, "Value assigned must be a list"); + return -1; + } + + size = PyList_Size(list); + for (Py_ssize_t i = 0; i < size; i++) { + item = PyList_GetItem(list, i); + if (!PyLong_Check(item)) { + PyErr_SetString(PyExc_TypeError, "List members should be numbers"); + return -1; + } + self->values.values[i] = PyLong_AsLong(item); + } + + return 0; +} + +static PyGetSetDef pyrf_counts_values_getset[] = { + {"values", (getter)pyrf_counts_values_get_values, (setter)pyrf_counts_values_set_values, + "Name field", NULL}, + { .name = NULL, }, +}; + +static PyTypeObject pyrf_counts_values__type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "perf.counts_values", + .tp_basicsize = sizeof(struct pyrf_counts_values), + .tp_dealloc = (destructor)pyrf_counts_values__delete, + .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, + .tp_doc = pyrf_counts_values__doc, + .tp_members = pyrf_counts_values_members, + .tp_getset = pyrf_counts_values_getset, +}; + +static int pyrf_counts_values__setup_types(void) +{ + pyrf_counts_values__type.tp_new = PyType_GenericNew; + return PyType_Ready(&pyrf_counts_values__type); +} + struct pyrf_evsel { PyObject_HEAD @@ -956,6 +1094,106 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel, return Py_None; } +static PyObject *pyrf_evsel__cpus(struct pyrf_evsel *pevsel) +{ + struct pyrf_cpu_map *pcpu_map = PyObject_New(struct pyrf_cpu_map, &pyrf_cpu_map__type); + + if (pcpu_map) + pcpu_map->cpus = perf_cpu_map__get(pevsel->evsel.core.cpus); + + return (PyObject *)pcpu_map; +} + +static PyObject *pyrf_evsel__threads(struct pyrf_evsel *pevsel) +{ + struct pyrf_thread_map *pthread_map = + PyObject_New(struct pyrf_thread_map, &pyrf_thread_map__type); + + if (pthread_map) + pthread_map->threads = perf_thread_map__get(pevsel->evsel.core.threads); + + return (PyObject *)pthread_map; +} + +/* + * Ensure evsel's counts and prev_raw_counts are allocated, the latter + * used by tool PMUs to compute the cumulative count as expected by + * stat's process_counter_values. + */ +static int evsel__ensure_counts(struct evsel *evsel) +{ + int nthreads, ncpus; + + if (evsel->counts != NULL) + return 0; + + nthreads = perf_thread_map__nr(evsel->core.threads); + ncpus = perf_cpu_map__nr(evsel->core.cpus); + + evsel->counts = perf_counts__new(ncpus, nthreads); + if (evsel->counts == NULL) + return -ENOMEM; + + evsel->prev_raw_counts = perf_counts__new(ncpus, nthreads); + if (evsel->prev_raw_counts == NULL) + return -ENOMEM; + + return 0; +} + +static PyObject *pyrf_evsel__read(struct pyrf_evsel *pevsel, + PyObject *args, PyObject *kwargs) +{ + struct evsel *evsel = &pevsel->evsel; + int cpu = 0, cpu_idx, thread = 0, thread_idx; + struct perf_counts_values *old_count, *new_count; + struct pyrf_counts_values *count_values = PyObject_New(struct pyrf_counts_values, + &pyrf_counts_values__type); + + if (!count_values) + return NULL; + + if (!PyArg_ParseTuple(args, "ii", &cpu, &thread)) + return NULL; + + cpu_idx = perf_cpu_map__idx(evsel->core.cpus, (struct perf_cpu){.cpu = cpu}); + if (cpu_idx < 0) { + PyErr_Format(PyExc_TypeError, "CPU %d is not part of evsel's CPUs", cpu); + return NULL; + } + thread_idx = perf_thread_map__idx(evsel->core.threads, thread); + if (thread_idx < 0) { + PyErr_Format(PyExc_TypeError, "Thread %d is not part of evsel's threads", + thread); + return NULL; + } + + if (evsel__ensure_counts(evsel)) + return PyErr_NoMemory(); + + /* Set up pointers to the old and newly read counter values. */ + old_count = perf_counts(evsel->prev_raw_counts, cpu_idx, thread_idx); + new_count = perf_counts(evsel->counts, cpu_idx, thread_idx); + /* Update the value in evsel->counts. */ + evsel__read_counter(evsel, cpu_idx, thread_idx); + /* Copy the value and turn it into the delta from old_count. */ + count_values->values = *new_count; + count_values->values.val -= old_count->val; + count_values->values.ena -= old_count->ena; + count_values->values.run -= old_count->run; + /* Save the new count over the old_count for the next read. */ + *old_count = *new_count; + return (PyObject *)count_values; +} + +static PyObject *pyrf_evsel__str(PyObject *self) +{ + struct pyrf_evsel *pevsel = (void *)self; + struct evsel *evsel = &pevsel->evsel; + + return PyUnicode_FromFormat("evsel(%s/%s/)", evsel__pmu_name(evsel), evsel__name(evsel)); +} + static PyMethodDef pyrf_evsel__methods[] = { { .ml_name = "open", @@ -963,10 +1201,50 @@ static PyMethodDef pyrf_evsel__methods[] = { .ml_flags = METH_VARARGS | METH_KEYWORDS, .ml_doc = PyDoc_STR("open the event selector file descriptor table.") }, + { + .ml_name = "cpus", + .ml_meth = (PyCFunction)pyrf_evsel__cpus, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("CPUs the event is to be used with.") + }, + { + .ml_name = "threads", + .ml_meth = (PyCFunction)pyrf_evsel__threads, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("threads the event is to be used with.") + }, + { + .ml_name = "read", + .ml_meth = (PyCFunction)pyrf_evsel__read, + .ml_flags = METH_VARARGS | METH_KEYWORDS, + .ml_doc = PyDoc_STR("read counters") + }, { .ml_name = NULL, } }; -static char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object."); +#define evsel_member_def(member, ptype, help) \ + { #member, ptype, \ + offsetof(struct pyrf_evsel, evsel.member), \ + 0, help } + +#define evsel_attr_member_def(member, ptype, help) \ + { #member, ptype, \ + offsetof(struct pyrf_evsel, evsel.core.attr.member), \ + 0, help } + +static PyMemberDef pyrf_evsel__members[] = { + evsel_member_def(tracking, T_BOOL, "tracking event."), + evsel_attr_member_def(type, T_UINT, "attribute type."), + evsel_attr_member_def(size, T_UINT, "attribute size."), + evsel_attr_member_def(config, T_ULONGLONG, "attribute config."), + evsel_attr_member_def(sample_period, T_ULONGLONG, "attribute sample_period."), + evsel_attr_member_def(sample_type, T_ULONGLONG, "attribute sample_type."), + evsel_attr_member_def(read_format, T_ULONGLONG, "attribute read_format."), + evsel_attr_member_def(wakeup_events, T_UINT, "attribute wakeup_events."), + { .name = NULL, }, +}; + +static const char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object."); static PyTypeObject pyrf_evsel__type = { PyVarObject_HEAD_INIT(NULL, 0) @@ -975,8 +1253,11 @@ static PyTypeObject pyrf_evsel__type = { .tp_dealloc = (destructor)pyrf_evsel__delete, .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, .tp_doc = pyrf_evsel__doc, + .tp_members = pyrf_evsel__members, .tp_methods = pyrf_evsel__methods, .tp_init = (initproc)pyrf_evsel__init, + .tp_str = pyrf_evsel__str, + .tp_repr = pyrf_evsel__str, }; static int pyrf_evsel__setup_types(void) @@ -1013,6 +1294,188 @@ static void pyrf_evlist__delete(struct pyrf_evlist *pevlist) Py_TYPE(pevlist)->tp_free((PyObject*)pevlist); } +static PyObject *pyrf_evlist__all_cpus(struct pyrf_evlist *pevlist) +{ + struct pyrf_cpu_map *pcpu_map = PyObject_New(struct pyrf_cpu_map, &pyrf_cpu_map__type); + + if (pcpu_map) + pcpu_map->cpus = perf_cpu_map__get(pevlist->evlist.core.all_cpus); + + return (PyObject *)pcpu_map; +} + +static PyObject *pyrf_evlist__metrics(struct pyrf_evlist *pevlist) +{ + PyObject *list = PyList_New(/*len=*/0); + struct rb_node *node; + + if (!list) + return NULL; + + for (node = rb_first_cached(&pevlist->evlist.metric_events.entries); node; + node = rb_next(node)) { + struct metric_event *me = container_of(node, struct metric_event, nd); + struct list_head *pos; + + list_for_each(pos, &me->head) { + struct metric_expr *expr = container_of(pos, struct metric_expr, nd); + PyObject *str = PyUnicode_FromString(expr->metric_name); + + if (!str || PyList_Append(list, str) != 0) { + Py_DECREF(list); + return NULL; + } + Py_DECREF(str); + } + } + return list; +} + +static int prepare_metric(const struct metric_expr *mexp, + const struct evsel *evsel, + struct expr_parse_ctx *pctx, + int cpu_idx, int thread_idx) +{ + struct evsel * const *metric_events = mexp->metric_events; + struct metric_ref *metric_refs = mexp->metric_refs; + + for (int i = 0; metric_events[i]; i++) { + struct evsel *cur = metric_events[i]; + double val, ena, run; + int ret, source_count = 0; + struct perf_counts_values *old_count, *new_count; + char *n = strdup(evsel__metric_id(cur)); + + if (!n) + return -ENOMEM; + + /* + * If there are multiple uncore PMUs and we're not reading the + * leader's stats, determine the stats for the appropriate + * uncore PMU. + */ + if (evsel && evsel->metric_leader && + evsel->pmu != evsel->metric_leader->pmu && + cur->pmu == evsel->metric_leader->pmu) { + struct evsel *pos; + + evlist__for_each_entry(evsel->evlist, pos) { + if (pos->pmu != evsel->pmu) + continue; + if (pos->metric_leader != cur) + continue; + cur = pos; + source_count = 1; + break; + } + } + + if (source_count == 0) + source_count = evsel__source_count(cur); + + ret = evsel__ensure_counts(cur); + if (ret) + return ret; + + /* Set up pointers to the old and newly read counter values. */ + old_count = perf_counts(cur->prev_raw_counts, cpu_idx, thread_idx); + new_count = perf_counts(cur->counts, cpu_idx, thread_idx); + /* Update the value in cur->counts. */ + evsel__read_counter(cur, cpu_idx, thread_idx); + + val = new_count->val - old_count->val; + ena = new_count->ena - old_count->ena; + run = new_count->run - old_count->run; + + if (ena != run && run != 0) + val = val * ena / run; + ret = expr__add_id_val_source_count(pctx, n, val, source_count); + if (ret) + return ret; + } + + for (int i = 0; metric_refs && metric_refs[i].metric_name; i++) { + int ret = expr__add_ref(pctx, &metric_refs[i]); + + if (ret) + return ret; + } + + return 0; +} + +static PyObject *pyrf_evlist__compute_metric(struct pyrf_evlist *pevlist, + PyObject *args, PyObject *kwargs) +{ + int ret, cpu = 0, cpu_idx = 0, thread = 0, thread_idx = 0; + const char *metric; + struct rb_node *node; + struct metric_expr *mexp = NULL; + struct expr_parse_ctx *pctx; + double result = 0; + struct evsel *metric_evsel = NULL; + + if (!PyArg_ParseTuple(args, "sii", &metric, &cpu, &thread)) + return NULL; + + for (node = rb_first_cached(&pevlist->evlist.metric_events.entries); + mexp == NULL && node; + node = rb_next(node)) { + struct metric_event *me = container_of(node, struct metric_event, nd); + struct list_head *pos; + + list_for_each(pos, &me->head) { + struct metric_expr *e = container_of(pos, struct metric_expr, nd); + struct evsel *pos2; + + if (strcmp(e->metric_name, metric)) + continue; + + if (e->metric_events[0] == NULL) + continue; + + evlist__for_each_entry(&pevlist->evlist, pos2) { + if (pos2->metric_leader != e->metric_events[0]) + continue; + cpu_idx = perf_cpu_map__idx(pos2->core.cpus, + (struct perf_cpu){.cpu = cpu}); + if (cpu_idx < 0) + continue; + + thread_idx = perf_thread_map__idx(pos2->core.threads, thread); + if (thread_idx < 0) + continue; + metric_evsel = pos2; + mexp = e; + goto done; + } + } + } +done: + if (!mexp) { + PyErr_Format(PyExc_TypeError, "Unknown metric '%s' for CPU '%d' and thread '%d'", + metric, cpu, thread); + return NULL; + } + + pctx = expr__ctx_new(); + if (!pctx) + return PyErr_NoMemory(); + + ret = prepare_metric(mexp, metric_evsel, pctx, cpu_idx, thread_idx); + if (ret) { + expr__ctx_free(pctx); + errno = -ret; + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + if (expr__parse(&result, pctx, mexp->metric_expr)) + result = 0.0; + + expr__ctx_free(pctx); + return PyFloat_FromDouble(result); +} + static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist, PyObject *args, PyObject *kwargs) { @@ -1062,17 +1525,8 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist, for (i = 0; i < evlist->core.pollfd.nr; ++i) { PyObject *file; -#if PY_MAJOR_VERSION < 3 - FILE *fp = fdopen(evlist->core.pollfd.entries[i].fd, "r"); - - if (fp == NULL) - goto free_list; - - file = PyFile_FromFile(fp, "perf", "r", NULL); -#else file = PyFile_FromFd(evlist->core.pollfd.entries[i].fd, "perf", "r", -1, NULL, NULL, NULL, 0); -#endif if (file == NULL) goto free_list; @@ -1138,8 +1592,10 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, return NULL; md = get_md(evlist, cpu); - if (!md) + if (!md) { + PyErr_Format(PyExc_TypeError, "Unknown CPU '%d'", cpu); return NULL; + } if (perf_mmap__read_init(&md->core) < 0) goto end; @@ -1155,20 +1611,22 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, evsel = evlist__event2evsel(evlist, event); if (!evsel) { + Py_DECREF(pyevent); Py_INCREF(Py_None); return Py_None; } pevent->evsel = evsel; - err = evsel__parse_sample(evsel, event, &pevent->sample); - - /* Consume the even only after we parsed it out. */ perf_mmap__consume(&md->core); - if (err) + err = evsel__parse_sample(evsel, &pevent->event, &pevent->sample); + if (err) { + Py_DECREF(pyevent); return PyErr_Format(PyExc_OSError, "perf: can't parse sample, err=%d", err); + } + return pyevent; } end: @@ -1190,8 +1648,75 @@ static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist, return Py_None; } +static PyObject *pyrf_evlist__close(struct pyrf_evlist *pevlist) +{ + struct evlist *evlist = &pevlist->evlist; + + evlist__close(evlist); + + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject *pyrf_evlist__config(struct pyrf_evlist *pevlist) +{ + struct record_opts opts = { + .sample_time = true, + .mmap_pages = UINT_MAX, + .user_freq = UINT_MAX, + .user_interval = ULLONG_MAX, + .freq = 4000, + .target = { + .uses_mmap = true, + .default_per_cpu = true, + }, + .nr_threads_synthesize = 1, + .ctl_fd = -1, + .ctl_fd_ack = -1, + .no_buffering = true, + .no_inherit = true, + }; + struct evlist *evlist = &pevlist->evlist; + + evlist__config(evlist, &opts, &callchain_param); + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject *pyrf_evlist__disable(struct pyrf_evlist *pevlist) +{ + evlist__disable(&pevlist->evlist); + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject *pyrf_evlist__enable(struct pyrf_evlist *pevlist) +{ + evlist__enable(&pevlist->evlist); + Py_INCREF(Py_None); + return Py_None; +} + static PyMethodDef pyrf_evlist__methods[] = { { + .ml_name = "all_cpus", + .ml_meth = (PyCFunction)pyrf_evlist__all_cpus, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("CPU map union of all evsel CPU maps.") + }, + { + .ml_name = "metrics", + .ml_meth = (PyCFunction)pyrf_evlist__metrics, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("List of metric names within the evlist.") + }, + { + .ml_name = "compute_metric", + .ml_meth = (PyCFunction)pyrf_evlist__compute_metric, + .ml_flags = METH_VARARGS | METH_KEYWORDS, + .ml_doc = PyDoc_STR("compute metric for given name, cpu and thread") + }, + { .ml_name = "mmap", .ml_meth = (PyCFunction)pyrf_evlist__mmap, .ml_flags = METH_VARARGS | METH_KEYWORDS, @@ -1204,6 +1729,12 @@ static PyMethodDef pyrf_evlist__methods[] = { .ml_doc = PyDoc_STR("open the file descriptors.") }, { + .ml_name = "close", + .ml_meth = (PyCFunction)pyrf_evlist__close, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("close the file descriptors.") + }, + { .ml_name = "poll", .ml_meth = (PyCFunction)pyrf_evlist__poll, .ml_flags = METH_VARARGS | METH_KEYWORDS, @@ -1227,6 +1758,24 @@ static PyMethodDef pyrf_evlist__methods[] = { .ml_flags = METH_VARARGS | METH_KEYWORDS, .ml_doc = PyDoc_STR("reads an event.") }, + { + .ml_name = "config", + .ml_meth = (PyCFunction)pyrf_evlist__config, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("Apply default record options to the evlist.") + }, + { + .ml_name = "disable", + .ml_meth = (PyCFunction)pyrf_evlist__disable, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("Disable the evsels in the evlist.") + }, + { + .ml_name = "enable", + .ml_meth = (PyCFunction)pyrf_evlist__enable, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("Enable the evsels in the evlist.") + }, { .ml_name = NULL, } }; @@ -1242,8 +1791,10 @@ static PyObject *pyrf_evlist__item(PyObject *obj, Py_ssize_t i) struct pyrf_evlist *pevlist = (void *)obj; struct evsel *pos; - if (i >= pevlist->evlist.core.nr_entries) + if (i >= pevlist->evlist.core.nr_entries) { + PyErr_SetString(PyExc_IndexError, "Index out of range"); return NULL; + } evlist__for_each_entry(&pevlist->evlist, pos) { if (i-- == 0) @@ -1253,12 +1804,36 @@ static PyObject *pyrf_evlist__item(PyObject *obj, Py_ssize_t i) return Py_BuildValue("O", container_of(pos, struct pyrf_evsel, evsel)); } +static PyObject *pyrf_evlist__str(PyObject *self) +{ + struct pyrf_evlist *pevlist = (void *)self; + struct evsel *pos; + struct strbuf sb = STRBUF_INIT; + bool first = true; + PyObject *result; + + strbuf_addstr(&sb, "evlist(["); + evlist__for_each_entry(&pevlist->evlist, pos) { + if (!first) + strbuf_addch(&sb, ','); + if (!pos->pmu) + strbuf_addstr(&sb, evsel__name(pos)); + else + strbuf_addf(&sb, "%s/%s/", pos->pmu->name, evsel__name(pos)); + first = false; + } + strbuf_addstr(&sb, "])"); + result = PyUnicode_FromString(sb.buf); + strbuf_release(&sb); + return result; +} + static PySequenceMethods pyrf_evlist__sequence_methods = { .sq_length = pyrf_evlist__length, .sq_item = pyrf_evlist__item, }; -static char pyrf_evlist__doc[] = PyDoc_STR("perf event selector list object."); +static const char pyrf_evlist__doc[] = PyDoc_STR("perf event selector list object."); static PyTypeObject pyrf_evlist__type = { PyVarObject_HEAD_INIT(NULL, 0) @@ -1270,6 +1845,8 @@ static PyTypeObject pyrf_evlist__type = { .tp_doc = pyrf_evlist__doc, .tp_methods = pyrf_evlist__methods, .tp_init = (initproc)pyrf_evlist__init, + .tp_repr = pyrf_evlist__str, + .tp_str = pyrf_evlist__str, }; static int pyrf_evlist__setup_types(void) @@ -1280,10 +1857,12 @@ static int pyrf_evlist__setup_types(void) #define PERF_CONST(name) { #name, PERF_##name } -static struct { +struct perf_constant { const char *name; int value; -} perf__constants[] = { +}; + +static const struct perf_constant perf__constants[] = { PERF_CONST(TYPE_HARDWARE), PERF_CONST(TYPE_SOFTWARE), PERF_CONST(TYPE_TRACEPOINT), @@ -1364,10 +1943,6 @@ static struct { static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel, PyObject *args, PyObject *kwargs) { -#ifndef HAVE_LIBTRACEEVENT - return NULL; -#else - struct tep_event *tp_format; static char *kwlist[] = { "sys", "name", NULL }; char *sys = NULL; char *name = NULL; @@ -1376,36 +1951,296 @@ static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel, &sys, &name)) return NULL; - tp_format = trace_event__tp_format(sys, name); - if (IS_ERR(tp_format)) - return _PyLong_FromLong(-1); + return PyLong_FromLong(tp_pmu__id(sys, name)); +} + +static PyObject *pyrf_evsel__from_evsel(struct evsel *evsel) +{ + struct pyrf_evsel *pevsel = PyObject_New(struct pyrf_evsel, &pyrf_evsel__type); + + if (!pevsel) + return NULL; + + memset(&pevsel->evsel, 0, sizeof(pevsel->evsel)); + evsel__init(&pevsel->evsel, &evsel->core.attr, evsel->core.idx); + + evsel__clone(&pevsel->evsel, evsel); + if (evsel__is_group_leader(evsel)) + evsel__set_leader(&pevsel->evsel, &pevsel->evsel); + return (PyObject *)pevsel; +} + +static int evlist__pos(struct evlist *evlist, struct evsel *evsel) +{ + struct evsel *pos; + int idx = 0; + + evlist__for_each_entry(evlist, pos) { + if (evsel == pos) + return idx; + idx++; + } + return -1; +} + +static struct evsel *evlist__at(struct evlist *evlist, int idx) +{ + struct evsel *pos; + int idx2 = 0; + + evlist__for_each_entry(evlist, pos) { + if (idx == idx2) + return pos; + idx2++; + } + return NULL; +} + +static PyObject *pyrf_evlist__from_evlist(struct evlist *evlist) +{ + struct pyrf_evlist *pevlist = PyObject_New(struct pyrf_evlist, &pyrf_evlist__type); + struct evsel *pos; + struct rb_node *node; + + if (!pevlist) + return NULL; + + memset(&pevlist->evlist, 0, sizeof(pevlist->evlist)); + evlist__init(&pevlist->evlist, evlist->core.all_cpus, evlist->core.threads); + evlist__for_each_entry(evlist, pos) { + struct pyrf_evsel *pevsel = (void *)pyrf_evsel__from_evsel(pos); + + evlist__add(&pevlist->evlist, &pevsel->evsel); + } + evlist__for_each_entry(&pevlist->evlist, pos) { + struct evsel *leader = evsel__leader(pos); + + if (pos != leader) { + int idx = evlist__pos(evlist, leader); + + if (idx >= 0) + evsel__set_leader(pos, evlist__at(&pevlist->evlist, idx)); + else if (leader == NULL) + evsel__set_leader(pos, pos); + } + + leader = pos->metric_leader; + + if (pos != leader) { + int idx = evlist__pos(evlist, leader); + + if (idx >= 0) + pos->metric_leader = evlist__at(&pevlist->evlist, idx); + else if (leader == NULL) + pos->metric_leader = pos; + } + } + metricgroup__copy_metric_events(&pevlist->evlist, /*cgrp=*/NULL, + &pevlist->evlist.metric_events, + &evlist->metric_events); + for (node = rb_first_cached(&pevlist->evlist.metric_events.entries); node; + node = rb_next(node)) { + struct metric_event *me = container_of(node, struct metric_event, nd); + struct list_head *mpos; + int idx = evlist__pos(evlist, me->evsel); + + if (idx >= 0) + me->evsel = evlist__at(&pevlist->evlist, idx); + list_for_each(mpos, &me->head) { + struct metric_expr *e = container_of(mpos, struct metric_expr, nd); + + for (int j = 0; e->metric_events[j]; j++) { + idx = evlist__pos(evlist, e->metric_events[j]); + if (idx >= 0) + e->metric_events[j] = evlist__at(&pevlist->evlist, idx); + } + } + } + return (PyObject *)pevlist; +} + +static PyObject *pyrf__parse_events(PyObject *self, PyObject *args) +{ + const char *input; + struct evlist evlist = {}; + struct parse_events_error err; + PyObject *result; + PyObject *pcpus = NULL, *pthreads = NULL; + struct perf_cpu_map *cpus; + struct perf_thread_map *threads; + + if (!PyArg_ParseTuple(args, "s|OO", &input, &pcpus, &pthreads)) + return NULL; + + threads = pthreads ? ((struct pyrf_thread_map *)pthreads)->threads : NULL; + cpus = pcpus ? ((struct pyrf_cpu_map *)pcpus)->cpus : NULL; - return _PyLong_FromLong(tp_format->id); -#endif // HAVE_LIBTRACEEVENT + parse_events_error__init(&err); + evlist__init(&evlist, cpus, threads); + if (parse_events(&evlist, input, &err)) { + parse_events_error__print(&err, input); + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + result = pyrf_evlist__from_evlist(&evlist); + evlist__exit(&evlist); + return result; +} + +static PyObject *pyrf__parse_metrics(PyObject *self, PyObject *args) +{ + const char *input, *pmu = NULL; + struct evlist evlist = {}; + PyObject *result; + PyObject *pcpus = NULL, *pthreads = NULL; + struct perf_cpu_map *cpus; + struct perf_thread_map *threads; + int ret; + + if (!PyArg_ParseTuple(args, "s|sOO", &input, &pmu, &pcpus, &pthreads)) + return NULL; + + threads = pthreads ? ((struct pyrf_thread_map *)pthreads)->threads : NULL; + cpus = pcpus ? ((struct pyrf_cpu_map *)pcpus)->cpus : NULL; + + evlist__init(&evlist, cpus, threads); + ret = metricgroup__parse_groups(&evlist, pmu ?: "all", input, + /*metric_no_group=*/ false, + /*metric_no_merge=*/ false, + /*metric_no_threshold=*/ true, + /*user_requested_cpu_list=*/ NULL, + /*system_wide=*/true, + /*hardware_aware_grouping=*/ false); + if (ret) { + errno = -ret; + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + result = pyrf_evlist__from_evlist(&evlist); + evlist__exit(&evlist); + return result; +} + +static PyObject *pyrf__metrics_groups(const struct pmu_metric *pm) +{ + PyObject *groups = PyList_New(/*len=*/0); + const char *mg = pm->metric_group; + + if (!groups) + return NULL; + + while (mg) { + PyObject *val = NULL; + const char *sep = strchr(mg, ';'); + size_t len = sep ? (size_t)(sep - mg) : strlen(mg); + + if (len > 0) { + val = PyUnicode_FromStringAndSize(mg, len); + if (val) + PyList_Append(groups, val); + + Py_XDECREF(val); + } + mg = sep ? sep + 1 : NULL; + } + return groups; +} + +static int pyrf__metrics_cb(const struct pmu_metric *pm, + const struct pmu_metrics_table *table __maybe_unused, + void *vdata) +{ + PyObject *py_list = vdata; + PyObject *dict = PyDict_New(); + PyObject *key = dict ? PyUnicode_FromString("MetricGroup") : NULL; + PyObject *value = key ? pyrf__metrics_groups(pm) : NULL; + + if (!value || PyDict_SetItem(dict, key, value) != 0) { + Py_XDECREF(key); + Py_XDECREF(value); + Py_XDECREF(dict); + return -ENOMEM; + } + + if (!add_to_dict(dict, "MetricName", pm->metric_name) || + !add_to_dict(dict, "PMU", pm->pmu) || + !add_to_dict(dict, "MetricExpr", pm->metric_expr) || + !add_to_dict(dict, "MetricThreshold", pm->metric_threshold) || + !add_to_dict(dict, "ScaleUnit", pm->unit) || + !add_to_dict(dict, "Compat", pm->compat) || + !add_to_dict(dict, "BriefDescription", pm->desc) || + !add_to_dict(dict, "PublicDescription", pm->long_desc) || + PyList_Append(py_list, dict) != 0) { + Py_DECREF(dict); + return -ENOMEM; + } + Py_DECREF(dict); + return 0; +} + +static PyObject *pyrf__metrics(PyObject *self, PyObject *args) +{ + const struct pmu_metrics_table *table = pmu_metrics_table__find(); + PyObject *list = PyList_New(/*len=*/0); + int ret; + + if (!list) + return NULL; + + ret = pmu_metrics_table__for_each_metric(table, pyrf__metrics_cb, list); + if (!ret) + ret = pmu_for_each_sys_metric(pyrf__metrics_cb, list); + + if (ret) { + Py_DECREF(list); + errno = -ret; + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + return list; } static PyMethodDef perf__methods[] = { { + .ml_name = "metrics", + .ml_meth = (PyCFunction) pyrf__metrics, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR( + "Returns a list of metrics represented as string values in dictionaries.") + }, + { .ml_name = "tracepoint", .ml_meth = (PyCFunction) pyrf__tracepoint, .ml_flags = METH_VARARGS | METH_KEYWORDS, .ml_doc = PyDoc_STR("Get tracepoint config.") }, + { + .ml_name = "parse_events", + .ml_meth = (PyCFunction) pyrf__parse_events, + .ml_flags = METH_VARARGS, + .ml_doc = PyDoc_STR("Parse a string of events and return an evlist.") + }, + { + .ml_name = "parse_metrics", + .ml_meth = (PyCFunction) pyrf__parse_metrics, + .ml_flags = METH_VARARGS, + .ml_doc = PyDoc_STR( + "Parse a string of metrics or metric groups and return an evlist.") + }, + { + .ml_name = "pmus", + .ml_meth = (PyCFunction) pyrf__pmus, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("Returns a sequence of pmus.") + }, { .ml_name = NULL, } }; -#if PY_MAJOR_VERSION < 3 -PyMODINIT_FUNC initperf(void) -#else PyMODINIT_FUNC PyInit_perf(void) -#endif { PyObject *obj; int i; PyObject *dict; -#if PY_MAJOR_VERSION < 3 - PyObject *module = Py_InitModule("perf", perf__methods); -#else static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "perf", /* m_name */ @@ -1418,19 +2253,17 @@ PyMODINIT_FUNC PyInit_perf(void) NULL, /* m_free */ }; PyObject *module = PyModule_Create(&moduledef); -#endif if (module == NULL || pyrf_event__setup_types() < 0 || pyrf_evlist__setup_types() < 0 || pyrf_evsel__setup_types() < 0 || pyrf_thread_map__setup_types() < 0 || - pyrf_cpu_map__setup_types() < 0) -#if PY_MAJOR_VERSION < 3 - return; -#else + pyrf_cpu_map__setup_types() < 0 || + pyrf_pmu_iterator__setup_types() < 0 || + pyrf_pmu__setup_types() < 0 || + pyrf_counts_values__setup_types() < 0) return module; -#endif /* The page_size is placed in util object. */ page_size = sysconf(_SC_PAGE_SIZE); @@ -1474,12 +2307,15 @@ PyMODINIT_FUNC PyInit_perf(void) Py_INCREF(&pyrf_cpu_map__type); PyModule_AddObject(module, "cpu_map", (PyObject*)&pyrf_cpu_map__type); + Py_INCREF(&pyrf_counts_values__type); + PyModule_AddObject(module, "counts_values", (PyObject *)&pyrf_counts_values__type); + dict = PyModule_GetDict(module); if (dict == NULL) goto error; for (i = 0; perf__constants[i].name != NULL; i++) { - obj = _PyLong_FromLong(perf__constants[i].value); + obj = PyLong_FromLong(perf__constants[i].value); if (obj == NULL) goto error; PyDict_SetItemString(dict, perf__constants[i].name, obj); @@ -1489,20 +2325,5 @@ PyMODINIT_FUNC PyInit_perf(void) error: if (PyErr_Occurred()) PyErr_SetString(PyExc_ImportError, "perf: Init failed!"); -#if PY_MAJOR_VERSION >= 3 return module; -#endif -} - -/* - * Dummy, to avoid dragging all the test_attr infrastructure in the python - * binding. - */ -void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, - int fd, int group_fd, unsigned long flags) -{ -} - -void evlist__free_stats(struct evlist *evlist) -{ } diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h deleted file mode 100644 index 376e86cb4c3c..000000000000 --- a/tools/perf/util/rb_resort.h +++ /dev/null @@ -1,151 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _PERF_RESORT_RB_H_ -#define _PERF_RESORT_RB_H_ -/* - * Template for creating a class to resort an existing rb_tree according to - * a new sort criteria, that must be present in the entries of the source - * rb_tree. - * - * (c) 2016 Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Quick example, resorting threads by its shortname: - * - * First define the prefix (threads) to be used for the functions and data - * structures created, and provide an expression for the sorting, then the - * fields to be present in each of the entries in the new, sorted, rb_tree. - * - * The body of the init function should collect the fields, maybe - * pre-calculating them from multiple entries in the original 'entry' from - * the rb_tree used as a source for the entries to be sorted: - -DEFINE_RB_RESORT_RB(threads, strcmp(a->thread->shortname, - b->thread->shortname) < 0, - struct thread *thread; -) -{ - entry->thread = rb_entry(nd, struct thread, rb_node); -} - - * After this it is just a matter of instantiating it and iterating it, - * for a few data structures with existing rb_trees, such as 'struct machine', - * helpers are available to get the rb_root and the nr_entries: - - DECLARE_RESORT_RB_MACHINE_THREADS(threads, machine_ptr); - - * This will instantiate the new rb_tree and a cursor for it, that can be used as: - - struct rb_node *nd; - - resort_rb__for_each_entry(nd, threads) { - struct thread *t = threads_entry; - printf("%s: %d\n", t->shortname, t->tid); - } - - * Then delete it: - - resort_rb__delete(threads); - - * The name of the data structures and functions will have a _sorted suffix - * right before the method names, i.e. will look like: - * - * struct threads_sorted_entry {} - * threads_sorted__insert() - */ - -#define DEFINE_RESORT_RB(__name, __comp, ...) \ -struct __name##_sorted_entry { \ - struct rb_node rb_node; \ - __VA_ARGS__ \ -}; \ -static void __name##_sorted__init_entry(struct rb_node *nd, \ - struct __name##_sorted_entry *entry); \ - \ -static int __name##_sorted__cmp(struct rb_node *nda, struct rb_node *ndb) \ -{ \ - struct __name##_sorted_entry *a, *b; \ - a = rb_entry(nda, struct __name##_sorted_entry, rb_node); \ - b = rb_entry(ndb, struct __name##_sorted_entry, rb_node); \ - return __comp; \ -} \ - \ -struct __name##_sorted { \ - struct rb_root entries; \ - struct __name##_sorted_entry nd[0]; \ -}; \ - \ -static void __name##_sorted__insert(struct __name##_sorted *sorted, \ - struct rb_node *sorted_nd) \ -{ \ - struct rb_node **p = &sorted->entries.rb_node, *parent = NULL; \ - while (*p != NULL) { \ - parent = *p; \ - if (__name##_sorted__cmp(sorted_nd, parent)) \ - p = &(*p)->rb_left; \ - else \ - p = &(*p)->rb_right; \ - } \ - rb_link_node(sorted_nd, parent, p); \ - rb_insert_color(sorted_nd, &sorted->entries); \ -} \ - \ -static void __name##_sorted__sort(struct __name##_sorted *sorted, \ - struct rb_root *entries) \ -{ \ - struct rb_node *nd; \ - unsigned int i = 0; \ - for (nd = rb_first(entries); nd; nd = rb_next(nd)) { \ - struct __name##_sorted_entry *snd = &sorted->nd[i++]; \ - __name##_sorted__init_entry(nd, snd); \ - __name##_sorted__insert(sorted, &snd->rb_node); \ - } \ -} \ - \ -static struct __name##_sorted *__name##_sorted__new(struct rb_root *entries, \ - int nr_entries) \ -{ \ - struct __name##_sorted *sorted; \ - sorted = malloc(sizeof(*sorted) + sizeof(sorted->nd[0]) * nr_entries); \ - if (sorted) { \ - sorted->entries = RB_ROOT; \ - __name##_sorted__sort(sorted, entries); \ - } \ - return sorted; \ -} \ - \ -static void __name##_sorted__delete(struct __name##_sorted *sorted) \ -{ \ - free(sorted); \ -} \ - \ -static void __name##_sorted__init_entry(struct rb_node *nd, \ - struct __name##_sorted_entry *entry) - -#define DECLARE_RESORT_RB(__name) \ -struct __name##_sorted_entry *__name##_entry; \ -struct __name##_sorted *__name = __name##_sorted__new - -#define resort_rb__for_each_entry(__nd, __name) \ - for (__nd = rb_first(&__name->entries); \ - __name##_entry = rb_entry(__nd, struct __name##_sorted_entry, \ - rb_node), __nd; \ - __nd = rb_next(__nd)) - -#define resort_rb__delete(__name) \ - __name##_sorted__delete(__name), __name = NULL - -/* - * Helpers for other classes that contains both an rbtree and the - * number of entries in it: - */ - -/* For 'struct intlist' */ -#define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \ - DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root, \ - __ilist->rblist.nr_entries) - -/* For 'struct machine->threads' */ -#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \ - DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries.rb_root, \ - __machine->threads[hash_bucket].nr) - -#endif /* _PERF_RESORT_RB_H_ */ diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 9eb5c6a08999..e867de8ddaaa 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -237,8 +237,8 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str) evsel = evlist__last(temp_evlist); - if (!evlist || perf_cpu_map__empty(evlist->core.user_requested_cpus)) { - struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); + if (!evlist || perf_cpu_map__is_any_cpu_or_is_empty(evlist->core.user_requested_cpus)) { + struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus(); if (cpus) cpu = perf_cpu_map__cpu(cpus, 0); diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index a6566134e09e..ea3a6c4657ee 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -28,6 +28,7 @@ struct record_opts { bool sample_time_set; bool sample_cpu; bool sample_identifier; + bool sample_data_src; bool period; bool period_set; bool running_time; @@ -79,6 +80,7 @@ struct record_opts { int synth; int threads_spec; const char *threads_user_spec; + u64 off_cpu_thresh_ns; }; extern const char * const *record_usage; diff --git a/tools/perf/util/rlimit.c b/tools/perf/util/rlimit.c index 13521d392a22..f857405fe1aa 100644 --- a/tools/perf/util/rlimit.c +++ b/tools/perf/util/rlimit.c @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1 */ +#include <errno.h> #include "util/debug.h" #include "util/rlimit.h" #include <sys/time.h> @@ -27,3 +28,30 @@ void rlimit__bump_memlock(void) } } } + +bool rlimit__increase_nofile(enum rlimit_action *set_rlimit) +{ + int old_errno; + struct rlimit l; + + if (*set_rlimit < INCREASED_MAX) { + old_errno = errno; + + if (getrlimit(RLIMIT_NOFILE, &l) == 0) { + if (*set_rlimit == NO_CHANGE) { + l.rlim_cur = l.rlim_max; + } else { + l.rlim_cur = l.rlim_max + 1000; + l.rlim_max = l.rlim_cur; + } + if (setrlimit(RLIMIT_NOFILE, &l) == 0) { + (*set_rlimit) += 1; + errno = old_errno; + return true; + } + } + errno = old_errno; + } + + return false; +} diff --git a/tools/perf/util/rlimit.h b/tools/perf/util/rlimit.h index 9f59d8e710a3..19050d7fb9d7 100644 --- a/tools/perf/util/rlimit.h +++ b/tools/perf/util/rlimit.h @@ -1,6 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1 */ #ifndef __PERF_RLIMIT_H_ #define __PERF_RLIMIT_H_ -/* SPDX-License-Identifier: LGPL-2.1 */ + +enum rlimit_action { + NO_CHANGE, + SET_TO_MAX, + INCREASED_MAX +}; void rlimit__bump_memlock(void); + +bool rlimit__increase_nofile(enum rlimit_action *set_rlimit); + #endif // __PERF_RLIMIT_H_ diff --git a/tools/perf/util/rwsem.c b/tools/perf/util/rwsem.c index f3d29d8ddc99..9d26832398db 100644 --- a/tools/perf/util/rwsem.c +++ b/tools/perf/util/rwsem.c @@ -2,32 +2,70 @@ #include "util.h" #include "rwsem.h" +#if RWS_ERRORCHECK +#include "mutex.h" +#endif + int init_rwsem(struct rw_semaphore *sem) { +#if RWS_ERRORCHECK + mutex_init(&sem->mtx); + return 0; +#else return pthread_rwlock_init(&sem->lock, NULL); +#endif } int exit_rwsem(struct rw_semaphore *sem) { +#if RWS_ERRORCHECK + mutex_destroy(&sem->mtx); + return 0; +#else return pthread_rwlock_destroy(&sem->lock); +#endif } int down_read(struct rw_semaphore *sem) + NO_THREAD_SAFETY_ANALYSIS { +#if RWS_ERRORCHECK + mutex_lock(&sem->mtx); + return 0; +#else return perf_singlethreaded ? 0 : pthread_rwlock_rdlock(&sem->lock); +#endif } int up_read(struct rw_semaphore *sem) + NO_THREAD_SAFETY_ANALYSIS { +#if RWS_ERRORCHECK + mutex_unlock(&sem->mtx); + return 0; +#else return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock); +#endif } int down_write(struct rw_semaphore *sem) + NO_THREAD_SAFETY_ANALYSIS { +#if RWS_ERRORCHECK + mutex_lock(&sem->mtx); + return 0; +#else return perf_singlethreaded ? 0 : pthread_rwlock_wrlock(&sem->lock); +#endif } int up_write(struct rw_semaphore *sem) + NO_THREAD_SAFETY_ANALYSIS { +#if RWS_ERRORCHECK + mutex_unlock(&sem->mtx); + return 0; +#else return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock); +#endif } diff --git a/tools/perf/util/rwsem.h b/tools/perf/util/rwsem.h index 94565ad4d494..b102d8143181 100644 --- a/tools/perf/util/rwsem.h +++ b/tools/perf/util/rwsem.h @@ -2,18 +2,29 @@ #define _PERF_RWSEM_H #include <pthread.h> +#include "mutex.h" -struct rw_semaphore { +/* + * Mutexes have additional error checking. Enable to use a mutex rather than a + * rwlock for debugging. + */ +#define RWS_ERRORCHECK 0 + +struct LOCKABLE rw_semaphore { +#if RWS_ERRORCHECK + struct mutex mtx; +#else pthread_rwlock_t lock; +#endif }; int init_rwsem(struct rw_semaphore *sem); int exit_rwsem(struct rw_semaphore *sem); -int down_read(struct rw_semaphore *sem); -int up_read(struct rw_semaphore *sem); +int down_read(struct rw_semaphore *sem) SHARED_LOCK_FUNCTION(sem); +int up_read(struct rw_semaphore *sem) UNLOCK_FUNCTION(sem); -int down_write(struct rw_semaphore *sem); -int up_write(struct rw_semaphore *sem); +int down_write(struct rw_semaphore *sem) EXCLUSIVE_LOCK_FUNCTION(sem); +int up_write(struct rw_semaphore *sem) UNLOCK_FUNCTION(sem); #endif /* _PERF_RWSEM_H */ diff --git a/tools/perf/util/s390-cpumcf-kernel.h b/tools/perf/util/s390-cpumcf-kernel.h index f55ca07f3ca1..74b36644e384 100644 --- a/tools/perf/util/s390-cpumcf-kernel.h +++ b/tools/perf/util/s390-cpumcf-kernel.h @@ -12,6 +12,8 @@ #define S390_CPUMCF_DIAG_DEF 0xfeef /* Counter diagnostic entry ID */ #define PERF_EVENT_CPUM_CF_DIAG 0xBC000 /* Event: Counter sets */ #define PERF_EVENT_CPUM_SF_DIAG 0xBD000 /* Event: Combined-sampling */ +#define PERF_EVENT_PAI_CRYPTO_ALL 0x1000 /* Event: CRYPTO_ALL */ +#define PERF_EVENT_PAI_NNPA_ALL 0x1800 /* Event: NNPA_ALL */ struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ unsigned int def:16; /* 0-15 Data Entry Format */ diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 6fe478b0b61b..c17dbe232c54 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -345,7 +345,7 @@ static bool s390_cpumsf_trailer_show(const char *color, size_t pos, } color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d" " dsdes:%d Overflow:%lld Time:%#llx\n" - "\t\tC:%d TOD:%#lx\n", + "\t\tC:%d TOD:%#llx\n", pos, te->f ? 'F' : ' ', te->a ? 'A' : ' ', @@ -513,6 +513,7 @@ static bool s390_cpumsf_make_event(size_t pos, .period = 1 }; union perf_event event; + int ret; memset(&event, 0, sizeof(event)); if (basic->CL == 1) /* Native LPAR mode */ @@ -536,8 +537,9 @@ static bool s390_cpumsf_make_event(size_t pos, pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n", __func__, pos, sample.ip, basic->P, basic->CL, sample.pid, sample.tid, sample.cpumode, sample.cpu); - if (perf_session__deliver_synth_event(sfq->sf->session, &event, - &sample)) { + ret = perf_session__deliver_synth_event(sfq->sf->session, &event, &sample); + perf_sample__exit(&sample); + if (ret) { pr_err("s390 Auxiliary Trace: failed to deliver event\n"); return false; } @@ -912,7 +914,7 @@ static int s390_cpumsf_process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, - struct perf_tool *tool) + const struct perf_tool *tool) { struct s390_cpumsf *sf = container_of(session->auxtrace, struct s390_cpumsf, @@ -952,15 +954,10 @@ s390_cpumsf_process_event(struct perf_session *session, return err; } -struct s390_cpumsf_synth { - struct perf_tool cpumsf_tool; - struct perf_session *session; -}; - static int s390_cpumsf_process_auxtrace_event(struct perf_session *session, union perf_event *event __maybe_unused, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { struct s390_cpumsf *sf = container_of(session->auxtrace, struct s390_cpumsf, @@ -1003,7 +1000,7 @@ static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused) } static int s390_cpumsf_flush(struct perf_session *session __maybe_unused, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { return 0; } @@ -1145,7 +1142,7 @@ int s390_cpumsf_process_auxtrace_info(union perf_event *event, sf->machine = &session->machines.host; /* No kvm support */ sf->auxtrace_type = auxtrace_info->type; sf->pmu_type = PERF_TYPE_RAW; - sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid); + sf->machine_type = s390_cpumsf_get_type(perf_session__env(session)->cpuid); sf->auxtrace.process_event = s390_cpumsf_process_event; sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event; diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index c10b891dbad6..c6ae0ae8d86a 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -19,15 +19,17 @@ #include <sys/stat.h> #include <linux/compiler.h> +#include <linux/err.h> #include <asm/byteorder.h> #include "debug.h" #include "session.h" #include "evlist.h" #include "color.h" +#include "hashmap.h" #include "sample-raw.h" #include "s390-cpumcf-kernel.h" -#include "pmu-events/pmu-events.h" +#include "util/pmu.h" #include "util/sample.h" static size_t ctrset_size(struct cf_ctrset_entry *set) @@ -51,8 +53,6 @@ static bool s390_cpumcfdg_testctr(struct perf_sample *sample) struct cf_trailer_entry *te; struct cf_ctrset_entry *cep, ce; - if (!len) - return false; while (offset < len) { cep = (struct cf_ctrset_entry *)(buf + offset); ce.def = be16_to_cpu(cep->def); @@ -100,12 +100,12 @@ static void s390_cpumcfdg_dumptrail(const char *color, size_t offset, te.res2 = be32_to_cpu(tep->res2); color_fprintf(stdout, color, " [%#08zx] Trailer:%c%c%c%c%c" - " Cfvn:%d Csvn:%d Speed:%d TOD:%#llx\n", + " Cfvn:%d Csvn:%d Speed:%d TOD:%#lx\n", offset, te.clock_base ? 'T' : ' ', te.speed ? 'S' : ' ', te.mtda ? 'M' : ' ', te.caca ? 'C' : ' ', te.lcda ? 'L' : ' ', te.cfvn, te.csvn, te.cpu_speed, te.timestamp); - color_fprintf(stdout, color, "\t\t1:%lx 2:%lx 3:%lx TOD-Base:%#llx" + color_fprintf(stdout, color, "\t\t1:%lx 2:%lx 3:%lx TOD-Base:%#lx" " Type:%x\n\n", te.progusage1, te.progusage2, te.progusage3, te.tod_base, te.mach_type); @@ -125,63 +125,111 @@ static int get_counterset_start(int setnr) return 128; case CPUMF_CTR_SET_MT_DIAG: /* Diagnostic counter set */ return 448; + case PERF_EVENT_PAI_NNPA_ALL: /* PAI NNPA counter set */ + case PERF_EVENT_PAI_CRYPTO_ALL: /* PAI CRYPTO counter set */ + return setnr; default: return -1; } } struct get_counter_name_data { - int wanted; + long wanted; const char *result; }; -static int get_counter_name_callback(const struct pmu_event *evp, - const struct pmu_events_table *table __maybe_unused, - void *vdata) +static int get_counter_name_callback(void *vdata, struct pmu_event_info *info) { struct get_counter_name_data *data = vdata; int rc, event_nr; + const char *event_str; + + if (info->str == NULL) + return 0; - if (evp->name == NULL || evp->event == NULL) + event_str = strstr(info->str, "event="); + if (!event_str) return 0; - rc = sscanf(evp->event, "event=%x", &event_nr); + + rc = sscanf(event_str, "event=%x", &event_nr); if (rc == 1 && event_nr == data->wanted) { - data->result = evp->name; + data->result = info->name; return 1; /* Terminate the search. */ } return 0; } -/* Scan the PMU table and extract the logical name of a counter from the - * PMU events table. Input is the counter set and counter number with in the - * set. Construct the event number and use this as key. If they match return - * the name of this counter. +static size_t get_counter_name_hash_fn(long key, void *ctx __maybe_unused) +{ + return key; +} + +static bool get_counter_name_hashmap_equal_fn(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + +/* Scan the PMU and extract the logical name of a counter from the event. Input + * is the counter set and counter number with in the set. Construct the event + * number and use this as key. If they match return the name of this counter. * If no match is found a NULL pointer is returned. */ -static const char *get_counter_name(int set, int nr, const struct pmu_events_table *table) +static char *get_counter_name(int set, int nr, struct perf_pmu *pmu) { + static struct hashmap *cache; + static struct perf_pmu *cache_pmu; + long cache_key = get_counterset_start(set) + nr; struct get_counter_name_data data = { - .wanted = get_counterset_start(set) + nr, + .wanted = cache_key, .result = NULL, }; + char *result = NULL; - if (!table) + if (!pmu) return NULL; - pmu_events_table_for_each_event(table, get_counter_name_callback, &data); - return data.result; + if (cache_pmu == pmu && hashmap__find(cache, cache_key, &result)) + return strdup(result); + + perf_pmu__for_each_event(pmu, /*skip_duplicate_pmus=*/ true, + &data, get_counter_name_callback); + + result = strdup(data.result ?: "<unknown>"); + + if (cache_pmu == NULL) { + struct hashmap *tmp = hashmap__new(get_counter_name_hash_fn, + get_counter_name_hashmap_equal_fn, + /*ctx=*/NULL); + + if (!IS_ERR(tmp)) { + cache = tmp; + cache_pmu = pmu; + } + } + + if (cache_pmu == pmu && result) { + char *old_value = NULL, *new_value = strdup(result); + + if (new_value) { + hashmap__set(cache, cache_key, new_value, /*old_key=*/NULL, &old_value); + /* + * Free in case of a race, but resizing would be broken + * in that case. + */ + free(old_value); + } + } + return result; } -static void s390_cpumcfdg_dump(struct perf_sample *sample) +static void s390_cpumcfdg_dump(struct perf_pmu *pmu, struct perf_sample *sample) { size_t i, len = sample->raw_size, offset = 0; unsigned char *buf = sample->raw_data; const char *color = PERF_COLOR_BLUE; struct cf_ctrset_entry *cep, ce; - const struct pmu_events_table *table; u64 *p; - table = pmu_events_table__find(); while (offset < len) { cep = (struct cf_ctrset_entry *)(buf + offset); @@ -199,37 +247,131 @@ static void s390_cpumcfdg_dump(struct perf_sample *sample) color_fprintf(stdout, color, " [%#08zx] Counterset:%d" " Counters:%d\n", offset, ce.set, ce.ctr); for (i = 0, p = (u64 *)(cep + 1); i < ce.ctr; ++i, ++p) { - const char *ev_name = get_counter_name(ce.set, i, table); + char *ev_name = get_counter_name(ce.set, i, pmu); color_fprintf(stdout, color, - "\tCounter:%03d %s Value:%#018lx\n", i, + "\tCounter:%03zd %s Value:%#018"PRIx64"\n", i, ev_name ?: "<unknown>", be64_to_cpu(*p)); + free(ev_name); } offset += ctrset_size(&ce); } } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpacked" +#pragma GCC diagnostic ignored "-Wattributes" +/* + * Check for consistency of PAI_CRYPTO/PAI_NNPA raw data. + */ +struct pai_data { /* Event number and value */ + u16 event_nr; + u64 event_val; +} __packed; + +#pragma GCC diagnostic pop + +/* + * Test for valid raw data. At least one PAI event should be in the raw + * data section. + */ +static bool s390_pai_all_test(struct perf_sample *sample) +{ + size_t len = sample->raw_size; + + if (len < 0xa) + return false; + return true; +} + +static void s390_pai_all_dump(struct evsel *evsel, struct perf_sample *sample) +{ + size_t len = sample->raw_size, offset = 0; + unsigned char *p = sample->raw_data; + const char *color = PERF_COLOR_BLUE; + struct pai_data pai_data; + char *ev_name; + + while (offset < len) { + memcpy(&pai_data.event_nr, p, sizeof(pai_data.event_nr)); + pai_data.event_nr = be16_to_cpu(pai_data.event_nr); + p += sizeof(pai_data.event_nr); + offset += sizeof(pai_data.event_nr); + + memcpy(&pai_data.event_val, p, sizeof(pai_data.event_val)); + pai_data.event_val = be64_to_cpu(pai_data.event_val); + p += sizeof(pai_data.event_val); + offset += sizeof(pai_data.event_val); + + ev_name = get_counter_name(evsel->core.attr.config, + pai_data.event_nr, evsel->pmu); + color_fprintf(stdout, color, "\tCounter:%03d %s Value:%#018"PRIx64"\n", + pai_data.event_nr, ev_name ?: "<unknown>", + pai_data.event_val); + free(ev_name); + + if (offset + 0xa > len) + break; + } + color_fprintf(stdout, color, "\n"); +} + /* S390 specific trace event function. Check for PERF_RECORD_SAMPLE events - * and if the event was triggered by a counter set diagnostic event display - * its raw data. + * and if the event was triggered by a + * - counter set diagnostic event + * - processor activity assist (PAI) crypto counter event + * - processor activity assist (PAI) neural network processor assist (NNPA) + * counter event + * display its raw data. * The function is only invoked when the dump flag -D is set. + * + * Function evlist__s390_sample_raw() is defined as call back after it has + * been verified that the perf.data file was created on s390 platform. */ -void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) +void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, + struct perf_sample *sample) { - struct evsel *ev_bc000; + const char *pai_name; + struct evsel *evsel; if (event->header.type != PERF_RECORD_SAMPLE) return; - ev_bc000 = evlist__event2evsel(evlist, event); - if (ev_bc000 == NULL || - ev_bc000->core.attr.config != PERF_EVENT_CPUM_CF_DIAG) + evsel = evlist__event2evsel(evlist, event); + if (!evsel) + return; + + /* Check for raw data in sample */ + if (!sample->raw_size || !sample->raw_data) return; /* Display raw data on screen */ - if (!s390_cpumcfdg_testctr(sample)) { - pr_err("Invalid counter set data encountered\n"); + if (evsel->core.attr.config == PERF_EVENT_CPUM_CF_DIAG) { + if (!evsel->pmu) + evsel->pmu = perf_pmus__find("cpum_cf"); + if (!s390_cpumcfdg_testctr(sample)) + pr_err("Invalid counter set data encountered\n"); + else + s390_cpumcfdg_dump(evsel->pmu, sample); return; } - s390_cpumcfdg_dump(sample); + + switch (evsel->core.attr.config) { + case PERF_EVENT_PAI_NNPA_ALL: + pai_name = "NNPA_ALL"; + break; + case PERF_EVENT_PAI_CRYPTO_ALL: + pai_name = "CRYPTO_ALL"; + break; + default: + return; + } + + if (!s390_pai_all_test(sample)) { + pr_err("Invalid %s raw data encountered\n", pai_name); + } else { + if (!evsel->pmu) + evsel->pmu = perf_pmus__find_by_type(evsel->core.attr.type); + s390_pai_all_dump(evsel, sample); + } } diff --git a/tools/perf/util/sample-raw.c b/tools/perf/util/sample-raw.c index f3f6bd9d290e..bcf442574d6e 100644 --- a/tools/perf/util/sample-raw.c +++ b/tools/perf/util/sample-raw.c @@ -6,15 +6,16 @@ #include "env.h" #include "header.h" #include "sample-raw.h" +#include "session.h" /* * Check platform the perf data file was created on and perform platform * specific interpretation. */ -void evlist__init_trace_event_sample_raw(struct evlist *evlist) +void evlist__init_trace_event_sample_raw(struct evlist *evlist, struct perf_env *env) { - const char *arch_pf = perf_env__arch(evlist->env); - const char *cpuid = perf_env__cpuid(evlist->env); + const char *arch_pf = perf_env__arch(env); + const char *cpuid = perf_env__cpuid(env); if (arch_pf && !strcmp("s390", arch_pf)) evlist->trace_event_sample_raw = evlist__s390_sample_raw; diff --git a/tools/perf/util/sample-raw.h b/tools/perf/util/sample-raw.h index ea01c5811503..896e9a87e373 100644 --- a/tools/perf/util/sample-raw.h +++ b/tools/perf/util/sample-raw.h @@ -11,5 +11,5 @@ void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, bool evlist__has_amd_ibs(struct evlist *evlist); void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); -void evlist__init_trace_event_sample_raw(struct evlist *evlist); +void evlist__init_trace_event_sample_raw(struct evlist *evlist, struct perf_env *env); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/sample.c b/tools/perf/util/sample.c new file mode 100644 index 000000000000..605fee971f55 --- /dev/null +++ b/tools/perf/util/sample.c @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include "sample.h" +#include "debug.h" +#include <linux/zalloc.h> +#include <stdlib.h> +#include <string.h> + +void perf_sample__init(struct perf_sample *sample, bool all) +{ + if (all) { + memset(sample, 0, sizeof(*sample)); + } else { + sample->user_regs = NULL; + sample->intr_regs = NULL; + } +} + +void perf_sample__exit(struct perf_sample *sample) +{ + free(sample->user_regs); + free(sample->intr_regs); +} + +struct regs_dump *perf_sample__user_regs(struct perf_sample *sample) +{ + if (!sample->user_regs) { + sample->user_regs = zalloc(sizeof(*sample->user_regs)); + if (!sample->user_regs) + pr_err("Failure to allocate sample user_regs"); + } + return sample->user_regs; +} + + +struct regs_dump *perf_sample__intr_regs(struct perf_sample *sample) +{ + if (!sample->intr_regs) { + sample->intr_regs = zalloc(sizeof(*sample->intr_regs)); + if (!sample->intr_regs) + pr_err("Failure to allocate sample intr_regs"); + } + return sample->intr_regs; +} diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h index c92ad0f51ecd..a8307b20a9ea 100644 --- a/tools/perf/util/sample.h +++ b/tools/perf/util/sample.h @@ -67,7 +67,7 @@ struct aux_sample { }; struct simd_flags { - u64 arch:1, /* architecture (isa) */ + u8 arch:1, /* architecture (isa) */ pred:2; /* predication */ }; @@ -104,23 +104,29 @@ struct perf_sample { u8 cpumode; u16 misc; u16 ins_lat; - union { - u16 p_stage_cyc; - u16 retire_lat; - }; + /** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */ + u16 weight3; bool no_hw_idx; /* No hw_idx collected in branch_stack */ + bool deferred_callchain; /* Has deferred user callchains */ + u64 deferred_cookie; char insn[MAX_INSN]; void *raw_data; struct ip_callchain *callchain; struct branch_stack *branch_stack; - struct regs_dump user_regs; - struct regs_dump intr_regs; + u64 *branch_stack_cntr; + struct regs_dump *user_regs; + struct regs_dump *intr_regs; struct stack_dump user_stack; struct sample_read read; struct aux_sample aux_sample; struct simd_flags simd_flags; }; +void perf_sample__init(struct perf_sample *sample, bool all); +void perf_sample__exit(struct perf_sample *sample); +struct regs_dump *perf_sample__user_regs(struct perf_sample *sample); +struct regs_dump *perf_sample__intr_regs(struct perf_sample *sample); + /* * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get * 8-byte alignment. diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build index c220fec97032..24f087b0cd11 100644 --- a/tools/perf/util/scripting-engines/Build +++ b/tools/perf/util/scripting-engines/Build @@ -1,8 +1,9 @@ ifeq ($(CONFIG_LIBTRACEEVENT),y) - perf-$(CONFIG_LIBPERL) += trace-event-perl.o + perf-util-$(CONFIG_LIBPERL) += trace-event-perl.o endif -perf-$(CONFIG_LIBPYTHON) += trace-event-python.o +perf-util-$(CONFIG_LIBPYTHON) += trace-event-python.o -CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -Wno-bad-function-cast -Wno-declaration-after-statement -Wno-switch-enum +CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -Wno-bad-function-cast -Wno-declaration-after-statement -Wno-switch-enum -Wno-thread-safety-analysis -CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-deprecated-declarations -Wno-switch-enum +# -Wno-declaration-after-statement: The python headers have mixed code with declarations (decls after asserts, for instance) +CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-deprecated-declarations -Wno-switch-enum -Wno-declaration-after-statement diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 603091317bed..e261a57b87d4 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -27,7 +27,7 @@ #include <errno.h> #include <linux/bitmap.h> #include <linux/time64.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include <stdbool.h> /* perl needs the following define, right after including stdbool.h */ @@ -320,10 +320,10 @@ static SV *perl_process_callchain(struct perf_sample *sample, const char *dsoname = "[unknown]"; if (dso) { - if (symbol_conf.show_kernel_path && dso->long_name) - dsoname = dso->long_name; + if (symbol_conf.show_kernel_path && dso__long_name(dso)) + dsoname = dso__long_name(dso); else - dsoname = dso->name; + dsoname = dso__name(dso); } if (!hv_stores(elem, "dso", newSVpv(dsoname,0))) { hv_undef(elem); @@ -344,7 +344,7 @@ static void perl_process_tracepoint(struct perf_sample *sample, struct addr_location *al) { struct thread *thread = al->thread; - struct tep_event *event = evsel->tp_format; + struct tep_event *event; struct tep_format_field *field; static char handler[256]; unsigned long long val; @@ -362,6 +362,7 @@ static void perl_process_tracepoint(struct perf_sample *sample, if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) return; + event = evsel__tp_format(evsel); if (!event) { pr_debug("ug! no event found for type %" PRIu64, (u64)evsel->core.attr.config); return; @@ -490,6 +491,9 @@ static int perl_start_script(const char *script, int argc, const char **argv, scripting_context->session = session; command_line = malloc((argc + 2) * sizeof(const char *)); + if (!command_line) + return -ENOMEM; + command_line[0] = ""; command_line[1] = script; for (i = 2; i < argc + 2; i++) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 94312741443a..6655c0bbe0d8 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -31,7 +31,7 @@ #include <linux/compiler.h> #include <linux/time64.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #include "../build-id.h" @@ -45,6 +45,7 @@ #include "../thread.h" #include "../comm.h" #include "../machine.h" +#include "../mem-info.h" #include "../db-export.h" #include "../thread-stack.h" #include "../trace-event.h" @@ -57,22 +58,6 @@ #include "mem-events.h" #include "util/perf_regs.h" -#if PY_MAJOR_VERSION < 3 -#define _PyUnicode_FromString(arg) \ - PyString_FromString(arg) -#define _PyUnicode_FromStringAndSize(arg1, arg2) \ - PyString_FromStringAndSize((arg1), (arg2)) -#define _PyBytes_FromStringAndSize(arg1, arg2) \ - PyString_FromStringAndSize((arg1), (arg2)) -#define _PyLong_FromLong(arg) \ - PyInt_FromLong(arg) -#define _PyLong_AsLong(arg) \ - PyInt_AsLong(arg) -#define _PyCapsule_New(arg1, arg2, arg3) \ - PyCObject_FromVoidPtr((arg1), (arg2)) - -PyMODINIT_FUNC initperf_trace_context(void); -#else #define _PyUnicode_FromString(arg) \ PyUnicode_FromString(arg) #define _PyUnicode_FromStringAndSize(arg1, arg2) \ @@ -87,7 +72,6 @@ PyMODINIT_FUNC initperf_trace_context(void); PyCapsule_New((arg1), (arg2), (arg3)) PyMODINIT_FUNC PyInit_perf_trace_context(void); -#endif #ifdef HAVE_LIBTRACEEVENT #define TRACE_EVENT_TYPE_MAX \ @@ -180,17 +164,7 @@ static int get_argument_count(PyObject *handler) { int arg_count = 0; - /* - * The attribute for the code object is func_code in Python 2, - * whereas it is __code__ in Python 3.0+. - */ - PyObject *code_obj = PyObject_GetAttrString(handler, - "func_code"); - if (PyErr_Occurred()) { - PyErr_Clear(); - code_obj = PyObject_GetAttrString(handler, - "__code__"); - } + PyObject *code_obj = code_obj = PyObject_GetAttrString(handler, "__code__"); PyErr_Clear(); if (code_obj) { PyObject *arg_count_obj = PyObject_GetAttrString(code_obj, @@ -353,6 +327,8 @@ static PyObject *get_field_numeric_entry(struct tep_event *event, if (is_array) { list = PyList_New(field->arraylen); + if (!list) + Py_FatalError("couldn't create Python list"); item_size = field->size / field->arraylen; n_items = field->arraylen; } else { @@ -391,10 +367,10 @@ static const char *get_dsoname(struct map *map) struct dso *dso = map ? map__dso(map) : NULL; if (dso) { - if (symbol_conf.show_kernel_path && dso->long_name) - dsoname = dso->long_name; + if (symbol_conf.show_kernel_path && dso__long_name(dso)) + dsoname = dso__long_name(dso); else - dsoname = dso->name; + dsoname = dso__name(dso); } return dsoname; @@ -718,15 +694,20 @@ static void set_sample_read_in_dict(PyObject *dict_sample, } static void set_sample_datasrc_in_dict(PyObject *dict, - struct perf_sample *sample) + struct perf_sample *sample) { - struct mem_info mi = { .data_src.val = sample->data_src }; + struct mem_info *mi = mem_info__new(); char decode[100]; + if (!mi) + Py_FatalError("couldn't create mem-info"); + pydict_set_item_string_decref(dict, "datasrc", PyLong_FromUnsignedLongLong(sample->data_src)); - perf_script__meminfo_scnprintf(decode, 100, &mi); + mem_info__data_src(mi)->val = sample->data_src; + perf_script__meminfo_scnprintf(decode, 100, mi); + mem_info__put(mi); pydict_set_item_string_decref(dict, "datasrc_decode", _PyUnicode_FromString(decode)); @@ -754,53 +735,66 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, ch } } -static void set_regs_in_dict(PyObject *dict, +#define MAX_REG_SIZE 128 + +static int set_regs_in_dict(PyObject *dict, struct perf_sample *sample, struct evsel *evsel) { struct perf_event_attr *attr = &evsel->core.attr; const char *arch = perf_env__arch(evsel__env(evsel)); - /* - * Here value 28 is a constant size which can be used to print - * one register value and its corresponds to: - * 16 chars is to specify 64 bit register in hexadecimal. - * 2 chars is for appending "0x" to the hexadecimal value and - * 10 chars is for register name. - */ - int size = __sw_hweight64(attr->sample_regs_intr) * 28; - char *bf = malloc(size); + int size = (__sw_hweight64(attr->sample_regs_intr) * MAX_REG_SIZE) + 1; + char *bf = NULL; - regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, size); + if (sample->intr_regs) { + bf = malloc(size); + if (!bf) + return -1; - pydict_set_item_string_decref(dict, "iregs", - _PyUnicode_FromString(bf)); + regs_map(sample->intr_regs, attr->sample_regs_intr, arch, bf, size); - regs_map(&sample->user_regs, attr->sample_regs_user, arch, bf, size); + pydict_set_item_string_decref(dict, "iregs", + _PyUnicode_FromString(bf)); + } + + if (sample->user_regs) { + if (!bf) { + bf = malloc(size); + if (!bf) + return -1; + } + regs_map(sample->user_regs, attr->sample_regs_user, arch, bf, size); - pydict_set_item_string_decref(dict, "uregs", - _PyUnicode_FromString(bf)); + pydict_set_item_string_decref(dict, "uregs", + _PyUnicode_FromString(bf)); + } free(bf); + + return 0; } static void set_sym_in_dict(PyObject *dict, struct addr_location *al, const char *dso_field, const char *dso_bid_field, const char *dso_map_start, const char *dso_map_end, - const char *sym_field, const char *symoff_field) + const char *sym_field, const char *symoff_field, + const char *map_pgoff) { - char sbuild_id[SBUILD_ID_SIZE]; - if (al->map) { + char sbuild_id[SBUILD_ID_SIZE]; struct dso *dso = map__dso(al->map); - pydict_set_item_string_decref(dict, dso_field, _PyUnicode_FromString(dso->name)); - build_id__sprintf(&dso->bid, sbuild_id); + pydict_set_item_string_decref(dict, dso_field, + _PyUnicode_FromString(dso__name(dso))); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); pydict_set_item_string_decref(dict, dso_bid_field, _PyUnicode_FromString(sbuild_id)); pydict_set_item_string_decref(dict, dso_map_start, PyLong_FromUnsignedLong(map__start(al->map))); pydict_set_item_string_decref(dict, dso_map_end, PyLong_FromUnsignedLong(map__end(al->map))); + pydict_set_item_string_decref(dict, map_pgoff, + PyLong_FromUnsignedLongLong(map__pgoff(al->map))); } if (al->sym) { pydict_set_item_string_decref(dict, sym_field, @@ -852,6 +846,10 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(evsel__name(evsel))); pydict_set_item_string_decref(dict, "attr", _PyBytes_FromStringAndSize((const char *)&evsel->core.attr, sizeof(evsel->core.attr))); + pydict_set_item_string_decref(dict_sample, "id", + PyLong_FromUnsignedLongLong(sample->id)); + pydict_set_item_string_decref(dict_sample, "stream_id", + PyLong_FromUnsignedLongLong(sample->stream_id)); pydict_set_item_string_decref(dict_sample, "pid", _PyLong_FromLong(sample->pid)); pydict_set_item_string_decref(dict_sample, "tid", @@ -871,6 +869,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, set_sample_read_in_dict(dict_sample, sample, evsel); pydict_set_item_string_decref(dict_sample, "weight", PyLong_FromUnsignedLongLong(sample->weight)); + pydict_set_item_string_decref(dict_sample, "ins_lat", + PyLong_FromUnsignedLong(sample->ins_lat)); pydict_set_item_string_decref(dict_sample, "transaction", PyLong_FromUnsignedLongLong(sample->transaction)); set_sample_datasrc_in_dict(dict_sample, sample); @@ -881,7 +881,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, pydict_set_item_string_decref(dict, "comm", _PyUnicode_FromString(thread__comm_str(al->thread))); set_sym_in_dict(dict, al, "dso", "dso_bid", "dso_map_start", "dso_map_end", - "symbol", "symoff"); + "symbol", "symoff", "map_pgoff"); pydict_set_item_string_decref(dict, "callchain", callchain); @@ -906,7 +906,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, PyBool_FromLong(1)); set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_dso_bid", "addr_dso_map_start", "addr_dso_map_end", - "addr_symbol", "addr_symoff"); + "addr_symbol", "addr_symoff", "addr_map_pgoff"); } if (sample->flags) @@ -920,7 +920,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, PyLong_FromUnsignedLongLong(sample->cyc_cnt)); } - set_regs_in_dict(dict, sample, evsel); + if (set_regs_in_dict(dict, sample, evsel)) + Py_FatalError("Failed to setting regs in dict"); return dict; } @@ -931,7 +932,7 @@ static void python_process_tracepoint(struct perf_sample *sample, struct addr_location *al, struct addr_location *addr_al) { - struct tep_event *event = evsel->tp_format; + struct tep_event *event; PyObject *handler, *context, *t, *obj = NULL, *callchain; PyObject *dict = NULL, *all_entries_dict = NULL; static char handler_name[256]; @@ -948,6 +949,7 @@ static void python_process_tracepoint(struct perf_sample *sample, bitmap_zero(events_defined, TRACE_EVENT_TYPE_MAX); + event = evsel__tp_format(evsel); if (!event) { snprintf(handler_name, sizeof(handler_name), "ug! no event found for type %" PRIu64, (u64)evsel->core.attr.config); @@ -1235,14 +1237,14 @@ static int python_export_dso(struct db_export *dbe, struct dso *dso, char sbuild_id[SBUILD_ID_SIZE]; PyObject *t; - build_id__sprintf(&dso->bid, sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); t = tuple_new(5); - tuple_set_d64(t, 0, dso->db_id); + tuple_set_d64(t, 0, dso__db_id(dso)); tuple_set_d64(t, 1, machine->db_id); - tuple_set_string(t, 2, dso->short_name); - tuple_set_string(t, 3, dso->long_name); + tuple_set_string(t, 2, dso__short_name(dso)); + tuple_set_string(t, 3, dso__long_name(dso)); tuple_set_string(t, 4, sbuild_id); call_object(tables->dso_handler, t, "dso_table"); @@ -1262,7 +1264,7 @@ static int python_export_symbol(struct db_export *dbe, struct symbol *sym, t = tuple_new(6); tuple_set_d64(t, 0, *sym_db_id); - tuple_set_d64(t, 1, dso->db_id); + tuple_set_d64(t, 1, dso__db_id(dso)); tuple_set_d64(t, 2, sym->start); tuple_set_d64(t, 3, sym->end); tuple_set_s32(t, 4, sym->binding); @@ -1299,11 +1301,11 @@ static void python_export_sample_table(struct db_export *dbe, struct tables *tables = container_of(dbe, struct tables, dbe); PyObject *t; - t = tuple_new(25); + t = tuple_new(28); tuple_set_d64(t, 0, es->db_id); tuple_set_d64(t, 1, es->evsel->db_id); - tuple_set_d64(t, 2, maps__machine(es->al->maps)->db_id); + tuple_set_d64(t, 2, maps__machine(thread__maps(es->al->thread))->db_id); tuple_set_d64(t, 3, thread__db_id(es->al->thread)); tuple_set_d64(t, 4, es->comm_db_id); tuple_set_d64(t, 5, es->dso_db_id); @@ -1326,6 +1328,9 @@ static void python_export_sample_table(struct db_export *dbe, tuple_set_d64(t, 22, es->sample->insn_cnt); tuple_set_d64(t, 23, es->sample->cyc_cnt); tuple_set_s32(t, 24, es->sample->flags); + tuple_set_d64(t, 25, es->sample->id); + tuple_set_d64(t, 26, es->sample->stream_id); + tuple_set_u32(t, 27, es->sample->ins_lat); call_object(tables->sample_handler, t, "sample_table"); @@ -1686,13 +1691,15 @@ static void python_process_stat(struct perf_stat_config *config, { struct perf_thread_map *threads = counter->core.threads; struct perf_cpu_map *cpus = counter->core.cpus; - int cpu, thread; - for (thread = 0; thread < perf_thread_map__nr(threads); thread++) { - for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) { - process_stat(counter, perf_cpu_map__cpu(cpus, cpu), + for (int thread = 0; thread < perf_thread_map__nr(threads); thread++) { + int idx; + struct perf_cpu cpu; + + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + process_stat(counter, cpu, perf_thread_map__pid(threads, thread), tstamp, - perf_counts(counter->counts, cpu, thread)); + perf_counts(counter->counts, idx, thread)); } } } @@ -1879,12 +1886,6 @@ static void set_table_handlers(struct tables *tables) tables->synth_handler = get_handler("synth_data"); } -#if PY_MAJOR_VERSION < 3 -static void _free_command_line(const char **command_line, int num) -{ - free(command_line); -} -#else static void _free_command_line(wchar_t **command_line, int num) { int i; @@ -1892,7 +1893,6 @@ static void _free_command_line(wchar_t **command_line, int num) PyMem_RawFree(command_line[i]); free(command_line); } -#endif /* @@ -1902,40 +1902,23 @@ static int python_start_script(const char *script, int argc, const char **argv, struct perf_session *session) { struct tables *tables = &tables_global; -#if PY_MAJOR_VERSION < 3 - const char **command_line; -#else wchar_t **command_line; -#endif - /* - * Use a non-const name variable to cope with python 2.6's - * PyImport_AppendInittab prototype - */ - char buf[PATH_MAX], name[19] = "perf_trace_context"; + char buf[PATH_MAX]; int i, err = 0; FILE *fp; scripting_context->session = session; -#if PY_MAJOR_VERSION < 3 - command_line = malloc((argc + 1) * sizeof(const char *)); - command_line[0] = script; - for (i = 1; i < argc + 1; i++) - command_line[i] = argv[i - 1]; - PyImport_AppendInittab(name, initperf_trace_context); -#else command_line = malloc((argc + 1) * sizeof(wchar_t *)); + if (!command_line) + return -1; + command_line[0] = Py_DecodeLocale(script, NULL); for (i = 1; i < argc + 1; i++) command_line[i] = Py_DecodeLocale(argv[i - 1], NULL); - PyImport_AppendInittab(name, PyInit_perf_trace_context); -#endif + PyImport_AppendInittab("perf_trace_context", PyInit_perf_trace_context); Py_Initialize(); -#if PY_MAJOR_VERSION < 3 - PySys_SetArgv(argc + 1, (char **)command_line); -#else PySys_SetArgv(argc + 1, command_line); -#endif fp = fopen(script, "r"); if (!fp) { diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 00d18c74c090..4236503c8f6c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -12,6 +12,7 @@ #include <sys/types.h> #include <sys/mman.h> #include <perf/cpumap.h> +#include <perf/event.h> #include "map_symbol.h" #include "branch.h" @@ -36,85 +37,30 @@ #include "util.h" #include "arch/common.h" #include "units.h" +#include "annotate.h" +#include "perf.h" #include <internal/lib.h> -#ifdef HAVE_ZSTD_SUPPORT -static int perf_session__process_compressed_event(struct perf_session *session, - union perf_event *event, u64 file_offset, - const char *file_path) -{ - void *src; - size_t decomp_size, src_size; - u64 decomp_last_rem = 0; - size_t mmap_len, decomp_len = session->header.env.comp_mmap_len; - struct decomp *decomp, *decomp_last = session->active_decomp->decomp_last; - - if (decomp_last) { - decomp_last_rem = decomp_last->size - decomp_last->head; - decomp_len += decomp_last_rem; - } - - mmap_len = sizeof(struct decomp) + decomp_len; - decomp = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE, - MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - if (decomp == MAP_FAILED) { - pr_err("Couldn't allocate memory for decompression\n"); - return -1; - } - - decomp->file_pos = file_offset; - decomp->file_path = file_path; - decomp->mmap_len = mmap_len; - decomp->head = 0; - - if (decomp_last_rem) { - memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem); - decomp->size = decomp_last_rem; - } - - src = (void *)event + sizeof(struct perf_record_compressed); - src_size = event->pack.header.size - sizeof(struct perf_record_compressed); - - decomp_size = zstd_decompress_stream(session->active_decomp->zstd_decomp, src, src_size, - &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem); - if (!decomp_size) { - munmap(decomp, mmap_len); - pr_err("Couldn't decompress data\n"); - return -1; - } - - decomp->size += decomp_size; - - if (session->active_decomp->decomp == NULL) - session->active_decomp->decomp = decomp; - else - session->active_decomp->decomp_last->next = decomp; - - session->active_decomp->decomp_last = decomp; - - pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size); - - return 0; -} -#else /* !HAVE_ZSTD_SUPPORT */ -#define perf_session__process_compressed_event perf_session__process_compressed_event_stub -#endif - static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool, + const struct perf_tool *tool, u64 file_offset, const char *file_path); -static int perf_session__open(struct perf_session *session, int repipe_fd) +static int perf_session__open(struct perf_session *session) { struct perf_data *data = session->data; - if (perf_session__read_header(session, repipe_fd) < 0) { + if (perf_session__read_header(session) < 0) { pr_err("incompatible file format (rerun with -v to learn more)\n"); return -1; } + if (perf_header__has_feat(&session->header, HEADER_AUXTRACE)) { + /* Auxiliary events may reference exited threads, hold onto dead ones. */ + symbol_conf.keep_exited_threads = true; + } + if (perf_data__is_pipe(data)) return 0; @@ -191,8 +137,9 @@ static int ordered_events__deliver_event(struct ordered_events *oe, } struct perf_session *__perf_session__new(struct perf_data *data, - bool repipe, int repipe_fd, - struct perf_tool *tool) + struct perf_tool *tool, + bool trace_event_repipe, + struct perf_env *host_env) { int ret = -ENOMEM; struct perf_session *session = zalloc(sizeof(*session)); @@ -200,7 +147,7 @@ struct perf_session *__perf_session__new(struct perf_data *data, if (!session) goto out; - session->repipe = repipe; + session->trace_event_repipe = trace_event_repipe; session->tool = tool; session->decomp_data.zstd_decomp = &session->zstd_data; session->active_decomp = &session->decomp_data; @@ -218,7 +165,7 @@ struct perf_session *__perf_session__new(struct perf_data *data, session->data = data; if (perf_data__is_read(data)) { - ret = perf_session__open(session, repipe_fd); + ret = perf_session__open(session); if (ret < 0) goto out_delete; @@ -231,7 +178,7 @@ struct perf_session *__perf_session__new(struct perf_data *data, perf_session__set_comm_exec(session); } - evlist__init_trace_event_sample_raw(session->evlist); + evlist__init_trace_event_sample_raw(session->evlist, &session->header.env); /* Open the directory data. */ if (data->is_dir) { @@ -245,8 +192,11 @@ struct perf_session *__perf_session__new(struct perf_data *data, symbol_conf.kallsyms_name = perf_data__kallsyms_name(data); } } else { - session->machines.host.env = &perf_env; + assert(host_env != NULL); + session->machines.host.env = host_env; } + if (session->evlist) + session->evlist->session = session; session->machines.host.single_address_space = perf_env__single_address_space(session->machines.host.env); @@ -299,6 +249,7 @@ void perf_session__delete(struct perf_session *session) return; auxtrace__free(session); auxtrace_index__free(&session->auxtrace_index); + debuginfo_cache__delete(); perf_session__destroy_kernel_maps(session); perf_decomp__release_events(session->decomp_data.decomp); perf_env__exit(&session->header.env); @@ -314,251 +265,6 @@ void perf_session__delete(struct perf_session *session) free(session); } -static int process_event_synth_tracing_data_stub(struct perf_session *session - __maybe_unused, - union perf_event *event - __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - -static int process_event_synth_attr_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct evlist **pevlist - __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - -static int process_event_synth_event_update_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct evlist **pevlist - __maybe_unused) -{ - if (dump_trace) - perf_event__fprintf_event_update(event, stdout); - - dump_printf(": unhandled!\n"); - return 0; -} - -static int process_event_sample_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_sample *sample __maybe_unused, - struct evsel *evsel __maybe_unused, - struct machine *machine __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - -static int process_event_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_sample *sample __maybe_unused, - struct machine *machine __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - -static int process_finished_round_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct ordered_events *oe __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - -static int skipn(int fd, off_t n) -{ - char buf[4096]; - ssize_t ret; - - while (n > 0) { - ret = read(fd, buf, min(n, (off_t)sizeof(buf))); - if (ret <= 0) - return ret; - n -= ret; - } - - return 0; -} - -static s64 process_event_auxtrace_stub(struct perf_session *session __maybe_unused, - union perf_event *event) -{ - dump_printf(": unhandled!\n"); - if (perf_data__is_pipe(session->data)) - skipn(perf_data__fd(session->data), event->auxtrace.size); - return event->auxtrace.size; -} - -static int process_event_op2_stub(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - - -static -int process_event_thread_map_stub(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused) -{ - if (dump_trace) - perf_event__fprintf_thread_map(event, stdout); - - dump_printf(": unhandled!\n"); - return 0; -} - -static -int process_event_cpu_map_stub(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused) -{ - if (dump_trace) - perf_event__fprintf_cpu_map(event, stdout); - - dump_printf(": unhandled!\n"); - return 0; -} - -static -int process_event_stat_config_stub(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused) -{ - if (dump_trace) - perf_event__fprintf_stat_config(event, stdout); - - dump_printf(": unhandled!\n"); - return 0; -} - -static int process_stat_stub(struct perf_session *perf_session __maybe_unused, - union perf_event *event) -{ - if (dump_trace) - perf_event__fprintf_stat(event, stdout); - - dump_printf(": unhandled!\n"); - return 0; -} - -static int process_stat_round_stub(struct perf_session *perf_session __maybe_unused, - union perf_event *event) -{ - if (dump_trace) - perf_event__fprintf_stat_round(event, stdout); - - dump_printf(": unhandled!\n"); - return 0; -} - -static int process_event_time_conv_stub(struct perf_session *perf_session __maybe_unused, - union perf_event *event) -{ - if (dump_trace) - perf_event__fprintf_time_conv(event, stdout); - - dump_printf(": unhandled!\n"); - return 0; -} - -static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused, - u64 file_offset __maybe_unused, - const char *file_path __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - -void perf_tool__fill_defaults(struct perf_tool *tool) -{ - if (tool->sample == NULL) - tool->sample = process_event_sample_stub; - if (tool->mmap == NULL) - tool->mmap = process_event_stub; - if (tool->mmap2 == NULL) - tool->mmap2 = process_event_stub; - if (tool->comm == NULL) - tool->comm = process_event_stub; - if (tool->namespaces == NULL) - tool->namespaces = process_event_stub; - if (tool->cgroup == NULL) - tool->cgroup = process_event_stub; - if (tool->fork == NULL) - tool->fork = process_event_stub; - if (tool->exit == NULL) - tool->exit = process_event_stub; - if (tool->lost == NULL) - tool->lost = perf_event__process_lost; - if (tool->lost_samples == NULL) - tool->lost_samples = perf_event__process_lost_samples; - if (tool->aux == NULL) - tool->aux = perf_event__process_aux; - if (tool->itrace_start == NULL) - tool->itrace_start = perf_event__process_itrace_start; - if (tool->context_switch == NULL) - tool->context_switch = perf_event__process_switch; - if (tool->ksymbol == NULL) - tool->ksymbol = perf_event__process_ksymbol; - if (tool->bpf == NULL) - tool->bpf = perf_event__process_bpf; - if (tool->text_poke == NULL) - tool->text_poke = perf_event__process_text_poke; - if (tool->aux_output_hw_id == NULL) - tool->aux_output_hw_id = perf_event__process_aux_output_hw_id; - if (tool->read == NULL) - tool->read = process_event_sample_stub; - if (tool->throttle == NULL) - tool->throttle = process_event_stub; - if (tool->unthrottle == NULL) - tool->unthrottle = process_event_stub; - if (tool->attr == NULL) - tool->attr = process_event_synth_attr_stub; - if (tool->event_update == NULL) - tool->event_update = process_event_synth_event_update_stub; - if (tool->tracing_data == NULL) - tool->tracing_data = process_event_synth_tracing_data_stub; - if (tool->build_id == NULL) - tool->build_id = process_event_op2_stub; - if (tool->finished_round == NULL) { - if (tool->ordered_events) - tool->finished_round = perf_event__process_finished_round; - else - tool->finished_round = process_finished_round_stub; - } - if (tool->id_index == NULL) - tool->id_index = process_event_op2_stub; - if (tool->auxtrace_info == NULL) - tool->auxtrace_info = process_event_op2_stub; - if (tool->auxtrace == NULL) - tool->auxtrace = process_event_auxtrace_stub; - if (tool->auxtrace_error == NULL) - tool->auxtrace_error = process_event_op2_stub; - if (tool->thread_map == NULL) - tool->thread_map = process_event_thread_map_stub; - if (tool->cpu_map == NULL) - tool->cpu_map = process_event_cpu_map_stub; - if (tool->stat_config == NULL) - tool->stat_config = process_event_stat_config_stub; - if (tool->stat == NULL) - tool->stat = process_stat_stub; - if (tool->stat_round == NULL) - tool->stat_round = process_stat_round_stub; - if (tool->time_conv == NULL) - tool->time_conv = process_event_time_conv_stub; - if (tool->feature == NULL) - tool->feature = process_event_op2_stub; - if (tool->compressed == NULL) - tool->compressed = perf_session__process_compressed_event; - if (tool->finished_init == NULL) - tool->finished_init = process_event_op2_stub; -} - static void swap_sample_id_all(union perf_event *event, void *data) { void *end = (void *) event + event->header.size; @@ -833,8 +539,8 @@ static void perf_event__hdr_attr_swap(union perf_event *event, perf_event__attr_swap(&event->attr.attr); size = event->header.size; - size -= (void *)&event->attr.id - (void *)event; - mem_bswap_64(event->attr.id, size); + size -= perf_record_header_attr_id(event) - (void *)event; + mem_bswap_64(perf_record_header_attr_id(event), size); } static void perf_event__event_update_swap(union perf_event *event, @@ -1014,6 +720,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_CGROUP] = perf_event__cgroup_swap, [PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap, [PERF_RECORD_AUX_OUTPUT_HW_ID] = perf_event__all64_swap, + [PERF_RECORD_CALLCHAIN_DEFERRED] = perf_event__all64_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, @@ -1071,7 +778,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { * Flush every events below timestamp 7 * etc... */ -int perf_event__process_finished_round(struct perf_tool *tool __maybe_unused, +int perf_event__process_finished_round(const struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, struct ordered_events *oe) { @@ -1148,11 +855,17 @@ static void callchain__printf(struct evsel *evsel, for (i = 0; i < callchain->nr; i++) printf("..... %2d: %016" PRIx64 "\n", i, callchain->ips[i]); + + if (sample->deferred_callchain) + printf("...... (deferred)\n"); } -static void branch_stack__printf(struct perf_sample *sample, bool callstack) +static void branch_stack__printf(struct perf_sample *sample, + struct evsel *evsel) { struct branch_entry *entries = perf_sample__branch_entries(sample); + bool callstack = evsel__has_branch_callstack(evsel); + u64 *branch_stack_cntr = sample->branch_stack_cntr; uint64_t i; if (!callstack) { @@ -1194,6 +907,16 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) } } } + + if (branch_stack_cntr) { + unsigned int br_cntr_width, br_cntr_nr; + + perf_env__find_br_cntr_info(evsel__env(evsel), &br_cntr_nr, &br_cntr_width); + printf("... branch stack counters: nr:%" PRIu64 " (counter width: %u max counter nr:%u)\n", + sample->branch_stack->nr, br_cntr_width, br_cntr_nr); + for (i = 0; i < sample->branch_stack->nr; i++) + printf("..... %2"PRIu64": %016" PRIx64 "\n", i, branch_stack_cntr[i]); + } } static void regs_dump__printf(u64 mask, u64 *regs, const char *arch) @@ -1236,7 +959,12 @@ static void regs__printf(const char *type, struct regs_dump *regs, const char *a static void regs_user__printf(struct perf_sample *sample, const char *arch) { - struct regs_dump *user_regs = &sample->user_regs; + struct regs_dump *user_regs; + + if (!sample->user_regs) + return; + + user_regs = perf_sample__user_regs(sample); if (user_regs->regs) regs__printf("user", user_regs, arch); @@ -1244,7 +972,12 @@ static void regs_user__printf(struct perf_sample *sample, const char *arch) static void regs_intr__printf(struct perf_sample *sample, const char *arch) { - struct regs_dump *intr_regs = &sample->intr_regs; + struct regs_dump *intr_regs; + + if (!sample->intr_regs) + return; + + intr_regs = perf_sample__intr_regs(sample); if (intr_regs->regs) regs__printf("intr", intr_regs, arch); @@ -1355,7 +1088,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, callchain__printf(evsel, sample); if (evsel__has_br_stack(evsel)) - branch_stack__printf(sample, evsel__has_branch_callstack(evsel)); + branch_stack__printf(sample, evsel); if (sample_type & PERF_SAMPLE_REGS_USER) regs_user__printf(sample, arch); @@ -1370,7 +1103,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, printf("... weight: %" PRIu64 "", sample->weight); if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) { printf(",0x%"PRIx16"", sample->ins_lat); - printf(",0x%"PRIx16"", sample->p_stage_cyc); + printf(",0x%"PRIx16"", sample->weight3); } printf("\n"); } @@ -1394,6 +1127,19 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, sample_read__printf(sample, evsel->core.attr.read_format); } +static void dump_deferred_callchain(struct evsel *evsel, union perf_event *event, + struct perf_sample *sample) +{ + if (!dump_trace) + return; + + printf("(IP, 0x%x): %d/%d: %#" PRIx64 "\n", + event->header.misc, sample->pid, sample->tid, sample->deferred_cookie); + + if (evsel__has_callchain(evsel)) + callchain__printf(evsel, sample); +} + static void dump_read(struct evsel *evsel, union perf_event *event) { struct perf_record_read *read_event = &event->read; @@ -1454,22 +1200,28 @@ static struct machine *machines__find_for_cpumode(struct machines *machines, } static int deliver_sample_value(struct evlist *evlist, - struct perf_tool *tool, + const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct sample_read_value *v, - struct machine *machine) + struct machine *machine, + bool per_thread) { struct perf_sample_id *sid = evlist__id2sid(evlist, v->id); struct evsel *evsel; + u64 *storage = NULL; if (sid) { + storage = perf_sample_id__get_period_storage(sid, sample->tid, per_thread); + } + + if (storage) { sample->id = v->id; - sample->period = v->value - sid->period; - sid->period = v->value; + sample->period = v->value - *storage; + *storage = v->value; } - if (!sid || sid->evsel == NULL) { + if (!storage || sid->evsel == NULL) { ++evlist->stats.nr_unknown_id; return 0; } @@ -1486,18 +1238,23 @@ static int deliver_sample_value(struct evlist *evlist, } static int deliver_sample_group(struct evlist *evlist, - struct perf_tool *tool, + const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine, - u64 read_format) + u64 read_format, + bool per_thread) { int ret = -EINVAL; struct sample_read_value *v = sample->read.group.values; + if (tool->dont_split_sample_group) + return deliver_sample_value(evlist, tool, event, sample, v, machine, + per_thread); + sample_read_group__for_each(v, sample->read.group.nr, read_format) { ret = deliver_sample_value(evlist, tool, event, sample, v, - machine); + machine, per_thread); if (ret) break; } @@ -1505,13 +1262,14 @@ static int deliver_sample_group(struct evlist *evlist, return ret; } -static int evlist__deliver_sample(struct evlist *evlist, struct perf_tool *tool, +static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct evsel *evsel, struct machine *machine) { /* We know evsel != NULL. */ u64 sample_type = evsel->core.attr.sample_type; u64 read_format = evsel->core.attr.read_format; + bool per_thread = perf_evsel__attr_has_per_thread_sample_period(&evsel->core); /* Standard sample delivery. */ if (!(sample_type & PERF_SAMPLE_READ)) @@ -1520,17 +1278,118 @@ static int evlist__deliver_sample(struct evlist *evlist, struct perf_tool *tool, /* For PERF_SAMPLE_READ we have either single or group mode. */ if (read_format & PERF_FORMAT_GROUP) return deliver_sample_group(evlist, tool, event, sample, - machine, read_format); + machine, read_format, per_thread); else return deliver_sample_value(evlist, tool, event, sample, - &sample->read.one, machine); + &sample->read.one, machine, + per_thread); +} + +/* + * Samples with deferred callchains should wait for the next matching + * PERF_RECORD_CALLCHAIN_RECORD entries. Keep the events in a list and + * deliver them once it finds the callchains. + */ +struct deferred_event { + struct list_head list; + union perf_event *event; +}; + +/* + * This is called when a deferred callchain record comes up. Find all matching + * samples, merge the callchains and process them. + */ +static int evlist__deliver_deferred_callchain(struct evlist *evlist, + const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct deferred_event *de, *tmp; + struct evsel *evsel; + int ret = 0; + + if (!tool->merge_deferred_callchains) { + evsel = evlist__id2evsel(evlist, sample->id); + return tool->callchain_deferred(tool, event, sample, + evsel, machine); + } + + list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) { + struct perf_sample orig_sample; + + ret = evlist__parse_sample(evlist, de->event, &orig_sample); + if (ret < 0) { + pr_err("failed to parse original sample\n"); + break; + } + + if (sample->tid != orig_sample.tid) + continue; + + if (event->callchain_deferred.cookie == orig_sample.deferred_cookie) + sample__merge_deferred_callchain(&orig_sample, sample); + else + orig_sample.deferred_callchain = false; + + evsel = evlist__id2evsel(evlist, orig_sample.id); + ret = evlist__deliver_sample(evlist, tool, de->event, + &orig_sample, evsel, machine); + + if (orig_sample.deferred_callchain) + free(orig_sample.callchain); + + list_del(&de->list); + free(de->event); + free(de); + + if (ret) + break; + } + return ret; +} + +/* + * This is called at the end of the data processing for the session. Flush the + * remaining samples as there's no hope for matching deferred callchains. + */ +static int session__flush_deferred_samples(struct perf_session *session, + const struct perf_tool *tool) +{ + struct evlist *evlist = session->evlist; + struct machine *machine = &session->machines.host; + struct deferred_event *de, *tmp; + struct evsel *evsel; + int ret = 0; + + list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) { + struct perf_sample sample; + + ret = evlist__parse_sample(evlist, de->event, &sample); + if (ret < 0) { + pr_err("failed to parse original sample\n"); + break; + } + + evsel = evlist__id2evsel(evlist, sample.id); + ret = evlist__deliver_sample(evlist, tool, de->event, + &sample, evsel, machine); + + list_del(&de->list); + free(de->event); + free(de); + + if (ret) + break; + } + return ret; } static int machines__deliver_event(struct machines *machines, struct evlist *evlist, union perf_event *event, struct perf_sample *sample, - struct perf_tool *tool, u64 file_offset, + const struct perf_tool *tool, u64 file_offset, const char *file_path) { struct evsel *evsel; @@ -1554,6 +1413,22 @@ static int machines__deliver_event(struct machines *machines, return 0; } dump_sample(evsel, event, sample, perf_env__arch(machine->env)); + if (sample->deferred_callchain && tool->merge_deferred_callchains) { + struct deferred_event *de = malloc(sizeof(*de)); + size_t sz = event->header.size; + + if (de == NULL) + return -ENOMEM; + + de->event = malloc(sz); + if (de->event == NULL) { + free(de); + return -ENOMEM; + } + memcpy(de->event, event, sz); + list_add_tail(&de->list, &evlist->deferred_samples); + return 0; + } return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine); case PERF_RECORD_MMAP: return tool->mmap(tool, event, sample, machine); @@ -1576,8 +1451,9 @@ static int machines__deliver_event(struct machines *machines, evlist->stats.total_lost += event->lost.lost; return tool->lost(tool, event, sample, machine); case PERF_RECORD_LOST_SAMPLES: - if (tool->lost_samples == perf_event__process_lost_samples && - !(event->header.misc & PERF_RECORD_MISC_LOST_SAMPLES_BPF)) + if (event->header.misc & PERF_RECORD_MISC_LOST_SAMPLES_BPF) + evlist->stats.total_dropped_samples += event->lost_samples.lost; + else if (tool->lost_samples == perf_event__process_lost_samples) evlist->stats.total_lost_samples += event->lost_samples.lost; return tool->lost_samples(tool, event, sample, machine); case PERF_RECORD_READ: @@ -1610,6 +1486,10 @@ static int machines__deliver_event(struct machines *machines, return tool->text_poke(tool, event, sample, machine); case PERF_RECORD_AUX_OUTPUT_HW_ID: return tool->aux_output_hw_id(tool, event, sample, machine); + case PERF_RECORD_CALLCHAIN_DEFERRED: + dump_deferred_callchain(evsel, event, sample); + return evlist__deliver_deferred_callchain(evlist, tool, event, + sample, machine); default: ++evlist->stats.nr_unknown_events; return -1; @@ -1618,30 +1498,35 @@ static int machines__deliver_event(struct machines *machines, static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool, + const struct perf_tool *tool, u64 file_offset, const char *file_path) { struct perf_sample sample; - int ret = evlist__parse_sample(session->evlist, event, &sample); + int ret; + perf_sample__init(&sample, /*all=*/false); + ret = evlist__parse_sample(session->evlist, event, &sample); if (ret) { pr_err("Can't parse sample, err = %d\n", ret); - return ret; + goto out; } ret = auxtrace__process_event(session, event, &sample, tool); if (ret < 0) - return ret; - if (ret > 0) - return 0; + goto out; + if (ret > 0) { + ret = 0; + goto out; + } ret = machines__deliver_event(&session->machines, session->evlist, event, &sample, tool, file_offset, file_path); if (dump_trace && sample.aux_sample.size) auxtrace__dump_auxtrace_sample(session, &sample); - +out: + perf_sample__exit(&sample); return ret; } @@ -1651,13 +1536,15 @@ static s64 perf_session__process_user_event(struct perf_session *session, const char *file_path) { struct ordered_events *oe = &session->ordered_events; - struct perf_tool *tool = session->tool; - struct perf_sample sample = { .time = 0, }; + const struct perf_tool *tool = session->tool; + struct perf_sample sample; int fd = perf_data__fd(session->data); - int err; + s64 err; - if (event->header.type != PERF_RECORD_COMPRESSED || - tool->compressed == perf_session__process_compressed_event_stub) + perf_sample__init(&sample, /*all=*/true); + if ((event->header.type != PERF_RECORD_COMPRESSED && + event->header.type != PERF_RECORD_COMPRESSED2) || + perf_tool__compressed_is_stub(tool)) dump_event(session->evlist, event, file_offset, &sample, file_path); /* These events are processed right away */ @@ -1668,15 +1555,17 @@ static s64 perf_session__process_user_event(struct perf_session *session, perf_session__set_id_hdr_size(session); perf_session__set_comm_exec(session); } - return err; + break; case PERF_RECORD_EVENT_UPDATE: - return tool->event_update(tool, event, &session->evlist); + err = tool->event_update(tool, event, &session->evlist); + break; case PERF_RECORD_HEADER_EVENT_TYPE: /* * Deprecated, but we need to handle it for sake * of old data files create in pipe mode. */ - return 0; + err = 0; + break; case PERF_RECORD_HEADER_TRACING_DATA: /* * Setup for reading amidst mmap, but only when we @@ -1685,15 +1574,20 @@ static s64 perf_session__process_user_event(struct perf_session *session, */ if (!perf_data__is_pipe(session->data)) lseek(fd, file_offset, SEEK_SET); - return tool->tracing_data(session, event); + err = tool->tracing_data(tool, session, event); + break; case PERF_RECORD_HEADER_BUILD_ID: - return tool->build_id(session, event); + err = tool->build_id(tool, session, event); + break; case PERF_RECORD_FINISHED_ROUND: - return tool->finished_round(tool, event, oe); + err = tool->finished_round(tool, event, oe); + break; case PERF_RECORD_ID_INDEX: - return tool->id_index(session, event); + err = tool->id_index(tool, session, event); + break; case PERF_RECORD_AUXTRACE_INFO: - return tool->auxtrace_info(session, event); + err = tool->auxtrace_info(tool, session, event); + break; case PERF_RECORD_AUXTRACE: /* * Setup for reading amidst mmap, but only when we @@ -1702,35 +1596,52 @@ static s64 perf_session__process_user_event(struct perf_session *session, */ if (!perf_data__is_pipe(session->data)) lseek(fd, file_offset + event->header.size, SEEK_SET); - return tool->auxtrace(session, event); + err = tool->auxtrace(tool, session, event); + break; case PERF_RECORD_AUXTRACE_ERROR: perf_session__auxtrace_error_inc(session, event); - return tool->auxtrace_error(session, event); + err = tool->auxtrace_error(tool, session, event); + break; case PERF_RECORD_THREAD_MAP: - return tool->thread_map(session, event); + err = tool->thread_map(tool, session, event); + break; case PERF_RECORD_CPU_MAP: - return tool->cpu_map(session, event); + err = tool->cpu_map(tool, session, event); + break; case PERF_RECORD_STAT_CONFIG: - return tool->stat_config(session, event); + err = tool->stat_config(tool, session, event); + break; case PERF_RECORD_STAT: - return tool->stat(session, event); + err = tool->stat(tool, session, event); + break; case PERF_RECORD_STAT_ROUND: - return tool->stat_round(session, event); + err = tool->stat_round(tool, session, event); + break; case PERF_RECORD_TIME_CONV: session->time_conv = event->time_conv; - return tool->time_conv(session, event); + err = tool->time_conv(tool, session, event); + break; case PERF_RECORD_HEADER_FEATURE: - return tool->feature(session, event); + err = tool->feature(tool, session, event); + break; case PERF_RECORD_COMPRESSED: - err = tool->compressed(session, event, file_offset, file_path); + case PERF_RECORD_COMPRESSED2: + err = tool->compressed(tool, session, event, file_offset, file_path); if (err) dump_event(session->evlist, event, file_offset, &sample, file_path); - return err; + break; case PERF_RECORD_FINISHED_INIT: - return tool->finished_init(session, event); + err = tool->finished_init(tool, session, event); + break; + case PERF_RECORD_BPF_METADATA: + err = tool->bpf_metadata(tool, session, event); + break; default: - return -EINVAL; + err = -EINVAL; + break; } + perf_sample__exit(&sample); + return err; } int perf_session__deliver_synth_event(struct perf_session *session, @@ -1738,7 +1649,7 @@ int perf_session__deliver_synth_event(struct perf_session *session, struct perf_sample *sample) { struct evlist *evlist = session->evlist; - struct perf_tool *tool = session->tool; + const struct perf_tool *tool = session->tool; events_stats__inc(&evlist->stats, event->header.type); @@ -1748,6 +1659,30 @@ int perf_session__deliver_synth_event(struct perf_session *session, return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0, NULL); } +int perf_session__deliver_synth_attr_event(struct perf_session *session, + const struct perf_event_attr *attr, + u64 id) +{ + union { + struct { + struct perf_record_header_attr attr; + u64 ids[1]; + } attr_id; + union perf_event ev; + } ev = { + .attr_id.attr.header.type = PERF_RECORD_HEADER_ATTR, + .attr_id.attr.header.size = sizeof(ev.attr_id), + .attr_id.ids[0] = id, + }; + + if (attr->size != sizeof(ev.attr_id.attr.attr)) { + pr_debug("Unexpected perf_event_attr size\n"); + return -EINVAL; + } + ev.attr_id.attr.attr = *attr; + return perf_session__deliver_synth_event(session, &ev.ev, NULL); +} + static void event_swap(union perf_event *event, bool sample_id_all) { perf_event__swap_op swap; @@ -1846,14 +1781,23 @@ static s64 perf_session__process_event(struct perf_session *session, const char *file_path) { struct evlist *evlist = session->evlist; - struct perf_tool *tool = session->tool; + const struct perf_tool *tool = session->tool; int ret; if (session->header.needs_swap) event_swap(event, evlist__sample_id_all(evlist)); - if (event->header.type >= PERF_RECORD_HEADER_MAX) - return -EINVAL; + if (event->header.type >= PERF_RECORD_HEADER_MAX) { + /* perf should not support unaligned event, stop here. */ + if (event->header.size % sizeof(u64)) + return -EINVAL; + + /* This perf is outdated and does not support the latest event type. */ + ui__warning("Unsupported header type %u, please consider updating perf.\n", + event->header.type); + /* Skip unsupported event by returning its size. */ + return event->header.size; + } events_stats__inc(&evlist->stats, event->header.type); @@ -2033,7 +1977,8 @@ static int __perf_session__process_decomp_events(struct perf_session *session); static int __perf_session__process_pipe_events(struct perf_session *session) { struct ordered_events *oe = &session->ordered_events; - struct perf_tool *tool = session->tool; + const struct perf_tool *tool = session->tool; + struct ui_progress prog; union perf_event *event; uint32_t size, cur_size = 0; void *buf = NULL; @@ -2041,8 +1986,18 @@ static int __perf_session__process_pipe_events(struct perf_session *session) u64 head; ssize_t err; void *p; + bool update_prog = false; - perf_tool__fill_defaults(tool); + /* + * If it's from a file saving pipe data (by redirection), it would have + * a file name other than "-". Then we can get the total size and show + * the progress. + */ + if (strcmp(session->data->path, "-") && session->data->file.size) { + ui_progress__init_size(&prog, session->data->file.size, + "Processing events..."); + update_prog = true; + } head = 0; cur_size = sizeof(union perf_event); @@ -2115,6 +2070,9 @@ more: if (err) goto out_err; + if (update_prog) + ui_progress__update(&prog, size); + if (!session_done()) goto more; done: @@ -2122,12 +2080,17 @@ done: err = ordered_events__flush(oe, OE_FLUSH__FINAL); if (err) goto out_err; + err = session__flush_deferred_samples(session, tool); + if (err) + goto out_err; err = auxtrace__flush_events(session, tool); if (err) goto out_err; err = perf_session__flush_thread_stacks(session); out_err: free(buf); + if (update_prog) + ui_progress__finish(); if (!tool->no_warn) perf_session__warn_about_errors(session); ordered_events__free(&session->ordered_events); @@ -2447,12 +2410,10 @@ static int __perf_session__process_events(struct perf_session *session) .in_place_update = session->data->in_place_update, }; struct ordered_events *oe = &session->ordered_events; - struct perf_tool *tool = session->tool; + const struct perf_tool *tool = session->tool; struct ui_progress prog; int err; - perf_tool__fill_defaults(tool); - if (rd.data_size == 0) return -1; @@ -2468,6 +2429,9 @@ static int __perf_session__process_events(struct perf_session *session) err = auxtrace__flush_events(session, tool); if (err) goto out_err; + err = session__flush_deferred_samples(session, tool); + if (err) + goto out_err; err = perf_session__flush_thread_stacks(session); out_err: ui_progress__finish(); @@ -2499,15 +2463,13 @@ out_err: static int __perf_session__process_dir_events(struct perf_session *session) { struct perf_data *data = session->data; - struct perf_tool *tool = session->tool; + const struct perf_tool *tool = session->tool; int i, ret, readers, nr_readers; struct ui_progress prog; u64 total_size = perf_data__size(session->data); struct reader *rd; - perf_tool__fill_defaults(tool); - - ui_progress__init_size(&prog, total_size, "Sorting events..."); + ui_progress__init_size(&prog, total_size, "Processing events..."); nr_readers = 1; for (i = 0; i < data->dir.nr; i++) { @@ -2590,6 +2552,10 @@ static int __perf_session__process_dir_events(struct perf_session *session) if (ret) goto out_err; + ret = session__flush_deferred_samples(session, tool); + if (ret) + goto out_err; + ret = perf_session__flush_thread_stacks(session); out_err: ui_progress__finish(); @@ -2640,6 +2606,18 @@ bool perf_session__has_traces(struct perf_session *session, const char *msg) return false; } +bool perf_session__has_switch_events(struct perf_session *session) +{ + struct evsel *evsel; + + evlist__for_each_entry(session->evlist, evsel) { + if (evsel->core.attr.context_switch) + return true; + } + + return false; +} + int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr) { char *bracket; @@ -2680,8 +2658,7 @@ size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp return machines__fprintf_dsos_buildid(&session->machines, fp, skip, parm); } -size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp, - bool skip_empty) +size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) { size_t ret; const char *msg = ""; @@ -2691,7 +2668,7 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp, ret = fprintf(fp, "\nAggregated stats:%s\n", msg); - ret += events_stats__fprintf(&session->evlist->stats, fp, skip_empty); + ret += events_stats__fprintf(&session->evlist->stats, fp); return ret; } @@ -2704,6 +2681,17 @@ size_t perf_session__fprintf(struct perf_session *session, FILE *fp) return machine__fprintf(&session->machines.host, fp); } +void perf_session__dump_kmaps(struct perf_session *session) +{ + int save_verbose = verbose; + + fflush(stdout); + fprintf(stderr, "Kernel and module maps:\n"); + verbose = 0; /* Suppress verbose to print a summary only */ + maps__fprintf(machine__kernel_maps(&session->machines.host), stderr); + verbose = save_verbose; +} + struct evsel *perf_session__find_first_evtype(struct perf_session *session, unsigned int type) { @@ -2721,7 +2709,8 @@ int perf_session__cpu_bitmap(struct perf_session *session, { int i, err = -1; struct perf_cpu_map *map; - int nr_cpus = min(session->header.env.nr_cpus_avail, MAX_NR_CPUS); + int nr_cpus = min(perf_session__env(session)->nr_cpus_avail, MAX_NR_CPUS); + struct perf_cpu cpu; for (i = 0; i < PERF_TYPE_MAX; ++i) { struct evsel *evsel; @@ -2743,9 +2732,7 @@ int perf_session__cpu_bitmap(struct perf_session *session, return -1; } - for (i = 0; i < perf_cpu_map__nr(map); i++) { - struct perf_cpu cpu = perf_cpu_map__cpu(map, i); - + perf_cpu_map__for_each_cpu(cpu, i, map) { if (cpu.cpu >= nr_cpus) { pr_err("Requested CPU %d too large. " "Consider raising MAX_NR_CPUS\n", cpu.cpu); @@ -2807,7 +2794,8 @@ static int perf_session__set_guest_cpu(struct perf_session *session, pid_t pid, return 0; } -int perf_event__process_id_index(struct perf_session *session, +int perf_event__process_id_index(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { struct evlist *evlist = session->evlist; @@ -2890,3 +2878,29 @@ int perf_event__process_id_index(struct perf_session *session, } return 0; } + +int perf_session__dsos_hit_all(struct perf_session *session) +{ + struct rb_node *nd; + int err; + + err = machine__hit_all_dsos(&session->machines.host); + if (err) + return err; + + for (nd = rb_first_cached(&session->machines.guests); nd; + nd = rb_next(nd)) { + struct machine *pos = rb_entry(nd, struct machine, rb_node); + + err = machine__hit_all_dsos(pos); + if (err) + return err; + } + + return 0; +} + +struct perf_env *perf_session__env(struct perf_session *session) +{ + return &session->header.env; +} diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index ee3715e8563b..22d3ff877e83 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -26,26 +26,68 @@ struct decomp_data { struct zstd_data *zstd_decomp; }; +/** + * struct perf_session- A Perf session holds the main state when the program is + * working with live perf events or reading data from an input file. + * + * The rough organization of a perf_session is: + * ``` + * +--------------+ +-----------+ +------------+ + * | Session |1..* ----->| Machine |1..* ----->| Thread | + * +--------------+ +-----------+ +------------+ + * ``` + */ struct perf_session { + /** + * @header: The read version of a perf_file_header, or captures global + * information from a live session. + */ struct perf_header header; + /** @machines: Machines within the session a host and 0 or more guests. */ struct machines machines; + /** @evlist: List of evsels/events of the session. */ struct evlist *evlist; - struct auxtrace *auxtrace; + /** @auxtrace: callbacks to allow AUX area data decoding. */ + const struct auxtrace *auxtrace; + /** @itrace_synth_opts: AUX area tracing synthesis options. */ struct itrace_synth_opts *itrace_synth_opts; + /** @auxtrace_index: index of AUX area tracing events within a perf.data file. */ struct list_head auxtrace_index; #ifdef HAVE_LIBTRACEEVENT + /** @tevent: handles for libtraceevent and plugins. */ struct trace_event tevent; #endif + /** @time_conv: Holds contents of last PERF_RECORD_TIME_CONV event. */ struct perf_record_time_conv time_conv; - bool repipe; + /** @trace_event_repipe: When set causes read trace events to be written to stdout. */ + bool trace_event_repipe; + /** + * @one_mmap: The reader will use a single mmap by default. There may be + * multiple data files in particular for aux events. If this is true + * then the single big mmap for the data file can be assumed. + */ bool one_mmap; + /** @one_mmap_addr: Address of initial perf data file reader mmap. */ void *one_mmap_addr; + /** @one_mmap_offset: File offset in perf.data file when mapped. */ u64 one_mmap_offset; + /** @ordered_events: Used to turn unordered events into ordered ones. */ struct ordered_events ordered_events; + /** @data: Optional perf data file being read from. */ struct perf_data *data; - struct perf_tool *tool; + /** @tool: callbacks for event handling. */ + const struct perf_tool *tool; + /** + * @bytes_transferred: Used by perf record to count written bytes before + * compression. + */ u64 bytes_transferred; + /** + * @bytes_compressed: Used by perf record to count written bytes after + * compression. + */ u64 bytes_compressed; + /** @zstd_data: Owner of global compression state, buffers, etc. */ struct zstd_data zstd_data; struct decomp_data decomp_data; struct decomp_data *active_decomp; @@ -64,13 +106,14 @@ struct decomp { struct perf_tool; struct perf_session *__perf_session__new(struct perf_data *data, - bool repipe, int repipe_fd, - struct perf_tool *tool); + struct perf_tool *tool, + bool trace_event_repipe, + struct perf_env *host_env); static inline struct perf_session *perf_session__new(struct perf_data *data, struct perf_tool *tool) { - return __perf_session__new(data, false, -1, tool); + return __perf_session__new(data, tool, /*trace_event_repipe=*/false, /*host_env=*/NULL); } void perf_session__delete(struct perf_session *session); @@ -92,8 +135,6 @@ int perf_session__process_events(struct perf_session *session); int perf_session__queue_event(struct perf_session *s, union perf_event *event, u64 timestamp, u64 file_offset, const char *file_path); -void perf_tool__fill_defaults(struct perf_tool *tool); - int perf_session__resolve_callchain(struct perf_session *session, struct evsel *evsel, struct thread *thread, @@ -101,6 +142,7 @@ int perf_session__resolve_callchain(struct perf_session *session, struct symbol **parent); bool perf_session__has_traces(struct perf_session *session, const char *msg); +bool perf_session__has_switch_events(struct perf_session *session); void perf_event__attr_swap(struct perf_event_attr *attr); @@ -130,8 +172,9 @@ size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp); size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp, bool (fn)(struct dso *dso, int parm), int parm); -size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp, - bool skip_empty); +size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp); + +void perf_session__dump_kmaps(struct perf_session *session); struct evsel *perf_session__find_first_evtype(struct perf_session *session, unsigned int type); @@ -153,12 +196,20 @@ extern volatile int session_done; int perf_session__deliver_synth_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample); +int perf_session__deliver_synth_attr_event(struct perf_session *session, + const struct perf_event_attr *attr, + u64 id); -int perf_event__process_id_index(struct perf_session *session, +int perf_session__dsos_hit_all(struct perf_session *session); + +int perf_event__process_id_index(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); -int perf_event__process_finished_round(struct perf_tool *tool, +int perf_event__process_finished_round(const struct perf_tool *tool, union perf_event *event, struct ordered_events *oe); +struct perf_env *perf_session__env(struct perf_session *session); + #endif /* __PERF_SESSION_H */ diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 869738fc06c3..b65b1792ca05 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -1,8 +1,10 @@ from os import getenv, path from subprocess import Popen, PIPE from re import sub +import shlex cc = getenv("CC") +assert cc, "Environment variable CC not set" # Check if CC has options, as is the case in yocto, where it uses CC="cc --sysroot..." cc_tokens = cc.split() @@ -12,12 +14,26 @@ if len(cc_tokens) > 1: else: cc_options = "" +# ignore optional stderr could be None as it is set to PIPE to avoid that. +# mypy: disable-error-code="union-attr" cc_is_clang = b"clang version" in Popen([cc, "-v"], stderr=PIPE).stderr.readline() -src_feature_tests = getenv('srctree') + '/tools/build/feature' + +srctree = getenv('srctree') +assert srctree, "Environment variable srctree, for the Linux sources, not set" +src_feature_tests = f'{srctree}/tools/build/feature' def clang_has_option(option): - cc_output = Popen([cc, cc_options + option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines() - return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o))] == [ ] + error_substrings = ( + b"unknown argument", + b"is not supported", + b"unknown warning option" + ) + cmd = shlex.split(f"{cc} {cc_options} {option}") + [ + "-o", "/dev/null", + path.join(src_feature_tests, "test-hello.c") + ] + cc_output = Popen(cmd, stderr=PIPE).stderr.readlines() + return not any(any(error in line for error in error_substrings) for line in cc_output) if cc_is_clang: from sysconfig import get_config_vars @@ -60,44 +76,25 @@ class install_lib(_install_lib): cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) -cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls', '-DPYTHON_PERF' ] +cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ] if cc_is_clang: cflags += ["-Wno-unused-command-line-argument" ] + if clang_has_option("-Wno-cast-function-type-mismatch"): + cflags += ["-Wno-cast-function-type-mismatch" ] else: cflags += ['-Wno-cast-function-type' ] -src_perf = getenv('srctree') + '/tools/perf' +# The python headers have mixed code with declarations (decls after asserts, for instance) +cflags += [ "-Wno-declaration-after-statement" ] + +src_perf = f'{srctree}/tools/perf' build_lib = getenv('PYTHON_EXTBUILD_LIB') build_tmp = getenv('PYTHON_EXTBUILD_TMP') -libtraceevent = getenv('LIBTRACEEVENT') -libapikfs = getenv('LIBAPI') -libperf = getenv('LIBPERF') - -ext_sources = [f.strip() for f in open('util/python-ext-sources') - if len(f.strip()) > 0 and f[0] != '#'] - -extra_libraries = [] - -if '-DHAVE_LIBTRACEEVENT' in cflags: - extra_libraries += [ 'traceevent' ] -else: - ext_sources.remove('util/trace-event.c') - -# use full paths with source files -ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources)) - -if '-DHAVE_LIBNUMA_SUPPORT' in cflags: - extra_libraries += [ 'numa' ] -if '-DHAVE_LIBCAP_SUPPORT' in cflags: - extra_libraries += [ 'cap' ] perf = Extension('perf', - sources = ext_sources, - include_dirs = ['util/include'], - libraries = extra_libraries, - extra_compile_args = cflags, - extra_objects = [ x for x in [libtraceevent, libapikfs, libperf] - if x is not None], + sources = [ src_perf + '/util/python.c' ], + include_dirs = ['util/include'], + extra_compile_args = cflags, ) setup(name='perf', diff --git a/tools/perf/util/sha1.c b/tools/perf/util/sha1.c new file mode 100644 index 000000000000..7032fa4ff3fd --- /dev/null +++ b/tools/perf/util/sha1.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * SHA-1 message digest algorithm + * + * Copyright 2025 Google LLC + */ +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <linux/unaligned.h> +#include <string.h> + +#include "sha1.h" + +#define SHA1_BLOCK_SIZE 64 + +static const u32 sha1_K[4] = { 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6 }; + +#define SHA1_ROUND(i, a, b, c, d, e) \ + do { \ + if ((i) >= 16) \ + w[i] = rol32(w[(i) - 16] ^ w[(i) - 14] ^ w[(i) - 8] ^ \ + w[(i) - 3], \ + 1); \ + e += w[i] + rol32(a, 5) + sha1_K[(i) / 20]; \ + if ((i) < 20) \ + e += (b & (c ^ d)) ^ d; \ + else if ((i) < 40 || (i) >= 60) \ + e += b ^ c ^ d; \ + else \ + e += (c & d) ^ (b & (c ^ d)); \ + b = rol32(b, 30); \ + /* The new (a, b, c, d, e) is the old (e, a, b, c, d). */ \ + } while (0) + +#define SHA1_5ROUNDS(i) \ + do { \ + SHA1_ROUND((i) + 0, a, b, c, d, e); \ + SHA1_ROUND((i) + 1, e, a, b, c, d); \ + SHA1_ROUND((i) + 2, d, e, a, b, c); \ + SHA1_ROUND((i) + 3, c, d, e, a, b); \ + SHA1_ROUND((i) + 4, b, c, d, e, a); \ + } while (0) + +#define SHA1_20ROUNDS(i) \ + do { \ + SHA1_5ROUNDS((i) + 0); \ + SHA1_5ROUNDS((i) + 5); \ + SHA1_5ROUNDS((i) + 10); \ + SHA1_5ROUNDS((i) + 15); \ + } while (0) + +static void sha1_blocks(u32 h[5], const u8 *data, size_t nblocks) +{ + while (nblocks--) { + u32 a = h[0]; + u32 b = h[1]; + u32 c = h[2]; + u32 d = h[3]; + u32 e = h[4]; + u32 w[80]; + + for (int i = 0; i < 16; i++) + w[i] = get_unaligned_be32(&data[i * 4]); + SHA1_20ROUNDS(0); + SHA1_20ROUNDS(20); + SHA1_20ROUNDS(40); + SHA1_20ROUNDS(60); + + h[0] += a; + h[1] += b; + h[2] += c; + h[3] += d; + h[4] += e; + data += SHA1_BLOCK_SIZE; + } +} + +/* Calculate the SHA-1 message digest of the given data. */ +void sha1(const void *data, size_t len, u8 out[SHA1_DIGEST_SIZE]) +{ + u32 h[5] = { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, + 0xC3D2E1F0 }; + u8 final_data[2 * SHA1_BLOCK_SIZE] = { 0 }; + size_t final_len = len % SHA1_BLOCK_SIZE; + + sha1_blocks(h, data, len / SHA1_BLOCK_SIZE); + + memcpy(final_data, data + len - final_len, final_len); + final_data[final_len] = 0x80; + final_len = round_up(final_len + 9, SHA1_BLOCK_SIZE); + put_unaligned_be64((u64)len * 8, &final_data[final_len - 8]); + + sha1_blocks(h, final_data, final_len / SHA1_BLOCK_SIZE); + + for (int i = 0; i < 5; i++) + put_unaligned_be32(h[i], &out[i * 4]); +} diff --git a/tools/perf/util/sha1.h b/tools/perf/util/sha1.h new file mode 100644 index 000000000000..e92c9966e1d5 --- /dev/null +++ b/tools/perf/util/sha1.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include <linux/types.h> + +#define SHA1_DIGEST_SIZE 20 + +void sha1(const void *data, size_t len, u8 out[SHA1_DIGEST_SIZE]); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 6aa1c7f2b444..f3a565b0e230 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -23,7 +23,9 @@ #include "strlist.h" #include "strbuf.h" #include "mem-events.h" +#include "mem-info.h" #include "annotate.h" +#include "annotate-data.h" #include "event.h" #include "time-utils.h" #include "cgroup.h" @@ -33,7 +35,7 @@ #include <linux/string.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif regex_t parent_regex; @@ -128,7 +130,7 @@ static int hist_entry__thread_filter(struct hist_entry *he, int type, const void if (type != HIST_FILTER__THREAD) return -1; - return th && RC_CHK_ACCESS(he->thread) != RC_CHK_ACCESS(th); + return th && !RC_CHK_EQUAL(he->thread, th); } struct sort_entry sort_thread = { @@ -139,6 +141,43 @@ struct sort_entry sort_thread = { .se_width_idx = HISTC_THREAD, }; +/* --sort tgid */ + +static int64_t +sort__tgid_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return thread__pid(right->thread) - thread__pid(left->thread); +} + +static int hist_entry__tgid_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + int tgid = thread__pid(he->thread); + const char *comm = NULL; + + /* display comm of the thread-group leader */ + if (thread__pid(he->thread) == thread__tid(he->thread)) { + comm = thread__comm_str(he->thread); + } else { + struct maps *maps = thread__maps(he->thread); + struct thread *leader = machine__find_thread(maps__machine(maps), + tgid, tgid); + if (leader) { + comm = thread__comm_str(leader); + thread__put(leader); + } + } + width = max(7U, width) - 8; + return repsep_snprintf(bf, size, "%7d:%-*.*s", tgid, width, width, comm ?: ""); +} + +struct sort_entry sort_tgid = { + .se_header = " Tgid:Command", + .se_cmp = sort__tgid_cmp, + .se_snprintf = hist_entry__tgid_snprintf, + .se_width_idx = HISTC_TGID, +}; + /* --sort simd */ static int64_t @@ -238,11 +277,11 @@ static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r) return cmp_null(dso_r, dso_l); if (verbose > 0) { - dso_name_l = dso_l->long_name; - dso_name_r = dso_r->long_name; + dso_name_l = dso__long_name(dso_l); + dso_name_r = dso__long_name(dso_r); } else { - dso_name_l = dso_l->short_name; - dso_name_r = dso_r->short_name; + dso_name_l = dso__short_name(dso_l); + dso_name_r = dso__short_name(dso_r); } return strcmp(dso_name_l, dso_name_r); @@ -261,7 +300,7 @@ static int _hist_entry__dso_snprintf(struct map *map, char *bf, const char *dso_name = "[unknown]"; if (dso) - dso_name = verbose > 0 ? dso->long_name : dso->short_name; + dso_name = verbose > 0 ? dso__long_name(dso) : dso__short_name(dso); return repsep_snprintf(bf, size, "%-*.*s", width, width, dso_name); } @@ -332,7 +371,7 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) * comparing symbol address alone is not enough since it's a * relative address within a dso. */ - if (!hists__has(left->hists, dso) || hists__has(right->hists, dso)) { + if (!hists__has(left->hists, dso)) { ret = sort__dso_cmp(left, right); if (ret != 0) return ret; @@ -363,7 +402,7 @@ static int _hist_entry__sym_snprintf(struct map_symbol *ms, char o = dso ? dso__symtab_origin(dso) : '!'; u64 rip = ip; - if (dso && dso->kernel && dso->adjust_symbols) + if (dso && dso__kernel(dso) && dso__adjust_symbols(dso)) rip = map__unmap_ip(map, ip); ret += repsep_snprintf(bf, size, "%-#*llx %c ", @@ -418,6 +457,52 @@ struct sort_entry sort_sym = { .se_width_idx = HISTC_SYMBOL, }; +/* --sort symoff */ + +static int64_t +sort__symoff_cmp(struct hist_entry *left, struct hist_entry *right) +{ + int64_t ret; + + ret = sort__sym_cmp(left, right); + if (ret) + return ret; + + return left->ip - right->ip; +} + +static int64_t +sort__symoff_sort(struct hist_entry *left, struct hist_entry *right) +{ + int64_t ret; + + ret = sort__sym_sort(left, right); + if (ret) + return ret; + + return left->ip - right->ip; +} + +static int +hist_entry__symoff_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) +{ + struct symbol *sym = he->ms.sym; + + if (sym == NULL) + return repsep_snprintf(bf, size, "[%c] %-#.*llx", he->level, width - 4, he->ip); + + return repsep_snprintf(bf, size, "[%c] %s+0x%llx", he->level, sym->name, he->ip - sym->start); +} + +struct sort_entry sort_sym_offset = { + .se_header = "Symbol Offset", + .se_cmp = sort__symoff_cmp, + .se_sort = sort__symoff_sort, + .se_snprintf = hist_entry__symoff_snprintf, + .se_filter = hist_entry__sym_filter, + .se_width_idx = HISTC_SYMBOL_OFFSET, +}; + /* --sort srcline */ char *hist_entry__srcline(struct hist_entry *he) @@ -583,21 +668,21 @@ static int hist_entry__sym_ipc_snprintf(struct hist_entry *he, char *bf, { struct symbol *sym = he->ms.sym; - struct annotation *notes; + struct annotated_branch *branch; double ipc = 0.0, coverage = 0.0; char tmp[64]; if (!sym) return repsep_snprintf(bf, size, "%-*s", width, "-"); - notes = symbol__annotation(sym); + branch = symbol__annotation(sym)->branch; - if (notes->hit_cycles) - ipc = notes->hit_insn / ((double)notes->hit_cycles); + if (branch && branch->hit_cycles) + ipc = branch->hit_insn / ((double)branch->hit_cycles); - if (notes->total_insn) { - coverage = notes->cover_insn * 100.0 / - ((double)notes->total_insn); + if (branch && branch->total_insn) { + coverage = branch->cover_insn * 100.0 / + ((double)branch->total_insn); } snprintf(tmp, sizeof(tmp), "%-5.2f [%5.1f%%]", ipc, coverage); @@ -629,6 +714,102 @@ struct sort_entry sort_sym_ipc_null = { .se_width_idx = HISTC_SYMBOL_IPC, }; +/* --sort callchain_branch_predicted */ + +static int64_t +sort__callchain_branch_predicted_cmp(struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) +{ + return 0; +} + +static int hist_entry__callchain_branch_predicted_snprintf( + struct hist_entry *he, char *bf, size_t size, unsigned int width) +{ + u64 branch_count, predicted_count; + double percent = 0.0; + char str[32]; + + callchain_branch_counts(he->callchain, &branch_count, + &predicted_count, NULL, NULL); + + if (branch_count) + percent = predicted_count * 100.0 / branch_count; + + snprintf(str, sizeof(str), "%.1f%%", percent); + return repsep_snprintf(bf, size, "%-*.*s", width, width, str); +} + +struct sort_entry sort_callchain_branch_predicted = { + .se_header = "Predicted", + .se_cmp = sort__callchain_branch_predicted_cmp, + .se_snprintf = hist_entry__callchain_branch_predicted_snprintf, + .se_width_idx = HISTC_CALLCHAIN_BRANCH_PREDICTED, +}; + +/* --sort callchain_branch_abort */ + +static int64_t +sort__callchain_branch_abort_cmp(struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) +{ + return 0; +} + +static int hist_entry__callchain_branch_abort_snprintf(struct hist_entry *he, + char *bf, size_t size, + unsigned int width) +{ + u64 branch_count, abort_count; + char str[32]; + + callchain_branch_counts(he->callchain, &branch_count, + NULL, &abort_count, NULL); + + snprintf(str, sizeof(str), "%" PRId64, abort_count); + return repsep_snprintf(bf, size, "%-*.*s", width, width, str); +} + +struct sort_entry sort_callchain_branch_abort = { + .se_header = "Abort", + .se_cmp = sort__callchain_branch_abort_cmp, + .se_snprintf = hist_entry__callchain_branch_abort_snprintf, + .se_width_idx = HISTC_CALLCHAIN_BRANCH_ABORT, +}; + +/* --sort callchain_branch_cycles */ + +static int64_t +sort__callchain_branch_cycles_cmp(struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) +{ + return 0; +} + +static int hist_entry__callchain_branch_cycles_snprintf(struct hist_entry *he, + char *bf, size_t size, + unsigned int width) +{ + u64 branch_count, cycles_count, cycles = 0; + char str[32]; + + callchain_branch_counts(he->callchain, &branch_count, + NULL, NULL, &cycles_count); + + if (branch_count) + cycles = cycles_count / branch_count; + + snprintf(str, sizeof(str), "%" PRId64 "", cycles); + return repsep_snprintf(bf, size, "%-*.*s", width, width, str); +} + +struct sort_entry sort_callchain_branch_cycles = { + .se_header = "Cycles", + .se_cmp = sort__callchain_branch_cycles_cmp, + .se_snprintf = hist_entry__callchain_branch_cycles_snprintf, + .se_width_idx = HISTC_CALLCHAIN_BRANCH_CYCLES, +}; + /* --sort srcfile */ static char no_srcfile[1]; @@ -748,6 +929,38 @@ struct sort_entry sort_cpu = { .se_width_idx = HISTC_CPU, }; +/* --sort parallelism */ + +static int64_t +sort__parallelism_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return right->parallelism - left->parallelism; +} + +static int hist_entry__parallelism_filter(struct hist_entry *he, int type, const void *arg) +{ + const unsigned long *parallelism_filter = arg; + + if (type != HIST_FILTER__PARALLELISM) + return -1; + + return test_bit(he->parallelism, parallelism_filter); +} + +static int hist_entry__parallelism_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%*d", width, he->parallelism); +} + +struct sort_entry sort_parallelism = { + .se_header = "Parallelism", + .se_cmp = sort__parallelism_cmp, + .se_filter = hist_entry__parallelism_filter, + .se_snprintf = hist_entry__parallelism_snprintf, + .se_width_idx = HISTC_PARALLELISM, +}; + /* --sort cgroup_id */ static int64_t _sort__cgroup_dev_cmp(u64 left_dev, u64 right_dev) @@ -894,17 +1107,19 @@ static char *get_trace_output(struct hist_entry *he) .data = he->raw_data, .size = he->raw_size, }; + struct tep_event *tp_format; evsel = hists_to_evsel(he->hists); trace_seq_init(&seq); - if (symbol_conf.raw_trace) { - tep_print_fields(&seq, he->raw_data, he->raw_size, - evsel->tp_format); - } else { - tep_print_event(evsel->tp_format->tep, - &seq, &rec, "%s", TEP_PRINT_INFO); + tp_format = evsel__tp_format(evsel); + if (tp_format) { + if (symbol_conf.raw_trace) + tep_print_fields(&seq, he->raw_data, he->raw_size, tp_format); + else + tep_print_event(tp_format->tep, &seq, &rec, "%s", TEP_PRINT_INFO); } + /* * Trim the buffer, it starts at 4KB and we're not going to * add anything more to this buffer. @@ -1317,9 +1532,9 @@ sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right) uint64_t l = 0, r = 0; if (left->mem_info) - l = left->mem_info->daddr.addr; + l = mem_info__daddr(left->mem_info)->addr; if (right->mem_info) - r = right->mem_info->daddr.addr; + r = mem_info__daddr(right->mem_info)->addr; return (int64_t)(r - l); } @@ -1331,8 +1546,8 @@ static int hist_entry__daddr_snprintf(struct hist_entry *he, char *bf, struct map_symbol *ms = NULL; if (he->mem_info) { - addr = he->mem_info->daddr.addr; - ms = &he->mem_info->daddr.ms; + addr = mem_info__daddr(he->mem_info)->addr; + ms = &mem_info__daddr(he->mem_info)->ms; } return _hist_entry__sym_snprintf(ms, addr, he->level, bf, size, width); } @@ -1343,9 +1558,9 @@ sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right) uint64_t l = 0, r = 0; if (left->mem_info) - l = left->mem_info->iaddr.addr; + l = mem_info__iaddr(left->mem_info)->addr; if (right->mem_info) - r = right->mem_info->iaddr.addr; + r = mem_info__iaddr(right->mem_info)->addr; return (int64_t)(r - l); } @@ -1357,8 +1572,8 @@ static int hist_entry__iaddr_snprintf(struct hist_entry *he, char *bf, struct map_symbol *ms = NULL; if (he->mem_info) { - addr = he->mem_info->iaddr.addr; - ms = &he->mem_info->iaddr.ms; + addr = mem_info__iaddr(he->mem_info)->addr; + ms = &mem_info__iaddr(he->mem_info)->ms; } return _hist_entry__sym_snprintf(ms, addr, he->level, bf, size, width); } @@ -1370,9 +1585,9 @@ sort__dso_daddr_cmp(struct hist_entry *left, struct hist_entry *right) struct map *map_r = NULL; if (left->mem_info) - map_l = left->mem_info->daddr.ms.map; + map_l = mem_info__daddr(left->mem_info)->ms.map; if (right->mem_info) - map_r = right->mem_info->daddr.ms.map; + map_r = mem_info__daddr(right->mem_info)->ms.map; return _sort__dso_cmp(map_l, map_r); } @@ -1383,7 +1598,7 @@ static int hist_entry__dso_daddr_snprintf(struct hist_entry *he, char *bf, struct map *map = NULL; if (he->mem_info) - map = he->mem_info->daddr.ms.map; + map = mem_info__daddr(he->mem_info)->ms.map; return _hist_entry__dso_snprintf(map, bf, size, width); } @@ -1395,12 +1610,12 @@ sort__locked_cmp(struct hist_entry *left, struct hist_entry *right) union perf_mem_data_src data_src_r; if (left->mem_info) - data_src_l = left->mem_info->data_src; + data_src_l = *mem_info__data_src(left->mem_info); else data_src_l.mem_lock = PERF_MEM_LOCK_NA; if (right->mem_info) - data_src_r = right->mem_info->data_src; + data_src_r = *mem_info__data_src(right->mem_info); else data_src_r.mem_lock = PERF_MEM_LOCK_NA; @@ -1423,12 +1638,12 @@ sort__tlb_cmp(struct hist_entry *left, struct hist_entry *right) union perf_mem_data_src data_src_r; if (left->mem_info) - data_src_l = left->mem_info->data_src; + data_src_l = *mem_info__data_src(left->mem_info); else data_src_l.mem_dtlb = PERF_MEM_TLB_NA; if (right->mem_info) - data_src_r = right->mem_info->data_src; + data_src_r = *mem_info__data_src(right->mem_info); else data_src_r.mem_dtlb = PERF_MEM_TLB_NA; @@ -1451,12 +1666,12 @@ sort__lvl_cmp(struct hist_entry *left, struct hist_entry *right) union perf_mem_data_src data_src_r; if (left->mem_info) - data_src_l = left->mem_info->data_src; + data_src_l = *mem_info__data_src(left->mem_info); else data_src_l.mem_lvl = PERF_MEM_LVL_NA; if (right->mem_info) - data_src_r = right->mem_info->data_src; + data_src_r = *mem_info__data_src(right->mem_info); else data_src_r.mem_lvl = PERF_MEM_LVL_NA; @@ -1479,12 +1694,12 @@ sort__snoop_cmp(struct hist_entry *left, struct hist_entry *right) union perf_mem_data_src data_src_r; if (left->mem_info) - data_src_l = left->mem_info->data_src; + data_src_l = *mem_info__data_src(left->mem_info); else data_src_l.mem_snoop = PERF_MEM_SNOOP_NA; if (right->mem_info) - data_src_r = right->mem_info->data_src; + data_src_r = *mem_info__data_src(right->mem_info); else data_src_r.mem_snoop = PERF_MEM_SNOOP_NA; @@ -1515,8 +1730,8 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) if (left->cpumode > right->cpumode) return -1; if (left->cpumode < right->cpumode) return 1; - l_map = left->mem_info->daddr.ms.map; - r_map = right->mem_info->daddr.ms.map; + l_map = mem_info__daddr(left->mem_info)->ms.map; + r_map = mem_info__daddr(right->mem_info)->ms.map; /* if both are NULL, jump to sort on al_addr instead */ if (!l_map && !r_map) @@ -1531,28 +1746,33 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) if (rc) return rc; /* - * Addresses with no major/minor numbers are assumed to be + * Addresses with no major/minor numbers or build ID are assumed to be * anonymous in userspace. Sort those on pid then address. * * The kernel and non-zero major/minor mapped areas are * assumed to be unity mapped. Sort those on address. */ + if (left->cpumode != PERF_RECORD_MISC_KERNEL && (map__flags(l_map) & MAP_SHARED) == 0) { + const struct dso_id *dso_id = dso__id_const(l_dso); - if ((left->cpumode != PERF_RECORD_MISC_KERNEL) && - (!(map__flags(l_map) & MAP_SHARED)) && !l_dso->id.maj && !l_dso->id.min && - !l_dso->id.ino && !l_dso->id.ino_generation) { - /* userspace anonymous */ + if (!dso_id->mmap2_valid) + dso_id = dso__id_const(r_dso); - if (thread__pid(left->thread) > thread__pid(right->thread)) - return -1; - if (thread__pid(left->thread) < thread__pid(right->thread)) - return 1; + if (!build_id__is_defined(&dso_id->build_id) && + (!dso_id->mmap2_valid || (dso_id->maj == 0 && dso_id->min == 0))) { + /* userspace anonymous */ + + if (thread__pid(left->thread) > thread__pid(right->thread)) + return -1; + if (thread__pid(left->thread) < thread__pid(right->thread)) + return 1; + } } addr: /* al_addr does all the right addr - start + offset calculations */ - l = cl_address(left->mem_info->daddr.al_addr, chk_double_cl); - r = cl_address(right->mem_info->daddr.al_addr, chk_double_cl); + l = cl_address(mem_info__daddr(left->mem_info)->al_addr, chk_double_cl); + r = cl_address(mem_info__daddr(right->mem_info)->al_addr, chk_double_cl); if (l > r) return -1; if (l < r) return 1; @@ -1569,17 +1789,18 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf, char level = he->level; if (he->mem_info) { - struct map *map = he->mem_info->daddr.ms.map; + struct map *map = mem_info__daddr(he->mem_info)->ms.map; struct dso *dso = map ? map__dso(map) : NULL; + const struct dso_id *dso_id = dso ? dso__id_const(dso) : &dso_id_empty; - addr = cl_address(he->mem_info->daddr.al_addr, chk_double_cl); - ms = &he->mem_info->daddr.ms; + addr = cl_address(mem_info__daddr(he->mem_info)->al_addr, chk_double_cl); + ms = &mem_info__daddr(he->mem_info)->ms; /* print [s] for shared data mmaps */ if ((he->cpumode != PERF_RECORD_MISC_KERNEL) && map && !(map__prot(map) & PROT_EXEC) && (map__flags(map) & MAP_SHARED) && - (dso->id.maj || dso->id.min || dso->id.ino || dso->id.ino_generation)) + (!dso_id->mmap2_valid || (dso_id->maj == 0 && dso_id->min == 0))) level = 's'; else if (!map) level = 'X'; @@ -1663,21 +1884,20 @@ struct sort_entry sort_global_ins_lat = { static int64_t sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) { - return left->p_stage_cyc - right->p_stage_cyc; + return left->weight3 - right->weight3; } static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, - he->p_stage_cyc * he->stat.nr_events); + return repsep_snprintf(bf, size, "%-*u", width, he->weight3 * he->stat.nr_events); } static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc); + return repsep_snprintf(bf, size, "%-*u", width, he->weight3); } struct sort_entry sort_local_p_stage_cyc = { @@ -1757,12 +1977,12 @@ sort__blocked_cmp(struct hist_entry *left, struct hist_entry *right) union perf_mem_data_src data_src_r; if (left->mem_info) - data_src_l = left->mem_info->data_src; + data_src_l = *mem_info__data_src(left->mem_info); else data_src_l.mem_blk = PERF_MEM_BLK_NA; if (right->mem_info) - data_src_r = right->mem_info->data_src; + data_src_r = *mem_info__data_src(right->mem_info); else data_src_r.mem_blk = PERF_MEM_BLK_NA; @@ -1791,9 +2011,9 @@ sort__phys_daddr_cmp(struct hist_entry *left, struct hist_entry *right) uint64_t l = 0, r = 0; if (left->mem_info) - l = left->mem_info->daddr.phys_addr; + l = mem_info__daddr(left->mem_info)->phys_addr; if (right->mem_info) - r = right->mem_info->daddr.phys_addr; + r = mem_info__daddr(right->mem_info)->phys_addr; return (int64_t)(r - l); } @@ -1805,7 +2025,7 @@ static int hist_entry__phys_daddr_snprintf(struct hist_entry *he, char *bf, size_t ret = 0; size_t len = BITS_PER_LONG / 4; - addr = he->mem_info->daddr.phys_addr; + addr = mem_info__daddr(he->mem_info)->phys_addr; ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", he->level); @@ -1832,9 +2052,9 @@ sort__data_page_size_cmp(struct hist_entry *left, struct hist_entry *right) uint64_t l = 0, r = 0; if (left->mem_info) - l = left->mem_info->daddr.data_page_size; + l = mem_info__daddr(left->mem_info)->data_page_size; if (right->mem_info) - r = right->mem_info->daddr.data_page_size; + r = mem_info__daddr(right->mem_info)->data_page_size; return (int64_t)(r - l); } @@ -1845,7 +2065,7 @@ static int hist_entry__data_page_size_snprintf(struct hist_entry *he, char *bf, char str[PAGE_SIZE_NAME_LEN]; return repsep_snprintf(bf, size, "%-*s", width, - get_page_size_name(he->mem_info->daddr.data_page_size, str)); + get_page_size_name(mem_info__daddr(he->mem_info)->data_page_size, str)); } struct sort_entry sort_mem_data_page_size = { @@ -2094,7 +2314,7 @@ struct sort_entry sort_dso_size = { .se_width_idx = HISTC_DSO_SIZE, }; -/* --sort dso_size */ +/* --sort addr */ static int64_t sort__addr_cmp(struct hist_entry *left, struct hist_entry *right) @@ -2131,6 +2351,178 @@ struct sort_entry sort_addr = { .se_width_idx = HISTC_ADDR, }; +/* --sort type */ + +struct annotated_data_type unknown_type = { + .self = { + .type_name = (char *)"(unknown)", + .children = LIST_HEAD_INIT(unknown_type.self.children), + }, +}; + +static int64_t +sort__type_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return sort__addr_cmp(left, right); +} + +static void sort__type_init(struct hist_entry *he) +{ + if (he->mem_type) + return; + + he->mem_type = hist_entry__get_data_type(he); + if (he->mem_type == NULL) { + he->mem_type = &unknown_type; + he->mem_type_off = 0; + } +} + +static int64_t +sort__type_collapse(struct hist_entry *left, struct hist_entry *right) +{ + struct annotated_data_type *left_type = left->mem_type; + struct annotated_data_type *right_type = right->mem_type; + + if (!left_type) { + sort__type_init(left); + left_type = left->mem_type; + } + + if (!right_type) { + sort__type_init(right); + right_type = right->mem_type; + } + + return strcmp(left_type->self.type_name, right_type->self.type_name); +} + +static int64_t +sort__type_sort(struct hist_entry *left, struct hist_entry *right) +{ + return sort__type_collapse(left, right); +} + +static int hist_entry__type_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%-*s", width, he->mem_type->self.type_name); +} + +struct sort_entry sort_type = { + .se_header = "Data Type", + .se_cmp = sort__type_cmp, + .se_collapse = sort__type_collapse, + .se_sort = sort__type_sort, + .se_init = sort__type_init, + .se_snprintf = hist_entry__type_snprintf, + .se_width_idx = HISTC_TYPE, +}; + +/* --sort typeoff */ + +static int64_t +sort__typeoff_sort(struct hist_entry *left, struct hist_entry *right) +{ + struct annotated_data_type *left_type = left->mem_type; + struct annotated_data_type *right_type = right->mem_type; + int64_t ret; + + if (!left_type) { + sort__type_init(left); + left_type = left->mem_type; + } + + if (!right_type) { + sort__type_init(right); + right_type = right->mem_type; + } + + ret = strcmp(left_type->self.type_name, right_type->self.type_name); + if (ret) + return ret; + return left->mem_type_off - right->mem_type_off; +} + +static int hist_entry__typeoff_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width __maybe_unused) +{ + struct annotated_data_type *he_type = he->mem_type; + char buf[4096]; + + if (he_type == &unknown_type || he_type == &stackop_type || + he_type == &canary_type) + return repsep_snprintf(bf, size, "%s", he_type->self.type_name); + + if (!annotated_data_type__get_member_name(he_type, buf, sizeof(buf), + he->mem_type_off)) + scnprintf(buf, sizeof(buf), "no field"); + + return repsep_snprintf(bf, size, "%s +%#x (%s)", he_type->self.type_name, + he->mem_type_off, buf); +} + +struct sort_entry sort_type_offset = { + .se_header = "Data Type Offset", + .se_cmp = sort__type_cmp, + .se_collapse = sort__typeoff_sort, + .se_sort = sort__typeoff_sort, + .se_init = sort__type_init, + .se_snprintf = hist_entry__typeoff_snprintf, + .se_width_idx = HISTC_TYPE_OFFSET, +}; + +/* --sort typecln */ + +/* TODO: use actual value in the system */ +#define TYPE_CACHELINE_SIZE 64 + +static int64_t +sort__typecln_sort(struct hist_entry *left, struct hist_entry *right) +{ + struct annotated_data_type *left_type = left->mem_type; + struct annotated_data_type *right_type = right->mem_type; + int64_t left_cln, right_cln; + int64_t ret; + + if (!left_type) { + sort__type_init(left); + left_type = left->mem_type; + } + + if (!right_type) { + sort__type_init(right); + right_type = right->mem_type; + } + + ret = strcmp(left_type->self.type_name, right_type->self.type_name); + if (ret) + return ret; + + left_cln = left->mem_type_off / TYPE_CACHELINE_SIZE; + right_cln = right->mem_type_off / TYPE_CACHELINE_SIZE; + return left_cln - right_cln; +} + +static int hist_entry__typecln_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width __maybe_unused) +{ + struct annotated_data_type *he_type = he->mem_type; + + return repsep_snprintf(bf, size, "%s: cache-line %d", he_type->self.type_name, + he->mem_type_off / TYPE_CACHELINE_SIZE); +} + +struct sort_entry sort_type_cacheline = { + .se_header = "Data Type Cacheline", + .se_cmp = sort__type_cmp, + .se_collapse = sort__typecln_sort, + .se_sort = sort__typecln_sort, + .se_init = sort__type_init, + .se_snprintf = hist_entry__typecln_snprintf, + .se_width_idx = HISTC_TYPE_CACHELINE, +}; + struct sort_dimension { const char *name; @@ -2138,25 +2530,51 @@ struct sort_dimension { int taken; }; -int __weak arch_support_sort_key(const char *sort_key __maybe_unused) +static int arch_support_sort_key(const char *sort_key, struct perf_env *env) { + const char *arch = perf_env__arch(env); + + if (!strcmp("x86", arch) || !strcmp("powerpc", arch)) { + if (!strcmp(sort_key, "p_stage_cyc")) + return 1; + if (!strcmp(sort_key, "local_p_stage_cyc")) + return 1; + } return 0; } -const char * __weak arch_perf_header_entry(const char *se_header) -{ +static const char *arch_perf_header_entry(const char *se_header, struct perf_env *env) +{ + const char *arch = perf_env__arch(env); + + if (!strcmp("x86", arch)) { + if (!strcmp(se_header, "Local Pipeline Stage Cycle")) + return "Local Retire Latency"; + else if (!strcmp(se_header, "Pipeline Stage Cycle")) + return "Retire Latency"; + } else if (!strcmp("powerpc", arch)) { + if (!strcmp(se_header, "Local INSTR Latency")) + return "Finish Cyc"; + else if (!strcmp(se_header, "INSTR Latency")) + return "Global Finish_cyc"; + else if (!strcmp(se_header, "Local Pipeline Stage Cycle")) + return "Dispatch Cyc"; + else if (!strcmp(se_header, "Pipeline Stage Cycle")) + return "Global Dispatch_cyc"; + } return se_header; } -static void sort_dimension_add_dynamic_header(struct sort_dimension *sd) +static void sort_dimension_add_dynamic_header(struct sort_dimension *sd, struct perf_env *env) { - sd->entry->se_header = arch_perf_header_entry(sd->entry->se_header); + sd->entry->se_header = arch_perf_header_entry(sd->entry->se_header, env); } #define DIM(d, n, func) [d] = { .name = n, .entry = &(func) } static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_PID, "pid", sort_thread), + DIM(SORT_TGID, "tgid", sort_tgid), DIM(SORT_COMM, "comm", sort_comm), DIM(SORT_DSO, "dso", sort_dso), DIM(SORT_SYM, "symbol", sort_sym), @@ -2185,7 +2603,12 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_ADDR, "addr", sort_addr), DIM(SORT_LOCAL_RETIRE_LAT, "local_retire_lat", sort_local_p_stage_cyc), DIM(SORT_GLOBAL_RETIRE_LAT, "retire_lat", sort_global_p_stage_cyc), - DIM(SORT_SIMD, "simd", sort_simd) + DIM(SORT_SIMD, "simd", sort_simd), + DIM(SORT_ANNOTATE_DATA_TYPE, "type", sort_type), + DIM(SORT_ANNOTATE_DATA_TYPE_OFFSET, "typeoff", sort_type_offset), + DIM(SORT_SYM_OFFSET, "symoff", sort_sym_offset), + DIM(SORT_ANNOTATE_DATA_TYPE_CACHELINE, "typecln", sort_type_cacheline), + DIM(SORT_PARALLELISM, "parallelism", sort_parallelism), }; #undef DIM @@ -2206,6 +2629,15 @@ static struct sort_dimension bstack_sort_dimensions[] = { DIM(SORT_SYM_IPC, "ipc_lbr", sort_sym_ipc), DIM(SORT_ADDR_FROM, "addr_from", sort_addr_from), DIM(SORT_ADDR_TO, "addr_to", sort_addr_to), + DIM(SORT_CALLCHAIN_BRANCH_PREDICTED, + "callchain_branch_predicted", + sort_callchain_branch_predicted), + DIM(SORT_CALLCHAIN_BRANCH_ABORT, + "callchain_branch_abort", + sort_callchain_branch_abort), + DIM(SORT_CALLCHAIN_BRANCH_CYCLES, + "callchain_branch_cycles", + sort_callchain_branch_cycles) }; #undef DIM @@ -2232,21 +2664,40 @@ struct hpp_dimension { const char *name; struct perf_hpp_fmt *fmt; int taken; + int was_taken; + int mem_mode; }; #define DIM(d, n) { .name = n, .fmt = &perf_hpp__format[d], } +#define DIM_MEM(d, n) { .name = n, .fmt = &perf_hpp__format[d], .mem_mode = 1, } static struct hpp_dimension hpp_sort_dimensions[] = { DIM(PERF_HPP__OVERHEAD, "overhead"), + DIM(PERF_HPP__LATENCY, "latency"), DIM(PERF_HPP__OVERHEAD_SYS, "overhead_sys"), DIM(PERF_HPP__OVERHEAD_US, "overhead_us"), DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"), DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"), DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"), + DIM(PERF_HPP__LATENCY_ACC, "latency_children"), DIM(PERF_HPP__SAMPLES, "sample"), DIM(PERF_HPP__PERIOD, "period"), + DIM(PERF_HPP__WEIGHT1, "weight1"), + DIM(PERF_HPP__WEIGHT2, "weight2"), + DIM(PERF_HPP__WEIGHT3, "weight3"), + /* aliases for weight_struct */ + DIM(PERF_HPP__WEIGHT2, "ins_lat"), + DIM(PERF_HPP__WEIGHT3, "retire_lat"), + DIM(PERF_HPP__WEIGHT3, "p_stage_cyc"), + /* used for output only when SORT_MODE__MEM */ + DIM_MEM(PERF_HPP__MEM_STAT_OP, "op"), + DIM_MEM(PERF_HPP__MEM_STAT_CACHE, "cache"), + DIM_MEM(PERF_HPP__MEM_STAT_MEMORY, "memory"), + DIM_MEM(PERF_HPP__MEM_STAT_SNOOP, "snoop"), + DIM_MEM(PERF_HPP__MEM_STAT_DTLB, "dtlb"), }; +#undef DIM_MEM #undef DIM struct hpp_sort_entry { @@ -2266,18 +2717,22 @@ void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists) } static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, - struct hists *hists, int line __maybe_unused, + struct hists *hists, int line, int *span __maybe_unused) { struct hpp_sort_entry *hse; size_t len = fmt->user_len; + const char *hdr = ""; + + if (line == hists->hpp_list->nr_header_lines - 1) + hdr = fmt->name; hse = container_of(fmt, struct hpp_sort_entry, hpp); if (!len) len = hists__col_len(hists, hse->se->se_width_idx); - return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, fmt->name); + return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, hdr); } static int __sort__hpp_width(struct perf_hpp_fmt *fmt, @@ -2371,6 +2826,7 @@ MK_SORT_ENTRY_CHK(thread) MK_SORT_ENTRY_CHK(comm) MK_SORT_ENTRY_CHK(dso) MK_SORT_ENTRY_CHK(sym) +MK_SORT_ENTRY_CHK(parallelism) static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) @@ -2508,9 +2964,10 @@ static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd, } static int __sort_dimension__add_hpp_output(struct sort_dimension *sd, - struct perf_hpp_list *list) + struct perf_hpp_list *list, + int level) { - struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, 0); + struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, level); if (hse == NULL) return -1; @@ -2931,9 +3388,8 @@ static int __dynamic_dimension__add(struct evsel *evsel, static int add_evsel_fields(struct evsel *evsel, bool raw_trace, int level) { int ret; - struct tep_format_field *field; - - field = evsel->tp_format->format.fields; + struct tep_event *tp_format = evsel__tp_format(evsel); + struct tep_format_field *field = tp_format ? tp_format->format.fields : NULL; while (field) { ret = __dynamic_dimension__add(evsel, field, raw_trace, level); if (ret < 0) @@ -2966,13 +3422,19 @@ static int add_all_matching_fields(struct evlist *evlist, { int ret = -ESRCH; struct evsel *evsel; - struct tep_format_field *field; evlist__for_each_entry(evlist, evsel) { + struct tep_event *tp_format; + struct tep_format_field *field; + if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) continue; - field = tep_find_any_field(evsel->tp_format, field_name); + tp_format = evsel__tp_format(evsel); + if (tp_format == NULL) + continue; + + field = tep_find_any_field(tp_format, field_name); if (field == NULL) continue; @@ -3054,7 +3516,9 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok, if (!strcmp(field_name, "*")) { ret = add_evsel_fields(evsel, raw_trace, level); } else { - struct tep_format_field *field = tep_find_any_field(evsel->tp_format, field_name); + struct tep_event *tp_format = evsel__tp_format(evsel); + struct tep_format_field *field = + tp_format ? tep_find_any_field(tp_format, field_name) : NULL; if (field == NULL) { pr_debug("Cannot find event field for %s.%s\n", @@ -3106,17 +3570,19 @@ static int __hpp_dimension__add(struct hpp_dimension *hd, return -1; hd->taken = 1; + hd->was_taken = 1; perf_hpp_list__register_sort_field(list, fmt); return 0; } static int __sort_dimension__add_output(struct perf_hpp_list *list, - struct sort_dimension *sd) + struct sort_dimension *sd, + int level) { if (sd->taken) return 0; - if (__sort_dimension__add_hpp_output(sd, list) < 0) + if (__sort_dimension__add_hpp_output(sd, list, level) < 0) return -1; sd->taken = 1; @@ -3124,14 +3590,15 @@ static int __sort_dimension__add_output(struct perf_hpp_list *list, } static int __hpp_dimension__add_output(struct perf_hpp_list *list, - struct hpp_dimension *hd) + struct hpp_dimension *hd, + int level) { struct perf_hpp_fmt *fmt; if (hd->taken) return 0; - fmt = __hpp_dimension__alloc_hpp(hd, 0); + fmt = __hpp_dimension__alloc_hpp(hd, level); if (!fmt) return -1; @@ -3140,14 +3607,19 @@ static int __hpp_dimension__add_output(struct perf_hpp_list *list, return 0; } -int hpp_dimension__add_output(unsigned col) +int hpp_dimension__add_output(unsigned col, bool implicit) { + struct hpp_dimension *hd; + BUG_ON(col >= PERF_HPP__MAX_INDEX); - return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]); + hd = &hpp_sort_dimensions[col]; + if (implicit && !hd->was_taken) + return 0; + return __hpp_dimension__add_output(&perf_hpp_list, hd, /*level=*/0); } int sort_dimension__add(struct perf_hpp_list *list, const char *tok, - struct evlist *evlist, + struct evlist *evlist, struct perf_env *env, int level) { unsigned int i, j; @@ -3160,7 +3632,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, */ for (j = 0; j < ARRAY_SIZE(arch_specific_sort_keys); j++) { if (!strcmp(arch_specific_sort_keys[j], tok) && - !arch_support_sort_key(tok)) { + !arch_support_sort_key(tok, env)) { return 0; } } @@ -3173,10 +3645,10 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, for (j = 0; j < ARRAY_SIZE(dynamic_headers); j++) { if (sd->name && !strcmp(dynamic_headers[j], sd->name)) - sort_dimension_add_dynamic_header(sd); + sort_dimension_add_dynamic_header(sd, env); } - if (sd->entry == &sort_parent) { + if (sd->entry == &sort_parent && parent_pattern) { int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); if (ret) { char err[BUFSIZ]; @@ -3205,27 +3677,26 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, list->thread = 1; } else if (sd->entry == &sort_comm) { list->comm = 1; + } else if (sd->entry == &sort_type_offset) { + symbol_conf.annotate_data_member = true; } return __sort_dimension__add(sd, list, level); } - for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { - struct hpp_dimension *hd = &hpp_sort_dimensions[i]; - - if (strncasecmp(tok, hd->name, strlen(tok))) - continue; - - return __hpp_dimension__add(hd, list, level); - } - for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { struct sort_dimension *sd = &bstack_sort_dimensions[i]; if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; - if (sort__mode != SORT_MODE__BRANCH) + if ((sort__mode != SORT_MODE__BRANCH) && + strncasecmp(tok, "callchain_branch_predicted", + strlen(tok)) && + strncasecmp(tok, "callchain_branch_abort", + strlen(tok)) && + strncasecmp(tok, "callchain_branch_cycles", + strlen(tok))) return -EINVAL; if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to) @@ -3254,20 +3725,59 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, return 0; } + for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { + struct hpp_dimension *hd = &hpp_sort_dimensions[i]; + + if (strncasecmp(tok, hd->name, strlen(tok))) + continue; + + return __hpp_dimension__add(hd, list, level); + } + if (!add_dynamic_entry(evlist, tok, level)) return 0; return -ESRCH; } +/* This should match with sort_dimension__add() above */ +static bool is_hpp_sort_key(const char *key, struct perf_env *env) +{ + unsigned i; + + for (i = 0; i < ARRAY_SIZE(arch_specific_sort_keys); i++) { + if (!strcmp(arch_specific_sort_keys[i], key) && + !arch_support_sort_key(key, env)) { + return false; + } + } + + for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) { + struct sort_dimension *sd = &common_sort_dimensions[i]; + + if (sd->name && !strncasecmp(key, sd->name, strlen(key))) + return false; + } + + for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { + struct hpp_dimension *hd = &hpp_sort_dimensions[i]; + + if (!strncasecmp(key, hd->name, strlen(key))) + return true; + } + return false; +} + static int setup_sort_list(struct perf_hpp_list *list, char *str, - struct evlist *evlist) + struct evlist *evlist, struct perf_env *env) { char *tmp, *tok; int ret = 0; int level = 0; int next_level = 1; + int prev_level = 0; bool in_group = false; + bool prev_was_hpp = false; do { tok = str; @@ -3288,7 +3798,20 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str, } if (*tok) { - ret = sort_dimension__add(list, tok, evlist, level); + if (is_hpp_sort_key(tok, env)) { + /* keep output (hpp) sort keys in the same level */ + if (prev_was_hpp) { + bool next_same = (level == next_level); + + level = prev_level; + next_level = next_same ? level : level+1; + } + prev_was_hpp = true; + } else { + prev_was_hpp = false; + } + + ret = sort_dimension__add(list, tok, evlist, env, level); if (ret == -EINVAL) { if (!cacheline_size() && !strncasecmp(tok, "dcacheline", strlen(tok))) ui__error("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system"); @@ -3299,6 +3822,7 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str, ui__error("Unknown --sort key: `%s'", tok); break; } + prev_level = level; } level = next_level; @@ -3394,15 +3918,29 @@ static char *setup_overhead(char *keys) if (sort__mode == SORT_MODE__DIFF) return keys; - keys = prefix_if_not_in("overhead", keys); - - if (symbol_conf.cumulate_callchain) - keys = prefix_if_not_in("overhead_children", keys); + if (symbol_conf.prefer_latency) { + keys = prefix_if_not_in("overhead", keys); + keys = prefix_if_not_in("latency", keys); + if (symbol_conf.cumulate_callchain) { + keys = prefix_if_not_in("overhead_children", keys); + keys = prefix_if_not_in("latency_children", keys); + } + } else if (!keys || (!strstr(keys, "overhead") && + !strstr(keys, "latency"))) { + if (symbol_conf.enable_latency) + keys = prefix_if_not_in("latency", keys); + keys = prefix_if_not_in("overhead", keys); + if (symbol_conf.cumulate_callchain) { + if (symbol_conf.enable_latency) + keys = prefix_if_not_in("latency_children", keys); + keys = prefix_if_not_in("overhead_children", keys); + } + } return keys; } -static int __setup_sorting(struct evlist *evlist) +static int __setup_sorting(struct evlist *evlist, struct perf_env *env) { char *str; const char *sort_keys; @@ -3442,7 +3980,7 @@ static int __setup_sorting(struct evlist *evlist) } } - ret = setup_sort_list(&perf_hpp_list, str, evlist); + ret = setup_sort_list(&perf_hpp_list, str, evlist, env); free(str); return ret; @@ -3545,26 +4083,38 @@ void sort__setup_elide(FILE *output) } } -int output_field_add(struct perf_hpp_list *list, char *tok) +int output_field_add(struct perf_hpp_list *list, const char *tok, int *level) { unsigned int i; - for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) { - struct sort_dimension *sd = &common_sort_dimensions[i]; + for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { + struct hpp_dimension *hd = &hpp_sort_dimensions[i]; - if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) + if (strncasecmp(tok, hd->name, strlen(tok))) continue; - return __sort_dimension__add_output(list, sd); + if (!strcasecmp(tok, "weight")) + ui__warning("--fields weight shows the average value unlike in the --sort key.\n"); + + if (hd->mem_mode && sort__mode != SORT_MODE__MEMORY) + continue; + + return __hpp_dimension__add_output(list, hd, *level); } - for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { - struct hpp_dimension *hd = &hpp_sort_dimensions[i]; + /* + * A non-output field will increase level so that it can be in a + * different hierarchy. + */ + (*level)++; - if (strncasecmp(tok, hd->name, strlen(tok))) + for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) { + struct sort_dimension *sd = &common_sort_dimensions[i]; + + if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; - return __hpp_dimension__add_output(list, hd); + return __sort_dimension__add_output(list, sd, *level); } for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { @@ -3576,7 +4126,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok) if (sort__mode != SORT_MODE__BRANCH) return -EINVAL; - return __sort_dimension__add_output(list, sd); + return __sort_dimension__add_output(list, sd, *level); } for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) { @@ -3588,7 +4138,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok) if (sort__mode != SORT_MODE__MEMORY) return -EINVAL; - return __sort_dimension__add_output(list, sd); + return __sort_dimension__add_output(list, sd, *level); } return -ESRCH; @@ -3598,10 +4148,11 @@ static int setup_output_list(struct perf_hpp_list *list, char *str) { char *tmp, *tok; int ret = 0; + int level = 0; for (tok = strtok_r(str, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { - ret = output_field_add(list, tok); + ret = output_field_add(list, tok, &level); if (ret == -EINVAL) { ui__error("Invalid --fields key: `%s'", tok); break; @@ -3665,16 +4216,16 @@ out: return ret; } -int setup_sorting(struct evlist *evlist) +int setup_sorting(struct evlist *evlist, struct perf_env *env) { int err; - err = __setup_sorting(evlist); + err = __setup_sorting(evlist, env); if (err < 0) return err; if (parent_pattern != default_parent_pattern) { - err = sort_dimension__add(&perf_hpp_list, "parent", evlist, -1); + err = sort_dimension__add(&perf_hpp_list, "parent", evlist, env, -1); if (err < 0) return err; } @@ -3691,6 +4242,10 @@ int setup_sorting(struct evlist *evlist) if (err < 0) return err; + err = perf_hpp__alloc_mem_stats(&perf_hpp_list, evlist); + if (err < 0) + return err; + /* copy sort keys to output fields */ perf_hpp__setup_output_field(&perf_hpp_list); /* and then copy output fields to sort keys */ @@ -3750,7 +4305,7 @@ static void add_hpp_sort_string(struct strbuf *sb, struct hpp_dimension *s, int add_key(sb, s[i].name, llen); } -char *sort_help(const char *prefix) +char *sort_help(const char *prefix, enum sort_mode mode) { struct strbuf sb; char *s; @@ -3762,10 +4317,12 @@ char *sort_help(const char *prefix) ARRAY_SIZE(hpp_sort_dimensions), &len); add_sort_string(&sb, common_sort_dimensions, ARRAY_SIZE(common_sort_dimensions), &len); - add_sort_string(&sb, bstack_sort_dimensions, - ARRAY_SIZE(bstack_sort_dimensions), &len); - add_sort_string(&sb, memory_sort_dimensions, - ARRAY_SIZE(memory_sort_dimensions), &len); + if (mode == SORT_MODE__NORMAL || mode == SORT_MODE__BRANCH) + add_sort_string(&sb, bstack_sort_dimensions, + ARRAY_SIZE(bstack_sort_dimensions), &len); + if (mode == SORT_MODE__NORMAL || mode == SORT_MODE__MEMORY) + add_sort_string(&sb, memory_sort_dimensions, + ARRAY_SIZE(memory_sort_dimensions), &len); s = strbuf_detach(&sb, NULL); strbuf_release(&sb); return s; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index ecfb7f1359d5..d7787958e06b 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -3,18 +3,10 @@ #define __PERF_SORT_H #include <regex.h> #include <stdbool.h> -#include <linux/list.h> -#include <linux/rbtree.h> -#include "map_symbol.h" -#include "symbol_conf.h" -#include "callchain.h" -#include "values.h" #include "hist.h" -#include "stat.h" -#include "spark.h" struct option; -struct thread; +struct perf_env; extern regex_t parent_regex; extern const char *sort_order; @@ -34,176 +26,10 @@ extern struct sort_entry sort_dso_to; extern struct sort_entry sort_sym_from; extern struct sort_entry sort_sym_to; extern struct sort_entry sort_srcline; +extern struct sort_entry sort_type; extern const char default_mem_sort_order[]; extern bool chk_double_cl; -struct res_sample { - u64 time; - int cpu; - int tid; -}; - -struct he_stat { - u64 period; - u64 period_sys; - u64 period_us; - u64 period_guest_sys; - u64 period_guest_us; - u32 nr_events; -}; - -struct namespace_id { - u64 dev; - u64 ino; -}; - -struct hist_entry_diff { - bool computed; - union { - /* PERF_HPP__DELTA */ - double period_ratio_delta; - - /* PERF_HPP__RATIO */ - double period_ratio; - - /* HISTC_WEIGHTED_DIFF */ - s64 wdiff; - - /* PERF_HPP_DIFF__CYCLES */ - s64 cycles; - }; - struct stats stats; - unsigned long svals[NUM_SPARKS]; -}; - -struct hist_entry_ops { - void *(*new)(size_t size); - void (*free)(void *ptr); -}; - -/** - * struct hist_entry - histogram entry - * - * @row_offset - offset from the first callchain expanded to appear on screen - * @nr_rows - rows expanded in callchain, recalculated on folding/unfolding - */ -struct hist_entry { - struct rb_node rb_node_in; - struct rb_node rb_node; - union { - struct list_head node; - struct list_head head; - } pairs; - struct he_stat stat; - struct he_stat *stat_acc; - struct map_symbol ms; - struct thread *thread; - struct comm *comm; - struct namespace_id cgroup_id; - u64 cgroup; - u64 ip; - u64 transaction; - s32 socket; - s32 cpu; - u64 code_page_size; - u64 weight; - u64 ins_lat; - u64 p_stage_cyc; - u8 cpumode; - u8 depth; - struct simd_flags simd_flags; - - /* We are added by hists__add_dummy_entry. */ - bool dummy; - bool leaf; - - char level; - u8 filtered; - - u16 callchain_size; - union { - /* - * Since perf diff only supports the stdio output, TUI - * fields are only accessed from perf report (or perf - * top). So make it a union to reduce memory usage. - */ - struct hist_entry_diff diff; - struct /* for TUI */ { - u16 row_offset; - u16 nr_rows; - bool init_have_children; - bool unfolded; - bool has_children; - bool has_no_entry; - }; - }; - char *srcline; - char *srcfile; - struct symbol *parent; - struct branch_info *branch_info; - long time; - struct hists *hists; - struct mem_info *mem_info; - struct block_info *block_info; - struct kvm_info *kvm_info; - void *raw_data; - u32 raw_size; - int num_res; - struct res_sample *res_samples; - void *trace_output; - struct perf_hpp_list *hpp_list; - struct hist_entry *parent_he; - struct hist_entry_ops *ops; - union { - /* this is for hierarchical entry structure */ - struct { - struct rb_root_cached hroot_in; - struct rb_root_cached hroot_out; - }; /* non-leaf entries */ - struct rb_root sorted_chain; /* leaf entry has callchains */ - }; - struct callchain_root callchain[0]; /* must be last member */ -}; - -static __pure inline bool hist_entry__has_callchains(struct hist_entry *he) -{ - return he->callchain_size != 0; -} - -int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width); - -static inline bool hist_entry__has_pairs(struct hist_entry *he) -{ - return !list_empty(&he->pairs.node); -} - -static inline struct hist_entry *hist_entry__next_pair(struct hist_entry *he) -{ - if (hist_entry__has_pairs(he)) - return list_entry(he->pairs.node.next, struct hist_entry, pairs.node); - return NULL; -} - -static inline void hist_entry__add_pair(struct hist_entry *pair, - struct hist_entry *he) -{ - list_add_tail(&pair->pairs.node, &he->pairs.head); -} - -static inline float hist_entry__get_percent_limit(struct hist_entry *he) -{ - u64 period = he->stat.period; - u64 total_period = hists__total_period(he->hists); - - if (unlikely(total_period == 0)) - return 0; - - if (symbol_conf.cumulate_callchain) - period = he->stat_acc->period; - - return period * 100.0 / total_period; -} - enum sort_mode { SORT_MODE__NORMAL, SORT_MODE__BRANCH, @@ -243,6 +69,12 @@ enum sort_type { SORT_LOCAL_RETIRE_LAT, SORT_GLOBAL_RETIRE_LAT, SORT_SIMD, + SORT_ANNOTATE_DATA_TYPE, + SORT_ANNOTATE_DATA_TYPE_OFFSET, + SORT_SYM_OFFSET, + SORT_ANNOTATE_DATA_TYPE_CACHELINE, + SORT_PARALLELISM, + SORT_TGID, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, @@ -259,6 +91,9 @@ enum sort_type { SORT_SYM_IPC, SORT_ADDR_FROM, SORT_ADDR_TO, + SORT_CALLCHAIN_BRANCH_PREDICTED, + SORT_CALLCHAIN_BRANCH_ABORT, + SORT_CALLCHAIN_BRANCH_CYCLES, /* memory mode specific sort keys */ __SORT_MEMORY_MODE, @@ -292,37 +127,28 @@ struct sort_entry { u8 se_width_idx; }; -struct block_hist { - struct hists block_hists; - struct perf_hpp_list block_list; - struct perf_hpp_fmt block_fmt; - int block_idx; - bool valid; - struct hist_entry he; -}; - extern struct sort_entry sort_thread; struct evlist; struct tep_handle; -int setup_sorting(struct evlist *evlist); +int setup_sorting(struct evlist *evlist, struct perf_env *env); int setup_output_field(void); void reset_output_field(void); void sort__setup_elide(FILE *fp); void perf_hpp__set_elide(int idx, bool elide); -char *sort_help(const char *prefix); +char *sort_help(const char *prefix, enum sort_mode mode); int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset); bool is_strict_order(const char *order); -int hpp_dimension__add_output(unsigned col); +int hpp_dimension__add_output(unsigned col, bool implicit); void reset_dimensions(void); int sort_dimension__add(struct perf_hpp_list *list, const char *tok, - struct evlist *evlist, + struct evlist *evlist, struct perf_env *env, int level); -int output_field_add(struct perf_hpp_list *list, char *tok); +int output_field_add(struct perf_hpp_list *list, const char *tok, int *level); int64_t sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right); int64_t diff --git a/tools/perf/util/spark.c b/tools/perf/util/spark.c index 70272a8b81a6..65ca253cc22e 100644 --- a/tools/perf/util/spark.c +++ b/tools/perf/util/spark.c @@ -1,9 +1,7 @@ -#include <stdio.h> -#include <limits.h> -#include <string.h> -#include <stdlib.h> +// SPDX-License-Identifier: GPL-2.0 #include "spark.h" -#include "stat.h" +#include <limits.h> +#include <linux/kernel.h> #define SPARK_SHIFT 8 diff --git a/tools/perf/util/spark.h b/tools/perf/util/spark.h index 25402d7d7a64..78597c38ef35 100644 --- a/tools/perf/util/spark.h +++ b/tools/perf/util/spark.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef SPARK_H #define SPARK_H 1 diff --git a/tools/perf/util/srccode.c b/tools/perf/util/srccode.c index 476e99896d5e..0f4907843ac1 100644 --- a/tools/perf/util/srccode.c +++ b/tools/perf/util/srccode.c @@ -16,7 +16,7 @@ #include "srccode.h" #include "debug.h" #include <internal/lib.h> // page_size -#include "fncache.h" +#include "hashmap.h" #define MAXSRCCACHE (32*1024*1024) #define MAXSRCFILES 64 @@ -92,7 +92,7 @@ static struct srcfile *find_srcfile(char *fn) struct srcfile *h; int fd; unsigned long sz; - unsigned hval = shash((unsigned char *)fn) % SRC_HTAB_SZ; + size_t hval = str_hash(fn) % SRC_HTAB_SZ; hlist_for_each_entry (h, &srcfile_htab[hval], hash_nd) { if (!strcmp(fn, h->fn)) { diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 034b496df297..27c0966611ab 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -1,52 +1,38 @@ // SPDX-License-Identifier: GPL-2.0 -#include <inttypes.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/types.h> - -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/zalloc.h> - -#include <api/io.h> - -#include "util/dso.h" -#include "util/debug.h" -#include "util/callchain.h" -#include "util/symbol_conf.h" #include "srcline.h" -#include "string2.h" +#include "addr2line.h" +#include "dso.h" +#include "callchain.h" +#include "libbfd.h" +#include "llvm.h" #include "symbol.h" -#include "subcmd/run-command.h" -/* If addr2line doesn't return data for 1 second then timeout. */ -int addr2line_timeout_ms = 1 * 1000; +#include <inttypes.h> +#include <string.h> + bool srcline_full_filename; char *srcline__unknown = (char *)"??:0"; -static const char *dso__name(struct dso *dso) +static const char *srcline_dso_name(struct dso *dso) { const char *dso_name; - if (dso->symsrc_filename) - dso_name = dso->symsrc_filename; + if (dso__symsrc_filename(dso)) + dso_name = dso__symsrc_filename(dso); else - dso_name = dso->long_name; + dso_name = dso__long_name(dso); if (dso_name[0] == '[') return NULL; - if (!strncmp(dso_name, "/tmp/perf-", 10)) + if (is_perf_pid_map_name(dso_name)) return NULL; return dso_name; } -static int inline_list__append(struct symbol *symbol, char *srcline, - struct inline_node *node) +int inline_list__append(struct symbol *symbol, char *srcline, struct inline_node *node) { struct inline_list *ilist; @@ -73,7 +59,7 @@ static const char *gnu_basename(const char *path) return base ? base + 1 : path; } -static char *srcline_from_fileline(const char *file, unsigned int line) +char *srcline_from_fileline(const char *file, unsigned int line) { char *srcline; @@ -89,9 +75,9 @@ static char *srcline_from_fileline(const char *file, unsigned int line) return srcline; } -static struct symbol *new_inline_sym(struct dso *dso, - struct symbol *base_sym, - const char *funcname) +struct symbol *new_inline_sym(struct dso *dso, + struct symbol *base_sym, + const char *funcname) { struct symbol *inline_sym; char *demangled = NULL; @@ -128,666 +114,23 @@ static struct symbol *new_inline_sym(struct dso *dso, return inline_sym; } -#define MAX_INLINE_NEST 1024 - -#ifdef HAVE_LIBBFD_SUPPORT - -/* - * Implement addr2line using libbfd. - */ -#define PACKAGE "perf" -#include <bfd.h> - -struct a2l_data { - const char *input; - u64 addr; - - bool found; - const char *filename; - const char *funcname; - unsigned line; - - bfd *abfd; - asymbol **syms; -}; - -static int bfd_error(const char *string) -{ - const char *errmsg; - - errmsg = bfd_errmsg(bfd_get_error()); - fflush(stdout); - - if (string) - pr_debug("%s: %s\n", string, errmsg); - else - pr_debug("%s\n", errmsg); - - return -1; -} - -static int slurp_symtab(bfd *abfd, struct a2l_data *a2l) -{ - long storage; - long symcount; - asymbol **syms; - bfd_boolean dynamic = FALSE; - - if ((bfd_get_file_flags(abfd) & HAS_SYMS) == 0) - return bfd_error(bfd_get_filename(abfd)); - - storage = bfd_get_symtab_upper_bound(abfd); - if (storage == 0L) { - storage = bfd_get_dynamic_symtab_upper_bound(abfd); - dynamic = TRUE; - } - if (storage < 0L) - return bfd_error(bfd_get_filename(abfd)); - - syms = malloc(storage); - if (dynamic) - symcount = bfd_canonicalize_dynamic_symtab(abfd, syms); - else - symcount = bfd_canonicalize_symtab(abfd, syms); - - if (symcount < 0) { - free(syms); - return bfd_error(bfd_get_filename(abfd)); - } - - a2l->syms = syms; - return 0; -} - -static void find_address_in_section(bfd *abfd, asection *section, void *data) -{ - bfd_vma pc, vma; - bfd_size_type size; - struct a2l_data *a2l = data; - flagword flags; - - if (a2l->found) - return; - -#ifdef bfd_get_section_flags - flags = bfd_get_section_flags(abfd, section); -#else - flags = bfd_section_flags(section); -#endif - if ((flags & SEC_ALLOC) == 0) - return; - - pc = a2l->addr; -#ifdef bfd_get_section_vma - vma = bfd_get_section_vma(abfd, section); -#else - vma = bfd_section_vma(section); -#endif -#ifdef bfd_get_section_size - size = bfd_get_section_size(section); -#else - size = bfd_section_size(section); -#endif - - if (pc < vma || pc >= vma + size) - return; - - a2l->found = bfd_find_nearest_line(abfd, section, a2l->syms, pc - vma, - &a2l->filename, &a2l->funcname, - &a2l->line); - - if (a2l->filename && !strlen(a2l->filename)) - a2l->filename = NULL; -} - -static struct a2l_data *addr2line_init(const char *path) -{ - bfd *abfd; - struct a2l_data *a2l = NULL; - - abfd = bfd_openr(path, NULL); - if (abfd == NULL) - return NULL; - - if (!bfd_check_format(abfd, bfd_object)) - goto out; - - a2l = zalloc(sizeof(*a2l)); - if (a2l == NULL) - goto out; - - a2l->abfd = abfd; - a2l->input = strdup(path); - if (a2l->input == NULL) - goto out; - - if (slurp_symtab(abfd, a2l)) - goto out; - - return a2l; - -out: - if (a2l) { - zfree((char **)&a2l->input); - free(a2l); - } - bfd_close(abfd); - return NULL; -} - -static void addr2line_cleanup(struct a2l_data *a2l) -{ - if (a2l->abfd) - bfd_close(a2l->abfd); - zfree((char **)&a2l->input); - zfree(&a2l->syms); - free(a2l); -} - -static int inline_list__append_dso_a2l(struct dso *dso, - struct inline_node *node, - struct symbol *sym) -{ - struct a2l_data *a2l = dso->a2l; - struct symbol *inline_sym = new_inline_sym(dso, sym, a2l->funcname); - char *srcline = NULL; - - if (a2l->filename) - srcline = srcline_from_fileline(a2l->filename, a2l->line); - - return inline_list__append(inline_sym, srcline, node); -} - -static int addr2line(const char *dso_name, u64 addr, - char **file, unsigned int *line, struct dso *dso, - bool unwind_inlines, struct inline_node *node, +static int addr2line(const char *dso_name, u64 addr, char **file, unsigned int *line_nr, + struct dso *dso, bool unwind_inlines, struct inline_node *node, struct symbol *sym) { - int ret = 0; - struct a2l_data *a2l = dso->a2l; - - if (!a2l) { - dso->a2l = addr2line_init(dso_name); - a2l = dso->a2l; - } - - if (a2l == NULL) { - if (!symbol_conf.disable_add2line_warn) - pr_warning("addr2line_init failed for %s\n", dso_name); - return 0; - } - - a2l->addr = addr; - a2l->found = false; - - bfd_map_over_sections(a2l->abfd, find_address_in_section, a2l); - - if (!a2l->found) - return 0; - - if (unwind_inlines) { - int cnt = 0; - - if (node && inline_list__append_dso_a2l(dso, node, sym)) - return 0; - - while (bfd_find_inliner_info(a2l->abfd, &a2l->filename, - &a2l->funcname, &a2l->line) && - cnt++ < MAX_INLINE_NEST) { - - if (a2l->filename && !strlen(a2l->filename)) - a2l->filename = NULL; - - if (node != NULL) { - if (inline_list__append_dso_a2l(dso, node, sym)) - return 0; - // found at least one inline frame - ret = 1; - } - } - } - - if (file) { - *file = a2l->filename ? strdup(a2l->filename) : NULL; - ret = *file ? 1 : 0; - } - - if (line) - *line = a2l->line; - - return ret; -} - -void dso__free_a2l(struct dso *dso) -{ - struct a2l_data *a2l = dso->a2l; - - if (!a2l) - return; - - addr2line_cleanup(a2l); - - dso->a2l = NULL; -} - -#else /* HAVE_LIBBFD_SUPPORT */ - -static int filename_split(char *filename, unsigned int *line_nr) -{ - char *sep; - - sep = strchr(filename, '\n'); - if (sep) - *sep = '\0'; - - if (!strcmp(filename, "??:0")) - return 0; - - sep = strchr(filename, ':'); - if (sep) { - *sep++ = '\0'; - *line_nr = strtoul(sep, NULL, 0); - return 1; - } - pr_debug("addr2line missing ':' in filename split\n"); - return 0; -} - -static void addr2line_subprocess_cleanup(struct child_process *a2l) -{ - if (a2l->pid != -1) { - kill(a2l->pid, SIGKILL); - finish_command(a2l); /* ignore result, we don't care */ - a2l->pid = -1; - } - - free(a2l); -} - -static struct child_process *addr2line_subprocess_init(const char *addr2line_path, - const char *binary_path) -{ - const char *argv[] = { - addr2line_path ?: "addr2line", - "-e", binary_path, - "-a", "-i", "-f", NULL - }; - struct child_process *a2l = zalloc(sizeof(*a2l)); - int start_command_status = 0; - - if (a2l == NULL) { - pr_err("Failed to allocate memory for addr2line"); - return NULL; - } - - a2l->pid = -1; - a2l->in = -1; - a2l->out = -1; - a2l->no_stderr = 1; - - a2l->argv = argv; - start_command_status = start_command(a2l); - a2l->argv = NULL; /* it's not used after start_command; avoid dangling pointers */ - - if (start_command_status != 0) { - pr_warning("could not start addr2line (%s) for %s: start_command return code %d\n", - addr2line_path, binary_path, start_command_status); - addr2line_subprocess_cleanup(a2l); - return NULL; - } - - return a2l; -} - -enum a2l_style { - BROKEN, - GNU_BINUTILS, - LLVM, -}; - -static enum a2l_style addr2line_configure(struct child_process *a2l, const char *dso_name) -{ - static bool cached; - static enum a2l_style style; - - if (!cached) { - char buf[128]; - struct io io; - int ch; - int lines; - - if (write(a2l->in, ",\n", 2) != 2) - return BROKEN; - - io__init(&io, a2l->out, buf, sizeof(buf)); - ch = io__get_char(&io); - if (ch == ',') { - style = LLVM; - cached = true; - lines = 1; - pr_debug("Detected LLVM addr2line style\n"); - } else if (ch == '0') { - style = GNU_BINUTILS; - cached = true; - lines = 3; - pr_debug("Detected binutils addr2line style\n"); - } else { - if (!symbol_conf.disable_add2line_warn) { - char *output = NULL; - size_t output_len; - - io__getline(&io, &output, &output_len); - pr_warning("%s %s: addr2line configuration failed\n", - __func__, dso_name); - pr_warning("\t%c%s", ch, output); - } - pr_debug("Unknown/broken addr2line style\n"); - return BROKEN; - } - while (lines) { - ch = io__get_char(&io); - if (ch <= 0) - break; - if (ch == '\n') - lines--; - } - /* Ignore SIGPIPE in the event addr2line exits. */ - signal(SIGPIPE, SIG_IGN); - } - return style; -} - -static int read_addr2line_record(struct io *io, - enum a2l_style style, - const char *dso_name, - u64 addr, - bool first, - char **function, - char **filename, - unsigned int *line_nr) -{ - /* - * Returns: - * -1 ==> error - * 0 ==> sentinel (or other ill-formed) record read - * 1 ==> a genuine record read - */ - char *line = NULL; - size_t line_len = 0; - unsigned int dummy_line_nr = 0; - int ret = -1; - - if (function != NULL) - zfree(function); - - if (filename != NULL) - zfree(filename); - - if (line_nr != NULL) - *line_nr = 0; - - /* - * Read the first line. Without an error this will be: - * - for the first line an address like 0x1234, - * - the binutils sentinel 0x0000000000000000, - * - the llvm-addr2line the sentinel ',' character, - * - the function name line for an inlined function. - */ - if (io__getline(io, &line, &line_len) < 0 || !line_len) - goto error; - - pr_debug("%s %s: addr2line read address for sentinel: %s", __func__, dso_name, line); - if (style == LLVM && line_len == 2 && line[0] == ',') { - /* Found the llvm-addr2line sentinel character. */ - zfree(&line); - return 0; - } else if (style == GNU_BINUTILS && (!first || addr != 0)) { - int zero_count = 0, non_zero_count = 0; - /* - * Check for binutils sentinel ignoring it for the case the - * requested address is 0. - */ - - /* A given address should always start 0x. */ - if (line_len >= 2 || line[0] != '0' || line[1] != 'x') { - for (size_t i = 2; i < line_len; i++) { - if (line[i] == '0') - zero_count++; - else if (line[i] != '\n') - non_zero_count++; - } - if (!non_zero_count) { - int ch; - - if (first && !zero_count) { - /* Line was erroneous just '0x'. */ - goto error; - } - /* - * Line was 0x0..0, the sentinel for binutils. Remove - * the function and filename lines. - */ - zfree(&line); - do { - ch = io__get_char(io); - } while (ch > 0 && ch != '\n'); - do { - ch = io__get_char(io); - } while (ch > 0 && ch != '\n'); - return 0; - } - } - } - /* Read the second function name line (if inline data then this is the first line). */ - if (first && (io__getline(io, &line, &line_len) < 0 || !line_len)) - goto error; - - pr_debug("%s %s: addr2line read line: %s", __func__, dso_name, line); - if (function != NULL) - *function = strdup(strim(line)); - - zfree(&line); - line_len = 0; - - /* Read the third filename and line number line. */ - if (io__getline(io, &line, &line_len) < 0 || !line_len) - goto error; - - pr_debug("%s %s: addr2line filename:number : %s", __func__, dso_name, line); - if (filename_split(line, line_nr == NULL ? &dummy_line_nr : line_nr) == 0 && - style == GNU_BINUTILS) { - ret = 0; - goto error; - } - - if (filename != NULL) - *filename = strdup(line); - - zfree(&line); - line_len = 0; - - return 1; - -error: - free(line); - if (function != NULL) - zfree(function); - if (filename != NULL) - zfree(filename); - return ret; -} - -static int inline_list__append_record(struct dso *dso, - struct inline_node *node, - struct symbol *sym, - const char *function, - const char *filename, - unsigned int line_nr) -{ - struct symbol *inline_sym = new_inline_sym(dso, sym, function); - - return inline_list__append(inline_sym, srcline_from_fileline(filename, line_nr), node); -} - -static int addr2line(const char *dso_name, u64 addr, - char **file, unsigned int *line_nr, - struct dso *dso, - bool unwind_inlines, - struct inline_node *node, - struct symbol *sym __maybe_unused) -{ - struct child_process *a2l = dso->a2l; - char *record_function = NULL; - char *record_filename = NULL; - unsigned int record_line_nr = 0; - int record_status = -1; - int ret = 0; - size_t inline_count = 0; - int len; - char buf[128]; - ssize_t written; - struct io io = { .eof = false }; - enum a2l_style a2l_style; - - if (!a2l) { - if (!filename__has_section(dso_name, ".debug_line")) - goto out; - - dso->a2l = addr2line_subprocess_init(symbol_conf.addr2line_path, dso_name); - a2l = dso->a2l; - } - - if (a2l == NULL) { - if (!symbol_conf.disable_add2line_warn) - pr_warning("%s %s: addr2line_subprocess_init failed\n", __func__, dso_name); - goto out; - } - a2l_style = addr2line_configure(a2l, dso_name); - if (a2l_style == BROKEN) - goto out; + int ret; - /* - * Send our request and then *deliberately* send something that can't be - * interpreted as a valid address to ask addr2line about (namely, - * ","). This causes addr2line to first write out the answer to our - * request, in an unbounded/unknown number of records, and then to write - * out the lines "0x0...0", "??" and "??:0", for GNU binutils, or "," - * for llvm-addr2line, so that we can detect when it has finished giving - * us anything useful. - */ - len = snprintf(buf, sizeof(buf), "%016"PRIx64"\n,\n", addr); - written = len > 0 ? write(a2l->in, buf, len) : -1; - if (written != len) { - if (!symbol_conf.disable_add2line_warn) - pr_warning("%s %s: could not send request\n", __func__, dso_name); - goto out; - } - io__init(&io, a2l->out, buf, sizeof(buf)); - io.timeout_ms = addr2line_timeout_ms; - switch (read_addr2line_record(&io, a2l_style, dso_name, addr, /*first=*/true, - &record_function, &record_filename, &record_line_nr)) { - case -1: - if (!symbol_conf.disable_add2line_warn) - pr_warning("%s %s: could not read first record\n", __func__, dso_name); - goto out; - case 0: - /* - * The first record was invalid, so return failure, but first - * read another record, since we sent a sentinel ',' for the - * sake of detected the last inlined function. Treat this as the - * first of a record as the ',' generates a new start with GNU - * binutils, also force a non-zero address as we're no longer - * reading that record. - */ - switch (read_addr2line_record(&io, a2l_style, dso_name, - /*addr=*/1, /*first=*/true, - NULL, NULL, NULL)) { - case -1: - if (!symbol_conf.disable_add2line_warn) - pr_warning("%s %s: could not read sentinel record\n", - __func__, dso_name); - break; - case 0: - /* The sentinel as expected. */ - break; - default: - if (!symbol_conf.disable_add2line_warn) - pr_warning("%s %s: unexpected record instead of sentinel", - __func__, dso_name); - break; - } - goto out; - default: - /* First record as expected. */ - break; - } + ret = llvm__addr2line(dso_name, addr, file, line_nr, dso, unwind_inlines, node, sym); + if (ret > 0) + return ret; - if (file) { - *file = strdup(record_filename); - ret = 1; - } - if (line_nr) - *line_nr = record_line_nr; - - if (unwind_inlines) { - if (node && inline_list__append_record(dso, node, sym, - record_function, - record_filename, - record_line_nr)) { - ret = 0; - goto out; - } - } + ret = libbfd__addr2line(dso_name, addr, file, line_nr, dso, unwind_inlines, node, sym); + if (ret > 0) + return ret; - /* - * We have to read the records even if we don't care about the inline - * info. This isn't the first record and force the address to non-zero - * as we're reading records beyond the first. - */ - while ((record_status = read_addr2line_record(&io, - a2l_style, - dso_name, - /*addr=*/1, - /*first=*/false, - &record_function, - &record_filename, - &record_line_nr)) == 1) { - if (unwind_inlines && node && inline_count++ < MAX_INLINE_NEST) { - if (inline_list__append_record(dso, node, sym, - record_function, - record_filename, - record_line_nr)) { - ret = 0; - goto out; - } - ret = 1; /* found at least one inline frame */ - } - } - -out: - free(record_function); - free(record_filename); - if (io.eof) { - dso->a2l = NULL; - addr2line_subprocess_cleanup(a2l); - } - return ret; -} - -void dso__free_a2l(struct dso *dso) -{ - struct child_process *a2l = dso->a2l; - - if (!a2l) - return; - - addr2line_subprocess_cleanup(a2l); - - dso->a2l = NULL; + return cmd__addr2line(dso_name, addr, file, line_nr, dso, unwind_inlines, node, sym); } -#endif /* HAVE_LIBBFD_SUPPORT */ - static struct inline_node *addr2inlines(const char *dso_name, u64 addr, struct dso *dso, struct symbol *sym) { @@ -802,7 +145,9 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr, INIT_LIST_HEAD(&node->val); node->addr = addr; - addr2line(dso_name, addr, NULL, NULL, dso, true, node, sym); + addr2line(dso_name, addr, /*file=*/NULL, /*line_nr=*/NULL, dso, + /*unwind_inlines=*/true, node, sym); + return node; } @@ -821,33 +166,34 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, char *srcline; const char *dso_name; - if (!dso->has_srcline) + if (!dso__has_srcline(dso)) goto out; - dso_name = dso__name(dso); + dso_name = srcline_dso_name(dso); if (dso_name == NULL) - goto out; + goto out_err; if (!addr2line(dso_name, addr, &file, &line, dso, - unwind_inlines, NULL, sym)) - goto out; + unwind_inlines, /*node=*/NULL, sym)) + goto out_err; srcline = srcline_from_fileline(file, line); free(file); if (!srcline) - goto out; + goto out_err; - dso->a2l_fails = 0; + dso__set_a2l_fails(dso, 0); return srcline; -out: - if (dso->a2l_fails && ++dso->a2l_fails > A2L_FAIL_LIMIT) { - dso->has_srcline = 0; +out_err: + dso__set_a2l_fails(dso, dso__a2l_fails(dso) + 1); + if (dso__a2l_fails(dso) > A2L_FAIL_LIMIT) { + dso__set_has_srcline(dso, false); dso__free_a2l(dso); } - +out: if (!show_addr) return (show_sym && sym) ? strndup(sym->name, sym->namelen) : SRCLINE_UNKNOWN; @@ -856,7 +202,7 @@ out: if (asprintf(&srcline, "%s+%" PRIu64, show_sym ? sym->name : "", ip - sym->start) < 0) return SRCLINE_UNKNOWN; - } else if (asprintf(&srcline, "%s[%" PRIx64 "]", dso->short_name, addr) < 0) + } else if (asprintf(&srcline, "%s[%" PRIx64 "]", dso__short_name(dso), addr) < 0) return SRCLINE_UNKNOWN; return srcline; } @@ -867,22 +213,24 @@ char *get_srcline_split(struct dso *dso, u64 addr, unsigned *line) char *file = NULL; const char *dso_name; - if (!dso->has_srcline) - goto out; + if (!dso__has_srcline(dso)) + return NULL; - dso_name = dso__name(dso); + dso_name = srcline_dso_name(dso); if (dso_name == NULL) - goto out; + goto out_err; - if (!addr2line(dso_name, addr, &file, line, dso, true, NULL, NULL)) - goto out; + if (!addr2line(dso_name, addr, &file, line, dso, /*unwind_inlines=*/true, + /*node=*/NULL, /*sym=*/NULL)) + goto out_err; - dso->a2l_fails = 0; + dso__set_a2l_fails(dso, 0); return file; -out: - if (dso->a2l_fails && ++dso->a2l_fails > A2L_FAIL_LIMIT) { - dso->has_srcline = 0; +out_err: + dso__set_a2l_fails(dso, dso__a2l_fails(dso) + 1); + if (dso__a2l_fails(dso) > A2L_FAIL_LIMIT) { + dso__set_has_srcline(dso, false); dso__free_a2l(dso); } @@ -980,7 +328,7 @@ struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr, { const char *dso_name; - dso_name = dso__name(dso); + dso_name = srcline_dso_name(dso); if (dso_name == NULL) return NULL; diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h index 75010d39ea28..c36f573cd339 100644 --- a/tools/perf/util/srcline.h +++ b/tools/perf/util/srcline.h @@ -9,7 +9,6 @@ struct dso; struct symbol; -extern int addr2line_timeout_ms; extern bool srcline_full_filename; char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, bool show_sym, bool show_addr, u64 ip); @@ -29,6 +28,8 @@ void srcline__tree_delete(struct rb_root_cached *tree); extern char *srcline__unknown; #define SRCLINE_UNKNOWN srcline__unknown +#define MAX_INLINE_NEST 1024 + struct inline_list { struct symbol *symbol; char *srcline; @@ -55,4 +56,10 @@ struct inline_node *inlines__tree_find(struct rb_root_cached *tree, u64 addr); /* delete all nodes within the tree of inline_node s */ void inlines__tree_delete(struct rb_root_cached *tree); +int inline_list__append(struct symbol *symbol, char *srcline, struct inline_node *node); +char *srcline_from_fileline(const char *file, unsigned int line); +struct symbol *new_inline_sym(struct dso *dso, + struct symbol *base_sym, + const char *funcname); + #endif /* PERF_SRCLINE_H */ diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index d45d5dcb0e2b..6d02f84c5691 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -7,6 +7,7 @@ #include <perf/cpumap.h> #include "color.h" #include "counts.h" +#include "debug.h" #include "evlist.h" #include "evsel.h" #include "stat.h" @@ -21,6 +22,7 @@ #include "iostat.h" #include "pmu.h" #include "pmus.h" +#include "tool_pmu.h" #define CNTR_NOT_SUPPORTED "<not supported>" #define CNTR_NOT_COUNTED "<not counted>" @@ -38,6 +40,7 @@ static int aggr_header_lens[] = { [AGGR_CORE] = 18, [AGGR_CACHE] = 22, + [AGGR_CLUSTER] = 20, [AGGR_DIE] = 12, [AGGR_SOCKET] = 6, [AGGR_NODE] = 6, @@ -47,19 +50,21 @@ static int aggr_header_lens[] = { }; static const char *aggr_header_csv[] = { - [AGGR_CORE] = "core,cpus,", - [AGGR_CACHE] = "cache,cpus,", - [AGGR_DIE] = "die,cpus,", - [AGGR_SOCKET] = "socket,cpus,", - [AGGR_NONE] = "cpu,", - [AGGR_THREAD] = "comm-pid,", - [AGGR_NODE] = "node,", - [AGGR_GLOBAL] = "" + [AGGR_CORE] = "core,ctrs,", + [AGGR_CACHE] = "cache,ctrs,", + [AGGR_CLUSTER] = "cluster,ctrs,", + [AGGR_DIE] = "die,ctrs,", + [AGGR_SOCKET] = "socket,ctrs,", + [AGGR_NONE] = "cpu,", + [AGGR_THREAD] = "comm-pid,", + [AGGR_NODE] = "node,", + [AGGR_GLOBAL] = "" }; static const char *aggr_header_std[] = { [AGGR_CORE] = "core", [AGGR_CACHE] = "cache", + [AGGR_CLUSTER] = "cluster", [AGGR_DIE] = "die", [AGGR_SOCKET] = "socket", [AGGR_NONE] = "cpu", @@ -68,6 +73,32 @@ static const char *aggr_header_std[] = { [AGGR_GLOBAL] = "" }; +const char *metric_threshold_classify__color(enum metric_threshold_classify thresh) +{ + const char * const colors[] = { + "", /* unknown */ + PERF_COLOR_RED, /* bad */ + PERF_COLOR_MAGENTA, /* nearly bad */ + PERF_COLOR_YELLOW, /* less good */ + PERF_COLOR_GREEN, /* good */ + }; + static_assert(ARRAY_SIZE(colors) - 1 == METRIC_THRESHOLD_GOOD, "missing enum value"); + return colors[thresh]; +} + +static const char *metric_threshold_classify__str(enum metric_threshold_classify thresh) +{ + const char * const strs[] = { + "unknown", + "bad", + "nearly bad", + "less good", + "good", + }; + static_assert(ARRAY_SIZE(strs) - 1 == METRIC_THRESHOLD_GOOD, "missing enum value"); + return strs[thresh]; +} + static void print_running_std(struct perf_stat_config *config, u64 run, u64 ena) { if (run != ena) @@ -83,23 +114,59 @@ static void print_running_csv(struct perf_stat_config *config, u64 run, u64 ena) fprintf(config->output, "%s%" PRIu64 "%s%.2f", config->csv_sep, run, config->csv_sep, enabled_percent); } +struct outstate { + /* Std mode: insert a newline before the next metric */ + bool newline; + /* JSON mode: track need for comma for a previous field or not */ + bool first; + /* Num CSV separators remaining to pad out when not all fields are printed */ + int csv_col_pad; + + /* + * The following don't track state across fields, but are here as a shortcut to + * pass data to the print functions. The alternative would be to update the + * function signatures of the entire print stack to pass them through. + */ + /* Place to output to */ + FILE * const fh; + /* Lines are timestamped in --interval-print mode */ + char timestamp[64]; + /* Num items aggregated in current line. See struct perf_stat_aggr.nr */ + int aggr_nr; + /* Core/socket/die etc ID for the current line */ + struct aggr_cpu_id id; + /* Event for current line */ + struct evsel *evsel; + /* Cgroup for current line */ + struct cgroup *cgrp; +}; + +static const char *json_sep(struct outstate *os) +{ + const char *sep = os->first ? "" : ", "; + + os->first = false; + return sep; +} + +#define json_out(os, format, ...) fprintf((os)->fh, "%s" format, json_sep(os), ##__VA_ARGS__) -static void print_running_json(struct perf_stat_config *config, u64 run, u64 ena) +static void print_running_json(struct outstate *os, u64 run, u64 ena) { double enabled_percent = 100; if (run != ena) enabled_percent = 100 * run / ena; - fprintf(config->output, "\"event-runtime\" : %" PRIu64 ", \"pcnt-running\" : %.2f, ", - run, enabled_percent); + json_out(os, "\"event-runtime\" : %" PRIu64 ", \"pcnt-running\" : %.2f", + run, enabled_percent); } -static void print_running(struct perf_stat_config *config, +static void print_running(struct perf_stat_config *config, struct outstate *os, u64 run, u64 ena, bool before_metric) { if (config->json_output) { if (before_metric) - print_running_json(config, run, ena); + print_running_json(os, run, ena); } else if (config->csv_output) { if (before_metric) print_running_csv(config, run, ena); @@ -122,20 +189,20 @@ static void print_noise_pct_csv(struct perf_stat_config *config, fprintf(config->output, "%s%.2f%%", config->csv_sep, pct); } -static void print_noise_pct_json(struct perf_stat_config *config, +static void print_noise_pct_json(struct outstate *os, double pct) { - fprintf(config->output, "\"variance\" : %.2f, ", pct); + json_out(os, "\"variance\" : %.2f", pct); } -static void print_noise_pct(struct perf_stat_config *config, +static void print_noise_pct(struct perf_stat_config *config, struct outstate *os, double total, double avg, bool before_metric) { double pct = rel_stddev_stats(total, avg); if (config->json_output) { if (before_metric) - print_noise_pct_json(config, pct); + print_noise_pct_json(os, pct); } else if (config->csv_output) { if (before_metric) print_noise_pct_csv(config, pct); @@ -145,7 +212,7 @@ static void print_noise_pct(struct perf_stat_config *config, } } -static void print_noise(struct perf_stat_config *config, +static void print_noise(struct perf_stat_config *config, struct outstate *os, struct evsel *evsel, double avg, bool before_metric) { struct perf_stat_evsel *ps; @@ -154,7 +221,7 @@ static void print_noise(struct perf_stat_config *config, return; ps = evsel->stats; - print_noise_pct(config, stddev_stats(&ps->res_stats), avg, before_metric); + print_noise_pct(config, os, stddev_stats(&ps->res_stats), avg, before_metric); } static void print_cgroup_std(struct perf_stat_config *config, const char *cgrp_name) @@ -167,18 +234,19 @@ static void print_cgroup_csv(struct perf_stat_config *config, const char *cgrp_n fprintf(config->output, "%s%s", config->csv_sep, cgrp_name); } -static void print_cgroup_json(struct perf_stat_config *config, const char *cgrp_name) +static void print_cgroup_json(struct outstate *os, const char *cgrp_name) { - fprintf(config->output, "\"cgroup\" : \"%s\", ", cgrp_name); + json_out(os, "\"cgroup\" : \"%s\"", cgrp_name); } -static void print_cgroup(struct perf_stat_config *config, struct cgroup *cgrp) +static void print_cgroup(struct perf_stat_config *config, struct outstate *os, + struct cgroup *cgrp) { if (nr_cgroups || config->cgroup_list) { const char *cgrp_name = cgrp ? cgrp->name : ""; if (config->json_output) - print_cgroup_json(config, cgrp_name); + print_cgroup_json(os, cgrp_name); else if (config->csv_output) print_cgroup_csv(config, cgrp_name); else @@ -201,6 +269,9 @@ static void print_aggr_id_std(struct perf_stat_config *config, snprintf(buf, sizeof(buf), "S%d-D%d-L%d-ID%d", id.socket, id.die, id.cache_lvl, id.cache); break; + case AGGR_CLUSTER: + snprintf(buf, sizeof(buf), "S%d-D%d-CLS%d", id.socket, id.die, id.cluster); + break; case AGGR_DIE: snprintf(buf, sizeof(buf), "S%d-D%d", id.socket, id.die); break; @@ -233,7 +304,7 @@ static void print_aggr_id_std(struct perf_stat_config *config, return; } - fprintf(output, "%-*s %*d ", aggr_header_lens[idx], buf, 4, aggr_nr); + fprintf(output, "%-*s %*d ", aggr_header_lens[idx], buf, /*strlen("ctrs")*/ 4, aggr_nr); } static void print_aggr_id_csv(struct perf_stat_config *config, @@ -251,6 +322,10 @@ static void print_aggr_id_csv(struct perf_stat_config *config, fprintf(config->output, "S%d-D%d-L%d-ID%d%s%d%s", id.socket, id.die, id.cache_lvl, id.cache, sep, aggr_nr, sep); break; + case AGGR_CLUSTER: + fprintf(config->output, "S%d-D%d-CLS%d%s%d%s", + id.socket, id.die, id.cluster, sep, aggr_nr, sep); + break; case AGGR_DIE: fprintf(output, "S%d-D%d%s%d%s", id.socket, id.die, sep, aggr_nr, sep); @@ -286,43 +361,45 @@ static void print_aggr_id_csv(struct perf_stat_config *config, } } -static void print_aggr_id_json(struct perf_stat_config *config, +static void print_aggr_id_json(struct perf_stat_config *config, struct outstate *os, struct evsel *evsel, struct aggr_cpu_id id, int aggr_nr) { - FILE *output = config->output; - switch (config->aggr_mode) { case AGGR_CORE: - fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ", + json_out(os, "\"core\" : \"S%d-D%d-C%d\", \"counters\" : %d", id.socket, id.die, id.core, aggr_nr); break; case AGGR_CACHE: - fprintf(output, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"aggregate-number\" : %d, ", + json_out(os, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"counters\" : %d", id.socket, id.die, id.cache_lvl, id.cache, aggr_nr); break; + case AGGR_CLUSTER: + json_out(os, "\"cluster\" : \"S%d-D%d-CLS%d\", \"counters\" : %d", + id.socket, id.die, id.cluster, aggr_nr); + break; case AGGR_DIE: - fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ", + json_out(os, "\"die\" : \"S%d-D%d\", \"counters\" : %d", id.socket, id.die, aggr_nr); break; case AGGR_SOCKET: - fprintf(output, "\"socket\" : \"S%d\", \"aggregate-number\" : %d, ", + json_out(os, "\"socket\" : \"S%d\", \"counters\" : %d", id.socket, aggr_nr); break; case AGGR_NODE: - fprintf(output, "\"node\" : \"N%d\", \"aggregate-number\" : %d, ", + json_out(os, "\"node\" : \"N%d\", \"counters\" : %d", id.node, aggr_nr); break; case AGGR_NONE: if (evsel->percore && !config->percore_show_thread) { - fprintf(output, "\"core\" : \"S%d-D%d-C%d\"", + json_out(os, "\"core\" : \"S%d-D%d-C%d\"", id.socket, id.die, id.core); } else if (id.cpu.cpu > -1) { - fprintf(output, "\"cpu\" : \"%d\", ", + json_out(os, "\"cpu\" : \"%d\"", id.cpu.cpu); } break; case AGGR_THREAD: - fprintf(output, "\"thread\" : \"%s-%d\", ", + json_out(os, "\"thread\" : \"%s-%d\"", perf_thread_map__comm(evsel->core.threads, id.thread_idx), perf_thread_map__pid(evsel->core.threads, id.thread_idx)); break; @@ -334,29 +411,17 @@ static void print_aggr_id_json(struct perf_stat_config *config, } } -static void aggr_printout(struct perf_stat_config *config, +static void aggr_printout(struct perf_stat_config *config, struct outstate *os, struct evsel *evsel, struct aggr_cpu_id id, int aggr_nr) { if (config->json_output) - print_aggr_id_json(config, evsel, id, aggr_nr); + print_aggr_id_json(config, os, evsel, id, aggr_nr); else if (config->csv_output) print_aggr_id_csv(config, evsel, id, aggr_nr); else print_aggr_id_std(config, evsel, id, aggr_nr); } -struct outstate { - FILE *fh; - bool newline; - bool first; - const char *prefix; - int nfields; - int aggr_nr; - struct aggr_cpu_id id; - struct evsel *evsel; - struct cgroup *cgrp; -}; - static void new_line_std(struct perf_stat_config *config __maybe_unused, void *ctx) { @@ -369,14 +434,14 @@ static inline void __new_line_std_csv(struct perf_stat_config *config, struct outstate *os) { fputc('\n', os->fh); - if (os->prefix) - fputs(os->prefix, os->fh); - aggr_printout(config, os->evsel, os->id, os->aggr_nr); + if (config->interval) + fputs(os->timestamp, os->fh); + aggr_printout(config, os, os->evsel, os->id, os->aggr_nr); } -static inline void __new_line_std(struct outstate *os) +static inline void __new_line_std(struct perf_stat_config *config, struct outstate *os) { - fprintf(os->fh, " "); + fprintf(os->fh, "%*s", COUNTS_LEN + EVNAME_LEN + config->unit_width + 2, ""); } static void do_new_line_std(struct perf_stat_config *config, @@ -385,17 +450,18 @@ static void do_new_line_std(struct perf_stat_config *config, __new_line_std_csv(config, os); if (config->aggr_mode == AGGR_NONE) fprintf(os->fh, " "); - __new_line_std(os); + __new_line_std(config, os); } static void print_metric_std(struct perf_stat_config *config, - void *ctx, const char *color, const char *fmt, - const char *unit, double val) + void *ctx, enum metric_threshold_classify thresh, + const char *fmt, const char *unit, double val) { struct outstate *os = ctx; FILE *out = os->fh; int n; bool newline = os->newline; + const char *color = metric_threshold_classify__color(thresh); os->newline = false; @@ -421,13 +487,13 @@ static void new_line_csv(struct perf_stat_config *config, void *ctx) int i; __new_line_std_csv(config, os); - for (i = 0; i < os->nfields; i++) + for (i = 0; i < os->csv_col_pad; i++) fputs(config->csv_sep, os->fh); } static void print_metric_csv(struct perf_stat_config *config __maybe_unused, void *ctx, - const char *color __maybe_unused, + enum metric_threshold_classify thresh __maybe_unused, const char *fmt, const char *unit, double val) { struct outstate *os = ctx; @@ -448,15 +514,20 @@ static void print_metric_csv(struct perf_stat_config *config __maybe_unused, static void print_metric_json(struct perf_stat_config *config __maybe_unused, void *ctx, - const char *color __maybe_unused, + enum metric_threshold_classify thresh, const char *fmt __maybe_unused, const char *unit, double val) { struct outstate *os = ctx; FILE *out = os->fh; - fprintf(out, "\"metric-value\" : \"%f\", ", val); - fprintf(out, "\"metric-unit\" : \"%s\"", unit); + if (unit) { + json_out(os, "\"metric-value\" : \"%f\", \"metric-unit\" : \"%s\"", val, unit); + if (thresh != METRIC_THRESHOLD_UNKNOWN) { + json_out(os, "\"metric-threshold\" : \"%s\"", + metric_threshold_classify__str(thresh)); + } + } if (!config->metric_only) fprintf(out, "}"); } @@ -466,9 +537,11 @@ static void new_line_json(struct perf_stat_config *config, void *ctx) struct outstate *os = ctx; fputs("\n{", os->fh); - if (os->prefix) - fprintf(os->fh, "%s", os->prefix); - aggr_printout(config, os->evsel, os->id, os->aggr_nr); + os->first = true; + if (config->interval) + json_out(os, "%s", os->timestamp); + + aggr_printout(config, os, os->evsel, os->id, os->aggr_nr); } static void print_metricgroup_header_json(struct perf_stat_config *config, @@ -478,7 +551,7 @@ static void print_metricgroup_header_json(struct perf_stat_config *config, if (!metricgroup_name) return; - fprintf(config->output, "\"metricgroup\" : \"%s\"}", metricgroup_name); + json_out((struct outstate *) ctx, "\"metricgroup\" : \"%s\"}", metricgroup_name); new_line_json(config, ctx); } @@ -491,12 +564,12 @@ static void print_metricgroup_header_csv(struct perf_stat_config *config, if (!metricgroup_name) { /* Leave space for running and enabling */ - for (i = 0; i < os->nfields - 2; i++) + for (i = 0; i < os->csv_col_pad - 2; i++) fputs(config->csv_sep, os->fh); return; } - for (i = 0; i < os->nfields; i++) + for (i = 0; i < os->csv_col_pad; i++) fputs(config->csv_sep, os->fh); fprintf(config->output, "%s", metricgroup_name); new_line_csv(config, ctx); @@ -510,75 +583,52 @@ static void print_metricgroup_header_std(struct perf_stat_config *config, int n; if (!metricgroup_name) { - __new_line_std(os); + __new_line_std(config, os); return; } n = fprintf(config->output, " %*s", EVNAME_LEN, metricgroup_name); - fprintf(config->output, "%*s", MGROUP_LEN - n - 1, ""); -} - -/* Filter out some columns that don't work well in metrics only mode */ - -static bool valid_only_metric(const char *unit) -{ - if (!unit) - return false; - if (strstr(unit, "/sec") || - strstr(unit, "CPUs utilized")) - return false; - return true; -} - -static const char *fixunit(char *buf, struct evsel *evsel, - const char *unit) -{ - if (!strncmp(unit, "of all", 6)) { - snprintf(buf, 1024, "%s %s", evsel__name(evsel), - unit); - return buf; - } - return unit; + fprintf(config->output, "%*s", MGROUP_LEN + config->unit_width + 2 - n, ""); } static void print_metric_only(struct perf_stat_config *config, - void *ctx, const char *color, const char *fmt, - const char *unit, double val) + void *ctx, enum metric_threshold_classify thresh, + const char *fmt, const char *unit, double val) { struct outstate *os = ctx; FILE *out = os->fh; - char buf[1024], str[1024]; + char str[1024]; unsigned mlen = config->metric_only_len; + const char *color = metric_threshold_classify__color(thresh); - if (!valid_only_metric(unit)) - return; - unit = fixunit(buf, os->evsel, unit); + if (!unit) + unit = ""; if (mlen < strlen(unit)) mlen = strlen(unit) + 1; if (color) mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1; - color_snprintf(str, sizeof(str), color ?: "", fmt, val); + color_snprintf(str, sizeof(str), color ?: "", fmt ?: "", val); fprintf(out, "%*s ", mlen, str); os->first = false; } static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused, - void *ctx, const char *color __maybe_unused, + void *ctx, + enum metric_threshold_classify thresh __maybe_unused, const char *fmt, - const char *unit, double val) + const char *unit __maybe_unused, double val) { struct outstate *os = ctx; FILE *out = os->fh; char buf[64], *vals, *ends; - char tbuf[1024]; - if (!valid_only_metric(unit)) + if (!unit) return; - unit = fixunit(tbuf, os->evsel, unit); - snprintf(buf, sizeof buf, fmt, val); + + snprintf(buf, sizeof(buf), fmt ?: "", val); ends = vals = skip_spaces(buf); while (isdigit(*ends) || *ends == '.') ends++; @@ -588,41 +638,34 @@ static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused } static void print_metric_only_json(struct perf_stat_config *config __maybe_unused, - void *ctx, const char *color __maybe_unused, + void *ctx, + enum metric_threshold_classify thresh __maybe_unused, const char *fmt, const char *unit, double val) { struct outstate *os = ctx; - FILE *out = os->fh; - char buf[64], *vals, *ends; - char tbuf[1024]; + char buf[64], *ends; + const char *vals; - if (!valid_only_metric(unit)) + if (!unit || !unit[0]) return; - unit = fixunit(tbuf, os->evsel, unit); - snprintf(buf, sizeof(buf), fmt, val); - ends = vals = skip_spaces(buf); + snprintf(buf, sizeof(buf), fmt ?: "", val); + vals = ends = skip_spaces(buf); while (isdigit(*ends) || *ends == '.') ends++; *ends = 0; - if (!unit[0] || !vals[0]) - return; - fprintf(out, "%s\"%s\" : \"%s\"", os->first ? "" : ", ", unit, vals); - os->first = false; -} - -static void new_line_metric(struct perf_stat_config *config __maybe_unused, - void *ctx __maybe_unused) -{ + if (!vals[0]) + vals = "none"; + json_out(os, "\"%s\" : \"%s\"", unit, vals); } static void print_metric_header(struct perf_stat_config *config, - void *ctx, const char *color __maybe_unused, + void *ctx, + enum metric_threshold_classify thresh __maybe_unused, const char *fmt __maybe_unused, const char *unit, double val __maybe_unused) { struct outstate *os = ctx; - char tbuf[1024]; /* In case of iostat, print metric header for first root port only */ if (config->iostat_run && @@ -632,9 +675,8 @@ static void print_metric_header(struct perf_stat_config *config, if (os->evsel->cgrp != os->cgrp) return; - if (!valid_only_metric(unit)) + if (!unit) return; - unit = fixunit(tbuf, os->evsel, unit); if (config->json_output) return; @@ -688,28 +730,27 @@ static void print_counter_value_csv(struct perf_stat_config *config, fprintf(output, "%s", evsel__name(evsel)); } -static void print_counter_value_json(struct perf_stat_config *config, +static void print_counter_value_json(struct outstate *os, struct evsel *evsel, double avg, bool ok) { - FILE *output = config->output; const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED; if (ok) - fprintf(output, "\"counter-value\" : \"%f\", ", avg); + json_out(os, "\"counter-value\" : \"%f\"", avg); else - fprintf(output, "\"counter-value\" : \"%s\", ", bad_count); + json_out(os, "\"counter-value\" : \"%s\"", bad_count); if (evsel->unit) - fprintf(output, "\"unit\" : \"%s\", ", evsel->unit); + json_out(os, "\"unit\" : \"%s\"", evsel->unit); - fprintf(output, "\"event\" : \"%s\", ", evsel__name(evsel)); + json_out(os, "\"event\" : \"%s\"", evsel__name(evsel)); } -static void print_counter_value(struct perf_stat_config *config, +static void print_counter_value(struct perf_stat_config *config, struct outstate *os, struct evsel *evsel, double avg, bool ok) { if (config->json_output) - print_counter_value_json(config, evsel, avg, ok); + print_counter_value_json(os, evsel, avg, ok); else if (config->csv_output) print_counter_value_csv(config, evsel, avg, ok); else @@ -717,48 +758,37 @@ static void print_counter_value(struct perf_stat_config *config, } static void abs_printout(struct perf_stat_config *config, + struct outstate *os, struct aggr_cpu_id id, int aggr_nr, struct evsel *evsel, double avg, bool ok) { - aggr_printout(config, evsel, id, aggr_nr); - print_counter_value(config, evsel, avg, ok); - print_cgroup(config, evsel->cgrp); -} - -static bool is_mixed_hw_group(struct evsel *counter) -{ - struct evlist *evlist = counter->evlist; - u32 pmu_type = counter->core.attr.type; - struct evsel *pos; - - if (counter->core.nr_members < 2) - return false; - - evlist__for_each_entry(evlist, pos) { - /* software events can be part of any hardware group */ - if (pos->core.attr.type == PERF_TYPE_SOFTWARE) - continue; - if (pmu_type == PERF_TYPE_SOFTWARE) { - pmu_type = pos->core.attr.type; - continue; - } - if (pmu_type != pos->core.attr.type) - return true; - } - - return false; + aggr_printout(config, os, evsel, id, aggr_nr); + print_counter_value(config, os, evsel, avg, ok); + print_cgroup(config, os, evsel->cgrp); } -static bool evlist__has_hybrid(struct evlist *evlist) +static bool evlist__has_hybrid_pmus(struct evlist *evlist) { struct evsel *evsel; + struct perf_pmu *last_core_pmu = NULL; if (perf_pmus__num_core_pmus() == 1) return false; evlist__for_each_entry(evlist, evsel) { - if (evsel->core.is_pmu_core) + if (evsel->core.is_pmu_core) { + struct perf_pmu *pmu = evsel__find_pmu(evsel); + + if (pmu == last_core_pmu) + continue; + + if (last_core_pmu == NULL) { + last_core_pmu = pmu; + continue; + } + /* A distinct core PMU. */ return true; + } } return false; @@ -776,32 +806,31 @@ static void printout(struct perf_stat_config *config, struct outstate *os, if (config->csv_output) { pm = config->metric_only ? print_metric_only_csv : print_metric_csv; - nl = config->metric_only ? new_line_metric : new_line_csv; + nl = config->metric_only ? NULL : new_line_csv; pmh = print_metricgroup_header_csv; - os->nfields = 4 + (counter->cgrp ? 1 : 0); + os->csv_col_pad = 4 + (counter->cgrp ? 1 : 0); } else if (config->json_output) { pm = config->metric_only ? print_metric_only_json : print_metric_json; - nl = config->metric_only ? new_line_metric : new_line_json; + nl = config->metric_only ? NULL : new_line_json; pmh = print_metricgroup_header_json; } else { pm = config->metric_only ? print_metric_only : print_metric_std; - nl = config->metric_only ? new_line_metric : new_line_std; + nl = config->metric_only ? NULL : new_line_std; pmh = print_metricgroup_header_std; } if (run == 0 || ena == 0 || counter->counts->scaled == -1) { if (config->metric_only) { - pm(config, os, NULL, "", "", 0); + pm(config, os, METRIC_THRESHOLD_UNKNOWN, /*format=*/NULL, + /*unit=*/NULL, /*val=*/0); return; } ok = false; if (counter->supported) { - if (!evlist__has_hybrid(counter->evlist)) { + if (!evlist__has_hybrid_pmus(counter->evlist)) { config->print_free_counters_hint = 1; - if (is_mixed_hw_group(counter)) - config->print_mixed_hw_group_error = 1; } } } @@ -812,18 +841,18 @@ static void printout(struct perf_stat_config *config, struct outstate *os, out.ctx = os; out.force_header = false; - if (!config->metric_only && !counter->default_metricgroup) { - abs_printout(config, os->id, os->aggr_nr, counter, uval, ok); + if (!config->metric_only && (!counter->default_metricgroup || counter->default_show_events)) { + abs_printout(config, os, os->id, os->aggr_nr, counter, uval, ok); - print_noise(config, counter, noise, /*before_metric=*/true); - print_running(config, run, ena, /*before_metric=*/true); + print_noise(config, os, counter, noise, /*before_metric=*/true); + print_running(config, os, run, ena, /*before_metric=*/true); } if (ok) { - if (!config->metric_only && counter->default_metricgroup) { + if (!config->metric_only && counter->default_metricgroup && !counter->default_show_events) { void *from = NULL; - aggr_printout(config, os->evsel, os->id, os->aggr_nr); + aggr_printout(config, os, os->evsel, os->id, os->aggr_nr); /* Print out all the metricgroup with the same metric event. */ do { int num = 0; @@ -836,70 +865,22 @@ static void printout(struct perf_stat_config *config, struct outstate *os, __new_line_std_csv(config, os); } - print_noise(config, counter, noise, /*before_metric=*/true); - print_running(config, run, ena, /*before_metric=*/true); + print_noise(config, os, counter, noise, /*before_metric=*/true); + print_running(config, os, run, ena, /*before_metric=*/true); from = perf_stat__print_shadow_stats_metricgroup(config, counter, aggr_idx, - &num, from, &out, - &config->metric_events); + &num, from, &out); } while (from != NULL); - } else - perf_stat__print_shadow_stats(config, counter, uval, aggr_idx, - &out, &config->metric_events); + } else { + perf_stat__print_shadow_stats(config, counter, aggr_idx, &out); + } } else { - pm(config, os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0); + pm(config, os, METRIC_THRESHOLD_UNKNOWN, /*format=*/NULL, /*unit=*/NULL, /*val=*/0); } if (!config->metric_only) { - print_noise(config, counter, noise, /*before_metric=*/false); - print_running(config, run, ena, /*before_metric=*/false); - } -} - -static void uniquify_event_name(struct evsel *counter) -{ - char *new_name; - char *config; - int ret = 0; - - if (counter->uniquified_name || counter->use_config_name || - !counter->pmu_name || !strncmp(evsel__name(counter), counter->pmu_name, - strlen(counter->pmu_name))) - return; - - config = strchr(counter->name, '/'); - if (config) { - if (asprintf(&new_name, - "%s%s", counter->pmu_name, config) > 0) { - free(counter->name); - counter->name = new_name; - } - } else { - if (evsel__is_hybrid(counter)) { - ret = asprintf(&new_name, "%s/%s/", - counter->pmu_name, counter->name); - } else { - ret = asprintf(&new_name, "%s [%s]", - counter->name, counter->pmu_name); - } - - if (ret) { - free(counter->name); - counter->name = new_name; - } + print_noise(config, os, counter, noise, /*before_metric=*/false); + print_running(config, os, run, ena, /*before_metric=*/false); } - - counter->uniquified_name = true; -} - -static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config) -{ - return evsel__is_hybrid(evsel) && !config->hybrid_merge; -} - -static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter) -{ - if (config->no_merge || hybrid_uniquify(counter, config)) - uniquify_event_name(counter); } /** @@ -926,24 +907,46 @@ static bool should_skip_zero_counter(struct perf_stat_config *config, int idx; /* + * Skip unsupported default events when not verbose. (default events + * are all marked 'skippable'). + */ + if (verbose == 0 && counter->skippable && !counter->supported) + return true; + + /* Metric only counts won't be displayed but the metric wants to be computed. */ + if (config->metric_only) + return false; + /* * Skip value 0 when enabling --per-thread globally, * otherwise it will have too many 0 output. */ if (config->aggr_mode == AGGR_THREAD && config->system_wide) return true; - /* Tool events have the software PMU but are only gathered on 1. */ - if (evsel__is_tool(counter)) - return true; - /* - * Skip value 0 when it's an uncore event and the given aggr id - * does not belong to the PMU cpumask. + * In per-thread mode the aggr_map and aggr_get_id functions may be + * NULL, assume all 0 values should be output in that case. */ - if (!counter->pmu || !counter->pmu->is_uncore) + if (!config->aggr_map || !config->aggr_get_id) return false; - perf_cpu_map__for_each_cpu(cpu, idx, counter->pmu->cpus) { + /* + * Tool events may be gathered on all logical CPUs, for example + * system_time, but for many the first index is the only one used, for + * example num_cores. Don't skip for the first index. + */ + if (evsel__is_tool(counter)) { + struct aggr_cpu_id own_id = + config->aggr_get_id(config, (struct perf_cpu){ .cpu = 0 }); + + return !aggr_cpu_id__equal(id, &own_id); + } + /* + * Skip value 0 when the counter's cpumask doesn't match the given aggr + * id. + */ + + perf_cpu_map__for_each_cpu(cpu, idx, counter->core.cpus) { struct aggr_cpu_id own_id = config->aggr_get_id(config, cpu); if (aggr_cpu_id__equal(id, &own_id)) @@ -970,28 +973,38 @@ static void print_counter_aggrdata(struct perf_stat_config *config, os->evsel = counter; /* Skip already merged uncore/hybrid events */ - if (counter->merged_stat) - return; - - uniquify_counter(config, counter); + if (config->aggr_mode != AGGR_NONE) { + if (evsel__is_hybrid(counter)) { + if (config->hybrid_merge && counter->first_wildcard_match != NULL) + return; + } else { + if (counter->first_wildcard_match != NULL) + return; + } + } val = aggr->counts.val; ena = aggr->counts.ena; run = aggr->counts.run; - if (perf_stat__skip_metric_event(counter, &config->metric_events, ena, run)) + if (perf_stat__skip_metric_event(counter, ena, run)) return; if (val == 0 && should_skip_zero_counter(config, counter, &id)) return; if (!metric_only) { - if (config->json_output) + if (config->json_output) { + os->first = true; fputc('{', output); - if (os->prefix) - fprintf(output, "%s", os->prefix); - else if (config->summary && config->csv_output && - !config->no_csv_summary && !config->interval) + } + if (config->interval) { + if (config->json_output) + json_out(os, "%s", os->timestamp); + else + fprintf(output, "%s", os->timestamp); + } else if (config->summary && config->csv_output && + !config->no_csv_summary) fprintf(output, "%s%s", "summary", config->csv_sep); } @@ -1017,15 +1030,19 @@ static void print_metric_begin(struct perf_stat_config *config, if (config->json_output) fputc('{', config->output); - if (os->prefix) - fprintf(config->output, "%s", os->prefix); + if (config->interval) { + if (config->json_output) + json_out(os, "%s", os->timestamp); + else + fprintf(config->output, "%s", os->timestamp); + } evsel = evlist__first(evlist); id = config->aggr_map->map[aggr_idx]; aggr = &evsel->stats->aggr[aggr_idx]; - aggr_printout(config, evsel, id, aggr->nr); + aggr_printout(config, os, evsel, id, aggr->nr); - print_cgroup(config, os->cgrp ? : evsel->cgrp); + print_cgroup(config, os, os->cgrp ? : evsel->cgrp); } static void print_metric_end(struct perf_stat_config *config, struct outstate *os) @@ -1126,11 +1143,16 @@ static void print_no_aggr_metric(struct perf_stat_config *config, u64 ena, run, val; double uval; struct perf_stat_evsel *ps = counter->stats; - int aggr_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu); + int aggr_idx = 0; - if (aggr_idx < 0) + if (!perf_cpu_map__has(evsel__cpus(counter), cpu)) continue; + cpu_aggr_map__for_each_idx(aggr_idx, config->aggr_map) { + if (config->aggr_map->map[aggr_idx].cpu.cpu == cpu.cpu) + break; + } + os->evsel = counter; os->id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); if (first) { @@ -1167,10 +1189,21 @@ static void print_metric_headers_std(struct perf_stat_config *config, static void print_metric_headers_csv(struct perf_stat_config *config, bool no_indent __maybe_unused) { + const char *p; + if (config->interval) - fputs("time,", config->output); - if (!config->iostat_run) - fputs(aggr_header_csv[config->aggr_mode], config->output); + fprintf(config->output, "time%s", config->csv_sep); + if (config->iostat_run) + return; + + p = aggr_header_csv[config->aggr_mode]; + while (*p) { + if (*p == ',') + fputs(config->csv_sep, config->output); + else + fputc(*p, config->output); + p++; + } } static void print_metric_headers_json(struct perf_stat_config *config __maybe_unused, @@ -1188,7 +1221,7 @@ static void print_metric_headers(struct perf_stat_config *config, struct perf_stat_output_ctx out = { .ctx = &os, .print_metric = print_metric_header, - .new_line = new_line_metric, + .new_line = NULL, .force_header = true, }; @@ -1207,32 +1240,33 @@ static void print_metric_headers(struct perf_stat_config *config, /* Print metrics headers only */ evlist__for_each_entry(evlist, counter) { + if (!config->iostat_run && + config->aggr_mode != AGGR_NONE && counter->metric_leader != counter) + continue; + os.evsel = counter; - perf_stat__print_shadow_stats(config, counter, 0, - 0, - &out, - &config->metric_events); + perf_stat__print_shadow_stats(config, counter, /*aggr_idx=*/0, &out); } if (!config->json_output) fputc('\n', config->output); } -static void prepare_interval(struct perf_stat_config *config, - char *prefix, size_t len, struct timespec *ts) +static void prepare_timestamp(struct perf_stat_config *config, + struct outstate *os, struct timespec *ts) { if (config->iostat_run) return; if (config->json_output) - scnprintf(prefix, len, "\"interval\" : %lu.%09lu, ", + scnprintf(os->timestamp, sizeof(os->timestamp), "\"interval\" : %lu.%09lu", (unsigned long) ts->tv_sec, ts->tv_nsec); else if (config->csv_output) - scnprintf(prefix, len, "%lu.%09lu%s", + scnprintf(os->timestamp, sizeof(os->timestamp), "%lu.%09lu%s", (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep); else - scnprintf(prefix, len, "%6lu.%09lu ", + scnprintf(os->timestamp, sizeof(os->timestamp), "%6lu.%09lu ", (unsigned long) ts->tv_sec, ts->tv_nsec); } @@ -1248,9 +1282,10 @@ static void print_header_interval_std(struct perf_stat_config *config, case AGGR_NODE: case AGGR_SOCKET: case AGGR_DIE: + case AGGR_CLUSTER: case AGGR_CACHE: case AGGR_CORE: - fprintf(output, "#%*s %-*s cpus", + fprintf(output, "#%*s %-*s ctrs", INTERVAL_LEN - 1, "time", aggr_header_lens[config->aggr_mode], aggr_header_std[config->aggr_mode]); @@ -1439,7 +1474,7 @@ static void print_footer(struct perf_stat_config *config) fprintf(output, " %17.*f +- %.*f seconds time elapsed", precision, avg, precision, sd); - print_noise_pct(config, sd, avg, /*before_metric=*/false); + print_noise_pct(config, NULL, sd, avg, /*before_metric=*/false); } fprintf(output, "\n\n"); @@ -1449,11 +1484,6 @@ static void print_footer(struct perf_stat_config *config) " echo 0 > /proc/sys/kernel/nmi_watchdog\n" " perf stat ...\n" " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); - - if (config->print_mixed_hw_group_error) - fprintf(output, - "The events in group usually have to be from " - "the same PMU. Try reorganizing the group.\n"); } static void print_percore(struct perf_stat_config *config, @@ -1529,27 +1559,26 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf int argc, const char **argv) { bool metric_only = config->metric_only; - int interval = config->interval; struct evsel *counter; - char buf[64]; struct outstate os = { .fh = config->output, .first = true, }; + evlist__uniquify_evsel_names(evlist, config); + if (config->iostat_run) evlist->selected = evlist__first(evlist); - if (interval) { - os.prefix = buf; - prepare_interval(config, buf, sizeof(buf), ts); - } + if (config->interval) + prepare_timestamp(config, &os, ts); print_header(config, _target, evlist, argc, argv); switch (config->aggr_mode) { case AGGR_CORE: case AGGR_CACHE: + case AGGR_CLUSTER: case AGGR_DIE: case AGGR_SOCKET: case AGGR_NODE: @@ -1561,7 +1590,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf case AGGR_THREAD: case AGGR_GLOBAL: if (config->iostat_run) { - iostat_print_counters(evlist, config, ts, buf, + iostat_print_counters(evlist, config, ts, os.timestamp, (iostat_print_counter_t)print_counter, &os); } else if (config->cgroup_list) { print_cgroup_counter(config, evlist, &os); diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 1c5c3eeba4cf..9c83f7d96caa 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <errno.h> #include <math.h> #include <stdio.h> #include "evsel.h" @@ -15,413 +16,88 @@ #include <linux/zalloc.h> #include "iostat.h" #include "util/hashmap.h" +#include "tool_pmu.h" -struct stats walltime_nsecs_stats; -struct rusage_stats ru_stats; - -enum { - CTX_BIT_USER = 1 << 0, - CTX_BIT_KERNEL = 1 << 1, - CTX_BIT_HV = 1 << 2, - CTX_BIT_HOST = 1 << 3, - CTX_BIT_IDLE = 1 << 4, - CTX_BIT_MAX = 1 << 5, -}; - -enum stat_type { - STAT_NONE = 0, - STAT_NSECS, - STAT_CYCLES, - STAT_INSTRUCTIONS, - STAT_STALLED_CYCLES_FRONT, - STAT_STALLED_CYCLES_BACK, - STAT_BRANCHES, - STAT_BRANCH_MISS, - STAT_CACHE_REFS, - STAT_CACHE_MISSES, - STAT_L1_DCACHE, - STAT_L1_ICACHE, - STAT_LL_CACHE, - STAT_ITLB_CACHE, - STAT_DTLB_CACHE, - STAT_L1D_MISS, - STAT_L1I_MISS, - STAT_LL_MISS, - STAT_DTLB_MISS, - STAT_ITLB_MISS, - STAT_MAX -}; - -static int evsel_context(const struct evsel *evsel) +static bool tool_pmu__is_time_event(const struct perf_stat_config *config, + const struct evsel *evsel, int *tool_aggr_idx) { - int ctx = 0; - - if (evsel->core.attr.exclude_kernel) - ctx |= CTX_BIT_KERNEL; - if (evsel->core.attr.exclude_user) - ctx |= CTX_BIT_USER; - if (evsel->core.attr.exclude_hv) - ctx |= CTX_BIT_HV; - if (evsel->core.attr.exclude_host) - ctx |= CTX_BIT_HOST; - if (evsel->core.attr.exclude_idle) - ctx |= CTX_BIT_IDLE; - - return ctx; -} - -void perf_stat__reset_shadow_stats(void) -{ - memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); - memset(&ru_stats, 0, sizeof(ru_stats)); -} - -static enum stat_type evsel__stat_type(const struct evsel *evsel) -{ - /* Fake perf_hw_cache_op_id values for use with evsel__match. */ - u64 PERF_COUNT_hw_cache_l1d_miss = PERF_COUNT_HW_CACHE_L1D | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); - u64 PERF_COUNT_hw_cache_l1i_miss = PERF_COUNT_HW_CACHE_L1I | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); - u64 PERF_COUNT_hw_cache_ll_miss = PERF_COUNT_HW_CACHE_LL | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); - u64 PERF_COUNT_hw_cache_dtlb_miss = PERF_COUNT_HW_CACHE_DTLB | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); - u64 PERF_COUNT_hw_cache_itlb_miss = PERF_COUNT_HW_CACHE_ITLB | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); - - if (evsel__is_clock(evsel)) - return STAT_NSECS; - else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) - return STAT_CYCLES; - else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) - return STAT_INSTRUCTIONS; - else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) - return STAT_STALLED_CYCLES_FRONT; - else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) - return STAT_STALLED_CYCLES_BACK; - else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - return STAT_BRANCHES; - else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) - return STAT_BRANCH_MISS; - else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES)) - return STAT_CACHE_REFS; - else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) - return STAT_CACHE_MISSES; - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D)) - return STAT_L1_DCACHE; - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I)) - return STAT_L1_ICACHE; - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL)) - return STAT_LL_CACHE; - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB)) - return STAT_DTLB_CACHE; - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB)) - return STAT_ITLB_CACHE; - else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss)) - return STAT_L1D_MISS; - else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss)) - return STAT_L1I_MISS; - else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss)) - return STAT_LL_MISS; - else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss)) - return STAT_DTLB_MISS; - else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss)) - return STAT_ITLB_MISS; - return STAT_NONE; -} - -static const char *get_ratio_color(const double ratios[3], double val) -{ - const char *color = PERF_COLOR_NORMAL; - - if (val > ratios[0]) - color = PERF_COLOR_RED; - else if (val > ratios[1]) - color = PERF_COLOR_MAGENTA; - else if (val > ratios[2]) - color = PERF_COLOR_YELLOW; - - return color; -} - -static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type type) -{ - const struct evsel *cur; - int evsel_ctx = evsel_context(evsel); - - evlist__for_each_entry(evsel->evlist, cur) { - struct perf_stat_aggr *aggr; - - /* Ignore the evsel that is being searched from. */ - if (evsel == cur) - continue; - - /* Ignore evsels that are part of different groups. */ - if (evsel->core.leader->nr_members > 1 && - evsel->core.leader != cur->core.leader) - continue; - /* Ignore evsels with mismatched modifiers. */ - if (evsel_ctx != evsel_context(cur)) - continue; - /* Ignore if not the cgroup we're looking for. */ - if (evsel->cgrp != cur->cgrp) - continue; - /* Ignore if not the stat we're looking for. */ - if (type != evsel__stat_type(cur)) - continue; - - aggr = &cur->stats->aggr[aggr_idx]; - if (type == STAT_NSECS) - return aggr->counts.val; - return aggr->counts.val * cur->scale; - } - return 0.0; -} - -static void print_ratio(struct perf_stat_config *config, - const struct evsel *evsel, int aggr_idx, - double numerator, struct perf_stat_output_ctx *out, - enum stat_type denominator_type, - const double color_ratios[3], const char *unit) -{ - double denominator = find_stat(evsel, aggr_idx, denominator_type); - - if (numerator && denominator) { - double ratio = numerator / denominator * 100.0; - const char *color = get_ratio_color(color_ratios, ratio); - - out->print_metric(config, out->ctx, color, "%7.2f%%", unit, ratio); - } else - out->print_metric(config, out->ctx, NULL, NULL, unit, 0); -} - -static void print_stalled_cycles_front(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double stalled, - struct perf_stat_output_ctx *out) -{ - static const double color_ratios[3] = {50.0, 30.0, 10.0}; - - print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios, - "frontend cycles idle"); -} - -static void print_stalled_cycles_back(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double stalled, - struct perf_stat_output_ctx *out) -{ - static const double color_ratios[3] = {75.0, 50.0, 20.0}; - - print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios, - "backend cycles idle"); -} - -static void print_branch_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - static const double color_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_BRANCHES, color_ratios, - "of all branches"); -} - -static void print_l1d_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - static const double color_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_DCACHE, color_ratios, - "of all L1-dcache accesses"); -} - -static void print_l1i_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - static const double color_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_ICACHE, color_ratios, - "of all L1-icache accesses"); -} - -static void print_ll_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - static const double color_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, color_ratios, - "of all L1-icache accesses"); -} + enum tool_pmu_event event = evsel__tool_event(evsel); + int aggr_idx; -static void print_dtlb_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - static const double color_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_DTLB_CACHE, color_ratios, - "of all dTLB cache accesses"); -} - -static void print_itlb_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - static const double color_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_ITLB_CACHE, color_ratios, - "of all iTLB cache accesses"); -} - -static void print_cache_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - static const double color_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_CACHE_REFS, color_ratios, - "of all cache refs"); -} + if (event != TOOL_PMU__EVENT_DURATION_TIME && + event != TOOL_PMU__EVENT_USER_TIME && + event != TOOL_PMU__EVENT_SYSTEM_TIME) + return false; -static void print_instructions(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double instructions, - struct perf_stat_output_ctx *out) -{ - print_metric_t print_metric = out->print_metric; - void *ctxp = out->ctx; - double cycles = find_stat(evsel, aggr_idx, STAT_CYCLES); - double max_stalled = max(find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_FRONT), - find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_BACK)); - - if (cycles) { - print_metric(config, ctxp, NULL, "%7.2f ", "insn per cycle", - instructions / cycles); - } else - print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); - - if (max_stalled && instructions) { - out->new_line(config, ctxp); - print_metric(config, ctxp, NULL, "%7.2f ", "stalled cycles per insn", - max_stalled / instructions); + if (config) { + cpu_aggr_map__for_each_idx(aggr_idx, config->aggr_map) { + if (config->aggr_map->map[aggr_idx].cpu.cpu == 0) { + *tool_aggr_idx = aggr_idx; + return true; + } + } + pr_debug("Unexpected CPU0 missing in aggregation for tool event.\n"); } + *tool_aggr_idx = 0; /* Assume the first aggregation index works. */ + return true; } -static void print_cycles(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double cycles, - struct perf_stat_output_ctx *out) -{ - double nsecs = find_stat(evsel, aggr_idx, STAT_NSECS); - - if (cycles && nsecs) { - double ratio = cycles / nsecs; - - out->print_metric(config, out->ctx, NULL, "%8.3f", "GHz", ratio); - } else - out->print_metric(config, out->ctx, NULL, NULL, "GHz", 0); -} - -static void print_nsecs(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx __maybe_unused, double nsecs, - struct perf_stat_output_ctx *out) -{ - print_metric_t print_metric = out->print_metric; - void *ctxp = out->ctx; - double wall_time = avg_stats(&walltime_nsecs_stats); - - if (wall_time) { - print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", - nsecs / (wall_time * evsel->scale)); - } else - print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); -} - -static int prepare_metric(struct evsel **metric_events, - struct metric_ref *metric_refs, +static int prepare_metric(struct perf_stat_config *config, + const struct metric_expr *mexp, + const struct evsel *evsel, struct expr_parse_ctx *pctx, int aggr_idx) { + struct evsel * const *metric_events = mexp->metric_events; + struct metric_ref *metric_refs = mexp->metric_refs; int i; for (i = 0; metric_events[i]; i++) { + int source_count = 0, tool_aggr_idx; + bool is_tool_time = + tool_pmu__is_time_event(config, metric_events[i], &tool_aggr_idx); + struct perf_stat_evsel *ps = metric_events[i]->stats; + struct perf_stat_aggr *aggr; char *n; double val; - int source_count = 0; - - if (evsel__is_tool(metric_events[i])) { - struct stats *stats; - double scale; - switch (metric_events[i]->tool_event) { - case PERF_TOOL_DURATION_TIME: - stats = &walltime_nsecs_stats; - scale = 1e-9; + /* + * If there are multiple uncore PMUs and we're not reading the + * leader's stats, determine the stats for the appropriate + * uncore PMU. + */ + if (evsel && evsel->metric_leader && + evsel->pmu != evsel->metric_leader->pmu && + mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) { + struct evsel *pos; + + evlist__for_each_entry(evsel->evlist, pos) { + if (pos->pmu != evsel->pmu) + continue; + if (pos->metric_leader != mexp->metric_events[i]) + continue; + ps = pos->stats; + source_count = 1; break; - case PERF_TOOL_USER_TIME: - stats = &ru_stats.ru_utime_usec_stat; - scale = 1e-6; - break; - case PERF_TOOL_SYSTEM_TIME: - stats = &ru_stats.ru_stime_usec_stat; - scale = 1e-6; - break; - case PERF_TOOL_NONE: - pr_err("Invalid tool event 'none'"); - abort(); - case PERF_TOOL_MAX: - pr_err("Invalid tool event 'max'"); - abort(); - default: - pr_err("Unknown tool event '%s'", evsel__name(metric_events[i])); - abort(); } - val = avg_stats(stats) * scale; - source_count = 1; + } + /* Time events are always on CPU0, the first aggregation index. */ + aggr = &ps->aggr[is_tool_time ? tool_aggr_idx : aggr_idx]; + if (!aggr || !metric_events[i]->supported) { + /* + * Not supported events will have a count of 0, which + * can be confusing in a metric. Explicitly set the + * value to NAN. Not counted events (enable time of 0) + * are read as 0. + */ + val = NAN; + source_count = 0; } else { - struct perf_stat_evsel *ps = metric_events[i]->stats; - struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx]; - - if (!aggr) - break; - - if (!metric_events[i]->supported) { - /* - * Not supported events will have a count of 0, - * which can be confusing in a - * metric. Explicitly set the value to NAN. Not - * counted events (enable time of 0) are read as - * 0. - */ - val = NAN; - source_count = 0; - } else { - /* - * If an event was scaled during stat gathering, - * reverse the scale before computing the - * metric. - */ - val = aggr->counts.val * (1.0 / metric_events[i]->scale); + val = aggr->counts.val; + if (is_tool_time) + val *= 1e-9; /* Convert time event nanoseconds to seconds. */ + if (!source_count) source_count = evsel__source_count(metric_events[i]); - } } n = strdup(evsel__metric_id(metric_events[i])); if (!n) @@ -441,23 +117,23 @@ static int prepare_metric(struct evsel **metric_events, } static void generic_metric(struct perf_stat_config *config, - const char *metric_expr, - const char *metric_threshold, - struct evsel **metric_events, - struct metric_ref *metric_refs, - char *name, - const char *metric_name, - const char *metric_unit, - int runtime, + struct metric_expr *mexp, + struct evsel *evsel, int aggr_idx, struct perf_stat_output_ctx *out) { print_metric_t print_metric = out->print_metric; + const char *metric_name = mexp->metric_name; + const char *metric_expr = mexp->metric_expr; + const char *metric_threshold = mexp->metric_threshold; + const char *metric_unit = mexp->metric_unit; + struct evsel * const *metric_events = mexp->metric_events; + int runtime = mexp->runtime; struct expr_parse_ctx *pctx; double ratio, scale, threshold; int i; void *ctxp = out->ctx; - const char *color = NULL; + enum metric_threshold_classify thresh = METRIC_THRESHOLD_UNKNOWN; pctx = expr__ctx_new(); if (!pctx) @@ -467,7 +143,7 @@ static void generic_metric(struct perf_stat_config *config, pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list); pctx->sctx.runtime = runtime; pctx->sctx.system_wide = config->system_wide; - i = prepare_metric(metric_events, metric_refs, pctx, aggr_idx); + i = prepare_metric(config, mexp, evsel, pctx, aggr_idx); if (i < 0) { expr__ctx_free(pctx); return; @@ -475,13 +151,13 @@ static void generic_metric(struct perf_stat_config *config, if (!metric_events[i]) { if (expr__parse(&ratio, pctx, metric_expr) == 0) { char *unit; - char metric_bf[64]; + char metric_bf[128]; if (metric_threshold && expr__parse(&threshold, pctx, metric_threshold) == 0 && !isnan(threshold)) { - color = fpclassify(threshold) == FP_ZERO - ? PERF_COLOR_GREEN : PERF_COLOR_RED; + thresh = fpclassify(threshold) == FP_ZERO + ? METRIC_THRESHOLD_GOOD : METRIC_THRESHOLD_BAD; } if (metric_unit && metric_name) { @@ -496,24 +172,24 @@ static void generic_metric(struct perf_stat_config *config, scnprintf(metric_bf, sizeof(metric_bf), "%s %s", unit, metric_name); - print_metric(config, ctxp, color, "%8.1f", + print_metric(config, ctxp, thresh, "%8.1f", metric_bf, ratio); } else { - print_metric(config, ctxp, color, "%8.2f", + print_metric(config, ctxp, thresh, "%8.2f", metric_name ? metric_name : - out->force_header ? name : "", + out->force_header ? evsel->name : "", ratio); } } else { - print_metric(config, ctxp, color, /*unit=*/NULL, + print_metric(config, ctxp, thresh, /*fmt=*/NULL, out->force_header ? - (metric_name ? metric_name : name) : "", 0); + (metric_name ?: evsel->name) : "", 0); } } else { - print_metric(config, ctxp, color, /*unit=*/NULL, + print_metric(config, ctxp, thresh, /*fmt=*/NULL, out->force_header ? - (metric_name ? metric_name : name) : "", 0); + (metric_name ?: evsel->name) : "", 0); } expr__ctx_free(pctx); @@ -528,7 +204,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx) if (!pctx) return NAN; - if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, aggr_idx) < 0) + if (prepare_metric(/*config=*/NULL, mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0) goto out; if (expr__parse(&ratio, pctx, mexp->metric_expr)) @@ -547,7 +223,7 @@ static void perf_stat__print_metricgroup_header(struct perf_stat_config *config, { bool need_full_name = perf_pmus__num_core_pmus() > 1; static const char *last_name; - static const char *last_pmu; + static const struct perf_pmu *last_pmu; char full_name[64]; /* @@ -557,22 +233,20 @@ static void perf_stat__print_metricgroup_header(struct perf_stat_config *config, * event. Only align with other metics from * different metric events. */ - if (last_name && !strcmp(last_name, name)) { - if (!need_full_name || !strcmp(last_pmu, evsel->pmu_name)) { - out->print_metricgroup_header(config, ctxp, NULL); - return; - } + if (last_name && !strcmp(last_name, name) && last_pmu == evsel->pmu) { + out->print_metricgroup_header(config, ctxp, NULL); + return; } - if (need_full_name) - scnprintf(full_name, sizeof(full_name), "%s (%s)", name, evsel->pmu_name); + if (need_full_name && evsel->pmu) + scnprintf(full_name, sizeof(full_name), "%s (%s)", name, evsel->pmu->name); else scnprintf(full_name, sizeof(full_name), "%s", name); out->print_metricgroup_header(config, ctxp, full_name); last_name = name; - last_pmu = evsel->pmu_name; + last_pmu = evsel->pmu; } /** @@ -591,14 +265,14 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config, int aggr_idx, int *num, void *from, - struct perf_stat_output_ctx *out, - struct rblist *metric_events) + struct perf_stat_output_ctx *out) { struct metric_event *me; struct metric_expr *mexp = from; void *ctxp = out->ctx; bool header_printed = false; const char *name = NULL; + struct rblist *metric_events = &evsel->evlist->metric_events; me = metricgroup__lookup(metric_events, evsel, false); if (me == NULL) @@ -621,19 +295,16 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config, if (strcmp(name, mexp->default_metricgroup_name)) return (void *)mexp; /* Only print the name of the metricgroup once */ - if (!header_printed) { + if (!header_printed && !evsel->default_show_events) { header_printed = true; perf_stat__print_metricgroup_header(config, evsel, ctxp, name, out); } } - if ((*num)++ > 0) + if ((*num)++ > 0 && out->new_line) out->new_line(config, ctxp); - generic_metric(config, mexp->metric_expr, mexp->metric_threshold, - mexp->metric_events, mexp->metric_refs, evsel->name, - mexp->metric_name, mexp->metric_unit, mexp->runtime, - aggr_idx, out); + generic_metric(config, mexp, evsel, aggr_idx, out); } return NULL; @@ -641,61 +312,23 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config, void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct evsel *evsel, - double avg, int aggr_idx, - struct perf_stat_output_ctx *out, - struct rblist *metric_events) + int aggr_idx, + struct perf_stat_output_ctx *out) { - typedef void (*stat_print_function_t)(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out); - static const stat_print_function_t stat_print_function[STAT_MAX] = { - [STAT_INSTRUCTIONS] = print_instructions, - [STAT_BRANCH_MISS] = print_branch_miss, - [STAT_L1D_MISS] = print_l1d_miss, - [STAT_L1I_MISS] = print_l1i_miss, - [STAT_DTLB_MISS] = print_dtlb_miss, - [STAT_ITLB_MISS] = print_itlb_miss, - [STAT_LL_MISS] = print_ll_miss, - [STAT_CACHE_MISSES] = print_cache_miss, - [STAT_STALLED_CYCLES_FRONT] = print_stalled_cycles_front, - [STAT_STALLED_CYCLES_BACK] = print_stalled_cycles_back, - [STAT_CYCLES] = print_cycles, - [STAT_NSECS] = print_nsecs, - }; print_metric_t print_metric = out->print_metric; void *ctxp = out->ctx; - int num = 1; + int num = 0; - if (config->iostat_run) { + if (config->iostat_run) iostat_print_metric(config, evsel, out); - } else { - stat_print_function_t fn = stat_print_function[evsel__stat_type(evsel)]; - - if (fn) - fn(config, evsel, aggr_idx, avg, out); - else { - double nsecs = find_stat(evsel, aggr_idx, STAT_NSECS); - - if (nsecs) { - char unit = ' '; - char unit_buf[10] = "/sec"; - double ratio = convert_unit_double(1000000000.0 * avg / nsecs, - &unit); - - if (unit != ' ') - snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); - print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); - } else - num = 0; - } - } perf_stat__print_shadow_stats_metricgroup(config, evsel, aggr_idx, - &num, NULL, out, metric_events); + &num, NULL, out); - if (num == 0) - print_metric(config, ctxp, NULL, NULL, NULL, 0); + if (num == 0) { + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, + /*fmt=*/NULL, /*unit=*/NULL, 0); + } } /** @@ -703,7 +336,6 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, * if it's not running or not the metric event. */ bool perf_stat__skip_metric_event(struct evsel *evsel, - struct rblist *metric_events, u64 ena, u64 run) { if (!evsel->default_metricgroup) @@ -712,5 +344,5 @@ bool perf_stat__skip_metric_event(struct evsel *evsel, if (!ena || !run) return true; - return !metricgroup__lookup(metric_events, evsel, false); + return !metricgroup__lookup(&evsel->evlist->metric_events, evsel, false); } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 967e583392c7..976a06e63252 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -315,7 +315,7 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, if (!counter->per_pkg) return 0; - if (perf_cpu_map__empty(cpus)) + if (perf_cpu_map__is_any_cpu_or_is_empty(cpus)) return 0; if (!mask) { @@ -526,7 +526,7 @@ static int evsel__merge_aggr_counters(struct evsel *evsel, struct evsel *alias) struct perf_counts_values *aggr_counts_a = &ps_a->aggr[i].counts; struct perf_counts_values *aggr_counts_b = &ps_b->aggr[i].counts; - /* NB: don't increase aggr.nr for aliases */ + ps_a->aggr[i].nr += ps_b->aggr[i].nr; aggr_counts_a->val += aggr_counts_b->val; aggr_counts_a->ena += aggr_counts_b->ena; @@ -535,26 +535,6 @@ static int evsel__merge_aggr_counters(struct evsel *evsel, struct evsel *alias) return 0; } -/* events should have the same name, scale, unit, cgroup but on different PMUs */ -static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b) -{ - if (strcmp(evsel__name(evsel_a), evsel__name(evsel_b))) - return false; - - if (evsel_a->scale != evsel_b->scale) - return false; - - if (evsel_a->cgrp != evsel_b->cgrp) - return false; - - if (strcmp(evsel_a->unit, evsel_b->unit)) - return false; - - if (evsel__is_clock(evsel_a) != evsel__is_clock(evsel_b)) - return false; - - return !!strcmp(evsel_a->pmu_name, evsel_b->pmu_name); -} static void evsel__merge_aliases(struct evsel *evsel) { @@ -563,10 +543,9 @@ static void evsel__merge_aliases(struct evsel *evsel) alias = list_prepare_entry(evsel, &(evlist->core.entries), core.node); list_for_each_entry_continue(alias, &evlist->core.entries, core.node) { - /* Merge the same events on different PMUs. */ - if (evsel__is_alias(evsel, alias)) { + if (alias->first_wildcard_match == evsel) { + /* Merge the same events on different PMUs. */ evsel__merge_aggr_counters(evsel, alias); - alias->merged_stat = true; } } } @@ -579,11 +558,7 @@ static bool evsel__should_merge_hybrid(const struct evsel *evsel, static void evsel__merge_stats(struct evsel *evsel, struct perf_stat_config *config) { - /* this evsel is already merged */ - if (evsel->merged_stat) - return; - - if (evsel->auto_merge_stats || evsel__should_merge_hybrid(evsel, config)) + if (!evsel->pmu || !evsel->pmu->is_core || evsel__should_merge_hybrid(evsel, config)) evsel__merge_aliases(evsel); } @@ -592,7 +567,7 @@ void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *ev { struct evsel *evsel; - if (config->no_merge) + if (config->aggr_mode == AGGR_NONE) return; evlist__for_each_entry(evlist, evsel) @@ -670,7 +645,8 @@ void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *e evsel__process_percore(evsel); } -int perf_event__process_stat_event(struct perf_session *session, +int perf_event__process_stat_event(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { struct perf_counts_values count, *ptr; @@ -729,7 +705,7 @@ size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp) size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp) { - struct perf_stat_config sc; + struct perf_stat_config sc = {}; size_t ret; perf_event__read_stat_config(&sc, &event->stat_config); @@ -741,61 +717,3 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp) return ret; } - -int create_perf_stat_counter(struct evsel *evsel, - struct perf_stat_config *config, - struct target *target, - int cpu_map_idx) -{ - struct perf_event_attr *attr = &evsel->core.attr; - struct evsel *leader = evsel__leader(evsel); - - attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | - PERF_FORMAT_TOTAL_TIME_RUNNING; - - /* - * The event is part of non trivial group, let's enable - * the group read (for leader) and ID retrieval for all - * members. - */ - if (leader->core.nr_members > 1) - attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; - - attr->inherit = !config->no_inherit && list_empty(&evsel->bpf_counter_list); - - /* - * Some events get initialized with sample_(period/type) set, - * like tracepoints. Clear it up for counting. - */ - attr->sample_period = 0; - - if (config->identifier) - attr->sample_type = PERF_SAMPLE_IDENTIFIER; - - if (config->all_user) { - attr->exclude_kernel = 1; - attr->exclude_user = 0; - } - - if (config->all_kernel) { - attr->exclude_kernel = 0; - attr->exclude_user = 1; - } - - /* - * Disabling all counters initially, they will be enabled - * either manually by us or by kernel via enable_on_exec - * set later. - */ - if (evsel__is_group_leader(evsel)) { - attr->disabled = 1; - - if (target__enable_on_exec(target)) - attr->enable_on_exec = 1; - } - - if (target__has_cpu(target) && !target__has_per_thread(target)) - return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu_map_idx); - - return evsel__open_per_thread(evsel, evsel->core.threads); -} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 325d0fad1842..f986911c9296 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -7,7 +7,6 @@ #include <sys/types.h> #include <sys/resource.h> #include "cpumap.h" -#include "rblist.h" #include "counts.h" struct perf_cpu_map; @@ -48,6 +47,7 @@ enum aggr_mode { AGGR_GLOBAL, AGGR_SOCKET, AGGR_DIE, + AGGR_CLUSTER, AGGR_CACHE, AGGR_CORE, AGGR_THREAD, @@ -56,11 +56,6 @@ enum aggr_mode { AGGR_MAX }; -struct rusage_stats { - struct stats ru_utime_usec_stat; - struct stats ru_stime_usec_stat; -}; - typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, struct perf_cpu cpu); struct perf_stat_config { @@ -76,7 +71,6 @@ struct perf_stat_config { bool null_run; bool ru_display; bool big_num; - bool no_merge; bool hybrid_merge; bool walltime_run_table; bool all_kernel; @@ -87,6 +81,7 @@ struct perf_stat_config { bool metric_no_group; bool metric_no_merge; bool metric_no_threshold; + bool hardware_aware_grouping; bool stop_read_counter; bool iostat_run; char *user_requested_cpu_list; @@ -99,16 +94,13 @@ struct perf_stat_config { int times; int run_count; int print_free_counters_hint; - int print_mixed_hw_group_error; const char *csv_sep; struct stats *walltime_nsecs_stats; struct rusage ru_data; - struct rusage_stats *ru_stats; struct cpu_aggr_map *aggr_map; aggr_get_id_t aggr_get_id; struct cpu_aggr_map *cpus_aggr_map; u64 *walltime_run; - struct rblist metric_events; int ctl_fd; int ctl_fd_ack; bool ctl_fd_close; @@ -116,8 +108,9 @@ struct perf_stat_config { unsigned int topdown_level; }; +extern struct perf_stat_config stat_config; + void perf_stat__set_big_num(int set); -void perf_stat__set_no_csv_summary(int set); void update_stats(struct stats *stats, u64 val); double avg_stats(struct stats *stats); @@ -133,29 +126,24 @@ static inline void init_stats(struct stats *stats) stats->max = 0; } -static inline void init_rusage_stats(struct rusage_stats *ru_stats) { - init_stats(&ru_stats->ru_utime_usec_stat); - init_stats(&ru_stats->ru_stime_usec_stat); -} - -static inline void update_rusage_stats(struct rusage_stats *ru_stats, struct rusage* rusage) { - const u64 us_to_ns = 1000; - const u64 s_to_ns = 1000000000; - update_stats(&ru_stats->ru_utime_usec_stat, - (rusage->ru_utime.tv_usec * us_to_ns + rusage->ru_utime.tv_sec * s_to_ns)); - update_stats(&ru_stats->ru_stime_usec_stat, - (rusage->ru_stime.tv_usec * us_to_ns + rusage->ru_stime.tv_sec * s_to_ns)); -} - struct evsel; struct evlist; -extern struct stats walltime_nsecs_stats; -extern struct rusage_stats ru_stats; +enum metric_threshold_classify { + METRIC_THRESHOLD_UNKNOWN, + METRIC_THRESHOLD_BAD, + METRIC_THRESHOLD_NEARLY_BAD, + METRIC_THRESHOLD_LESS_GOOD, + METRIC_THRESHOLD_GOOD, +}; +const char *metric_threshold_classify__color(enum metric_threshold_classify thresh); typedef void (*print_metric_t)(struct perf_stat_config *config, - void *ctx, const char *color, const char *unit, - const char *fmt, double val); + void *ctx, + enum metric_threshold_classify thresh, + const char *fmt, + const char *unit, + double val); typedef void (*new_line_t)(struct perf_stat_config *config, void *ctx); /* Used to print the display name of the Default metricgroup for now. */ @@ -173,19 +161,15 @@ struct perf_stat_output_ctx { void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct evsel *evsel, - double avg, int aggr_idx, - struct perf_stat_output_ctx *out, - struct rblist *metric_events); -bool perf_stat__skip_metric_event(struct evsel *evsel, - struct rblist *metric_events, - u64 ena, u64 run); + int aggr_idx, + struct perf_stat_output_ctx *out); +bool perf_stat__skip_metric_event(struct evsel *evsel, u64 ena, u64 run); void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config, struct evsel *evsel, int aggr_idx, int *num, void *from, - struct perf_stat_output_ctx *out, - struct rblist *metric_events); + struct perf_stat_output_ctx *out); int evlist__alloc_stats(struct perf_stat_config *config, struct evlist *evlist, bool alloc_raw); @@ -209,17 +193,14 @@ union perf_event; struct perf_session; struct target; -int perf_event__process_stat_event(struct perf_session *session, +int perf_event__process_stat_event(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp); size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp); size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp); -int create_perf_stat_counter(struct evsel *evsel, - struct perf_stat_config *config, - struct target *target, - int cpu_map_idx); void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, struct target *_target, struct timespec *ts, int argc, const char **argv); diff --git a/tools/perf/util/stream.c b/tools/perf/util/stream.c index 545e44981a27..3de4a6130853 100644 --- a/tools/perf/util/stream.c +++ b/tools/perf/util/stream.c @@ -52,7 +52,6 @@ static struct evlist_streams *evlist_streams__new(int nr_evsel, goto err; s->nr_streams_max = nr_streams_max; - s->evsel_idx = -1; } els->ev_streams = es; @@ -139,7 +138,7 @@ static int evlist__init_callchain_streams(struct evlist *evlist, hists__output_resort(hists, NULL); init_hot_callchain(hists, &es[i]); - es[i].evsel_idx = pos->core.idx; + es[i].evsel = pos; i++; } @@ -166,12 +165,12 @@ struct evlist_streams *evlist__create_streams(struct evlist *evlist, } struct evsel_streams *evsel_streams__entry(struct evlist_streams *els, - int evsel_idx) + const struct evsel *evsel) { struct evsel_streams *es = els->ev_streams; for (int i = 0; i < els->nr_evsel; i++) { - if (es[i].evsel_idx == evsel_idx) + if (es[i].evsel == evsel) return &es[i]; } diff --git a/tools/perf/util/stream.h b/tools/perf/util/stream.h index bee768874fea..50f7e6e04982 100644 --- a/tools/perf/util/stream.h +++ b/tools/perf/util/stream.h @@ -2,7 +2,9 @@ #ifndef __PERF_STREAM_H #define __PERF_STREAM_H -#include "callchain.h" +struct callchain_node; +struct evlist; +struct evsel; struct stream { struct callchain_node *cnode; @@ -11,9 +13,9 @@ struct stream { struct evsel_streams { struct stream *streams; + const struct evsel *evsel; int nr_streams_max; int nr_streams; - int evsel_idx; u64 streams_hits; }; @@ -22,15 +24,13 @@ struct evlist_streams { int nr_evsel; }; -struct evlist; - void evlist_streams__delete(struct evlist_streams *els); struct evlist_streams *evlist__create_streams(struct evlist *evlist, int nr_streams_max); struct evsel_streams *evsel_streams__entry(struct evlist_streams *els, - int evsel_idx); + const struct evsel *evsel); void evsel_streams__match(struct evsel_streams *es_base, struct evsel_streams *es_pair); diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index cf05b0b56c57..c0e927bbadf6 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -254,12 +254,49 @@ char *strpbrk_esc(char *str, const char *stopset) do { ptr = strpbrk(str, stopset); - if (ptr == str || - (ptr == str + 1 && *(ptr - 1) != '\\')) + if (!ptr) { + /* stopset not in str. */ + break; + } + if (ptr == str) { + /* stopset character is first in str. */ + break; + } + if (ptr == str + 1 && str[0] != '\\') { + /* stopset chacter is second and wasn't preceded by a '\'. */ + break; + } + str = ptr + 1; + } while (ptr[-1] == '\\' && ptr[-2] != '\\'); + + return ptr; +} + +/* Like strpbrk_esc(), but not break if it is quoted with single/double quotes */ +char *strpbrk_esq(char *str, const char *stopset) +{ + char *_stopset = NULL; + char *ptr; + const char *squote = "'"; + const char *dquote = "\""; + + if (asprintf(&_stopset, "%s%c%c", stopset, *squote, *dquote) < 0) + return NULL; + + do { + ptr = strpbrk_esc(str, _stopset); + if (!ptr) + break; + if (*ptr == *squote) + ptr = strpbrk_esc(ptr + 1, squote); + else if (*ptr == *dquote) + ptr = strpbrk_esc(ptr + 1, dquote); + else break; str = ptr + 1; - } while (ptr && *(ptr - 1) == '\\' && *(ptr - 2) != '\\'); + } while (ptr); + free(_stopset); return ptr; } @@ -293,6 +330,78 @@ char *strdup_esc(const char *str) return ret; } +/* Remove backslash right before quote and return next quote address. */ +static char *remove_consumed_esc(char *str, int len, int quote) +{ + char *ptr = str, *end = str + len; + + while (*ptr != quote && ptr < end) { + if (*ptr == '\\' && *(ptr + 1) == quote) { + memmove(ptr, ptr + 1, end - (ptr + 1)); + /* now *ptr is `quote`. */ + end--; + } + ptr++; + } + + return *ptr == quote ? ptr : NULL; +} + +/* + * Like strdup_esc, but keep quoted string as it is (and single backslash + * before quote is removed). If there is no closed quote, return NULL. + */ +char *strdup_esq(const char *str) +{ + char *d, *ret; + + /* If there is no quote, return normal strdup_esc() */ + d = strpbrk_esc((char *)str, "\"'"); + if (!d) + return strdup_esc(str); + + ret = strdup(str); + if (!ret) + return NULL; + + d = ret; + do { + d = strpbrk(d, "\\\"\'"); + if (!d) + break; + + if (*d == '"' || *d == '\'') { + /* This is non-escaped quote */ + int quote = *d; + int len = strlen(d + 1) + 1; + + /* + * Remove the start quote and remove consumed escape (backslash + * before quote) and remove the end quote. If there is no end + * quote, it is the input error. + */ + memmove(d, d + 1, len); + d = remove_consumed_esc(d, len, quote); + if (!d) + goto error; + memmove(d, d + 1, strlen(d + 1) + 1); + } + if (*d == '\\') { + memmove(d, d + 1, strlen(d + 1) + 1); + if (*d == '\\') { + /* double backslash -- keep the second one. */ + d++; + } + } + } while (*d != '\0'); + + return ret; + +error: + free(ret); + return NULL; +} + unsigned int hex(char c) { if (c >= '0' && c <= '9') @@ -301,3 +410,51 @@ unsigned int hex(char c) return c - 'a' + 10; return c - 'A' + 10; } + +/* + * Replace all occurrences of character 'needle' in string 'haystack' with + * string 'replace' + * + * The new string could be longer so a new string is returned which must be + * freed. + */ +char *strreplace_chars(char needle, const char *haystack, const char *replace) +{ + int replace_len = strlen(replace); + char *new_s, *to; + const char *loc = strchr(haystack, needle); + const char *from = haystack; + int num = 0; + + /* Count occurrences */ + while (loc) { + loc = strchr(loc + 1, needle); + num++; + } + + /* Allocate enough space for replacements and reset first location */ + new_s = malloc(strlen(haystack) + (num * (replace_len - 1) + 1)); + if (!new_s) + return NULL; + loc = strchr(haystack, needle); + to = new_s; + + while (loc) { + /* Copy original string up to found char and update positions */ + memcpy(to, from, 1 + loc - from); + to += loc - from; + from = loc + 1; + + /* Copy replacement string and update positions */ + memcpy(to, replace, replace_len); + to += replace_len; + + /* needle next occurrence or end of string */ + loc = strchr(from, needle); + } + + /* Copy any remaining chars + null */ + strcpy(to, from); + + return new_s; +} diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h index 56c30fef9682..4c8bff47cfd3 100644 --- a/tools/perf/util/string2.h +++ b/tools/perf/util/string2.h @@ -37,7 +37,10 @@ char *asprintf__tp_filter_pids(size_t npids, pid_t *pids); char *strpbrk_esc(char *str, const char *stopset); char *strdup_esc(const char *str); +char *strpbrk_esq(char *str, const char *stopset); +char *strdup_esq(const char *str); unsigned int hex(char c); +char *strreplace_chars(char needle, const char *haystack, const char *replace); #endif /* PERF_STRING_H */ diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 5c62d3118c41..b1d259f590e9 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -21,6 +21,7 @@ #include <perf/cpumap.h> #include "env.h" +#include "perf.h" #include "svghelper.h" static u64 first_time, last_time; @@ -331,7 +332,7 @@ static char *cpu_model(void) file = fopen("/proc/cpuinfo", "r"); if (file) { while (fgets(buf, 255, file)) { - if (strstr(buf, "model name")) { + if (strcasestr(buf, "model name")) { strlcpy(cpu_m, &buf[13], 255); break; } @@ -725,26 +726,24 @@ static void scan_core_topology(int *map, struct topology *t, int nr_cpus) static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus) { - int i; - int ret = 0; - struct perf_cpu_map *m; - struct perf_cpu c; + int idx, ret = 0; + struct perf_cpu_map *map; + struct perf_cpu cpu; - m = perf_cpu_map__new(s); - if (!m) + map = perf_cpu_map__new(s); + if (!map) return -1; - for (i = 0; i < perf_cpu_map__nr(m); i++) { - c = perf_cpu_map__cpu(m, i); - if (c.cpu >= nr_cpus) { + perf_cpu_map__for_each_cpu(cpu, idx, map) { + if (cpu.cpu >= nr_cpus) { ret = -1; break; } - __set_bit(c.cpu, cpumask_bits(b)); + __set_bit(cpu.cpu, cpumask_bits(b)); } - perf_cpu_map__put(m); + perf_cpu_map__put(map); return ret; } @@ -754,6 +753,7 @@ int svg_build_topology_map(struct perf_env *env) int i, nr_cpus; struct topology t; char *sib_core, *sib_thr; + int ret = -1; nr_cpus = min(env->nr_cpus_online, MAX_NR_CPUS); @@ -799,11 +799,11 @@ int svg_build_topology_map(struct perf_env *env) scan_core_topology(topology_map, &t, nr_cpus); - return 0; + ret = 0; exit: zfree(&t.sib_core); zfree(&t.sib_thr); - return -1; + return ret; } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 8bd466d1c2bd..957143fbf8a0 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -7,15 +7,13 @@ #include <unistd.h> #include <inttypes.h> +#include "compress.h" #include "dso.h" +#include "libbfd.h" #include "map.h" #include "maps.h" #include "symbol.h" #include "symsrc.h" -#include "demangle-cxx.h" -#include "demangle-ocaml.h" -#include "demangle-java.h" -#include "demangle-rust.h" #include "machine.h" #include "vdso.h" #include "debug.h" @@ -23,21 +21,10 @@ #include <linux/ctype.h> #include <linux/kernel.h> #include <linux/zalloc.h> +#include <linux/string.h> #include <symbol/kallsyms.h> #include <internal/lib.h> -#ifdef HAVE_LIBBFD_SUPPORT -#define PACKAGE 'perf' -#include <bfd.h> -#endif - -#if defined(HAVE_LIBBFD_SUPPORT) || defined(HAVE_CPLUS_DEMANGLE_SUPPORT) -#ifndef DMGL_PARAMS -#define DMGL_PARAMS (1 << 0) /* Include function args */ -#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ -#endif -#endif - #ifndef EM_AARCH64 #define EM_AARCH64 183 /* ARM 64 bit */ #endif @@ -173,7 +160,7 @@ static inline bool elf_sec__is_data(const GElf_Shdr *shdr, static bool elf_sec__filter(GElf_Shdr *shdr, Elf_Data *secstrs) { - return elf_sec__is_text(shdr, secstrs) || + return elf_sec__is_text(shdr, secstrs) || elf_sec__is_data(shdr, secstrs); } @@ -277,60 +264,6 @@ static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr) return -1; } -static bool want_demangle(bool is_kernel_sym) -{ - return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; -} - -/* - * Demangle C++ function signature, typically replaced by demangle-cxx.cpp - * version. - */ -__weak char *cxx_demangle_sym(const char *str __maybe_unused, bool params __maybe_unused, - bool modifiers __maybe_unused) -{ -#ifdef HAVE_LIBBFD_SUPPORT - int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0); - - return bfd_demangle(NULL, str, flags); -#elif defined(HAVE_CPLUS_DEMANGLE_SUPPORT) - int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0); - - return cplus_demangle(str, flags); -#else - return NULL; -#endif -} - -static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name) -{ - char *demangled = NULL; - - /* - * We need to figure out if the object was created from C++ sources - * DWARF DW_compile_unit has this, but we don't always have access - * to it... - */ - if (!want_demangle(dso->kernel || kmodule)) - return demangled; - - demangled = cxx_demangle_sym(elf_name, verbose > 0, verbose > 0); - if (demangled == NULL) { - demangled = ocaml_demangle_sym(elf_name); - if (demangled == NULL) { - demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET); - } - } - else if (rust_is_mangled(demangled)) - /* - * Input to Rust demangling is the BFD-demangled - * name which it Rust-demangles in place. - */ - rust_demangle_sym(demangled); - - return demangled; -} - struct rel_info { u32 nr_entries; u32 *sorted; @@ -469,7 +402,7 @@ static bool get_plt_sizes(struct dso *dso, GElf_Ehdr *ehdr, GElf_Shdr *shdr_plt, } if (*plt_entry_size) return true; - pr_debug("Missing PLT entry size for %s\n", dso->long_name); + pr_debug("Missing PLT entry size for %s\n", dso__long_name(dso)); return false; } @@ -616,7 +549,7 @@ static bool get_plt_got_name(GElf_Shdr *shdr, size_t i, /* Get the associated symbol */ gelf_getsym(di->dynsym_data, vr->sym_idx, &sym); sym_name = elf_sym__name(&sym, di->dynstr_data); - demangled = demangle_sym(di->dso, 0, sym_name); + demangled = dso__demangle_sym(di->dso, /*kmodule=*/0, sym_name); if (demangled != NULL) sym_name = demangled; @@ -653,7 +586,7 @@ static int dso__synthesize_plt_got_symbols(struct dso *dso, Elf *elf, sym = symbol__new(shdr.sh_offset + i, shdr.sh_entsize, STB_GLOBAL, STT_FUNC, buf); if (!sym) goto out; - symbols__insert(&dso->symbols, sym); + symbols__insert(dso__symbols(dso), sym); } err = 0; out: @@ -707,7 +640,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) plt_sym = symbol__new(shdr_plt.sh_offset, plt_header_size, STB_GLOBAL, STT_FUNC, ".plt"); if (!plt_sym) goto out_elf_end; - symbols__insert(&dso->symbols, plt_sym); + symbols__insert(dso__symbols(dso), plt_sym); /* Only x86 has .plt.got */ if (machine_is_x86(ehdr.e_machine) && @@ -814,7 +747,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) gelf_getsym(syms, get_rel_symidx(&ri, idx), &sym); elf_name = elf_sym__name(&sym, symstrs); - demangled = demangle_sym(dso, 0, elf_name); + demangled = dso__demangle_sym(dso, /*kmodule=*/0, elf_name); if (demangled) elf_name = demangled; if (*elf_name) @@ -829,7 +762,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) goto out_elf_end; plt_offset += plt_entry_size; - symbols__insert(&dso->symbols, f); + symbols__insert(dso__symbols(dso), f); ++nr; } @@ -839,15 +772,10 @@ out_elf_end: if (err == 0) return nr; pr_debug("%s: problems reading %s PLT info.\n", - __func__, dso->long_name); + __func__, dso__long_name(dso)); return 0; } -char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name) -{ - return demangle_sym(dso, kmodule, elf_name); -} - /* * Align offset to 4 bytes as needed for note name and descriptor data. */ @@ -932,43 +860,16 @@ out: return err; } -#ifdef HAVE_LIBBFD_BUILDID_SUPPORT - -static int read_build_id(const char *filename, struct build_id *bid) -{ - size_t size = sizeof(bid->data); - int err = -1; - bfd *abfd; - - abfd = bfd_openr(filename, NULL); - if (!abfd) - return -1; - - if (!bfd_check_format(abfd, bfd_object)) { - pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename); - goto out_close; - } - - if (!abfd->build_id || abfd->build_id->size > size) - goto out_close; - - memcpy(bid->data, abfd->build_id->data, abfd->build_id->size); - memset(bid->data + abfd->build_id->size, 0, size - abfd->build_id->size); - err = bid->size = abfd->build_id->size; - -out_close: - bfd_close(abfd); - return err; -} - -#else // HAVE_LIBBFD_BUILDID_SUPPORT - static int read_build_id(const char *filename, struct build_id *bid) { size_t size = sizeof(bid->data); - int fd, err = -1; + int fd, err; Elf *elf; + err = libbfd__read_build_id(filename, bid); + if (err >= 0) + goto out; + if (size < BUILD_ID_SIZE) goto out; @@ -993,8 +894,6 @@ out: return err; } -#endif // HAVE_LIBBFD_BUILDID_SUPPORT - int filename__read_build_id(const char *filename, struct build_id *bid) { struct kmod_path m = { .name = NULL, }; @@ -1003,6 +902,8 @@ int filename__read_build_id(const char *filename, struct build_id *bid) if (!filename) return -EFAULT; + if (!is_regular_file(filename)) + return -EWOULDBLOCK; err = kmod_path__parse(&m, filename); if (err) @@ -1078,44 +979,6 @@ out: return err; } -#ifdef HAVE_LIBBFD_SUPPORT - -int filename__read_debuglink(const char *filename, char *debuglink, - size_t size) -{ - int err = -1; - asection *section; - bfd *abfd; - - abfd = bfd_openr(filename, NULL); - if (!abfd) - return -1; - - if (!bfd_check_format(abfd, bfd_object)) { - pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename); - goto out_close; - } - - section = bfd_get_section_by_name(abfd, ".gnu_debuglink"); - if (!section) - goto out_close; - - if (section->size > size) - goto out_close; - - if (!bfd_get_section_contents(abfd, section, debuglink, 0, - section->size)) - goto out_close; - - err = 0; - -out_close: - bfd_close(abfd); - return err; -} - -#else - int filename__read_debuglink(const char *filename, char *debuglink, size_t size) { @@ -1127,6 +990,10 @@ int filename__read_debuglink(const char *filename, char *debuglink, Elf_Scn *sec; Elf_Kind ek; + err = libbfd_filename__read_debuglink(filename, debuglink, size); + if (err >= 0) + goto out; + fd = open(filename, O_RDONLY); if (fd < 0) goto out; @@ -1168,35 +1035,6 @@ out: return err; } -#endif - -static int dso__swap_init(struct dso *dso, unsigned char eidata) -{ - static unsigned int const endian = 1; - - dso->needs_swap = DSO_SWAP__NO; - - switch (eidata) { - case ELFDATA2LSB: - /* We are big endian, DSO is little endian. */ - if (*(unsigned char const *)&endian != 1) - dso->needs_swap = DSO_SWAP__YES; - break; - - case ELFDATA2MSB: - /* We are little endian, DSO is big endian. */ - if (*(unsigned char const *)&endian != 0) - dso->needs_swap = DSO_SWAP__YES; - break; - - default: - pr_err("unrecognized DSO data encoding %d\n", eidata); - return -EINVAL; - } - - return 0; -} - bool symsrc__possibly_runtime(struct symsrc *ss) { return ss->dynsym || ss->opdsec; @@ -1225,6 +1063,81 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) ehdr.e_type == ET_DYN; } +static Elf *read_gnu_debugdata(struct dso *dso, Elf *elf, const char *name, int *fd_ret) +{ + Elf *elf_embedded; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + Elf_Scn *scn; + Elf_Data *scn_data; + FILE *wrapped; + size_t shndx; + char temp_filename[] = "/tmp/perf.gnu_debugdata.elf.XXXXXX"; + int ret, temp_fd; + + if (gelf_getehdr(elf, &ehdr) == NULL) { + pr_debug("%s: cannot read %s ELF file.\n", __func__, name); + *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF; + return NULL; + } + + scn = elf_section_by_name(elf, &ehdr, &shdr, ".gnu_debugdata", &shndx); + if (!scn) { + *dso__load_errno(dso) = -ENOENT; + return NULL; + } + + if (shdr.sh_type == SHT_NOBITS) { + pr_debug("%s: .gnu_debugdata of ELF file %s has no data.\n", __func__, name); + *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF; + return NULL; + } + + scn_data = elf_rawdata(scn, NULL); + if (!scn_data) { + pr_debug("%s: error reading .gnu_debugdata of %s: %s\n", __func__, + name, elf_errmsg(-1)); + *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF; + return NULL; + } + + wrapped = fmemopen(scn_data->d_buf, scn_data->d_size, "r"); + if (!wrapped) { + pr_debug("%s: fmemopen: %s\n", __func__, strerror(errno)); + *dso__load_errno(dso) = -errno; + return NULL; + } + + temp_fd = mkstemp(temp_filename); + if (temp_fd < 0) { + pr_debug("%s: mkstemp: %s\n", __func__, strerror(errno)); + *dso__load_errno(dso) = -errno; + fclose(wrapped); + return NULL; + } + unlink(temp_filename); + + ret = lzma_decompress_stream_to_file(wrapped, temp_fd); + fclose(wrapped); + if (ret < 0) { + *dso__load_errno(dso) = -errno; + close(temp_fd); + return NULL; + } + + elf_embedded = elf_begin(temp_fd, PERF_ELF_C_READ_MMAP, NULL); + if (!elf_embedded) { + pr_debug("%s: error reading .gnu_debugdata of %s: %s\n", __func__, + name, elf_errmsg(-1)); + *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF; + close(temp_fd); + return NULL; + } + pr_debug("%s: using .gnu_debugdata of %s\n", __func__, name); + *fd_ret = temp_fd; + return elf_embedded; +} + int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, enum dso_binary_type type) { @@ -1237,11 +1150,11 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, if (fd < 0) return -1; - type = dso->symtab_type; + type = dso__symtab_type(dso); } else { fd = open(name, O_RDONLY); if (fd < 0) { - dso->load_errno = errno; + *dso__load_errno(dso) = errno; return -1; } } @@ -1249,37 +1162,50 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); if (elf == NULL) { pr_debug("%s: cannot read %s ELF file.\n", __func__, name); - dso->load_errno = DSO_LOAD_ERRNO__INVALID_ELF; + *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF; goto out_close; } + if (type == DSO_BINARY_TYPE__GNU_DEBUGDATA) { + int new_fd; + Elf *embedded = read_gnu_debugdata(dso, elf, name, &new_fd); + + if (!embedded) + goto out_close; + + elf_end(elf); + close(fd); + fd = new_fd; + elf = embedded; + } + if (gelf_getehdr(elf, &ehdr) == NULL) { - dso->load_errno = DSO_LOAD_ERRNO__INVALID_ELF; + *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF; pr_debug("%s: cannot get elf header.\n", __func__); goto out_elf_end; } if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) { - dso->load_errno = DSO_LOAD_ERRNO__INTERNAL_ERROR; + *dso__load_errno(dso) = DSO_LOAD_ERRNO__INTERNAL_ERROR; goto out_elf_end; } /* Always reject images with a mismatched build-id: */ - if (dso->has_build_id && !symbol_conf.ignore_vmlinux_buildid) { + if (dso__has_build_id(dso) && !symbol_conf.ignore_vmlinux_buildid) { u8 build_id[BUILD_ID_SIZE]; struct build_id bid; int size; size = elf_read_build_id(elf, build_id, BUILD_ID_SIZE); if (size <= 0) { - dso->load_errno = DSO_LOAD_ERRNO__CANNOT_READ_BUILDID; + *dso__load_errno(dso) = DSO_LOAD_ERRNO__CANNOT_READ_BUILDID; goto out_elf_end; } build_id__init(&bid, build_id, size); if (!dso__build_id_equal(dso, &bid)) { pr_debug("%s: build id mismatch for %s.\n", __func__, name); - dso->load_errno = DSO_LOAD_ERRNO__MISMATCHING_BUILDID; + *dso__load_errno(dso) = DSO_LOAD_ERRNO__MISMATCHING_BUILDID; goto out_elf_end; } } @@ -1304,14 +1230,14 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, if (ss->opdshdr.sh_type != SHT_PROGBITS) ss->opdsec = NULL; - if (dso->kernel == DSO_SPACE__USER) + if (dso__kernel(dso) == DSO_SPACE__USER) ss->adjust_symbols = true; else ss->adjust_symbols = elf__needs_adjust_symbols(ehdr); ss->name = strdup(name); if (!ss->name) { - dso->load_errno = errno; + *dso__load_errno(dso) = errno; goto out_elf_end; } @@ -1329,6 +1255,58 @@ out_close: return -1; } +static bool is_exe_text(int flags) +{ + return (flags & (SHF_ALLOC | SHF_EXECINSTR)) == (SHF_ALLOC | SHF_EXECINSTR); +} + +/* + * Some executable module sections like .noinstr.text might be laid out with + * .text so they can use the same mapping (memory address to file offset). + * Check if that is the case. Refer to kernel layout_sections(). Return the + * maximum offset. + */ +static u64 max_text_section(Elf *elf, GElf_Ehdr *ehdr) +{ + Elf_Scn *sec = NULL; + GElf_Shdr shdr; + u64 offs = 0; + + /* Doesn't work for some arch */ + if (ehdr->e_machine == EM_PARISC || + ehdr->e_machine == EM_ALPHA) + return 0; + + /* ELF is corrupted/truncated, avoid calling elf_strptr. */ + if (!elf_rawdata(elf_getscn(elf, ehdr->e_shstrndx), NULL)) + return 0; + + while ((sec = elf_nextscn(elf, sec)) != NULL) { + char *sec_name; + + if (!gelf_getshdr(sec, &shdr)) + break; + + if (!is_exe_text(shdr.sh_flags)) + continue; + + /* .init and .exit sections are not placed with .text */ + sec_name = elf_strptr(elf, ehdr->e_shstrndx, shdr.sh_name); + if (!sec_name || + strstarts(sec_name, ".init") || + strstarts(sec_name, ".exit")) + break; + + /* Must be next to previous, assumes .text is first */ + if (offs && PERF_ALIGN(offs, shdr.sh_addralign ?: 1) != shdr.sh_offset) + break; + + offs = shdr.sh_offset + shdr.sh_size; + } + + return offs; +} + /** * ref_reloc_sym_not_found - has kernel relocation symbol been found. * @kmap: kernel maps and relocation reference symbol @@ -1366,9 +1344,10 @@ void __weak arch__sym_update(struct symbol *s __maybe_unused, static int dso__process_kernel_symbol(struct dso *dso, struct map *map, GElf_Sym *sym, GElf_Shdr *shdr, struct maps *kmaps, struct kmap *kmap, - struct dso **curr_dsop, struct map **curr_mapp, + struct dso **curr_dsop, const char *section_name, - bool adjust_kernel_syms, bool kmodule, bool *remap_kernel) + bool adjust_kernel_syms, bool kmodule, bool *remap_kernel, + u64 max_text_sh_offset) { struct dso *curr_dso = *curr_dsop; struct map *curr_map; @@ -1378,7 +1357,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, if (adjust_kernel_syms) sym->st_value -= shdr->sh_addr - shdr->sh_offset; - if (strcmp(section_name, (curr_dso->short_name + dso->short_name_len)) == 0) + if (strcmp(section_name, (dso__short_name(curr_dso) + dso__short_name_len(dso))) == 0) return 0; if (strcmp(section_name, ".text") == 0) { @@ -1387,13 +1366,12 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, * kallsyms and identity maps. Overwrite it to * map to the kernel dso. */ - if (*remap_kernel && dso->kernel && !kmodule) { + if (*remap_kernel && dso__kernel(dso) && !kmodule) { *remap_kernel = false; map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); map__set_end(map, map__start(map) + shdr->sh_size); map__set_pgoff(map, shdr->sh_offset); - map__set_map_ip(map, map__dso_map_ip); - map__set_unmap_ip(map, map__dso_unmap_ip); + map__set_mapping_type(map, MAPPING_TYPE__DSO); /* Ensure maps are correctly ordered */ if (kmaps) { int err; @@ -1417,15 +1395,26 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, map__set_pgoff(map, shdr->sh_offset); } - *curr_mapp = map; - *curr_dsop = dso; + dso__put(*curr_dsop); + *curr_dsop = dso__get(dso); return 0; } if (!kmap) return 0; - snprintf(dso_name, sizeof(dso_name), "%s%s", dso->short_name, section_name); + /* + * perf does not record module section addresses except for .text, but + * some sections can use the same mapping as .text. + */ + if (kmodule && adjust_kernel_syms && is_exe_text(shdr->sh_flags) && + shdr->sh_offset <= max_text_sh_offset) { + dso__put(*curr_dsop); + *curr_dsop = dso__get(dso); + return 0; + } + + snprintf(dso_name, sizeof(dso_name), "%s%s", dso__short_name(dso), section_name); curr_map = maps__find_by_name(kmaps, dso_name); if (curr_map == NULL) { @@ -1437,15 +1426,17 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, curr_dso = dso__new(dso_name); if (curr_dso == NULL) return -1; - curr_dso->kernel = dso->kernel; - curr_dso->long_name = dso->long_name; - curr_dso->long_name_len = dso->long_name_len; + dso__set_kernel(curr_dso, dso__kernel(dso)); + RC_CHK_ACCESS(curr_dso)->long_name = dso__long_name(dso); + RC_CHK_ACCESS(curr_dso)->long_name_len = dso__long_name_len(dso); + dso__set_binary_type(curr_dso, dso__binary_type(dso)); + dso__set_adjust_symbols(curr_dso, dso__adjust_symbols(dso)); curr_map = map__new2(start, curr_dso); - dso__put(curr_dso); - if (curr_map == NULL) + if (curr_map == NULL) { + dso__put(curr_dso); return -1; - - if (curr_dso->kernel) + } + if (dso__kernel(curr_dso)) map__kmap(curr_map)->kmaps = kmaps; if (adjust_kernel_syms) { @@ -1453,25 +1444,23 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, map__set_end(curr_map, map__start(curr_map) + shdr->sh_size); map__set_pgoff(curr_map, shdr->sh_offset); } else { - map__set_map_ip(curr_map, identity__map_ip); - map__set_unmap_ip(curr_map, identity__map_ip); + map__set_mapping_type(curr_map, MAPPING_TYPE__IDENTITY); } - curr_dso->symtab_type = dso->symtab_type; - if (maps__insert(kmaps, curr_map)) + dso__set_symtab_type(curr_dso, dso__symtab_type(dso)); + if (maps__insert(kmaps, curr_map)) { + dso__put(curr_dso); + map__put(curr_map); return -1; - /* - * Add it before we drop the reference to curr_map, i.e. while - * we still are sure to have a reference to this DSO via - * *curr_map->dso. - */ + } dsos__add(&maps__machine(kmaps)->dsos, curr_dso); - /* kmaps already got it */ - map__put(curr_map); dso__set_loaded(curr_dso); - *curr_mapp = curr_map; + dso__put(*curr_dsop); *curr_dsop = curr_dso; - } else - *curr_dsop = map__dso(curr_map); + } else { + dso__put(*curr_dsop); + *curr_dsop = dso__get(map__dso(curr_map)); + } + map__put(curr_map); return 0; } @@ -1480,13 +1469,11 @@ static int dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, struct symsrc *runtime_ss, int kmodule, int dynsym) { - struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL; + struct kmap *kmap = dso__kernel(dso) ? map__kmap(map) : NULL; struct maps *kmaps = kmap ? map__kmaps(map) : NULL; - struct map *curr_map = map; - struct dso *curr_dso = dso; + struct dso *curr_dso = NULL; Elf_Data *symstrs, *secstrs, *secstrs_run, *secstrs_sym; uint32_t nr_syms; - int err = -1; uint32_t idx; GElf_Ehdr ehdr; GElf_Shdr shdr; @@ -1497,6 +1484,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, Elf *elf; int nr = 0; bool remap_kernel = false, adjust_kernel_syms = false; + u64 max_text_sh_offset = 0; if (kmap && !kmaps) return -1; @@ -1512,8 +1500,10 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, } if (elf_section_by_name(runtime_ss->elf, &runtime_ss->ehdr, &tshdr, - ".text", NULL)) - dso->text_offset = tshdr.sh_addr - tshdr.sh_offset; + ".text", NULL)) { + dso__set_text_offset(dso, tshdr.sh_addr - tshdr.sh_offset); + dso__set_text_end(dso, tshdr.sh_offset + tshdr.sh_size); + } if (runtime_ss->opdsec) opddata = elf_rawdata(runtime_ss->opdsec, NULL); @@ -1571,17 +1561,22 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, * attempted to prelink vdso to its virtual address. */ if (dso__is_vdso(dso)) - map__set_reloc(map, map__start(map) - dso->text_offset); + map__set_reloc(map, map__start(map) - dso__text_offset(dso)); - dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap); + dso__set_adjust_symbols(dso, runtime_ss->adjust_symbols || ref_reloc(kmap)); /* * Initial kernel and module mappings do not map to the dso. * Flag the fixups. */ - if (dso->kernel) { + if (dso__kernel(dso)) { remap_kernel = true; - adjust_kernel_syms = dso->adjust_symbols; + adjust_kernel_syms = dso__adjust_symbols(dso); } + + if (kmodule && adjust_kernel_syms) + max_text_sh_offset = max_text_section(runtime_ss->elf, &runtime_ss->ehdr); + + curr_dso = dso__get(dso); elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) { struct symbol *f; const char *elf_name = elf_sym__name(&sym, symstrs); @@ -1602,6 +1597,12 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, continue; } + /* Reject RISCV ELF "mapping symbols" */ + if (ehdr.e_machine == EM_RISCV) { + if (elf_name[0] == '$' && strchr("dx", elf_name[1])) + continue; + } + if (runtime_ss->opdsec && sym.st_shndx == runtime_ss->opdidx) { u32 offset = sym.st_value - syms_ss->opdshdr.sh_addr; u64 *opd = opddata->d_buf + offset; @@ -1669,9 +1670,14 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, (sym.st_value & 1)) --sym.st_value; - if (dso->kernel) { - if (dso__process_kernel_symbol(dso, map, &sym, &shdr, kmaps, kmap, &curr_dso, &curr_map, - section_name, adjust_kernel_syms, kmodule, &remap_kernel)) + if (dso__kernel(dso)) { + if (dso__process_kernel_symbol(dso, map, &sym, &shdr, + kmaps, kmap, &curr_dso, + section_name, + adjust_kernel_syms, + kmodule, + &remap_kernel, + max_text_sh_offset)) goto out_elf_end; } else if ((used_opd && runtime_ss->adjust_symbols) || (!used_opd && syms_ss->adjust_symbols)) { @@ -1704,7 +1710,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, } } - demangled = demangle_sym(dso, kmodule, elf_name); + demangled = dso__demangle_sym(dso, kmodule, elf_name); if (demangled != NULL) elf_name = demangled; @@ -1717,16 +1723,17 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, arch__sym_update(f, &sym); - __symbols__insert(&curr_dso->symbols, f, dso->kernel); + __symbols__insert(dso__symbols(curr_dso), f, dso__kernel(dso)); nr++; } + dso__put(curr_dso); /* * For misannotated, zeroed, ASM function sizes. */ if (nr > 0) { - symbols__fixup_end(&dso->symbols, false); - symbols__fixup_duplicate(&dso->symbols); + symbols__fixup_end(dso__symbols(dso), false); + symbols__fixup_duplicate(dso__symbols(dso)); if (kmap) { /* * We need to fixup this here too because we create new @@ -1735,9 +1742,10 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, maps__fixup_end(kmaps); } } - err = nr; + return nr; out_elf_end: - return err; + dso__put(curr_dso); + return -1; } int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, @@ -1746,16 +1754,16 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, int nr = 0; int err = -1; - dso->symtab_type = syms_ss->type; - dso->is_64_bit = syms_ss->is_64_bit; - dso->rel = syms_ss->ehdr.e_type == ET_REL; + dso__set_symtab_type(dso, syms_ss->type); + dso__set_is_64_bit(dso, syms_ss->is_64_bit); + dso__set_rel(dso, syms_ss->ehdr.e_type == ET_REL); /* * Modules may already have symbols from kallsyms, but those symbols * have the wrong values for the dso maps, so remove them. */ if (kmodule && syms_ss->symtab) - symbols__delete(&dso->symbols); + symbols__delete(dso__symbols(dso)); if (!syms_ss->symtab) { /* @@ -1763,7 +1771,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, * to using kallsyms. The vmlinux runtime symbols aren't * of much use. */ - if (dso->kernel) + if (dso__kernel(dso)) return err; } else { err = dso__load_sym_internal(dso, map, syms_ss, runtime_ss, @@ -1778,10 +1786,23 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, kmodule, 1); if (err < 0) return err; - err += nr; + nr += err; } - return err; + /* + * The .gnu_debugdata is a special situation: it contains a symbol + * table, but the runtime file may also contain dynsym entries which are + * not present there. We need to load both. + */ + if (syms_ss->type == DSO_BINARY_TYPE__GNU_DEBUGDATA && runtime_ss->dynsym) { + err = dso__load_sym_internal(dso, map, runtime_ss, runtime_ss, + kmodule, 1); + if (err < 0) + return err; + nr += err; + } + + return nr; } static int elf_read_maps(Elf *elf, bool exe, mapfn_t mapfn, void *data) diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index a81a14769bd1..c6b17c14a2e9 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -4,7 +4,6 @@ #include <errno.h> #include <unistd.h> -#include <stdio.h> #include <fcntl.h> #include <string.h> #include <stdlib.h> @@ -43,7 +42,7 @@ static int read_build_id(void *note_data, size_t note_len, struct build_id *bid, void *ptr; ptr = note_data; - while (ptr < (note_data + note_len)) { + while ((ptr + sizeof(*nhdr)) < (note_data + note_len)) { const char *name; size_t namesz, descsz; @@ -88,137 +87,118 @@ int filename__read_debuglink(const char *filename __maybe_unused, */ int filename__read_build_id(const char *filename, struct build_id *bid) { - FILE *fp; - int ret = -1; - bool need_swap = false; - u8 e_ident[EI_NIDENT]; - size_t buf_size; - void *buf; - int i; + int fd, ret = -1; + bool need_swap = false, elf32; + union { + struct { + Elf32_Ehdr ehdr32; + Elf32_Phdr *phdr32; + }; + struct { + Elf64_Ehdr ehdr64; + Elf64_Phdr *phdr64; + }; + } hdrs; + void *phdr, *buf = NULL; + ssize_t phdr_size, ehdr_size, buf_size = 0; + + if (!filename) + return -EFAULT; + if (!is_regular_file(filename)) + return -EWOULDBLOCK; - fp = fopen(filename, "r"); - if (fp == NULL) + fd = open(filename, O_RDONLY); + if (fd < 0) return -1; - if (fread(e_ident, sizeof(e_ident), 1, fp) != 1) + if (read(fd, hdrs.ehdr32.e_ident, EI_NIDENT) != EI_NIDENT) goto out; - if (memcmp(e_ident, ELFMAG, SELFMAG) || - e_ident[EI_VERSION] != EV_CURRENT) + if (memcmp(hdrs.ehdr32.e_ident, ELFMAG, SELFMAG) || + hdrs.ehdr32.e_ident[EI_VERSION] != EV_CURRENT) goto out; - need_swap = check_need_swap(e_ident[EI_DATA]); - - /* for simplicity */ - fseek(fp, 0, SEEK_SET); + need_swap = check_need_swap(hdrs.ehdr32.e_ident[EI_DATA]); + elf32 = hdrs.ehdr32.e_ident[EI_CLASS] == ELFCLASS32; + ehdr_size = (elf32 ? sizeof(hdrs.ehdr32) : sizeof(hdrs.ehdr64)) - EI_NIDENT; - if (e_ident[EI_CLASS] == ELFCLASS32) { - Elf32_Ehdr ehdr; - Elf32_Phdr *phdr; - - if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) - goto out; + if (read(fd, + (elf32 ? (void *)&hdrs.ehdr32 : (void *)&hdrs.ehdr64) + EI_NIDENT, + ehdr_size) != ehdr_size) + goto out; - if (need_swap) { - ehdr.e_phoff = bswap_32(ehdr.e_phoff); - ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); - ehdr.e_phnum = bswap_16(ehdr.e_phnum); + if (need_swap) { + if (elf32) { + hdrs.ehdr32.e_phoff = bswap_32(hdrs.ehdr32.e_phoff); + hdrs.ehdr32.e_phentsize = bswap_16(hdrs.ehdr32.e_phentsize); + hdrs.ehdr32.e_phnum = bswap_16(hdrs.ehdr32.e_phnum); + } else { + hdrs.ehdr64.e_phoff = bswap_64(hdrs.ehdr64.e_phoff); + hdrs.ehdr64.e_phentsize = bswap_16(hdrs.ehdr64.e_phentsize); + hdrs.ehdr64.e_phnum = bswap_16(hdrs.ehdr64.e_phnum); } + } + if ((elf32 && hdrs.ehdr32.e_phentsize != sizeof(Elf32_Phdr)) || + (!elf32 && hdrs.ehdr64.e_phentsize != sizeof(Elf64_Phdr))) + goto out; - buf_size = ehdr.e_phentsize * ehdr.e_phnum; - buf = malloc(buf_size); - if (buf == NULL) - goto out; - - fseek(fp, ehdr.e_phoff, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; - - for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { - void *tmp; - long offset; - - if (need_swap) { - phdr->p_type = bswap_32(phdr->p_type); - phdr->p_offset = bswap_32(phdr->p_offset); - phdr->p_filesz = bswap_32(phdr->p_filesz); - } - - if (phdr->p_type != PT_NOTE) - continue; - - buf_size = phdr->p_filesz; - offset = phdr->p_offset; - tmp = realloc(buf, buf_size); - if (tmp == NULL) - goto out_free; + phdr_size = elf32 ? sizeof(Elf32_Phdr) * hdrs.ehdr32.e_phnum + : sizeof(Elf64_Phdr) * hdrs.ehdr64.e_phnum; + phdr = malloc(phdr_size); + if (phdr == NULL) + goto out; - buf = tmp; - fseek(fp, offset, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; + lseek(fd, elf32 ? hdrs.ehdr32.e_phoff : hdrs.ehdr64.e_phoff, SEEK_SET); + if (read(fd, phdr, phdr_size) != phdr_size) + goto out_free; - ret = read_build_id(buf, buf_size, bid, need_swap); - if (ret == 0) - ret = bid->size; - break; - } - } else { - Elf64_Ehdr ehdr; - Elf64_Phdr *phdr; + if (elf32) + hdrs.phdr32 = phdr; + else + hdrs.phdr64 = phdr; - if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) - goto out; + for (int i = 0; i < (elf32 ? hdrs.ehdr32.e_phnum : hdrs.ehdr64.e_phnum); i++) { + ssize_t p_filesz; if (need_swap) { - ehdr.e_phoff = bswap_64(ehdr.e_phoff); - ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); - ehdr.e_phnum = bswap_16(ehdr.e_phnum); + if (elf32) { + hdrs.phdr32[i].p_type = bswap_32(hdrs.phdr32[i].p_type); + hdrs.phdr32[i].p_offset = bswap_32(hdrs.phdr32[i].p_offset); + hdrs.phdr32[i].p_filesz = bswap_32(hdrs.phdr32[i].p_offset); + } else { + hdrs.phdr64[i].p_type = bswap_32(hdrs.phdr64[i].p_type); + hdrs.phdr64[i].p_offset = bswap_64(hdrs.phdr64[i].p_offset); + hdrs.phdr64[i].p_filesz = bswap_64(hdrs.phdr64[i].p_filesz); + } } + if ((elf32 ? hdrs.phdr32[i].p_type : hdrs.phdr64[i].p_type) != PT_NOTE) + continue; - buf_size = ehdr.e_phentsize * ehdr.e_phnum; - buf = malloc(buf_size); - if (buf == NULL) - goto out; - - fseek(fp, ehdr.e_phoff, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; - - for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { + p_filesz = elf32 ? hdrs.phdr32[i].p_filesz : hdrs.phdr64[i].p_filesz; + if (p_filesz > buf_size) { void *tmp; - long offset; - if (need_swap) { - phdr->p_type = bswap_32(phdr->p_type); - phdr->p_offset = bswap_64(phdr->p_offset); - phdr->p_filesz = bswap_64(phdr->p_filesz); - } - - if (phdr->p_type != PT_NOTE) - continue; - - buf_size = phdr->p_filesz; - offset = phdr->p_offset; + buf_size = p_filesz; tmp = realloc(buf, buf_size); if (tmp == NULL) goto out_free; - buf = tmp; - fseek(fp, offset, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; + } + lseek(fd, elf32 ? hdrs.phdr32[i].p_offset : hdrs.phdr64[i].p_offset, SEEK_SET); + if (read(fd, buf, p_filesz) != p_filesz) + goto out_free; - ret = read_build_id(buf, buf_size, bid, need_swap); - if (ret == 0) - ret = bid->size; + ret = read_build_id(buf, p_filesz, bid, need_swap); + if (ret == 0) { + ret = bid->size; break; } } out_free: free(buf); + free(phdr); out: - fclose(fp); + close(fd); return ret; } @@ -271,7 +251,7 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, out_close: close(fd); out_errno: - dso->load_errno = errno; + RC_CHK_ACCESS(dso)->load_errno = errno; return -1; } @@ -341,12 +321,12 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused, struct symsrc *runtime_ss __maybe_unused, int kmodule __maybe_unused) { - struct build_id bid; + struct build_id bid = { .size = 0, }; int ret; ret = fd__is_64_bit(ss->fd); if (ret >= 0) - dso->is_64_bit = ret; + RC_CHK_ACCESS(dso)->is_64_bit = ret; if (filename__read_build_id(ss->name, &bid) > 0) dso__set_build_id(dso, &bid); @@ -379,13 +359,6 @@ void symbol__elf_init(void) { } -char *dso__demangle_sym(struct dso *dso __maybe_unused, - int kmodule __maybe_unused, - const char *elf_name __maybe_unused) -{ - return NULL; -} - bool filename__has_section(const char *filename __maybe_unused, const char *sec __maybe_unused) { return false; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index f849f9ef68e6..814f960fa8f8 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -18,6 +18,12 @@ #include "annotate.h" #include "build-id.h" #include "cap.h" +#include "cpumap.h" +#include "debug.h" +#include "demangle-cxx.h" +#include "demangle-java.h" +#include "demangle-ocaml.h" +#include "demangle-rust-v0.h" #include "dso.h" #include "util.h" // lsdir() #include "debug.h" @@ -27,6 +33,7 @@ #include "symbol.h" #include "map_symbol.h" #include "mem-events.h" +#include "mem-info.h" #include "symsrc.h" #include "strlist.h" #include "intlist.h" @@ -34,6 +41,7 @@ #include "header.h" #include "path.h" #include <linux/ctype.h> +#include <linux/log2.h> #include <linux/zalloc.h> #include <elf.h> @@ -48,11 +56,6 @@ static bool symbol__is_idle(const char *name); int vmlinux_path__nr_entries; char **vmlinux_path; -struct map_list_node { - struct list_head node; - struct map *map; -}; - struct symbol_conf symbol_conf = { .nanosecs = false, .use_modules = true, @@ -68,6 +71,16 @@ struct symbol_conf symbol_conf = { .res_sample = 0, }; +struct map_list_node { + struct list_head node; + struct map *map; +}; + +static struct map_list_node *map_list_node__new(void) +{ + return malloc(sizeof(struct map_list_node)); +} + static enum dso_binary_type binary_type_symtab[] = { DSO_BINARY_TYPE__KALLSYMS, DSO_BINARY_TYPE__GUEST_KALLSYMS, @@ -78,6 +91,7 @@ static enum dso_binary_type binary_type_symtab[] = { DSO_BINARY_TYPE__FEDORA_DEBUGINFO, DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, DSO_BINARY_TYPE__BUILDID_DEBUGINFO, + DSO_BINARY_TYPE__GNU_DEBUGDATA, DSO_BINARY_TYPE__SYSTEM_PATH_DSO, DSO_BINARY_TYPE__GUEST_KMODULE, DSO_BINARY_TYPE__GUEST_KMODULE_COMP, @@ -90,15 +104,21 @@ static enum dso_binary_type binary_type_symtab[] = { #define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab) -static struct map_list_node *map_list_node__new(void) +static bool symbol_type__filter(char __symbol_type) { - return malloc(sizeof(struct map_list_node)); -} - -static bool symbol_type__filter(char symbol_type) -{ - symbol_type = toupper(symbol_type); - return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B'; + // Since 'U' == undefined and 'u' == unique global symbol, we can't use toupper there + // 'N' is for debugging symbols, 'n' is a non-data, non-code, non-debug read-only section. + // According to 'man nm'. + // 'N' first seen in: + // ffffffff9b35d130 N __pfx__RNCINvNtNtNtCsbDUBuN8AbD4_4core4iter8adapters3map12map_try_foldjNtCs6vVzKs5jPr6_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ + // a seemingly Rust mangled name + // Ditto for '1': + // root@x1:~# grep ' 1 ' /proc/kallsyms + // ffffffffb098bc00 1 __pfx__RNCINvNtNtNtCsfwaGRd4cjqE_4core4iter8adapters3map12map_try_foldjNtCskFudTml27HW_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ + // ffffffffb098bc10 1 _RNCINvNtNtNtCsfwaGRd4cjqE_4core4iter8adapters3map12map_try_foldjNtCskFudTml27HW_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ + char symbol_type = toupper(__symbol_type); + return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B' || + __symbol_type == 'u' || __symbol_type == 'l' || __symbol_type == 'N' || __symbol_type == '1'; } static int prefix_underscores_count(const char *str) @@ -153,6 +173,13 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb) else if ((a == 0) && (b > 0)) return SYMBOL_B; + if (syma->type != symb->type) { + if (syma->type == STT_NOTYPE) + return SYMBOL_B; + if (symb->type == STT_NOTYPE) + return SYMBOL_A; + } + /* Prefer a non weak symbol over a weak one */ a = syma->binding == STB_WEAK; b = symb->binding == STB_WEAK; @@ -202,11 +229,10 @@ void symbols__fixup_duplicate(struct rb_root_cached *symbols) curr = rb_entry(nd, struct symbol, rb_node); again: nd = rb_next(&curr->rb_node); - next = rb_entry(nd, struct symbol, rb_node); - if (!nd) break; + next = rb_entry(nd, struct symbol, rb_node); if (curr->start != next->start) continue; @@ -249,14 +275,31 @@ void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms) * segment is very big. Therefore do not fill this gap and do * not assign it to the kernel dso map (kallsyms). * + * Also BPF code can be allocated separately from text segments + * and modules. So the last entry in a module should not fill + * the gap too. + * * In kallsyms, it determines module symbols using '[' character * like in: * ffffffffc1937000 T hdmi_driver_init [snd_hda_codec_hdmi] */ if (prev->end == prev->start) { + const char *prev_mod; + const char *curr_mod; + + if (!is_kallsyms) { + prev->end = curr->start; + continue; + } + + prev_mod = strchr(prev->name, '['); + curr_mod = strchr(curr->name, '['); + /* Last kernel/module symbol mapped to end of page */ - if (is_kallsyms && (!strchr(prev->name, '[') != - !strchr(curr->name, '['))) + if (!prev_mod != !curr_mod) + prev->end = roundup(prev->end + 4096, 4096); + /* Last symbol in the previous module */ + else if (prev_mod && strcmp(prev_mod, curr_mod)) prev->end = roundup(prev->end + 4096, 4096); else prev->end = curr->start; @@ -271,29 +314,6 @@ void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms) curr->end = roundup(curr->start, 4096) + 4096; } -void maps__fixup_end(struct maps *maps) -{ - struct map_rb_node *prev = NULL, *curr; - - down_write(maps__lock(maps)); - - maps__for_each_entry(maps, curr) { - if (prev != NULL && !map__end(prev->map)) - map__set_end(prev->map, map__start(curr->map)); - - prev = curr; - } - - /* - * We still haven't the actual symbols, so guess the - * last map final address. - */ - if (curr && !map__end(curr->map)) - map__set_end(curr->map, ~0ULL); - - up_write(maps__lock(maps)); -} - struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name) { size_t namelen = strlen(name) + 1; @@ -539,52 +559,52 @@ static struct symbol *symbols__find_by_name(struct symbol *symbols[], void dso__reset_find_symbol_cache(struct dso *dso) { - dso->last_find_result.addr = 0; - dso->last_find_result.symbol = NULL; + dso__set_last_find_result_addr(dso, 0); + dso__set_last_find_result_symbol(dso, NULL); } void dso__insert_symbol(struct dso *dso, struct symbol *sym) { - __symbols__insert(&dso->symbols, sym, dso->kernel); + __symbols__insert(dso__symbols(dso), sym, dso__kernel(dso)); /* update the symbol cache if necessary */ - if (dso->last_find_result.addr >= sym->start && - (dso->last_find_result.addr < sym->end || + if (dso__last_find_result_addr(dso) >= sym->start && + (dso__last_find_result_addr(dso) < sym->end || sym->start == sym->end)) { - dso->last_find_result.symbol = sym; + dso__set_last_find_result_symbol(dso, sym); } } void dso__delete_symbol(struct dso *dso, struct symbol *sym) { - rb_erase_cached(&sym->rb_node, &dso->symbols); + rb_erase_cached(&sym->rb_node, dso__symbols(dso)); symbol__delete(sym); dso__reset_find_symbol_cache(dso); } struct symbol *dso__find_symbol(struct dso *dso, u64 addr) { - if (dso->last_find_result.addr != addr || dso->last_find_result.symbol == NULL) { - dso->last_find_result.addr = addr; - dso->last_find_result.symbol = symbols__find(&dso->symbols, addr); + if (dso__last_find_result_addr(dso) != addr || dso__last_find_result_symbol(dso) == NULL) { + dso__set_last_find_result_addr(dso, addr); + dso__set_last_find_result_symbol(dso, symbols__find(dso__symbols(dso), addr)); } - return dso->last_find_result.symbol; + return dso__last_find_result_symbol(dso); } struct symbol *dso__find_symbol_nocache(struct dso *dso, u64 addr) { - return symbols__find(&dso->symbols, addr); + return symbols__find(dso__symbols(dso), addr); } struct symbol *dso__first_symbol(struct dso *dso) { - return symbols__first(&dso->symbols); + return symbols__first(dso__symbols(dso)); } struct symbol *dso__last_symbol(struct dso *dso) { - return symbols__last(&dso->symbols); + return symbols__last(dso__symbols(dso)); } struct symbol *dso__next_symbol(struct symbol *sym) @@ -594,11 +614,11 @@ struct symbol *dso__next_symbol(struct symbol *sym) struct symbol *dso__next_symbol_by_name(struct dso *dso, size_t *idx) { - if (*idx + 1 >= dso->symbol_names_len) + if (*idx + 1 >= dso__symbol_names_len(dso)) return NULL; ++*idx; - return dso->symbol_names[*idx]; + return dso__symbol_names(dso)[*idx]; } /* @@ -606,27 +626,29 @@ struct symbol *dso__next_symbol_by_name(struct dso *dso, size_t *idx) */ struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name, size_t *idx) { - struct symbol *s = symbols__find_by_name(dso->symbol_names, dso->symbol_names_len, - name, SYMBOL_TAG_INCLUDE__NONE, idx); - if (!s) - s = symbols__find_by_name(dso->symbol_names, dso->symbol_names_len, - name, SYMBOL_TAG_INCLUDE__DEFAULT_ONLY, idx); + struct symbol *s = symbols__find_by_name(dso__symbol_names(dso), + dso__symbol_names_len(dso), + name, SYMBOL_TAG_INCLUDE__NONE, idx); + if (!s) { + s = symbols__find_by_name(dso__symbol_names(dso), dso__symbol_names_len(dso), + name, SYMBOL_TAG_INCLUDE__DEFAULT_ONLY, idx); + } return s; } void dso__sort_by_name(struct dso *dso) { - mutex_lock(&dso->lock); + mutex_lock(dso__lock(dso)); if (!dso__sorted_by_name(dso)) { - size_t len; + size_t len = 0; - dso->symbol_names = symbols__sort_by_name(&dso->symbols, &len); - if (dso->symbol_names) { - dso->symbol_names_len = len; + dso__set_symbol_names(dso, symbols__sort_by_name(dso__symbols(dso), &len)); + if (dso__symbol_names(dso)) { + dso__set_symbol_names_len(dso, len); dso__set_sorted_by_name(dso); } } - mutex_unlock(&dso->lock); + mutex_unlock(dso__lock(dso)); } /* @@ -719,6 +741,7 @@ static bool symbol__is_idle(const char *name) "cpu_startup_entry", "idle_cpu", "intel_idle", + "intel_idle_ibrs", "default_idle", "native_safe_halt", "enter_idle", @@ -752,7 +775,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name, { struct symbol *sym; struct dso *dso = arg; - struct rb_root_cached *root = &dso->symbols; + struct rb_root_cached *root = dso__symbols(dso); if (!symbol_type__filter(type)) return 0; @@ -790,11 +813,10 @@ static int dso__load_all_kallsyms(struct dso *dso, const char *filename) static int maps__split_kallsyms_for_kcore(struct maps *kmaps, struct dso *dso) { - struct map *curr_map; struct symbol *pos; int count = 0; - struct rb_root_cached old_root = dso->symbols; - struct rb_root_cached *root = &dso->symbols; + struct rb_root_cached *root = dso__symbols(dso); + struct rb_root_cached old_root = *root; struct rb_node *next = rb_first_cached(root); if (!kmaps) @@ -803,6 +825,7 @@ static int maps__split_kallsyms_for_kcore(struct maps *kmaps, struct dso *dso) *root = RB_ROOT_CACHED; while (next) { + struct map *curr_map; struct dso *curr_map_dso; char *module; @@ -827,12 +850,13 @@ static int maps__split_kallsyms_for_kcore(struct maps *kmaps, struct dso *dso) pos->end = map__end(curr_map); if (pos->end) pos->end -= map__start(curr_map) - map__pgoff(curr_map); - symbols__insert(&curr_map_dso->symbols, pos); + symbols__insert(dso__symbols(curr_map_dso), pos); ++count; + map__put(curr_map); } /* Symbols have been adjusted */ - dso->adjust_symbols = 1; + dso__set_adjust_symbols(dso, true); return count; } @@ -846,10 +870,10 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, struct map *initial_map) { struct machine *machine; - struct map *curr_map = initial_map; + struct map *curr_map = map__get(initial_map); struct symbol *pos; int count = 0, moved = 0; - struct rb_root_cached *root = &dso->symbols; + struct rb_root_cached *root = dso__symbols(dso); struct rb_node *next = rb_first_cached(root); int kernel_range = 0; bool x86_64; @@ -876,9 +900,9 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, *module++ = '\0'; curr_map_dso = map__dso(curr_map); - if (strcmp(curr_map_dso->short_name, module)) { - if (RC_CHK_ACCESS(curr_map) != RC_CHK_ACCESS(initial_map) && - dso->kernel == DSO_SPACE__KERNEL_GUEST && + if (strcmp(dso__short_name(curr_map_dso), module)) { + if (!RC_CHK_EQUAL(curr_map, initial_map) && + dso__kernel(dso) == DSO_SPACE__KERNEL_GUEST && machine__is_default_guest(machine)) { /* * We assume all symbols of a module are @@ -890,17 +914,18 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, dso__set_loaded(curr_map_dso); } + map__zput(curr_map); curr_map = maps__find_by_name(kmaps, module); if (curr_map == NULL) { pr_debug("%s/proc/{kallsyms,modules} " "inconsistency while looking " "for \"%s\" module!\n", machine->root_dir, module); - curr_map = initial_map; + curr_map = map__get(initial_map); goto discard_symbol; } curr_map_dso = map__dso(curr_map); - if (curr_map_dso->loaded && + if (dso__loaded(curr_map_dso) && !machine__is_default_guest(machine)) goto discard_symbol; } @@ -920,7 +945,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, * symbols at this point. */ goto discard_symbol; - } else if (curr_map != initial_map) { + } else if (!RC_CHK_EQUAL(curr_map, initial_map)) { char dso_name[PATH_MAX]; struct dso *ndso; @@ -930,25 +955,29 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, pos->end -= delta; } - if (count == 0) { - curr_map = initial_map; + if (map__start(initial_map) <= (pos->start + delta) && + (pos->start + delta) < map__end(initial_map)) { + map__zput(curr_map); + curr_map = map__get(initial_map); goto add_symbol; } - if (dso->kernel == DSO_SPACE__KERNEL_GUEST) + if (dso__kernel(dso) == DSO_SPACE__KERNEL_GUEST) snprintf(dso_name, sizeof(dso_name), "[guest.kernel].%d", - kernel_range++); + kernel_range); else snprintf(dso_name, sizeof(dso_name), "[kernel].%d", - kernel_range++); + kernel_range); ndso = dso__new(dso_name); + map__zput(curr_map); if (ndso == NULL) return -1; - ndso->kernel = dso->kernel; + dso__set_kernel(ndso, dso__kernel(dso)); + dso__set_loaded(ndso); curr_map = map__new2(pos->start, ndso); if (curr_map == NULL) { @@ -956,12 +985,13 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, return -1; } - map__set_map_ip(curr_map, identity__map_ip); - map__set_unmap_ip(curr_map, identity__map_ip); + map__set_mapping_type(curr_map, MAPPING_TYPE__IDENTITY); if (maps__insert(kmaps, curr_map)) { + map__zput(curr_map); dso__put(ndso); return -1; } + dso__put(ndso); ++kernel_range; } else if (delta) { /* Kernel was relocated at boot time */ @@ -969,11 +999,11 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, pos->end -= delta; } add_symbol: - if (curr_map != initial_map) { + if (!RC_CHK_EQUAL(curr_map, initial_map)) { struct dso *curr_map_dso = map__dso(curr_map); rb_erase_cached(&pos->rb_node, root); - symbols__insert(&curr_map_dso->symbols, pos); + symbols__insert(dso__symbols(curr_map_dso), pos); ++moved; } else ++count; @@ -984,12 +1014,12 @@ discard_symbol: symbol__delete(pos); } - if (curr_map != initial_map && - dso->kernel == DSO_SPACE__KERNEL_GUEST && + if (!RC_CHK_EQUAL(curr_map, initial_map) && + dso__kernel(dso) == DSO_SPACE__KERNEL_GUEST && machine__is_default_guest(maps__machine(kmaps))) { dso__set_loaded(map__dso(curr_map)); } - + map__put(curr_map); return count + moved; } @@ -1148,33 +1178,35 @@ out_delete_from: return ret; } +static int do_validate_kcore_modules_cb(struct map *old_map, void *data) +{ + struct rb_root *modules = data; + struct module_info *mi; + struct dso *dso; + + if (!__map__is_kmodule(old_map)) + return 0; + + dso = map__dso(old_map); + /* Module must be in memory at the same address */ + mi = find_module(dso__short_name(dso), modules); + if (!mi || mi->start != map__start(old_map)) + return -EINVAL; + + return 0; +} + static int do_validate_kcore_modules(const char *filename, struct maps *kmaps) { struct rb_root modules = RB_ROOT; - struct map_rb_node *old_node; int err; err = read_proc_modules(filename, &modules); if (err) return err; - maps__for_each_entry(kmaps, old_node) { - struct map *old_map = old_node->map; - struct module_info *mi; - struct dso *dso; + err = maps__for_each_map(kmaps, do_validate_kcore_modules_cb, &modules); - if (!__map__is_kmodule(old_map)) { - continue; - } - dso = map__dso(old_map); - /* Module must be in memory at the same address */ - mi = find_module(dso->short_name, &modules); - if (!mi || mi->start != map__start(old_map)) { - err = -EINVAL; - goto out; - } - } -out: delete_modules(&modules); return err; } @@ -1271,101 +1303,15 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data) return 0; } -/* - * Merges map into maps by splitting the new map within the existing map - * regions. - */ -int maps__merge_in(struct maps *kmaps, struct map *new_map) +static bool remove_old_maps(struct map *map, void *data) { - struct map_rb_node *rb_node; - LIST_HEAD(merged); - int err = 0; - - maps__for_each_entry(kmaps, rb_node) { - struct map *old_map = rb_node->map; + const struct map *map_to_save = data; - /* no overload with this one */ - if (map__end(new_map) < map__start(old_map) || - map__start(new_map) >= map__end(old_map)) - continue; - - if (map__start(new_map) < map__start(old_map)) { - /* - * |new...... - * |old.... - */ - if (map__end(new_map) < map__end(old_map)) { - /* - * |new......| -> |new..| - * |old....| -> |old....| - */ - map__set_end(new_map, map__start(old_map)); - } else { - /* - * |new.............| -> |new..| |new..| - * |old....| -> |old....| - */ - struct map_list_node *m = map_list_node__new(); - - if (!m) { - err = -ENOMEM; - goto out; - } - - m->map = map__clone(new_map); - if (!m->map) { - free(m); - err = -ENOMEM; - goto out; - } - - map__set_end(m->map, map__start(old_map)); - list_add_tail(&m->node, &merged); - map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); - map__set_start(new_map, map__end(old_map)); - } - } else { - /* - * |new...... - * |old.... - */ - if (map__end(new_map) < map__end(old_map)) { - /* - * |new..| -> x - * |old.........| -> |old.........| - */ - map__put(new_map); - new_map = NULL; - break; - } else { - /* - * |new......| -> |new...| - * |old....| -> |old....| - */ - map__add_pgoff(new_map, map__end(old_map) - map__start(new_map)); - map__set_start(new_map, map__end(old_map)); - } - } - } - -out: - while (!list_empty(&merged)) { - struct map_list_node *old_node; - - old_node = list_entry(merged.next, struct map_list_node, node); - list_del_init(&old_node->node); - if (!err) - err = maps__insert(kmaps, old_node->map); - map__put(old_node->map); - free(old_node); - } - - if (new_map) { - if (!err) - err = maps__insert(kmaps, new_map); - map__put(new_map); - } - return err; + /* + * We need to preserve eBPF maps even if they are covered by kcore, + * because we need to access eBPF dso for source data. + */ + return !RC_CHK_EQUAL(map, map_to_save) && !__map__is_bpf_prog(map); } static int dso__load_kcore(struct dso *dso, struct map *map, @@ -1373,8 +1319,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, { struct maps *kmaps = map__kmaps(map); struct kcore_mapfn_data md; - struct map *replacement_map = NULL; - struct map_rb_node *old_node, *next; + struct map *map_ref, *replacement_map = NULL; struct machine *machine; bool is_64_bit; int err, fd; @@ -1413,7 +1358,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, &is_64_bit); if (err) goto out_err; - dso->is_64_bit = is_64_bit; + dso__set_is_64_bit(dso, is_64_bit); if (list_empty(&md.maps)) { err = -EINVAL; @@ -1421,17 +1366,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, } /* Remove old maps */ - maps__for_each_entry_safe(kmaps, old_node, next) { - struct map *old_map = old_node->map; - - /* - * We need to preserve eBPF maps even if they are - * covered by kcore, because we need to access - * eBPF dso for source data. - */ - if (old_map != map && !__map__is_bpf_prog(old_map)) - maps__remove(kmaps, old_map); - } + maps__remove_maps(kmaps, remove_old_maps, map); machine->trampolines_mapped = false; /* Find the kernel map using the '_stext' symbol */ @@ -1462,6 +1397,24 @@ static int dso__load_kcore(struct dso *dso, struct map *map, if (!replacement_map) replacement_map = list_entry(md.maps.next, struct map_list_node, node)->map; + /* + * Update addresses of vmlinux map. Re-insert it to ensure maps are + * correctly ordered. Do this before using maps__merge_in() for the + * remaining maps so vmlinux gets split if necessary. + */ + map_ref = map__get(map); + maps__remove(kmaps, map_ref); + + map__set_start(map_ref, map__start(replacement_map)); + map__set_end(map_ref, map__end(replacement_map)); + map__set_pgoff(map_ref, map__pgoff(replacement_map)); + map__set_mapping_type(map_ref, map__mapping_type(replacement_map)); + + err = maps__insert(kmaps, map_ref); + map__put(map_ref); + if (err) + goto out_err; + /* Add new maps */ while (!list_empty(&md.maps)) { struct map_list_node *new_node = list_entry(md.maps.next, struct map_list_node, node); @@ -1469,23 +1422,8 @@ static int dso__load_kcore(struct dso *dso, struct map *map, list_del_init(&new_node->node); - if (RC_CHK_ACCESS(new_map) == RC_CHK_ACCESS(replacement_map)) { - struct map *map_ref; - - map__set_start(map, map__start(new_map)); - map__set_end(map, map__end(new_map)); - map__set_pgoff(map, map__pgoff(new_map)); - map__set_map_ip(map, map__map_ip_ptr(new_map)); - map__set_unmap_ip(map, map__unmap_ip_ptr(new_map)); - /* Ensure maps are correctly ordered */ - map_ref = map__get(map); - maps__remove(kmaps, map_ref); - err = maps__insert(kmaps, map_ref); - map__put(map_ref); - map__put(new_map); - if (err) - goto out_err; - } else { + /* skip if replacement_map, already inserted above */ + if (!RC_CHK_EQUAL(new_map, replacement_map)) { /* * Merge kcore map into existing maps, * and ensure that current maps (eBPF) @@ -1496,6 +1434,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, goto out_err; } } + map__zput(new_node->map); free(new_node); } @@ -1516,10 +1455,10 @@ static int dso__load_kcore(struct dso *dso, struct map *map, * Set the data type and long name so that kcore can be read via * dso__data_read_addr(). */ - if (dso->kernel == DSO_SPACE__KERNEL_GUEST) - dso->binary_type = DSO_BINARY_TYPE__GUEST_KCORE; + if (dso__kernel(dso) == DSO_SPACE__KERNEL_GUEST) + dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KCORE); else - dso->binary_type = DSO_BINARY_TYPE__KCORE; + dso__set_binary_type(dso, DSO_BINARY_TYPE__KCORE); dso__set_long_name(dso, strdup(kcore_filename), true); close(fd); @@ -1580,13 +1519,13 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename, if (kallsyms__delta(kmap, filename, &delta)) return -1; - symbols__fixup_end(&dso->symbols, true); - symbols__fixup_duplicate(&dso->symbols); + symbols__fixup_end(dso__symbols(dso), true); + symbols__fixup_duplicate(dso__symbols(dso)); - if (dso->kernel == DSO_SPACE__KERNEL_GUEST) - dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS; + if (dso__kernel(dso) == DSO_SPACE__KERNEL_GUEST) + dso__set_symtab_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); else - dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS; + dso__set_symtab_type(dso, DSO_BINARY_TYPE__KALLSYMS); if (!no_kcore && !dso__load_kcore(dso, map, filename)) return maps__split_kallsyms_for_kcore(kmap->kmaps, dso); @@ -1642,7 +1581,7 @@ static int dso__load_perf_map(const char *map_path, struct dso *dso) if (sym == NULL) goto out_delete_line; - symbols__insert(&dso->symbols, sym); + symbols__insert(dso__symbols(dso), sym); nr_syms++; } @@ -1657,134 +1596,6 @@ out_failure: return -1; } -#ifdef HAVE_LIBBFD_SUPPORT -#define PACKAGE 'perf' -#include <bfd.h> - -static int bfd_symbols__cmpvalue(const void *a, const void *b) -{ - const asymbol *as = *(const asymbol **)a, *bs = *(const asymbol **)b; - - if (bfd_asymbol_value(as) != bfd_asymbol_value(bs)) - return bfd_asymbol_value(as) - bfd_asymbol_value(bs); - - return bfd_asymbol_name(as)[0] - bfd_asymbol_name(bs)[0]; -} - -static int bfd2elf_binding(asymbol *symbol) -{ - if (symbol->flags & BSF_WEAK) - return STB_WEAK; - if (symbol->flags & BSF_GLOBAL) - return STB_GLOBAL; - if (symbol->flags & BSF_LOCAL) - return STB_LOCAL; - return -1; -} - -int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) -{ - int err = -1; - long symbols_size, symbols_count, i; - asection *section; - asymbol **symbols, *sym; - struct symbol *symbol; - bfd *abfd; - u64 start, len; - - abfd = bfd_openr(debugfile, NULL); - if (!abfd) - return -1; - - if (!bfd_check_format(abfd, bfd_object)) { - pr_debug2("%s: cannot read %s bfd file.\n", __func__, - dso->long_name); - goto out_close; - } - - if (bfd_get_flavour(abfd) == bfd_target_elf_flavour) - goto out_close; - - symbols_size = bfd_get_symtab_upper_bound(abfd); - if (symbols_size == 0) { - bfd_close(abfd); - return 0; - } - - if (symbols_size < 0) - goto out_close; - - symbols = malloc(symbols_size); - if (!symbols) - goto out_close; - - symbols_count = bfd_canonicalize_symtab(abfd, symbols); - if (symbols_count < 0) - goto out_free; - - section = bfd_get_section_by_name(abfd, ".text"); - if (section) { - for (i = 0; i < symbols_count; ++i) { - if (!strcmp(bfd_asymbol_name(symbols[i]), "__ImageBase") || - !strcmp(bfd_asymbol_name(symbols[i]), "__image_base__")) - break; - } - if (i < symbols_count) { - /* PE symbols can only have 4 bytes, so use .text high bits */ - dso->text_offset = section->vma - (u32)section->vma; - dso->text_offset += (u32)bfd_asymbol_value(symbols[i]); - } else { - dso->text_offset = section->vma - section->filepos; - } - } - - qsort(symbols, symbols_count, sizeof(asymbol *), bfd_symbols__cmpvalue); - -#ifdef bfd_get_section -#define bfd_asymbol_section bfd_get_section -#endif - for (i = 0; i < symbols_count; ++i) { - sym = symbols[i]; - section = bfd_asymbol_section(sym); - if (bfd2elf_binding(sym) < 0) - continue; - - while (i + 1 < symbols_count && - bfd_asymbol_section(symbols[i + 1]) == section && - bfd2elf_binding(symbols[i + 1]) < 0) - i++; - - if (i + 1 < symbols_count && - bfd_asymbol_section(symbols[i + 1]) == section) - len = symbols[i + 1]->value - sym->value; - else - len = section->size - sym->value; - - start = bfd_asymbol_value(sym) - dso->text_offset; - symbol = symbol__new(start, len, bfd2elf_binding(sym), STT_FUNC, - bfd_asymbol_name(sym)); - if (!symbol) - goto out_free; - - symbols__insert(&dso->symbols, symbol); - } -#ifdef bfd_get_section -#undef bfd_asymbol_section -#endif - - symbols__fixup_end(&dso->symbols, false); - symbols__fixup_duplicate(&dso->symbols); - dso->adjust_symbols = 1; - - err = 0; -out_free: - free(symbols); -out_close: - bfd_close(abfd); - return err; -} -#endif - static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, enum dso_binary_type type) { @@ -1797,17 +1608,18 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: case DSO_BINARY_TYPE__BUILDID_DEBUGINFO: case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: - return !kmod && dso->kernel == DSO_SPACE__USER; + case DSO_BINARY_TYPE__GNU_DEBUGDATA: + return !kmod && dso__kernel(dso) == DSO_SPACE__USER; case DSO_BINARY_TYPE__KALLSYMS: case DSO_BINARY_TYPE__VMLINUX: case DSO_BINARY_TYPE__KCORE: - return dso->kernel == DSO_SPACE__KERNEL; + return dso__kernel(dso) == DSO_SPACE__KERNEL; case DSO_BINARY_TYPE__GUEST_KALLSYMS: case DSO_BINARY_TYPE__GUEST_VMLINUX: case DSO_BINARY_TYPE__GUEST_KCORE: - return dso->kernel == DSO_SPACE__KERNEL_GUEST; + return dso__kernel(dso) == DSO_SPACE__KERNEL_GUEST; case DSO_BINARY_TYPE__GUEST_KMODULE: case DSO_BINARY_TYPE__GUEST_KMODULE_COMP: @@ -1817,7 +1629,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, * kernel modules know their symtab type - it's set when * creating a module dso in machine__addnew_module_map(). */ - return kmod && dso->symtab_type == type; + return kmod && dso__symtab_type(dso) == type; case DSO_BINARY_TYPE__BUILD_ID_CACHE: case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO: @@ -1882,21 +1694,22 @@ int dso__load(struct dso *dso, struct map *map) struct symsrc *syms_ss = NULL, *runtime_ss = NULL; bool kmod; bool perfmap; - struct build_id bid; struct nscookie nsc; char newmapname[PATH_MAX]; - const char *map_path = dso->long_name; + const char *map_path = dso__long_name(dso); + + mutex_lock(dso__lock(dso)); + perfmap = is_perf_pid_map_name(map_path); - mutex_lock(&dso->lock); - perfmap = strncmp(dso->name, "/tmp/perf-", 10) == 0; if (perfmap) { - if (dso->nsinfo && (dso__find_perf_map(newmapname, - sizeof(newmapname), &dso->nsinfo) == 0)) { + if (dso__nsinfo(dso) && + (dso__find_perf_map(newmapname, sizeof(newmapname), + dso__nsinfo_ptr(dso)) == 0)) { map_path = newmapname; } } - nsinfo__mountns_enter(dso->nsinfo, &nsc); + nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); /* check again under the dso->lock */ if (dso__loaded(dso)) { @@ -1904,15 +1717,12 @@ int dso__load(struct dso *dso, struct map *map) goto out; } - kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE || - dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP || - dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE || - dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP; + kmod = dso__is_kmod(dso); - if (dso->kernel && !kmod) { - if (dso->kernel == DSO_SPACE__KERNEL) + if (dso__kernel(dso) && !kmod) { + if (dso__kernel(dso) == DSO_SPACE__KERNEL) ret = dso__load_kernel_sym(dso, map); - else if (dso->kernel == DSO_SPACE__KERNEL_GUEST) + else if (dso__kernel(dso) == DSO_SPACE__KERNEL_GUEST) ret = dso__load_guest_kernel_sym(dso, map); machine = maps__machine(map__kmaps(map)); @@ -1921,12 +1731,13 @@ int dso__load(struct dso *dso, struct map *map) goto out; } - dso->adjust_symbols = 0; + dso__set_adjust_symbols(dso, false); if (perfmap) { ret = dso__load_perf_map(map_path, dso); - dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT : - DSO_BINARY_TYPE__NOT_FOUND; + dso__set_symtab_type(dso, ret > 0 + ? DSO_BINARY_TYPE__JAVA_JIT + : DSO_BINARY_TYPE__NOT_FOUND); goto out; } @@ -1939,11 +1750,12 @@ int dso__load(struct dso *dso, struct map *map) /* * Read the build id if possible. This is required for - * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work + * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work. */ - if (!dso->has_build_id && - is_regular_file(dso->long_name)) { - __symbol__join_symfs(name, PATH_MAX, dso->long_name); + if (!dso__has_build_id(dso)) { + struct build_id bid = { .size = 0, }; + + __symbol__join_symfs(name, PATH_MAX, dso__long_name(dso)); if (filename__read_build_id(name, &bid) > 0) dso__set_build_id(dso, &bid); } @@ -1977,7 +1789,7 @@ int dso__load(struct dso *dso, struct map *map) nsinfo__mountns_exit(&nsc); is_reg = is_regular_file(name); - if (!is_reg && errno == ENOENT && dso->nsinfo) { + if (!is_reg && errno == ENOENT && dso__nsinfo(dso)) { char *new_name = dso__filename_with_chroot(dso, name); if (new_name) { is_reg = is_regular_file(new_name); @@ -1994,7 +1806,7 @@ int dso__load(struct dso *dso, struct map *map) sirc = symsrc__init(ss, dso, name, symtab_type); if (nsexit) - nsinfo__mountns_enter(dso->nsinfo, &nsc); + nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); if (bfdrc == 0) { ret = 0; @@ -2007,8 +1819,8 @@ int dso__load(struct dso *dso, struct map *map) if (!syms_ss && symsrc__has_symtab(ss)) { syms_ss = ss; next_slot = true; - if (!dso->symsrc_filename) - dso->symsrc_filename = strdup(name); + if (!dso__symsrc_filename(dso)) + dso__set_symsrc_filename(dso, strdup(name)); } if (!runtime_ss && symsrc__possibly_runtime(ss)) { @@ -2019,6 +1831,9 @@ int dso__load(struct dso *dso, struct map *map) if (next_slot) { ss_pos++; + if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) + dso__set_binary_type(dso, symtab_type); + if (syms_ss && runtime_ss) break; } else { @@ -2055,134 +1870,20 @@ int dso__load(struct dso *dso, struct map *map) symsrc__destroy(&ss_[ss_pos - 1]); out_free: free(name); - if (ret < 0 && strstr(dso->name, " (deleted)") != NULL) + if (ret < 0 && strstr(dso__name(dso), " (deleted)") != NULL) ret = 0; out: dso__set_loaded(dso); - mutex_unlock(&dso->lock); + mutex_unlock(dso__lock(dso)); nsinfo__mountns_exit(&nsc); return ret; } -static int map__strcmp(const void *a, const void *b) -{ - const struct map *map_a = *(const struct map **)a; - const struct map *map_b = *(const struct map **)b; - const struct dso *dso_a = map__dso(map_a); - const struct dso *dso_b = map__dso(map_b); - int ret = strcmp(dso_a->short_name, dso_b->short_name); - - if (ret == 0 && map_a != map_b) { - /* - * Ensure distinct but name equal maps have an order in part to - * aid reference counting. - */ - ret = (int)map__start(map_a) - (int)map__start(map_b); - if (ret == 0) - ret = (int)((intptr_t)map_a - (intptr_t)map_b); - } - - return ret; -} - -static int map__strcmp_name(const void *name, const void *b) -{ - const struct dso *dso = map__dso(*(const struct map **)b); - - return strcmp(name, dso->short_name); -} - -void __maps__sort_by_name(struct maps *maps) -{ - qsort(maps__maps_by_name(maps), maps__nr_maps(maps), sizeof(struct map *), map__strcmp); -} - -static int map__groups__sort_by_name_from_rbtree(struct maps *maps) -{ - struct map_rb_node *rb_node; - struct map **maps_by_name = realloc(maps__maps_by_name(maps), - maps__nr_maps(maps) * sizeof(struct map *)); - int i = 0; - - if (maps_by_name == NULL) - return -1; - - up_read(maps__lock(maps)); - down_write(maps__lock(maps)); - - RC_CHK_ACCESS(maps)->maps_by_name = maps_by_name; - RC_CHK_ACCESS(maps)->nr_maps_allocated = maps__nr_maps(maps); - - maps__for_each_entry(maps, rb_node) - maps_by_name[i++] = map__get(rb_node->map); - - __maps__sort_by_name(maps); - - up_write(maps__lock(maps)); - down_read(maps__lock(maps)); - - return 0; -} - -static struct map *__maps__find_by_name(struct maps *maps, const char *name) -{ - struct map **mapp; - - if (maps__maps_by_name(maps) == NULL && - map__groups__sort_by_name_from_rbtree(maps)) - return NULL; - - mapp = bsearch(name, maps__maps_by_name(maps), maps__nr_maps(maps), - sizeof(*mapp), map__strcmp_name); - if (mapp) - return *mapp; - return NULL; -} - -struct map *maps__find_by_name(struct maps *maps, const char *name) -{ - struct map_rb_node *rb_node; - struct map *map; - - down_read(maps__lock(maps)); - - - if (RC_CHK_ACCESS(maps)->last_search_by_name) { - const struct dso *dso = map__dso(RC_CHK_ACCESS(maps)->last_search_by_name); - - if (strcmp(dso->short_name, name) == 0) { - map = RC_CHK_ACCESS(maps)->last_search_by_name; - goto out_unlock; - } - } - /* - * If we have maps->maps_by_name, then the name isn't in the rbtree, - * as maps->maps_by_name mirrors the rbtree when lookups by name are - * made. - */ - map = __maps__find_by_name(maps, name); - if (map || maps__maps_by_name(maps) != NULL) - goto out_unlock; - - /* Fallback to traversing the rbtree... */ - maps__for_each_entry(maps, rb_node) { - struct dso *dso; - - map = rb_node->map; - dso = map__dso(map); - if (strcmp(dso->short_name, name) == 0) { - RC_CHK_ACCESS(maps)->last_search_by_name = map; - goto out_unlock; - } - } - map = NULL; - -out_unlock: - up_read(maps__lock(maps)); - return map; -} - +/* + * Always takes ownership of vmlinux when vmlinux_allocated == true, even if + * it returns an error. + */ int dso__load_vmlinux(struct dso *dso, struct map *map, const char *vmlinux, bool vmlinux_allocated) { @@ -2196,23 +1897,31 @@ int dso__load_vmlinux(struct dso *dso, struct map *map, else symbol__join_symfs(symfs_vmlinux, vmlinux); - if (dso->kernel == DSO_SPACE__KERNEL_GUEST) + if (dso__kernel(dso) == DSO_SPACE__KERNEL_GUEST) symtab_type = DSO_BINARY_TYPE__GUEST_VMLINUX; else symtab_type = DSO_BINARY_TYPE__VMLINUX; - if (symsrc__init(&ss, dso, symfs_vmlinux, symtab_type)) + if (symsrc__init(&ss, dso, symfs_vmlinux, symtab_type)) { + if (vmlinux_allocated) + free((char *) vmlinux); return -1; + } + + /* + * dso__load_sym() may copy 'dso' which will result in the copies having + * an incorrect long name unless we set it here first. + */ + dso__set_long_name(dso, vmlinux, vmlinux_allocated); + if (dso__kernel(dso) == DSO_SPACE__KERNEL_GUEST) + dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_VMLINUX); + else + dso__set_binary_type(dso, DSO_BINARY_TYPE__VMLINUX); err = dso__load_sym(dso, map, &ss, &ss, 0); symsrc__destroy(&ss); if (err > 0) { - if (dso->kernel == DSO_SPACE__KERNEL_GUEST) - dso->binary_type = DSO_BINARY_TYPE__GUEST_VMLINUX; - else - dso->binary_type = DSO_BINARY_TYPE__VMLINUX; - dso__set_long_name(dso, vmlinux, vmlinux_allocated); dso__set_loaded(dso); pr_debug("Using %s for symbols\n", symfs_vmlinux); } @@ -2240,7 +1949,6 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map) err = dso__load_vmlinux(dso, map, filename, true); if (err > 0) goto out; - free(filename); } out: return err; @@ -2295,12 +2003,13 @@ static bool filename__readable(const char *file) static char *dso__find_kallsyms(struct dso *dso, struct map *map) { - struct build_id bid; + struct build_id bid = { .size = 0, }; char sbuild_id[SBUILD_ID_SIZE]; bool is_host = false; char path[PATH_MAX]; + struct maps *kmaps = map__kmaps(map); - if (!dso->has_build_id) { + if (!dso__has_build_id(dso)) { /* * Last resort, if we don't have a build-id and couldn't find * any vmlinux file, try the running kernel kallsyms table. @@ -2325,7 +2034,7 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) goto proc_kallsyms; } - build_id__sprintf(&dso->bid, sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); /* Find kallsyms in build-id cache with kcore */ scnprintf(path, sizeof(path), "%s/%s/%s", @@ -2335,8 +2044,13 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) return strdup(path); /* Use current /proc/kallsyms if possible */ - if (is_host) { proc_kallsyms: + if (kmaps) { + struct machine *machine = maps__machine(kmaps); + + scnprintf(path, sizeof(path), "%s/proc/kallsyms", machine->root_dir); + return strdup(path); + } else if (is_host) { return strdup("/proc/kallsyms"); } @@ -2392,7 +2106,6 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) err = dso__load_vmlinux(dso, map, filename, true); if (err > 0) return err; - free(filename); } if (!symbol_conf.ignore_vmlinux && vmlinux_path != NULL) { @@ -2418,7 +2131,7 @@ do_kallsyms: free(kallsyms_allocated_filename); if (err > 0 && !dso__is_kcore(dso)) { - dso->binary_type = DSO_BINARY_TYPE__KALLSYMS; + dso__set_binary_type(dso, DSO_BINARY_TYPE__KALLSYMS); dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); map__fixup_start(map); map__fixup_end(map); @@ -2461,7 +2174,7 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) if (err > 0) pr_debug("Using %s for symbols\n", kallsyms_filename); if (err > 0 && !dso__is_kcore(dso)) { - dso->binary_type = DSO_BINARY_TYPE__GUEST_KALLSYMS; + dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); dso__set_long_name(dso, machine->mmap_name, false); map__fixup_start(map); map__fixup_end(map); @@ -2621,14 +2334,14 @@ static bool symbol__read_kptr_restrict(void) { bool value = false; FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r"); + bool used_root; + bool cap_syslog = perf_cap__capable(CAP_SYSLOG, &used_root); if (fp != NULL) { char line[8]; if (fgets(line, sizeof(line), fp) != NULL) - value = perf_cap__capable(CAP_SYSLOG) ? - (atoi(line) >= 2) : - (atoi(line) != 0); + value = cap_syslog ? (atoi(line) >= 2) : (atoi(line) != 0); fclose(fp); } @@ -2636,7 +2349,7 @@ static bool symbol__read_kptr_restrict(void) /* Per kernel/kallsyms.c: * we also restrict when perf_event_paranoid > 1 w/o CAP_SYSLOG */ - if (perf_event_paranoid() > 1 && !perf_cap__capable(CAP_SYSLOG)) + if (perf_event_paranoid() > 1 && !cap_syslog) value = true; return value; @@ -2657,6 +2370,36 @@ int symbol__annotation_init(void) return 0; } +static int setup_parallelism_bitmap(void) +{ + struct perf_cpu_map *map; + struct perf_cpu cpu; + int i, err = -1; + + if (symbol_conf.parallelism_list_str == NULL) + return 0; + + map = perf_cpu_map__new(symbol_conf.parallelism_list_str); + if (map == NULL) { + pr_err("failed to parse parallelism filter list\n"); + return -1; + } + + bitmap_fill(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1); + perf_cpu_map__for_each_cpu(cpu, i, map) { + if (cpu.cpu <= 0 || cpu.cpu > MAX_NR_CPUS) { + pr_err("Requested parallelism level %d is invalid.\n", cpu.cpu); + goto out_delete_map; + } + __clear_bit(cpu.cpu, symbol_conf.parallelism_filter); + } + + err = 0; +out_delete_map: + perf_cpu_map__put(map); + return err; +} + int symbol__init(struct perf_env *env) { const char *symfs; @@ -2676,6 +2419,9 @@ int symbol__init(struct perf_env *env) return -1; } + if (setup_parallelism_bitmap()) + return -1; + if (setup_list(&symbol_conf.dso_list, symbol_conf.dso_list_str, "dso") < 0) return -1; @@ -2775,28 +2521,6 @@ int symbol__config_symfs(const struct option *opt __maybe_unused, return 0; } -struct mem_info *mem_info__get(struct mem_info *mi) -{ - if (mi) - refcount_inc(&mi->refcnt); - return mi; -} - -void mem_info__put(struct mem_info *mi) -{ - if (mi && refcount_dec_and_test(&mi->refcnt)) - free(mi); -} - -struct mem_info *mem_info__new(void) -{ - struct mem_info *mi = zalloc(sizeof(*mi)); - - if (mi) - refcount_set(&mi->refcnt, 1); - return mi; -} - /* * Checks that user supplied symbol kernel files are accessible because * the default mechanism for accessing elf files fails silently. i.e. if @@ -2818,3 +2542,79 @@ int symbol__validate_sym_arguments(void) } return 0; } + +static bool want_demangle(bool is_kernel_sym) +{ + return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; +} + +/* + * Demangle C++ function signature, typically replaced by demangle-cxx.cpp + * version. + */ +#ifndef HAVE_CXA_DEMANGLE_SUPPORT +char *cxx_demangle_sym(const char *str __maybe_unused, bool params __maybe_unused, + bool modifiers __maybe_unused) +{ +#ifdef HAVE_LIBBFD_SUPPORT + int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0); + + return bfd_demangle(NULL, str, flags); +#elif defined(HAVE_CPLUS_DEMANGLE_SUPPORT) + int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0); + + return cplus_demangle(str, flags); +#else + return NULL; +#endif +} +#endif /* !HAVE_CXA_DEMANGLE_SUPPORT */ + +char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name) +{ + struct demangle rust_demangle = { + .style = DemangleStyleUnknown, + }; + char *demangled = NULL; + + /* + * We need to figure out if the object was created from C++ sources + * DWARF DW_compile_unit has this, but we don't always have access + * to it... + */ + if (!want_demangle((dso && dso__kernel(dso)) || kmodule)) + return demangled; + + rust_demangle_demangle(elf_name, &rust_demangle); + if (rust_demangle_is_known(&rust_demangle)) { + /* A rust mangled name. */ + if (rust_demangle.mangled_len == 0) + return demangled; + + for (size_t buf_len = roundup_pow_of_two(rust_demangle.mangled_len * 2); + buf_len < 1024 * 1024; buf_len += 32) { + char *tmp = realloc(demangled, buf_len); + + if (!tmp) { + /* Failure to grow output buffer, return what is there. */ + return demangled; + } + demangled = tmp; + if (rust_demangle_display_demangle(&rust_demangle, demangled, buf_len, + /*alternate=*/true) == OverflowOk) + return demangled; + } + /* Buffer exceeded sensible bounds, return what is there. */ + return demangled; + } + + demangled = cxx_demangle_sym(elf_name, verbose > 0, verbose > 0); + if (demangled) + return demangled; + + demangled = ocaml_demangle_sym(elf_name); + if (demangled) + return demangled; + + return java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET); +} diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index af87c46b3f89..3fb5d146d9b1 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -189,7 +189,6 @@ void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym, void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym); void symbols__fixup_duplicate(struct rb_root_cached *symbols); void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms); -void maps__fixup_end(struct maps *maps); typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data, @@ -269,18 +268,6 @@ enum { SDT_NOTE_IDX_REFCTR, }; -struct mem_info *mem_info__new(void); -struct mem_info *mem_info__get(struct mem_info *mi); -void mem_info__put(struct mem_info *mi); - -static inline void __mem_info__zput(struct mem_info **mi) -{ - mem_info__put(*mi); - *mi = NULL; -} - -#define mem_info__zput(mi) __mem_info__zput(&mi) - int symbol__validate_sym_arguments(void); #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index 0b589570d1d0..7a80d2c14d9b 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -3,6 +3,8 @@ #define __PERF_SYMBOL_CONF 1 #include <stdbool.h> +#include <linux/bitmap.h> +#include "perf.h" struct strlist; struct intlist; @@ -41,8 +43,15 @@ struct symbol_conf { report_individual_block, inline_name, disable_add2line_warn, - buildid_mmap2, - guest_code; + no_buildid_mmap2, + guest_code, + lazy_load_kernel_maps, + keep_exited_threads, + annotate_data_member, + annotate_data_sample, + skip_empty, + enable_latency, + prefer_latency; const char *vmlinux_name, *kallsyms_name, *source_prefix, @@ -57,9 +66,10 @@ struct symbol_conf { *pid_list_str, *tid_list_str, *sym_list_str, + *parallelism_list_str, *col_width_list_str, *bt_stop_list_str; - char *addr2line_path; + const char *addr2line_path; unsigned long time_quantum; struct strlist *dso_list, *comm_list, @@ -77,6 +87,7 @@ struct symbol_conf { int pad_output_len_dso; int group_sort_idx; int addr_range; + DECLARE_BITMAP(parallelism_filter, MAX_NR_CPUS + 1); }; extern struct symbol_conf symbol_conf; diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c index 088f4abf230f..53e1af4ed9ac 100644 --- a/tools/perf/util/symbol_fprintf.c +++ b/tools/perf/util/symbol_fprintf.c @@ -64,8 +64,8 @@ size_t dso__fprintf_symbols_by_name(struct dso *dso, { size_t ret = 0; - for (size_t i = 0; i < dso->symbol_names_len; i++) { - struct symbol *pos = dso->symbol_names[i]; + for (size_t i = 0; i < dso__symbol_names_len(dso); i++) { + struct symbol *pos = dso__symbol_names(dso)[i]; ret += fprintf(fp, "%s\n", pos->name); } diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 45714a2785fd..2ba9fa25e00a 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-only +// SPDX-License-Identifier: GPL-2.0-only #include "util/cgroup.h" #include "util/data.h" @@ -38,6 +38,7 @@ #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */ #include <api/fs/fs.h> #include <api/io.h> +#include <api/io_dir.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> @@ -47,7 +48,7 @@ unsigned int proc_map_timeout = DEFAULT_PROC_MAP_PARSE_TIMEOUT; -int perf_tool__process_synth_event(struct perf_tool *tool, +int perf_tool__process_synth_event(const struct perf_tool *tool, union perf_event *event, struct machine *machine, perf_event__handler_t process) @@ -187,7 +188,7 @@ static int perf_event__prepare_comm(union perf_event *event, pid_t pid, pid_t ti return 0; } -pid_t perf_event__synthesize_comm(struct perf_tool *tool, +pid_t perf_event__synthesize_comm(const struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine) @@ -218,7 +219,7 @@ static void perf_event__get_ns_link_info(pid_t pid, const char *ns, } } -int perf_event__synthesize_namespaces(struct perf_tool *tool, +int perf_event__synthesize_namespaces(const struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, @@ -257,7 +258,7 @@ int perf_event__synthesize_namespaces(struct perf_tool *tool, return 0; } -static int perf_event__synthesize_fork(struct perf_tool *tool, +static int perf_event__synthesize_fork(const struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, pid_t ppid, perf_event__handler_t process, @@ -367,11 +368,11 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event, struct machine *machine, bool is_kernel) { - struct build_id bid; + struct build_id bid = { .size = 0, }; struct nsinfo *nsi; struct nscookie nc; struct dso *dso = NULL; - struct dso_id id; + struct dso_id dso_id = dso_id_empty; int rc; if (is_kernel) { @@ -379,14 +380,20 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event, goto out; } - id.maj = event->maj; - id.min = event->min; - id.ino = event->ino; - id.ino_generation = event->ino_generation; + if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { + build_id__init(&dso_id.build_id, event->build_id, event->build_id_size); + } else { + dso_id.maj = event->maj; + dso_id.min = event->min; + dso_id.ino = event->ino; + dso_id.ino_generation = event->ino_generation; + dso_id.mmap2_valid = true; + dso_id.mmap2_ino_generation_valid = true; + } - dso = dsos__findnew_id(&machine->dsos, event->filename, &id); - if (dso && dso->has_build_id) { - bid = dso->bid; + dso = dsos__findnew_id(&machine->dsos, event->filename, &dso_id); + if (dso && dso__has_build_id(dso)) { + bid = *dso__bid(dso); rc = 0; goto out; } @@ -407,7 +414,7 @@ out: event->__reserved_1 = 0; event->__reserved_2 = 0; - if (dso && !dso->has_build_id) + if (dso && !dso__has_build_id(dso)) dso__set_build_id(dso, &bid); } else { if (event->filename[0] == '/') { @@ -418,7 +425,7 @@ out: dso__put(dso); } -int perf_event__synthesize_mmap_events(struct perf_tool *tool, +int perf_event__synthesize_mmap_events(const struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, @@ -525,7 +532,7 @@ out: event->mmap2.pid = tgid; event->mmap2.tid = pid; - if (symbol_conf.buildid_mmap2) + if (!symbol_conf.no_buildid_mmap2) perf_record_mmap2__read_build_id(&event->mmap2, machine, false); if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { @@ -542,7 +549,7 @@ out: } #ifdef HAVE_FILE_HANDLE -static int perf_event__synthesize_cgroup(struct perf_tool *tool, +static int perf_event__synthesize_cgroup(const struct perf_tool *tool, union perf_event *event, char *path, size_t mount_len, perf_event__handler_t process, @@ -582,7 +589,7 @@ static int perf_event__synthesize_cgroup(struct perf_tool *tool, return 0; } -static int perf_event__walk_cgroup_tree(struct perf_tool *tool, +static int perf_event__walk_cgroup_tree(const struct perf_tool *tool, union perf_event *event, char *path, size_t mount_len, perf_event__handler_t process, @@ -630,7 +637,7 @@ static int perf_event__walk_cgroup_tree(struct perf_tool *tool, return ret; } -int perf_event__synthesize_cgroups(struct perf_tool *tool, +int perf_event__synthesize_cgroups(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { @@ -657,7 +664,7 @@ int perf_event__synthesize_cgroups(struct perf_tool *tool, return 0; } #else -int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused, +int perf_event__synthesize_cgroups(const struct perf_tool *tool __maybe_unused, perf_event__handler_t process __maybe_unused, struct machine *machine __maybe_unused) { @@ -665,18 +672,74 @@ int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused, } #endif -int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, +struct perf_event__synthesize_modules_maps_cb_args { + const struct perf_tool *tool; + perf_event__handler_t process; + struct machine *machine; + union perf_event *event; +}; + +static int perf_event__synthesize_modules_maps_cb(struct map *map, void *data) +{ + struct perf_event__synthesize_modules_maps_cb_args *args = data; + union perf_event *event = args->event; + struct dso *dso; + size_t size; + + if (!__map__is_kmodule(map)) + return 0; + + dso = map__dso(map); + if (!symbol_conf.no_buildid_mmap2) { + size = PERF_ALIGN(dso__long_name_len(dso) + 1, sizeof(u64)); + event->mmap2.header.type = PERF_RECORD_MMAP2; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size)); + memset(event->mmap2.filename + size, 0, args->machine->id_hdr_size); + event->mmap2.header.size += args->machine->id_hdr_size; + event->mmap2.start = map__start(map); + event->mmap2.len = map__size(map); + event->mmap2.pid = args->machine->pid; + + memcpy(event->mmap2.filename, dso__long_name(dso), dso__long_name_len(dso) + 1); + + perf_record_mmap2__read_build_id(&event->mmap2, args->machine, false); + } else { + size = PERF_ALIGN(dso__long_name_len(dso) + 1, sizeof(u64)); + event->mmap.header.type = PERF_RECORD_MMAP; + event->mmap.header.size = (sizeof(event->mmap) - + (sizeof(event->mmap.filename) - size)); + memset(event->mmap.filename + size, 0, args->machine->id_hdr_size); + event->mmap.header.size += args->machine->id_hdr_size; + event->mmap.start = map__start(map); + event->mmap.len = map__size(map); + event->mmap.pid = args->machine->pid; + + memcpy(event->mmap.filename, dso__long_name(dso), dso__long_name_len(dso) + 1); + } + + if (perf_tool__process_synth_event(args->tool, event, args->machine, args->process) != 0) + return -1; + + return 0; +} + +int perf_event__synthesize_modules(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { - int rc = 0; - struct map_rb_node *pos; + int rc; struct maps *maps = machine__kernel_maps(machine); - union perf_event *event; - size_t size = symbol_conf.buildid_mmap2 ? - sizeof(event->mmap2) : sizeof(event->mmap); + struct perf_event__synthesize_modules_maps_cb_args args = { + .tool = tool, + .process = process, + .machine = machine, + }; + size_t size = symbol_conf.no_buildid_mmap2 + ? sizeof(args.event->mmap) + : sizeof(args.event->mmap2); - event = zalloc(size + machine->id_hdr_size); - if (event == NULL) { + args.event = zalloc(size + machine->id_hdr_size); + if (args.event == NULL) { pr_debug("Not enough memory synthesizing mmap event " "for kernel modules\n"); return -1; @@ -687,53 +750,13 @@ int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t * __perf_event_mmap */ if (machine__is_host(machine)) - event->header.misc = PERF_RECORD_MISC_KERNEL; + args.event->header.misc = PERF_RECORD_MISC_KERNEL; else - event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; - - maps__for_each_entry(maps, pos) { - struct map *map = pos->map; - struct dso *dso; - - if (!__map__is_kmodule(map)) - continue; - - dso = map__dso(map); - if (symbol_conf.buildid_mmap2) { - size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); - event->mmap2.header.type = PERF_RECORD_MMAP2; - event->mmap2.header.size = (sizeof(event->mmap2) - - (sizeof(event->mmap2.filename) - size)); - memset(event->mmap2.filename + size, 0, machine->id_hdr_size); - event->mmap2.header.size += machine->id_hdr_size; - event->mmap2.start = map__start(map); - event->mmap2.len = map__size(map); - event->mmap2.pid = machine->pid; + args.event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; - memcpy(event->mmap2.filename, dso->long_name, dso->long_name_len + 1); + rc = maps__for_each_map(maps, perf_event__synthesize_modules_maps_cb, &args); - perf_record_mmap2__read_build_id(&event->mmap2, machine, false); - } else { - size = PERF_ALIGN(dso->long_name_len + 1, sizeof(u64)); - event->mmap.header.type = PERF_RECORD_MMAP; - event->mmap.header.size = (sizeof(event->mmap) - - (sizeof(event->mmap.filename) - size)); - memset(event->mmap.filename + size, 0, machine->id_hdr_size); - event->mmap.header.size += machine->id_hdr_size; - event->mmap.start = map__start(map); - event->mmap.len = map__size(map); - event->mmap.pid = machine->pid; - - memcpy(event->mmap.filename, dso->long_name, dso->long_name_len + 1); - } - - if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { - rc = -1; - break; - } - } - - free(event); + free(args.event); return rc; } @@ -747,14 +770,14 @@ static int __event__synthesize_thread(union perf_event *comm_event, union perf_event *fork_event, union perf_event *namespaces_event, pid_t pid, int full, perf_event__handler_t process, - struct perf_tool *tool, struct machine *machine, + const struct perf_tool *tool, struct machine *machine, bool needs_mmap, bool mmap_data) { char filename[PATH_MAX]; - struct dirent **dirent; + struct io_dir iod; + struct io_dirent64 *dent; pid_t tgid, ppid; int rc = 0; - int i, n; /* special case: only send one comm event using passed in pid */ if (!full) { @@ -786,16 +809,19 @@ static int __event__synthesize_thread(union perf_event *comm_event, snprintf(filename, sizeof(filename), "%s/proc/%d/task", machine->root_dir, pid); - n = scandir(filename, &dirent, filter_task, NULL); - if (n < 0) - return n; + io_dir__init(&iod, open(filename, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + if (iod.dirfd < 0) + return -1; - for (i = 0; i < n; i++) { + while ((dent = io_dir__readdir(&iod)) != NULL) { char *end; pid_t _pid; bool kernel_thread = false; - _pid = strtol(dirent[i]->d_name, &end, 10); + if (!isdigit(dent->d_name[0])) + continue; + + _pid = strtol(dent->d_name, &end, 10); if (*end) continue; @@ -829,14 +855,12 @@ static int __event__synthesize_thread(union perf_event *comm_event, } } - for (i = 0; i < n; i++) - zfree(&dirent[i]); - free(dirent); + close(iod.dirfd); return rc; } -int perf_event__synthesize_thread_map(struct perf_tool *tool, +int perf_event__synthesize_thread_map(const struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, @@ -913,7 +937,7 @@ out: return err; } -static int __perf_event__synthesize_threads(struct perf_tool *tool, +static int __perf_event__synthesize_threads(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, @@ -977,7 +1001,7 @@ out: } struct synthesize_threads_arg { - struct perf_tool *tool; + const struct perf_tool *tool; perf_event__handler_t process; struct machine *machine; bool needs_mmap; @@ -999,7 +1023,7 @@ static void *synthesize_threads_worker(void *arg) return NULL; } -int perf_event__synthesize_threads(struct perf_tool *tool, +int perf_event__synthesize_threads(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, @@ -1039,11 +1063,11 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (thread_nr > n) thread_nr = n; - synthesize_threads = calloc(sizeof(pthread_t), thread_nr); + synthesize_threads = calloc(thread_nr, sizeof(pthread_t)); if (synthesize_threads == NULL) goto free_dirent; - args = calloc(sizeof(*args), thread_nr); + args = calloc(thread_nr, sizeof(*args)); if (args == NULL) goto free_threads; @@ -1088,20 +1112,20 @@ free_dirent: return err; } -int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused, +int __weak perf_event__synthesize_extra_kmaps(const struct perf_tool *tool __maybe_unused, perf_event__handler_t process __maybe_unused, struct machine *machine __maybe_unused) { return 0; } -static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, +static int __perf_event__synthesize_kernel_mmap(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { union perf_event *event; - size_t size = symbol_conf.buildid_mmap2 ? - sizeof(event->mmap2) : sizeof(event->mmap); + size_t size = symbol_conf.no_buildid_mmap2 ? + sizeof(event->mmap) : sizeof(event->mmap2); struct map *map = machine__kernel_map(machine); struct kmap *kmap; int err; @@ -1135,7 +1159,7 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; } - if (symbol_conf.buildid_mmap2) { + if (!symbol_conf.no_buildid_mmap2) { size = snprintf(event->mmap2.filename, sizeof(event->mmap2.filename), "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1; size = PERF_ALIGN(size, sizeof(u64)); @@ -1167,7 +1191,7 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, return err; } -int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, +int perf_event__synthesize_kernel_mmap(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { @@ -1180,7 +1204,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, return perf_event__synthesize_extra_kmaps(tool, process, machine); } -int perf_event__synthesize_thread_map2(struct perf_tool *tool, +int perf_event__synthesize_thread_map2(const struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine) @@ -1330,7 +1354,7 @@ static struct perf_record_cpu_map *cpu_map_event__new(const struct perf_cpu_map } -int perf_event__synthesize_cpu_map(struct perf_tool *tool, +int perf_event__synthesize_cpu_map(const struct perf_tool *tool, const struct perf_cpu_map *map, perf_event__handler_t process, struct machine *machine) @@ -1348,7 +1372,7 @@ int perf_event__synthesize_cpu_map(struct perf_tool *tool, return err; } -int perf_event__synthesize_stat_config(struct perf_tool *tool, +int perf_event__synthesize_stat_config(const struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine) @@ -1387,7 +1411,7 @@ int perf_event__synthesize_stat_config(struct perf_tool *tool, return err; } -int perf_event__synthesize_stat(struct perf_tool *tool, +int perf_event__synthesize_stat(const struct perf_tool *tool, struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, @@ -1409,7 +1433,7 @@ int perf_event__synthesize_stat(struct perf_tool *tool, return process(tool, (union perf_event *) &event, NULL, machine); } -int perf_event__synthesize_stat_round(struct perf_tool *tool, +int perf_event__synthesize_stat_round(const struct perf_tool *tool, u64 evtime, u64 type, perf_event__handler_t process, struct machine *machine) @@ -1492,9 +1516,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, } if (type & PERF_SAMPLE_REGS_USER) { - if (sample->user_regs.abi) { + if (sample->user_regs && sample->user_regs->abi) { result += sizeof(u64); - sz = hweight64(sample->user_regs.mask) * sizeof(u64); + sz = hweight64(sample->user_regs->mask) * sizeof(u64); result += sz; } else { result += sizeof(u64); @@ -1520,9 +1544,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, result += sizeof(u64); if (type & PERF_SAMPLE_REGS_INTR) { - if (sample->intr_regs.abi) { + if (sample->intr_regs && sample->intr_regs->abi) { result += sizeof(u64); - sz = hweight64(sample->intr_regs.mask) * sizeof(u64); + sz = hweight64(sample->intr_regs->mask) * sizeof(u64); result += sz; } else { result += sizeof(u64); @@ -1549,10 +1573,16 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, return result; } -void __weak arch_perf_synthesize_sample_weight(const struct perf_sample *data, +static void perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type __maybe_unused) { *array = data->weight; + + if (type & PERF_SAMPLE_WEIGHT_STRUCT) { + *array &= 0xffffffff; + *array |= ((u64)data->ins_lat << 32); + *array |= ((u64)data->weight3 << 48); + } } static __u64 *copy_read_group_values(__u64 *array, __u64 read_format, @@ -1670,12 +1700,16 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo } if (type & PERF_SAMPLE_RAW) { - u.val32[0] = sample->raw_size; - *array = u.val64; - array = (void *)array + sizeof(u32); + u32 *array32 = (void *)array; + + *array32 = sample->raw_size; + array32++; - memcpy(array, sample->raw_data, sample->raw_size); - array = (void *)array + sample->raw_size; + memcpy(array32, sample->raw_data, sample->raw_size); + array = (void *)(array32 + (sample->raw_size / sizeof(u32))); + + /* make sure the array is 64-bit aligned */ + BUG_ON(((long)array) % sizeof(u64)); } if (type & PERF_SAMPLE_BRANCH_STACK) { @@ -1687,10 +1721,10 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo } if (type & PERF_SAMPLE_REGS_USER) { - if (sample->user_regs.abi) { - *array++ = sample->user_regs.abi; - sz = hweight64(sample->user_regs.mask) * sizeof(u64); - memcpy(array, sample->user_regs.regs, sz); + if (sample->user_regs && sample->user_regs->abi) { + *array++ = sample->user_regs->abi; + sz = hweight64(sample->user_regs->mask) * sizeof(u64); + memcpy(array, sample->user_regs->regs, sz); array = (void *)array + sz; } else { *array++ = 0; @@ -1708,7 +1742,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo } if (type & PERF_SAMPLE_WEIGHT_TYPE) { - arch_perf_synthesize_sample_weight(sample, array, type); + perf_synthesize_sample_weight(sample, array, type); array++; } @@ -1723,10 +1757,10 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo } if (type & PERF_SAMPLE_REGS_INTR) { - if (sample->intr_regs.abi) { - *array++ = sample->intr_regs.abi; - sz = hweight64(sample->intr_regs.mask) * sizeof(u64); - memcpy(array, sample->intr_regs.regs, sz); + if (sample->intr_regs && sample->intr_regs->abi) { + *array++ = sample->intr_regs->abi; + sz = hweight64(sample->intr_regs->mask) * sizeof(u64); + memcpy(array, sample->intr_regs->regs, sz); array = (void *)array + sz; } else { *array++ = 0; @@ -1810,7 +1844,7 @@ int perf_event__synthesize_id_sample(__u64 *array, u64 type, const struct perf_s return (void *)array - (void *)start; } -int __perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, +int __perf_event__synthesize_id_index(const struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine, size_t from) { union perf_event *ev; @@ -1902,13 +1936,13 @@ out_err: return err; } -int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, +int perf_event__synthesize_id_index(const struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine) { return __perf_event__synthesize_id_index(tool, process, evlist, machine, 0); } -int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, +int __machine__synthesize_threads(struct machine *machine, const struct perf_tool *tool, struct target *target, struct perf_thread_map *threads, perf_event__handler_t process, bool needs_mmap, bool data_mmap, unsigned int nr_threads_synthesize) @@ -1969,7 +2003,7 @@ static struct perf_record_event_update *event_update_event__new(size_t size, u64 return ev; } -int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evsel *evsel, +int perf_event__synthesize_event_update_unit(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process) { size_t size = strlen(evsel->unit); @@ -1986,7 +2020,7 @@ int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evse return err; } -int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel, +int perf_event__synthesize_event_update_scale(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process) { struct perf_record_event_update *ev; @@ -2003,7 +2037,7 @@ int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evs return err; } -int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel, +int perf_event__synthesize_event_update_name(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process) { struct perf_record_event_update *ev; @@ -2020,10 +2054,10 @@ int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evse return err; } -int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel, +int perf_event__synthesize_event_update_cpus(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process) { - struct synthesize_cpu_map_data syn_data = { .map = evsel->core.own_cpus }; + struct synthesize_cpu_map_data syn_data = { .map = evsel->core.pmu_cpus }; struct perf_record_event_update *ev; int err; @@ -2043,7 +2077,7 @@ int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evse return err; } -int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist, +int perf_event__synthesize_attrs(const struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process) { struct evsel *evsel; @@ -2071,7 +2105,7 @@ static bool has_scale(struct evsel *evsel) return evsel->scale != 1; } -int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evsel_list, +int perf_event__synthesize_extra_attr(const struct perf_tool *tool, struct evlist *evsel_list, perf_event__handler_t process, bool is_pipe) { struct evsel *evsel; @@ -2104,7 +2138,7 @@ int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evs } } - if (evsel->core.own_cpus) { + if (evsel->core.pmu_cpus) { err = perf_event__synthesize_event_update_cpus(tool, evsel, process); if (err < 0) { pr_err("Couldn't synthesize evsel cpus.\n"); @@ -2127,7 +2161,7 @@ int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evs return 0; } -int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, +int perf_event__synthesize_attr(const struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process) { union perf_event *ev; @@ -2145,7 +2179,7 @@ int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr * return -ENOMEM; ev->attr.attr = *attr; - memcpy(ev->attr.id, id, ids * sizeof(u64)); + memcpy(perf_record_header_attr_id(ev), id, ids * sizeof(u64)); ev->attr.header.type = PERF_RECORD_HEADER_ATTR; ev->attr.header.size = (u16)size; @@ -2161,7 +2195,7 @@ int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr * } #ifdef HAVE_LIBTRACEEVENT -int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, +int perf_event__synthesize_tracing_data(const struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process) { union perf_event ev; @@ -2184,7 +2218,7 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct e if (!tdata) return -1; - memset(&ev, 0, sizeof(ev)); + memset(&ev, 0, sizeof(ev.tracing_data)); ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA; size = tdata->size; @@ -2209,31 +2243,112 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct e } #endif -int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc, - perf_event__handler_t process, struct machine *machine) +int perf_event__synthesize_build_id(const struct perf_tool *tool, + struct perf_sample *sample, + struct machine *machine, + perf_event__handler_t process, + const struct evsel *evsel, + __u16 misc, + const struct build_id *bid, + const char *filename) { union perf_event ev; size_t len; - if (!pos->hit) - return 0; + len = sizeof(ev.build_id) + strlen(filename) + 1; + len = PERF_ALIGN(len, sizeof(u64)); - memset(&ev, 0, sizeof(ev)); + memset(&ev, 0, len); - len = pos->long_name_len + 1; - len = PERF_ALIGN(len, NAME_ALIGN); - ev.build_id.size = min(pos->bid.size, sizeof(pos->bid.data)); - memcpy(&ev.build_id.build_id, pos->bid.data, ev.build_id.size); + ev.build_id.size = bid->size; + if (ev.build_id.size > sizeof(ev.build_id.build_id)) + ev.build_id.size = sizeof(ev.build_id.build_id); + memcpy(ev.build_id.build_id, bid->data, ev.build_id.size); ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID; ev.build_id.header.misc = misc | PERF_RECORD_MISC_BUILD_ID_SIZE; ev.build_id.pid = machine->pid; - ev.build_id.header.size = sizeof(ev.build_id) + len; - memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len); + ev.build_id.header.size = len; + strcpy(ev.build_id.filename, filename); + + if (evsel) { + void *array = &ev; + int ret; + + array += ev.header.size; + ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample); + if (ret < 0) + return ret; + + if (ret & 7) { + pr_err("Bad id sample size %d\n", ret); + return -EINVAL; + } + + ev.header.size += ret; + } + + return process(tool, &ev, sample, machine); +} + +int perf_event__synthesize_mmap2_build_id(const struct perf_tool *tool, + struct perf_sample *sample, + struct machine *machine, + perf_event__handler_t process, + const struct evsel *evsel, + __u16 misc, + __u32 pid, __u32 tid, + __u64 start, __u64 len, __u64 pgoff, + const struct build_id *bid, + __u32 prot, __u32 flags, + const char *filename) +{ + union perf_event ev; + size_t ev_len; + void *array; + int ret; + + ev_len = sizeof(ev.mmap2) - sizeof(ev.mmap2.filename) + strlen(filename) + 1; + ev_len = PERF_ALIGN(ev_len, sizeof(u64)); + + memset(&ev, 0, ev_len); + + ev.mmap2.header.type = PERF_RECORD_MMAP2; + ev.mmap2.header.misc = misc | PERF_RECORD_MISC_MMAP_BUILD_ID; + ev.mmap2.header.size = ev_len; + + ev.mmap2.pid = pid; + ev.mmap2.tid = tid; + ev.mmap2.start = start; + ev.mmap2.len = len; + ev.mmap2.pgoff = pgoff; + + ev.mmap2.build_id_size = bid->size; + if (ev.mmap2.build_id_size > sizeof(ev.mmap2.build_id)) + ev.build_id.size = sizeof(ev.mmap2.build_id); + memcpy(ev.mmap2.build_id, bid->data, ev.mmap2.build_id_size); + + ev.mmap2.prot = prot; + ev.mmap2.flags = flags; + + memcpy(ev.mmap2.filename, filename, min(strlen(filename), sizeof(ev.mmap.filename))); + + array = &ev; + array += ev.header.size; + ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample); + if (ret < 0) + return ret; + + if (ret & 7) { + pr_err("Bad id sample size %d\n", ret); + return -EINVAL; + } + + ev.header.size += ret; - return process(tool, &ev, NULL, machine); + return process(tool, &ev, sample, machine); } -int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, +int perf_event__synthesize_stat_events(struct perf_stat_config *config, const struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs) { int err; @@ -2270,7 +2385,7 @@ int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct p extern const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE]; -int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session, +int perf_event__synthesize_features(const struct perf_tool *tool, struct perf_session *session, struct evlist *evlist, perf_event__handler_t process) { struct perf_header *header = &session->header; @@ -2333,7 +2448,7 @@ int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session return ret; } -int perf_event__synthesize_for_pipe(struct perf_tool *tool, +int perf_event__synthesize_for_pipe(const struct perf_tool *tool, struct perf_session *session, struct perf_data *data, perf_event__handler_t process) diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h index 53737d1619a4..f8588b6cf11a 100644 --- a/tools/perf/util/synthetic-events.h +++ b/tools/perf/util/synthetic-events.h @@ -9,6 +9,7 @@ #include <perf/cpumap.h> struct auxtrace_record; +struct build_id; struct dso; struct evlist; struct evsel; @@ -40,45 +41,65 @@ enum perf_record_synth { int parse_synth_opt(char *str); -typedef int (*perf_event__handler_t)(struct perf_tool *tool, union perf_event *event, +typedef int (*perf_event__handler_t)(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process); -int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process); -int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_cpu_map(struct perf_tool *tool, const struct perf_cpu_map *cpus, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); -int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); -int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); -int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); -int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evsel_list, perf_event__handler_t process, bool is_pipe); -int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session, struct evlist *evlist, perf_event__handler_t process); -int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine); -int __perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine, size_t from); +int perf_event__synthesize_attrs(const struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process); +int perf_event__synthesize_attr(const struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process); +int perf_event__synthesize_build_id(const struct perf_tool *tool, + struct perf_sample *sample, + struct machine *machine, + perf_event__handler_t process, + const struct evsel *evsel, + __u16 misc, + const struct build_id *bid, + const char *filename); +int perf_event__synthesize_mmap2_build_id(const struct perf_tool *tool, + struct perf_sample *sample, + struct machine *machine, + perf_event__handler_t process, + const struct evsel *evsel, + __u16 misc, + __u32 pid, __u32 tid, + __u64 start, __u64 len, __u64 pgoff, + const struct build_id *bid, + __u32 prot, __u32 flags, + const char *filename); +int perf_event__synthesize_cpu_map(const struct perf_tool *tool, const struct perf_cpu_map *cpus, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_event_update_cpus(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); +int perf_event__synthesize_event_update_name(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); +int perf_event__synthesize_event_update_scale(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); +int perf_event__synthesize_event_update_unit(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); +int perf_event__synthesize_extra_attr(const struct perf_tool *tool, struct evlist *evsel_list, perf_event__handler_t process, bool is_pipe); +int perf_event__synthesize_extra_kmaps(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_features(const struct perf_tool *tool, struct perf_session *session, struct evlist *evlist, perf_event__handler_t process); +int perf_event__synthesize_id_index(const struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine); +int __perf_event__synthesize_id_index(const struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine, size_t from); int perf_event__synthesize_id_sample(__u64 *array, u64 type, const struct perf_sample *sample); -int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, bool mmap_data); -int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_namespaces(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_cgroups(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_kernel_mmap(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_mmap_events(const struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, bool mmap_data); +int perf_event__synthesize_modules(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_namespaces(const struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_cgroups(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, const struct perf_sample *sample); -int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs); -int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_stat(struct perf_tool *tool, struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data); -int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize); -int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process); -int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); -pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine); - -int perf_tool__process_synth_event(struct perf_tool *tool, union perf_event *event, struct machine *machine, perf_event__handler_t process); +int perf_event__synthesize_stat_config(const struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_stat_events(struct perf_stat_config *config, const struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs); +int perf_event__synthesize_stat_round(const struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_stat(const struct perf_tool *tool, struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_thread_map2(const struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_thread_map(const struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data); +int perf_event__synthesize_threads(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize); +int perf_event__synthesize_tracing_data(const struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process); +int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +pid_t perf_event__synthesize_comm(const struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine); +void perf_event__synthesize_final_bpf_metadata(struct perf_session *session, + perf_event__handler_t process); + +int perf_tool__process_synth_event(const struct perf_tool *tool, union perf_event *event, struct machine *machine, perf_event__handler_t process); size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format); -int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, +int __machine__synthesize_threads(struct machine *machine, const struct perf_tool *tool, struct target *target, struct perf_thread_map *threads, perf_event__handler_t process, bool needs_mmap, bool data_mmap, unsigned int nr_threads_synthesize); @@ -86,24 +107,9 @@ int machine__synthesize_threads(struct machine *machine, struct target *target, struct perf_thread_map *threads, bool needs_mmap, bool data_mmap, unsigned int nr_threads_synthesize); -#ifdef HAVE_AUXTRACE_SUPPORT -int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, struct perf_tool *tool, +int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, const struct perf_tool *tool, struct perf_session *session, perf_event__handler_t process); -#else // HAVE_AUXTRACE_SUPPORT - -#include <errno.h> - -static inline int -perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused, - struct perf_tool *tool __maybe_unused, - struct perf_session *session __maybe_unused, - perf_event__handler_t process __maybe_unused) -{ - return -EINVAL; -} -#endif // HAVE_AUXTRACE_SUPPORT - #ifdef HAVE_LIBBPF_SUPPORT int perf_event__synthesize_bpf_events(struct perf_session *session, perf_event__handler_t process, struct machine *machine, struct record_opts *opts); @@ -117,7 +123,7 @@ static inline int perf_event__synthesize_bpf_events(struct perf_session *session } #endif // HAVE_LIBBPF_SUPPORT -int perf_event__synthesize_for_pipe(struct perf_tool *tool, +int perf_event__synthesize_for_pipe(const struct perf_tool *tool, struct perf_session *session, struct perf_data *data, perf_event__handler_t process); diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 63be7b58761d..67a8ec10e9e4 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -7,178 +7,127 @@ #include "syscalltbl.h" #include <stdlib.h> +#include <asm/bitsperlong.h> #include <linux/compiler.h> +#include <linux/kernel.h> #include <linux/zalloc.h> -#ifdef HAVE_SYSCALL_TABLE_SUPPORT #include <string.h> #include "string2.h" -#if defined(__x86_64__) -#include <asm/syscalls_64.c> -const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID; -static const char *const *syscalltbl_native = syscalltbl_x86_64; -#elif defined(__s390x__) -#include <asm/syscalls_64.c> -const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID; -static const char *const *syscalltbl_native = syscalltbl_s390_64; -#elif defined(__powerpc64__) -#include <asm/syscalls_64.c> -const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_64_MAX_ID; -static const char *const *syscalltbl_native = syscalltbl_powerpc_64; -#elif defined(__powerpc__) -#include <asm/syscalls_32.c> -const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_32_MAX_ID; -static const char *const *syscalltbl_native = syscalltbl_powerpc_32; -#elif defined(__aarch64__) -#include <asm/syscalls.c> -const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID; -static const char *const *syscalltbl_native = syscalltbl_arm64; -#elif defined(__mips__) -#include <asm/syscalls_n64.c> -const int syscalltbl_native_max_id = SYSCALLTBL_MIPS_N64_MAX_ID; -static const char *const *syscalltbl_native = syscalltbl_mips_n64; -#elif defined(__loongarch__) -#include <asm/syscalls.c> -const int syscalltbl_native_max_id = SYSCALLTBL_LOONGARCH_MAX_ID; -static const char *const *syscalltbl_native = syscalltbl_loongarch; -#endif - -struct syscall { - int id; - const char *name; -}; +#include "trace/beauty/generated/syscalltbl.c" -static int syscallcmpname(const void *vkey, const void *ventry) +static const struct syscalltbl *find_table(int e_machine) { - const char *key = vkey; - const struct syscall *entry = ventry; + static const struct syscalltbl *last_table; + static int last_table_machine = EM_NONE; - return strcmp(key, entry->name); -} + /* Tables only exist for EM_SPARC. */ + if (e_machine == EM_SPARCV9) + e_machine = EM_SPARC; -static int syscallcmp(const void *va, const void *vb) -{ - const struct syscall *a = va, *b = vb; + if (last_table_machine == e_machine && last_table != NULL) + return last_table; - return strcmp(a->name, b->name); -} + for (size_t i = 0; i < ARRAY_SIZE(syscalltbls); i++) { + const struct syscalltbl *entry = &syscalltbls[i]; -static int syscalltbl__init_native(struct syscalltbl *tbl) -{ - int nr_entries = 0, i, j; - struct syscall *entries; - - for (i = 0; i <= syscalltbl_native_max_id; ++i) - if (syscalltbl_native[i]) - ++nr_entries; + if (entry->e_machine != e_machine && entry->e_machine != EM_NONE) + continue; - entries = tbl->syscalls.entries = malloc(sizeof(struct syscall) * nr_entries); - if (tbl->syscalls.entries == NULL) - return -1; - - for (i = 0, j = 0; i <= syscalltbl_native_max_id; ++i) { - if (syscalltbl_native[i]) { - entries[j].name = syscalltbl_native[i]; - entries[j].id = i; - ++j; - } + last_table = entry; + last_table_machine = e_machine; + return entry; } - - qsort(tbl->syscalls.entries, nr_entries, sizeof(struct syscall), syscallcmp); - tbl->syscalls.nr_entries = nr_entries; - tbl->syscalls.max_id = syscalltbl_native_max_id; - return 0; + return NULL; } -struct syscalltbl *syscalltbl__new(void) +const char *syscalltbl__name(int e_machine, int id) { - struct syscalltbl *tbl = malloc(sizeof(*tbl)); - if (tbl) { - if (syscalltbl__init_native(tbl)) { - free(tbl); - return NULL; - } + const struct syscalltbl *table = find_table(e_machine); + + if (e_machine == EM_MIPS && id > 1000) { + /* + * MIPS may encode the N32/64/O32 type in the high part of + * syscall number. Mask this off if present. See the values of + * __NR_N32_Linux, __NR_64_Linux, __NR_O32_Linux and __NR_Linux. + */ + id = id % 1000; } - return tbl; + if (table && id >= 0 && id < table->num_to_name_len) + return table->num_to_name[id]; + return NULL; } -void syscalltbl__delete(struct syscalltbl *tbl) -{ - zfree(&tbl->syscalls.entries); - free(tbl); -} +struct syscall_cmp_key { + const char *name; + const char *const *tbl; +}; -const char *syscalltbl__name(const struct syscalltbl *tbl __maybe_unused, int id) +static int syscallcmpname(const void *vkey, const void *ventry) { - return id <= syscalltbl_native_max_id ? syscalltbl_native[id]: NULL; + const struct syscall_cmp_key *key = vkey; + const uint16_t *entry = ventry; + + return strcmp(key->name, key->tbl[*entry]); } -int syscalltbl__id(struct syscalltbl *tbl, const char *name) +int syscalltbl__id(int e_machine, const char *name) { - struct syscall *sc = bsearch(name, tbl->syscalls.entries, - tbl->syscalls.nr_entries, sizeof(*sc), - syscallcmpname); + const struct syscalltbl *table = find_table(e_machine); + struct syscall_cmp_key key; + const uint16_t *id; + + if (!table) + return -1; - return sc ? sc->id : -1; + key.name = name; + key.tbl = table->num_to_name; + id = bsearch(&key, table->sorted_names, table->sorted_names_len, + sizeof(table->sorted_names[0]), syscallcmpname); + + return id ? *id : -1; } -int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx) +int syscalltbl__num_idx(int e_machine) { - int i; - struct syscall *syscalls = tbl->syscalls.entries; + const struct syscalltbl *table = find_table(e_machine); - for (i = *idx + 1; i < tbl->syscalls.nr_entries; ++i) { - if (strglobmatch(syscalls[i].name, syscall_glob)) { - *idx = i; - return syscalls[i].id; - } - } + if (!table) + return 0; - return -1; + return table->sorted_names_len; } -int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx) +int syscalltbl__id_at_idx(int e_machine, int idx) { - *idx = -1; - return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx); -} - -#else /* HAVE_SYSCALL_TABLE_SUPPORT */ + const struct syscalltbl *table = find_table(e_machine); -#include <libaudit.h> + if (!table) + return -1; -struct syscalltbl *syscalltbl__new(void) -{ - struct syscalltbl *tbl = zalloc(sizeof(*tbl)); - if (tbl) - tbl->audit_machine = audit_detect_machine(); - return tbl; + assert(idx >= 0 && idx < table->sorted_names_len); + return table->sorted_names[idx]; } -void syscalltbl__delete(struct syscalltbl *tbl) +int syscalltbl__strglobmatch_next(int e_machine, const char *syscall_glob, int *idx) { - free(tbl); -} + const struct syscalltbl *table = find_table(e_machine); -const char *syscalltbl__name(const struct syscalltbl *tbl, int id) -{ - return audit_syscall_to_name(id, tbl->audit_machine); -} + for (int i = *idx + 1; table && i < table->sorted_names_len; ++i) { + const char *name = table->num_to_name[table->sorted_names[i]]; -int syscalltbl__id(struct syscalltbl *tbl, const char *name) -{ - return audit_name_to_syscall(name, tbl->audit_machine); -} + if (strglobmatch(name, syscall_glob)) { + *idx = i; + return table->sorted_names[i]; + } + } -int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused, - const char *syscall_glob __maybe_unused, int *idx __maybe_unused) -{ return -1; } -int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx) +int syscalltbl__strglobmatch_first(int e_machine, const char *syscall_glob, int *idx) { - return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx); + *idx = -1; + return syscalltbl__strglobmatch_next(e_machine, syscall_glob, idx); } -#endif /* HAVE_SYSCALL_TABLE_SUPPORT */ diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h index a41d2ca9e4ae..2bb628eff367 100644 --- a/tools/perf/util/syscalltbl.h +++ b/tools/perf/util/syscalltbl.h @@ -2,22 +2,12 @@ #ifndef __PERF_SYSCALLTBL_H #define __PERF_SYSCALLTBL_H -struct syscalltbl { - int audit_machine; - struct { - int max_id; - int nr_entries; - void *entries; - } syscalls; -}; +const char *syscalltbl__name(int e_machine, int id); +int syscalltbl__id(int e_machine, const char *name); +int syscalltbl__num_idx(int e_machine); +int syscalltbl__id_at_idx(int e_machine, int idx); -struct syscalltbl *syscalltbl__new(void); -void syscalltbl__delete(struct syscalltbl *tbl); - -const char *syscalltbl__name(const struct syscalltbl *tbl, int id); -int syscalltbl__id(struct syscalltbl *tbl, const char *name); - -int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx); -int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx); +int syscalltbl__strglobmatch_first(int e_machine, const char *syscall_glob, int *idx); +int syscalltbl__strglobmatch_next(int e_machine, const char *syscall_glob, int *idx); #endif /* __PERF_SYSCALLTBL_H */ diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c index 0f383418e3df..8cf71bea295a 100644 --- a/tools/perf/util/target.c +++ b/tools/perf/util/target.c @@ -28,20 +28,6 @@ enum target_errno target__validate(struct target *target) ret = TARGET_ERRNO__PID_OVERRIDE_CPU; } - /* UID and PID are mutually exclusive */ - if (target->tid && target->uid_str) { - target->uid_str = NULL; - if (ret == TARGET_ERRNO__SUCCESS) - ret = TARGET_ERRNO__PID_OVERRIDE_UID; - } - - /* UID and CPU are mutually exclusive */ - if (target->uid_str && target->cpu_list) { - target->cpu_list = NULL; - if (ret == TARGET_ERRNO__SUCCESS) - ret = TARGET_ERRNO__UID_OVERRIDE_CPU; - } - /* PID and SYSTEM are mutually exclusive */ if (target->tid && target->system_wide) { target->system_wide = false; @@ -49,13 +35,6 @@ enum target_errno target__validate(struct target *target) ret = TARGET_ERRNO__PID_OVERRIDE_SYSTEM; } - /* UID and SYSTEM are mutually exclusive */ - if (target->uid_str && target->system_wide) { - target->system_wide = false; - if (ret == TARGET_ERRNO__SUCCESS) - ret = TARGET_ERRNO__UID_OVERRIDE_SYSTEM; - } - /* BPF and CPU are mutually exclusive */ if (target->bpf_str && target->cpu_list) { target->cpu_list = NULL; @@ -70,13 +49,6 @@ enum target_errno target__validate(struct target *target) ret = TARGET_ERRNO__BPF_OVERRIDE_PID; } - /* BPF and UID are mutually exclusive */ - if (target->bpf_str && target->uid_str) { - target->uid_str = NULL; - if (ret == TARGET_ERRNO__SUCCESS) - ret = TARGET_ERRNO__BPF_OVERRIDE_UID; - } - /* BPF and THREADS are mutually exclusive */ if (target->bpf_str && target->per_thread) { target->per_thread = false; @@ -94,15 +66,13 @@ enum target_errno target__validate(struct target *target) return ret; } -enum target_errno target__parse_uid(struct target *target) +uid_t parse_uid(const char *str) { struct passwd pwd, *result; char buf[1024]; - const char *str = target->uid_str; - target->uid = UINT_MAX; if (str == NULL) - return TARGET_ERRNO__SUCCESS; + return UINT_MAX; /* Try user name first */ getpwnam_r(str, &pwd, buf, sizeof(buf), &result); @@ -115,16 +85,15 @@ enum target_errno target__parse_uid(struct target *target) int uid = strtol(str, &endptr, 10); if (*endptr != '\0') - return TARGET_ERRNO__INVALID_UID; + return UINT_MAX; getpwuid_r(uid, &pwd, buf, sizeof(buf), &result); if (result == NULL) - return TARGET_ERRNO__USER_NOT_FOUND; + return UINT_MAX; } - target->uid = result->pw_uid; - return TARGET_ERRNO__SUCCESS; + return result->pw_uid; } /* @@ -132,20 +101,14 @@ enum target_errno target__parse_uid(struct target *target) */ static const char *target__error_str[] = { "PID/TID switch overriding CPU", - "PID/TID switch overriding UID", - "UID switch overriding CPU", "PID/TID switch overriding SYSTEM", - "UID switch overriding SYSTEM", "SYSTEM/CPU switch overriding PER-THREAD", "BPF switch overriding CPU", "BPF switch overriding PID/TID", - "BPF switch overriding UID", "BPF switch overriding THREAD", - "Invalid User: %s", - "Problems obtaining information for user %s", }; -int target__strerror(struct target *target, int errnum, +int target__strerror(struct target *target __maybe_unused, int errnum, char *buf, size_t buflen) { int idx; @@ -170,11 +133,6 @@ int target__strerror(struct target *target, int errnum, snprintf(buf, buflen, "%s", msg); break; - case TARGET_ERRNO__INVALID_UID: - case TARGET_ERRNO__USER_NOT_FOUND: - snprintf(buf, buflen, msg, target->uid_str); - break; - default: /* cannot reach here */ break; diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index d582cae8e105..84ebb9c940c6 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -9,14 +9,13 @@ struct target { const char *pid; const char *tid; const char *cpu_list; - const char *uid_str; const char *bpf_str; - uid_t uid; bool system_wide; bool uses_mmap; bool default_per_cpu; bool per_thread; bool use_bpf; + bool inherit; int initial_delay; const char *attr_map; }; @@ -35,31 +34,24 @@ enum target_errno { /* for target__validate() */ TARGET_ERRNO__PID_OVERRIDE_CPU = __TARGET_ERRNO__START, - TARGET_ERRNO__PID_OVERRIDE_UID, - TARGET_ERRNO__UID_OVERRIDE_CPU, TARGET_ERRNO__PID_OVERRIDE_SYSTEM, - TARGET_ERRNO__UID_OVERRIDE_SYSTEM, TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD, TARGET_ERRNO__BPF_OVERRIDE_CPU, TARGET_ERRNO__BPF_OVERRIDE_PID, - TARGET_ERRNO__BPF_OVERRIDE_UID, TARGET_ERRNO__BPF_OVERRIDE_THREAD, - /* for target__parse_uid() */ - TARGET_ERRNO__INVALID_UID, - TARGET_ERRNO__USER_NOT_FOUND, - __TARGET_ERRNO__END, }; enum target_errno target__validate(struct target *target); -enum target_errno target__parse_uid(struct target *target); + +uid_t parse_uid(const char *str); int target__strerror(struct target *target, int errnum, char *buf, size_t buflen); static inline bool target__has_task(struct target *target) { - return target->tid || target->pid || target->uid_str; + return target->tid || target->pid; } static inline bool target__has_cpu(struct target *target) diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 0b166404c5c3..aa9c58bbf9d3 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 +#include <elf.h> #include <errno.h> +#include <fcntl.h> #include <stdlib.h> #include <stdio.h> #include <string.h> @@ -16,6 +18,7 @@ #include "symbol.h" #include "unwind.h" #include "callchain.h" +#include "dwarf-regs.h" #include <api/fs/fs.h> @@ -26,7 +29,7 @@ int thread__init_maps(struct thread *thread, struct machine *machine) if (pid == thread__tid(thread) || pid == -1) { thread__set_maps(thread, maps__new(machine)); } else { - struct thread *leader = __machine__findnew_thread(machine, pid, pid); + struct thread *leader = machine__findnew_thread(machine, pid, pid); if (leader) { thread__set_maps(thread, maps__get(thread__maps(leader))); @@ -38,31 +41,29 @@ int thread__init_maps(struct thread *thread, struct machine *machine) } struct thread *thread__new(pid_t pid, pid_t tid) + NO_THREAD_SAFETY_ANALYSIS /* Allocation/creation is inherently single threaded. */ { - char *comm_str; - struct comm *comm; RC_STRUCT(thread) *_thread = zalloc(sizeof(*_thread)); struct thread *thread; if (ADD_RC_CHK(thread, _thread) != NULL) { + struct comm *comm; + char comm_str[32]; + thread__set_pid(thread, pid); thread__set_tid(thread, tid); thread__set_ppid(thread, -1); thread__set_cpu(thread, -1); thread__set_guest_cpu(thread, -1); + thread__set_e_machine(thread, EM_NONE); thread__set_lbr_stitch_enable(thread, false); INIT_LIST_HEAD(thread__namespaces_list(thread)); INIT_LIST_HEAD(thread__comm_list(thread)); init_rwsem(thread__namespaces_lock(thread)); init_rwsem(thread__comm_lock(thread)); - comm_str = malloc(32); - if (!comm_str) - goto err_thread; - - snprintf(comm_str, 32, ":%d", tid); + snprintf(comm_str, sizeof(comm_str), ":%d", tid); comm = comm__new(comm_str, 0, false); - free(comm_str); if (!comm) goto err_thread; @@ -76,10 +77,19 @@ struct thread *thread__new(pid_t pid, pid_t tid) return thread; err_thread: - free(thread); + thread__delete(thread); return NULL; } +static void (*thread__priv_destructor)(void *priv); + +void thread__set_priv_destructor(void (*destructor)(void *priv)) +{ + assert(thread__priv_destructor == NULL); + + thread__priv_destructor = destructor; +} + void thread__delete(struct thread *thread) { struct namespaces *namespaces, *tmp_namespaces; @@ -112,6 +122,10 @@ void thread__delete(struct thread *thread) exit_rwsem(thread__namespaces_lock(thread)); exit_rwsem(thread__comm_lock(thread)); thread__free_stitch_list(thread); + + if (thread__priv_destructor) + thread__priv_destructor(thread__priv(thread)); + RC_CHK_FREE(thread); } @@ -187,7 +201,8 @@ int thread__set_namespaces(struct thread *thread, u64 timestamp, return ret; } -struct comm *thread__comm(struct thread *thread) +static struct comm *__thread__comm(struct thread *thread) + SHARED_LOCKS_REQUIRED(thread__comm_lock(thread)) { if (list_empty(thread__comm_list(thread))) return NULL; @@ -195,16 +210,30 @@ struct comm *thread__comm(struct thread *thread) return list_first_entry(thread__comm_list(thread), struct comm, list); } +struct comm *thread__comm(struct thread *thread) +{ + struct comm *res = NULL; + + down_read(thread__comm_lock(thread)); + res = __thread__comm(thread); + up_read(thread__comm_lock(thread)); + return res; +} + struct comm *thread__exec_comm(struct thread *thread) { struct comm *comm, *last = NULL, *second_last = NULL; + down_read(thread__comm_lock(thread)); list_for_each_entry(comm, thread__comm_list(thread), list) { - if (comm->exec) + if (comm->exec) { + up_read(thread__comm_lock(thread)); return comm; + } second_last = last; last = comm; } + up_read(thread__comm_lock(thread)); /* * 'last' with no start time might be the parent's comm of a synthesized @@ -220,8 +249,9 @@ struct comm *thread__exec_comm(struct thread *thread) static int ____thread__set_comm(struct thread *thread, const char *str, u64 timestamp, bool exec) + EXCLUSIVE_LOCKS_REQUIRED(thread__comm_lock(thread)) { - struct comm *new, *curr = thread__comm(thread); + struct comm *new, *curr = __thread__comm(thread); /* Override the default :tid entry */ if (!thread__comm_set(thread)) { @@ -272,8 +302,9 @@ int thread__set_comm_from_proc(struct thread *thread) } static const char *__thread__comm_str(struct thread *thread) + SHARED_LOCKS_REQUIRED(thread__comm_lock(thread)) { - const struct comm *comm = thread__comm(thread); + const struct comm *comm = __thread__comm(thread); if (!comm) return NULL; @@ -332,38 +363,36 @@ int thread__insert_map(struct thread *thread, struct map *map) if (ret) return ret; - maps__fixup_overlappings(thread__maps(thread), map, stderr); - return maps__insert(thread__maps(thread), map); + return maps__fixup_overlap_and_insert(thread__maps(thread), map); } -static int __thread__prepare_access(struct thread *thread) +struct thread__prepare_access_maps_cb_args { + int err; + struct maps *maps; +}; + +static int thread__prepare_access_maps_cb(struct map *map, void *data) { bool initialized = false; - int err = 0; - struct maps *maps = thread__maps(thread); - struct map_rb_node *rb_node; + struct thread__prepare_access_maps_cb_args *args = data; - down_read(maps__lock(maps)); - - maps__for_each_entry(maps, rb_node) { - err = unwind__prepare_access(thread__maps(thread), rb_node->map, &initialized); - if (err || initialized) - break; - } + args->err = unwind__prepare_access(args->maps, map, &initialized); - up_read(maps__lock(maps)); - - return err; + return (args->err || initialized) ? 1 : 0; } static int thread__prepare_access(struct thread *thread) { - int err = 0; + struct thread__prepare_access_maps_cb_args args = { + .err = 0, + }; - if (dwarf_callchain_users) - err = __thread__prepare_access(thread); + if (dwarf_callchain_users) { + args.maps = thread__maps(thread); + maps__for_each_map(thread__maps(thread), thread__prepare_access_maps_cb, &args); + } - return err; + return args.err; } static int thread__clone_maps(struct thread *thread, struct thread *parent, bool do_maps_clone) @@ -372,14 +401,14 @@ static int thread__clone_maps(struct thread *thread, struct thread *parent, bool if (thread__pid(thread) == thread__pid(parent)) return thread__prepare_access(thread); - if (thread__maps(thread) == thread__maps(parent)) { + if (maps__equal(thread__maps(thread), thread__maps(parent))) { pr_debug("broken map groups on thread %d/%d parent %d/%d\n", thread__pid(thread), thread__tid(thread), thread__pid(parent), thread__tid(parent)); return 0; } /* But this one is new process, copy maps. */ - return do_maps_clone ? maps__clone(thread, thread__maps(parent)) : 0; + return do_maps_clone ? maps__copy_from(thread__maps(thread), thread__maps(parent)) : 0; } int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone) @@ -399,7 +428,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bo } void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, - struct addr_location *al) + bool symbols, struct addr_location *al) { size_t i; const u8 cpumodes[] = { @@ -410,12 +439,93 @@ void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, }; for (i = 0; i < ARRAY_SIZE(cpumodes); i++) { - thread__find_symbol(thread, cpumodes[i], addr, al); + if (symbols) + thread__find_symbol(thread, cpumodes[i], addr, al); + else + thread__find_map(thread, cpumodes[i], addr, al); + if (al->map) break; } } +static uint16_t read_proc_e_machine_for_pid(pid_t pid) +{ + char path[6 /* "/proc/" */ + 11 /* max length of pid */ + 5 /* "/exe\0" */]; + int fd; + uint16_t e_machine = EM_NONE; + + snprintf(path, sizeof(path), "/proc/%d/exe", pid); + fd = open(path, O_RDONLY); + if (fd >= 0) { + _Static_assert(offsetof(Elf32_Ehdr, e_machine) == 18, "Unexpected offset"); + _Static_assert(offsetof(Elf64_Ehdr, e_machine) == 18, "Unexpected offset"); + if (pread(fd, &e_machine, sizeof(e_machine), 18) != sizeof(e_machine)) + e_machine = EM_NONE; + close(fd); + } + return e_machine; +} + +static int thread__e_machine_callback(struct map *map, void *machine) +{ + struct dso *dso = map__dso(map); + + _Static_assert(0 == EM_NONE, "Unexpected EM_NONE"); + if (!dso) + return EM_NONE; + + return dso__e_machine(dso, machine); +} + +uint16_t thread__e_machine(struct thread *thread, struct machine *machine) +{ + pid_t tid, pid; + uint16_t e_machine = RC_CHK_ACCESS(thread)->e_machine; + + if (e_machine != EM_NONE) + return e_machine; + + tid = thread__tid(thread); + pid = thread__pid(thread); + if (pid != tid) { + struct thread *parent = machine__findnew_thread(machine, pid, pid); + + if (parent) { + e_machine = thread__e_machine(parent, machine); + thread__put(parent); + thread__set_e_machine(thread, e_machine); + return e_machine; + } + /* Something went wrong, fallback. */ + } + /* Reading on the PID thread. First try to find from the maps. */ + e_machine = maps__for_each_map(thread__maps(thread), + thread__e_machine_callback, + machine); + if (e_machine == EM_NONE) { + /* Maps failed, perhaps we're live with map events disabled. */ + bool is_live = machine->machines == NULL; + + if (!is_live) { + /* Check if the session has a data file. */ + struct perf_session *session = container_of(machine->machines, + struct perf_session, + machines); + + is_live = !!session->data; + } + /* Read from /proc/pid/exe if live. */ + if (is_live) + e_machine = read_proc_e_machine_for_pid(pid); + } + if (e_machine != EM_NONE) + thread__set_e_machine(thread, e_machine); + else + e_machine = EM_HOST; + return e_machine; +} + struct thread *thread__main_thread(struct machine *machine, struct thread *thread) { if (thread__pid(thread) == thread__tid(thread)) @@ -446,14 +556,14 @@ int thread__memcpy(struct thread *thread, struct machine *machine, dso = map__dso(al.map); - if (!dso || dso->data.status == DSO_DATA_STATUS_ERROR || map__load(al.map) < 0) { + if (!dso || dso__data(dso)->status == DSO_DATA_STATUS_ERROR || map__load(al.map) < 0) { addr_location__exit(&al); return -1; } offset = map__map_ip(al.map, ip); if (is64bit) - *is64bit = dso->is_64_bit; + *is64bit = dso__is_64_bit(dso); addr_location__exit(&al); @@ -469,6 +579,7 @@ void thread__free_stitch_list(struct thread *thread) return; list_for_each_entry_safe(pos, tmp, &lbr_stitch->lists, node) { + map_symbol__exit(&pos->cursor.ms); list_del_init(&pos->node); free(pos); } @@ -478,6 +589,9 @@ void thread__free_stitch_list(struct thread *thread) free(pos); } + for (unsigned int i = 0 ; i < lbr_stitch->prev_lbr_cursor_size; i++) + map_symbol__exit(&lbr_stitch->prev_lbr_cursor[i].ms); + zfree(&lbr_stitch->prev_lbr_cursor); free(thread__lbr_stitch(thread)); thread__set_lbr_stitch(thread, NULL); diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 9068a21ce0fa..310eaea344bb 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -3,7 +3,6 @@ #define __PERF_THREAD_H #include <linux/refcount.h> -#include <linux/rbtree.h> #include <linux/list.h> #include <stdio.h> #include <unistd.h> @@ -13,7 +12,6 @@ #include <strlist.h> #include <intlist.h> #include "rwsem.h" -#include "event.h" #include "callchain.h" #include <internal/rc_check.h> @@ -28,21 +26,26 @@ struct lbr_stitch { struct list_head free_lists; struct perf_sample prev_sample; struct callchain_cursor_node *prev_lbr_cursor; -}; - -struct thread_rb_node { - struct rb_node rb_node; - struct thread *thread; + unsigned int prev_lbr_cursor_size; }; DECLARE_RC_STRUCT(thread) { + /** @maps: mmaps associated with this thread. */ struct maps *maps; pid_t pid_; /* Not all tools update this */ + /** @tid: thread ID number unique to a machine. */ pid_t tid; + /** @ppid: parent process of the process this thread belongs to. */ pid_t ppid; int cpu; int guest_cpu; /* For QEMU thread */ refcount_t refcnt; + /** + * @exited: Has the thread had an exit event. Such threads are usually + * removed from the machine's threads but some events/tools require + * access to dead threads. + */ + bool exited; bool comm_set; int comm_len; struct list_head namespaces_list; @@ -57,7 +60,11 @@ DECLARE_RC_STRUCT(thread) { struct srccode_state srccode_state; bool filter; int filter_entry_depth; - + /** + * @e_machine: The ELF EM_* associated with the thread. EM_NONE if not + * computed. + */ + uint16_t e_machine; /* LBR call stack stitch */ bool lbr_stitch_enable; struct lbr_stitch *lbr_stitch; @@ -71,6 +78,8 @@ struct thread *thread__new(pid_t pid, pid_t tid); int thread__init_maps(struct thread *thread, struct machine *machine); void thread__delete(struct thread *thread); +void thread__set_priv_destructor(void (*destructor)(void *priv)); + struct thread *thread__get(struct thread *thread); void thread__put(struct thread *thread); @@ -117,7 +126,7 @@ struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al); void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, - struct addr_location *al); + bool symbols, struct addr_location *al); int thread__memcpy(struct thread *thread, struct machine *machine, void *buf, u64 ip, int len, bool *is64bit); @@ -187,6 +196,11 @@ static inline refcount_t *thread__refcnt(struct thread *thread) return &RC_CHK_ACCESS(thread)->refcnt; } +static inline void thread__set_exited(struct thread *thread, bool exited) +{ + RC_CHK_ACCESS(thread)->exited = exited; +} + static inline bool thread__comm_set(const struct thread *thread) { return RC_CHK_ACCESS(thread)->comm_set; @@ -222,14 +236,15 @@ static inline struct rw_semaphore *thread__namespaces_lock(struct thread *thread return &RC_CHK_ACCESS(thread)->namespaces_lock; } -static inline struct list_head *thread__comm_list(struct thread *thread) +static inline struct rw_semaphore *thread__comm_lock(struct thread *thread) { - return &RC_CHK_ACCESS(thread)->comm_list; + return &RC_CHK_ACCESS(thread)->comm_lock; } -static inline struct rw_semaphore *thread__comm_lock(struct thread *thread) +static inline struct list_head *thread__comm_list(struct thread *thread) + SHARED_LOCKS_REQUIRED(thread__comm_lock(thread)) { - return &RC_CHK_ACCESS(thread)->comm_lock; + return &RC_CHK_ACCESS(thread)->comm_list; } static inline u64 thread__db_id(const struct thread *thread) @@ -292,6 +307,14 @@ static inline void thread__set_filter_entry_depth(struct thread *thread, int dep RC_CHK_ACCESS(thread)->filter_entry_depth = depth; } +uint16_t thread__e_machine(struct thread *thread, struct machine *machine); + +static inline void thread__set_e_machine(struct thread *thread, uint16_t e_machine) +{ + RC_CHK_ACCESS(thread)->e_machine = e_machine; +} + + static inline bool thread__lbr_stitch_enable(const struct thread *thread) { return RC_CHK_ACCESS(thread)->lbr_stitch_enable; diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index e848579e61a8..ca193c1374ed 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -72,7 +72,7 @@ struct perf_thread_map *thread_map__new_by_tid(pid_t tid) return threads; } -static struct perf_thread_map *__thread_map__new_all_cpus(uid_t uid) +static struct perf_thread_map *thread_map__new_all_cpus(void) { DIR *proc; int max_threads = 32, items, i; @@ -98,20 +98,12 @@ static struct perf_thread_map *__thread_map__new_all_cpus(uid_t uid) if (*end) /* only interested in proper numerical dirents */ continue; - snprintf(path, sizeof(path), "/proc/%s", dirent->d_name); - - if (uid != UINT_MAX) { - struct stat st; - - if (stat(path, &st) != 0 || st.st_uid != uid) - continue; - } - snprintf(path, sizeof(path), "/proc/%d/task", pid); items = scandir(path, &namelist, filter, NULL); - if (items <= 0) - goto out_free_closedir; - + if (items <= 0) { + pr_debug("scandir for %d returned empty, skipping\n", pid); + continue; + } while (threads->nr + items >= max_threads) { max_threads *= 2; grow = true; @@ -152,30 +144,15 @@ out_free_namelist: for (i = 0; i < items; i++) zfree(&namelist[i]); free(namelist); - -out_free_closedir: zfree(&threads); goto out_closedir; } -struct perf_thread_map *thread_map__new_all_cpus(void) -{ - return __thread_map__new_all_cpus(UINT_MAX); -} - -struct perf_thread_map *thread_map__new_by_uid(uid_t uid) -{ - return __thread_map__new_all_cpus(uid); -} - -struct perf_thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid) +struct perf_thread_map *thread_map__new(pid_t pid, pid_t tid) { if (pid != -1) return thread_map__new_by_pid(pid); - if (tid == -1 && uid != UINT_MAX) - return thread_map__new_by_uid(uid); - return thread_map__new_by_tid(tid); } @@ -280,25 +257,21 @@ struct perf_thread_map *thread_map__new_by_tid_str(const char *tid_str) threads->nr = ntasks; } out: + strlist__delete(slist); if (threads) refcount_set(&threads->refcnt, 1); return threads; out_free_threads: zfree(&threads); - strlist__delete(slist); goto out; } -struct perf_thread_map *thread_map__new_str(const char *pid, const char *tid, - uid_t uid, bool all_threads) +struct perf_thread_map *thread_map__new_str(const char *pid, const char *tid, bool all_threads) { if (pid) return thread_map__new_by_pid_str(pid); - if (!tid && uid != UINT_MAX) - return thread_map__new_by_uid(uid); - if (all_threads) return thread_map__new_all_cpus(); diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 00ec05fc1656..fc16d87f32fb 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -11,13 +11,11 @@ struct perf_record_thread_map; struct perf_thread_map *thread_map__new_dummy(void); struct perf_thread_map *thread_map__new_by_pid(pid_t pid); struct perf_thread_map *thread_map__new_by_tid(pid_t tid); -struct perf_thread_map *thread_map__new_by_uid(uid_t uid); -struct perf_thread_map *thread_map__new_all_cpus(void); -struct perf_thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid); +struct perf_thread_map *thread_map__new(pid_t pid, pid_t tid); struct perf_thread_map *thread_map__new_event(struct perf_record_thread_map *event); struct perf_thread_map *thread_map__new_str(const char *pid, - const char *tid, uid_t uid, bool all_threads); + const char *tid, bool all_threads); struct perf_thread_map *thread_map__new_by_tid_str(const char *tid_str); diff --git a/tools/perf/util/threads.c b/tools/perf/util/threads.c new file mode 100644 index 000000000000..6ca0b178fb6c --- /dev/null +++ b/tools/perf/util/threads.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "threads.h" +#include "machine.h" +#include "thread.h" + +static struct threads_table_entry *threads__table(struct threads *threads, pid_t tid) +{ + /* Cast it to handle tid == -1 */ + return &threads->table[(unsigned int)tid % THREADS__TABLE_SIZE]; +} + +static size_t key_hash(long key, void *ctx __maybe_unused) +{ + /* The table lookup removes low bit entropy, but this is just ignored here. */ + return key; +} + +static bool key_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + +void threads__init(struct threads *threads) +{ + for (int i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads_table_entry *table = &threads->table[i]; + + hashmap__init(&table->shard, key_hash, key_equal, NULL); + init_rwsem(&table->lock); + table->last_match = NULL; + } +} + +void threads__exit(struct threads *threads) +{ + threads__remove_all_threads(threads); + for (int i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads_table_entry *table = &threads->table[i]; + + hashmap__clear(&table->shard); + exit_rwsem(&table->lock); + } +} + +size_t threads__nr(struct threads *threads) +{ + size_t nr = 0; + + for (int i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads_table_entry *table = &threads->table[i]; + + down_read(&table->lock); + nr += hashmap__size(&table->shard); + up_read(&table->lock); + } + return nr; +} + +/* + * Front-end cache - TID lookups come in blocks, + * so most of the time we dont have to look up + * the full rbtree: + */ +static struct thread *__threads_table_entry__get_last_match(struct threads_table_entry *table, + pid_t tid) +{ + struct thread *th, *res = NULL; + + th = table->last_match; + if (th != NULL) { + if (thread__tid(th) == tid) + res = thread__get(th); + } + return res; +} + +static void __threads_table_entry__set_last_match(struct threads_table_entry *table, + struct thread *th) +{ + thread__put(table->last_match); + table->last_match = thread__get(th); +} + +static void threads_table_entry__set_last_match(struct threads_table_entry *table, + struct thread *th) +{ + down_write(&table->lock); + __threads_table_entry__set_last_match(table, th); + up_write(&table->lock); +} + +struct thread *threads__find(struct threads *threads, pid_t tid) +{ + struct threads_table_entry *table = threads__table(threads, tid); + struct thread *res; + + down_read(&table->lock); + res = __threads_table_entry__get_last_match(table, tid); + if (!res) { + if (hashmap__find(&table->shard, tid, &res)) + res = thread__get(res); + } + up_read(&table->lock); + if (res) + threads_table_entry__set_last_match(table, res); + return res; +} + +struct thread *threads__findnew(struct threads *threads, pid_t pid, pid_t tid, bool *created) +{ + struct threads_table_entry *table = threads__table(threads, tid); + struct thread *res = NULL; + + *created = false; + down_write(&table->lock); + res = thread__new(pid, tid); + if (res) { + if (hashmap__add(&table->shard, tid, res)) { + /* Add failed. Assume a race so find other entry. */ + thread__put(res); + res = NULL; + if (hashmap__find(&table->shard, tid, &res)) + res = thread__get(res); + } else { + res = thread__get(res); + *created = true; + } + if (res) + __threads_table_entry__set_last_match(table, res); + } + up_write(&table->lock); + return res; +} + +void threads__remove_all_threads(struct threads *threads) +{ + for (int i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads_table_entry *table = &threads->table[i]; + struct hashmap_entry *cur, *tmp; + size_t bkt; + + down_write(&table->lock); + __threads_table_entry__set_last_match(table, NULL); + hashmap__for_each_entry_safe(&table->shard, cur, tmp, bkt) { + struct thread *old_value; + + hashmap__delete(&table->shard, cur->key, /*old_key=*/NULL, &old_value); + thread__put(old_value); + } + up_write(&table->lock); + } +} + +void threads__remove(struct threads *threads, struct thread *thread) +{ + struct threads_table_entry *table = threads__table(threads, thread__tid(thread)); + struct thread *old_value; + + down_write(&table->lock); + if (table->last_match && RC_CHK_EQUAL(table->last_match, thread)) + __threads_table_entry__set_last_match(table, NULL); + + hashmap__delete(&table->shard, thread__tid(thread), /*old_key=*/NULL, &old_value); + thread__put(old_value); + up_write(&table->lock); +} + +int threads__for_each_thread(struct threads *threads, + int (*fn)(struct thread *thread, void *data), + void *data) +{ + for (int i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads_table_entry *table = &threads->table[i]; + struct hashmap_entry *cur; + size_t bkt; + + down_read(&table->lock); + hashmap__for_each_entry(&table->shard, cur, bkt) { + int rc = fn((struct thread *)cur->pvalue, data); + + if (rc != 0) { + up_read(&table->lock); + return rc; + } + } + up_read(&table->lock); + } + return 0; + +} diff --git a/tools/perf/util/threads.h b/tools/perf/util/threads.h new file mode 100644 index 000000000000..da68d2223f18 --- /dev/null +++ b/tools/perf/util/threads.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_THREADS_H +#define __PERF_THREADS_H + +#include "hashmap.h" +#include "rwsem.h" + +struct thread; + +#define THREADS__TABLE_BITS 3 +#define THREADS__TABLE_SIZE (1 << THREADS__TABLE_BITS) + +struct threads_table_entry { + /* Key is tid, value is struct thread. */ + struct hashmap shard; + struct rw_semaphore lock; + struct thread *last_match; +}; + +struct threads { + struct threads_table_entry table[THREADS__TABLE_SIZE]; +}; + +void threads__init(struct threads *threads); +void threads__exit(struct threads *threads); +size_t threads__nr(struct threads *threads); +struct thread *threads__find(struct threads *threads, pid_t tid); +struct thread *threads__findnew(struct threads *threads, pid_t pid, pid_t tid, bool *created); +void threads__remove_all_threads(struct threads *threads); +void threads__remove(struct threads *threads, struct thread *thread); +int threads__for_each_thread(struct threads *threads, + int (*fn)(struct thread *thread, void *data), + void *data); + +#endif /* __PERF_THREADS_H */ diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 302443921681..1b91ccd4d523 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -20,7 +20,7 @@ int parse_nsec_time(const char *str, u64 *ptime) u64 time_sec, time_nsec; char *end; - time_sec = strtoul(str, &end, 10); + time_sec = strtoull(str, &end, 10); if (*end != '.' && *end != '\0') return -1; @@ -38,7 +38,7 @@ int parse_nsec_time(const char *str, u64 *ptime) for (i = strlen(nsec_buf); i < 9; i++) nsec_buf[i] = '0'; - time_nsec = strtoul(nsec_buf, &end, 10); + time_nsec = strtoull(nsec_buf, &end, 10); if (*end != '\0') return -1; } else diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c new file mode 100644 index 000000000000..27ba5849c74a --- /dev/null +++ b/tools/perf/util/tool.c @@ -0,0 +1,499 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "data.h" +#include "debug.h" +#include "event.h" +#include "header.h" +#include "session.h" +#include "stat.h" +#include "tool.h" +#include "tsc.h" +#include <linux/compiler.h> +#include <sys/mman.h> +#include <stddef.h> +#include <unistd.h> + +#ifdef HAVE_ZSTD_SUPPORT +static int perf_session__process_compressed_event(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event, u64 file_offset, + const char *file_path) +{ + void *src; + size_t decomp_size, src_size; + u64 decomp_last_rem = 0; + size_t mmap_len, decomp_len = perf_session__env(session)->comp_mmap_len; + struct decomp *decomp, *decomp_last = session->active_decomp->decomp_last; + + if (decomp_last) { + decomp_last_rem = decomp_last->size - decomp_last->head; + decomp_len += decomp_last_rem; + } + + mmap_len = sizeof(struct decomp) + decomp_len; + decomp = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if (decomp == MAP_FAILED) { + pr_err("Couldn't allocate memory for decompression\n"); + return -1; + } + + decomp->file_pos = file_offset; + decomp->file_path = file_path; + decomp->mmap_len = mmap_len; + decomp->head = 0; + + if (decomp_last_rem) { + memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem); + decomp->size = decomp_last_rem; + } + + if (event->header.type == PERF_RECORD_COMPRESSED) { + src = (void *)event + sizeof(struct perf_record_compressed); + src_size = event->pack.header.size - sizeof(struct perf_record_compressed); + } else if (event->header.type == PERF_RECORD_COMPRESSED2) { + src = (void *)event + sizeof(struct perf_record_compressed2); + src_size = event->pack2.data_size; + } else { + return -1; + } + + decomp_size = zstd_decompress_stream(session->active_decomp->zstd_decomp, src, src_size, + &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem); + if (!decomp_size) { + munmap(decomp, mmap_len); + pr_err("Couldn't decompress data\n"); + return -1; + } + + decomp->size += decomp_size; + + if (session->active_decomp->decomp == NULL) + session->active_decomp->decomp = decomp; + else + session->active_decomp->decomp_last->next = decomp; + + session->active_decomp->decomp_last = decomp; + + pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size); + + return 0; +} +#endif + +static int process_event_synth_tracing_data_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + +static int process_event_synth_attr_stub(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct evlist **pevlist __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + +static int process_event_synth_event_update_stub(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct evlist **pevlist __maybe_unused) +{ + if (dump_trace) + perf_event__fprintf_event_update(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + +int process_event_sample_stub(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct evsel *evsel __maybe_unused, + struct machine *machine __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + +static int process_event_stub(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + +static int process_finished_round_stub(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct ordered_events *oe __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + +static int skipn(int fd, off_t n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, min(n, (off_t)sizeof(buf))); + if (ret <= 0) + return ret; + n -= ret; + } + + return 0; +} + +static s64 process_event_auxtrace_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event) +{ + dump_printf(": unhandled!\n"); + if (perf_data__is_pipe(session->data)) + skipn(perf_data__fd(session->data), event->auxtrace.size); + return event->auxtrace.size; +} + +static int process_event_op2_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + + +static +int process_event_thread_map_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + if (dump_trace) + perf_event__fprintf_thread_map(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + +static +int process_event_cpu_map_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + if (dump_trace) + perf_event__fprintf_cpu_map(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + +static +int process_event_stat_config_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + if (dump_trace) + perf_event__fprintf_stat_config(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + +static int process_stat_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *perf_session __maybe_unused, + union perf_event *event) +{ + if (dump_trace) + perf_event__fprintf_stat(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + +static int process_stat_round_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *perf_session __maybe_unused, + union perf_event *event) +{ + if (dump_trace) + perf_event__fprintf_stat_round(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + +static int process_event_time_conv_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *perf_session __maybe_unused, + union perf_event *event) +{ + if (dump_trace) + perf_event__fprintf_time_conv(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + +static int perf_session__process_compressed_event_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused, + u64 file_offset __maybe_unused, + const char *file_path __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + +static int perf_event__process_bpf_metadata_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *perf_session __maybe_unused, + union perf_event *event) +{ + if (dump_trace) + perf_event__fprintf_bpf_metadata(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + +void perf_tool__init(struct perf_tool *tool, bool ordered_events) +{ + tool->ordered_events = ordered_events; + tool->ordering_requires_timestamps = false; + tool->namespace_events = false; + tool->cgroup_events = false; + tool->no_warn = false; + tool->show_feat_hdr = SHOW_FEAT_NO_HEADER; + tool->merge_deferred_callchains = true; + + tool->sample = process_event_sample_stub; + tool->mmap = process_event_stub; + tool->mmap2 = process_event_stub; + tool->comm = process_event_stub; + tool->namespaces = process_event_stub; + tool->cgroup = process_event_stub; + tool->fork = process_event_stub; + tool->exit = process_event_stub; + tool->lost = perf_event__process_lost; + tool->lost_samples = perf_event__process_lost_samples; + tool->aux = perf_event__process_aux; + tool->itrace_start = perf_event__process_itrace_start; + tool->context_switch = perf_event__process_switch; + tool->ksymbol = perf_event__process_ksymbol; + tool->bpf = perf_event__process_bpf; + tool->text_poke = perf_event__process_text_poke; + tool->aux_output_hw_id = perf_event__process_aux_output_hw_id; + tool->read = process_event_sample_stub; + tool->throttle = process_event_stub; + tool->unthrottle = process_event_stub; + tool->callchain_deferred = process_event_sample_stub; + tool->attr = process_event_synth_attr_stub; + tool->event_update = process_event_synth_event_update_stub; + tool->tracing_data = process_event_synth_tracing_data_stub; + tool->build_id = process_event_op2_stub; + + if (ordered_events) + tool->finished_round = perf_event__process_finished_round; + else + tool->finished_round = process_finished_round_stub; + + tool->id_index = process_event_op2_stub; + tool->auxtrace_info = process_event_op2_stub; + tool->auxtrace = process_event_auxtrace_stub; + tool->auxtrace_error = process_event_op2_stub; + tool->thread_map = process_event_thread_map_stub; + tool->cpu_map = process_event_cpu_map_stub; + tool->stat_config = process_event_stat_config_stub; + tool->stat = process_stat_stub; + tool->stat_round = process_stat_round_stub; + tool->time_conv = process_event_time_conv_stub; + tool->feature = process_event_op2_stub; +#ifdef HAVE_ZSTD_SUPPORT + tool->compressed = perf_session__process_compressed_event; +#else + tool->compressed = perf_session__process_compressed_event_stub; +#endif + tool->finished_init = process_event_op2_stub; + tool->bpf_metadata = perf_event__process_bpf_metadata_stub; +} + +bool perf_tool__compressed_is_stub(const struct perf_tool *tool) +{ + return tool->compressed == perf_session__process_compressed_event_stub; +} + +#define CREATE_DELEGATE_SAMPLE(name) \ + static int delegate_ ## name(const struct perf_tool *tool, \ + union perf_event *event, \ + struct perf_sample *sample, \ + struct evsel *evsel, \ + struct machine *machine) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, event, sample, evsel, machine); \ + } +CREATE_DELEGATE_SAMPLE(read); +CREATE_DELEGATE_SAMPLE(sample); +CREATE_DELEGATE_SAMPLE(callchain_deferred); + +#define CREATE_DELEGATE_ATTR(name) \ + static int delegate_ ## name(const struct perf_tool *tool, \ + union perf_event *event, \ + struct evlist **pevlist) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, event, pevlist); \ + } +CREATE_DELEGATE_ATTR(attr); +CREATE_DELEGATE_ATTR(event_update); + +#define CREATE_DELEGATE_OE(name) \ + static int delegate_ ## name(const struct perf_tool *tool, \ + union perf_event *event, \ + struct ordered_events *oe) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, event, oe); \ + } +CREATE_DELEGATE_OE(finished_round); + +#define CREATE_DELEGATE_OP(name) \ + static int delegate_ ## name(const struct perf_tool *tool, \ + union perf_event *event, \ + struct perf_sample *sample, \ + struct machine *machine) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, event, sample, machine); \ + } +CREATE_DELEGATE_OP(aux); +CREATE_DELEGATE_OP(aux_output_hw_id); +CREATE_DELEGATE_OP(bpf); +CREATE_DELEGATE_OP(cgroup); +CREATE_DELEGATE_OP(comm); +CREATE_DELEGATE_OP(context_switch); +CREATE_DELEGATE_OP(exit); +CREATE_DELEGATE_OP(fork); +CREATE_DELEGATE_OP(itrace_start); +CREATE_DELEGATE_OP(ksymbol); +CREATE_DELEGATE_OP(lost); +CREATE_DELEGATE_OP(lost_samples); +CREATE_DELEGATE_OP(mmap); +CREATE_DELEGATE_OP(mmap2); +CREATE_DELEGATE_OP(namespaces); +CREATE_DELEGATE_OP(text_poke); +CREATE_DELEGATE_OP(throttle); +CREATE_DELEGATE_OP(unthrottle); + +#define CREATE_DELEGATE_OP2(name) \ + static int delegate_ ## name(const struct perf_tool *tool, \ + struct perf_session *session, \ + union perf_event *event) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, session, event); \ + } +CREATE_DELEGATE_OP2(auxtrace_error); +CREATE_DELEGATE_OP2(auxtrace_info); +CREATE_DELEGATE_OP2(bpf_metadata); +CREATE_DELEGATE_OP2(build_id); +CREATE_DELEGATE_OP2(cpu_map); +CREATE_DELEGATE_OP2(feature); +CREATE_DELEGATE_OP2(finished_init); +CREATE_DELEGATE_OP2(id_index); +CREATE_DELEGATE_OP2(stat); +CREATE_DELEGATE_OP2(stat_config); +CREATE_DELEGATE_OP2(stat_round); +CREATE_DELEGATE_OP2(thread_map); +CREATE_DELEGATE_OP2(time_conv); +CREATE_DELEGATE_OP2(tracing_data); + +#define CREATE_DELEGATE_OP3(name) \ + static s64 delegate_ ## name(const struct perf_tool *tool, \ + struct perf_session *session, \ + union perf_event *event) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, session, event); \ + } +CREATE_DELEGATE_OP3(auxtrace); + +#define CREATE_DELEGATE_OP4(name) \ + static int delegate_ ## name(const struct perf_tool *tool, \ + struct perf_session *session, \ + union perf_event *event, \ + u64 data, \ + const char *str) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, session, event, data, str); \ + } +CREATE_DELEGATE_OP4(compressed); + +void delegate_tool__init(struct delegate_tool *tool, struct perf_tool *delegate) +{ + tool->delegate = delegate; + + tool->tool.ordered_events = delegate->ordered_events; + tool->tool.ordering_requires_timestamps = delegate->ordering_requires_timestamps; + tool->tool.namespace_events = delegate->namespace_events; + tool->tool.cgroup_events = delegate->cgroup_events; + tool->tool.no_warn = delegate->no_warn; + tool->tool.show_feat_hdr = delegate->show_feat_hdr; + tool->tool.merge_deferred_callchains = delegate->merge_deferred_callchains; + + tool->tool.sample = delegate_sample; + tool->tool.read = delegate_read; + + tool->tool.mmap = delegate_mmap; + tool->tool.mmap2 = delegate_mmap2; + tool->tool.comm = delegate_comm; + tool->tool.namespaces = delegate_namespaces; + tool->tool.cgroup = delegate_cgroup; + tool->tool.fork = delegate_fork; + tool->tool.exit = delegate_exit; + tool->tool.lost = delegate_lost; + tool->tool.lost_samples = delegate_lost_samples; + tool->tool.aux = delegate_aux; + tool->tool.itrace_start = delegate_itrace_start; + tool->tool.aux_output_hw_id = delegate_aux_output_hw_id; + tool->tool.context_switch = delegate_context_switch; + tool->tool.throttle = delegate_throttle; + tool->tool.unthrottle = delegate_unthrottle; + tool->tool.ksymbol = delegate_ksymbol; + tool->tool.bpf = delegate_bpf; + tool->tool.text_poke = delegate_text_poke; + tool->tool.callchain_deferred = delegate_callchain_deferred; + + tool->tool.attr = delegate_attr; + tool->tool.event_update = delegate_event_update; + + tool->tool.tracing_data = delegate_tracing_data; + + tool->tool.finished_round = delegate_finished_round; + + tool->tool.build_id = delegate_build_id; + tool->tool.id_index = delegate_id_index; + tool->tool.auxtrace_info = delegate_auxtrace_info; + tool->tool.auxtrace_error = delegate_auxtrace_error; + tool->tool.time_conv = delegate_time_conv; + tool->tool.thread_map = delegate_thread_map; + tool->tool.cpu_map = delegate_cpu_map; + tool->tool.stat_config = delegate_stat_config; + tool->tool.stat = delegate_stat; + tool->tool.stat_round = delegate_stat_round; + tool->tool.feature = delegate_feature; + tool->tool.finished_init = delegate_finished_init; + tool->tool.bpf_metadata = delegate_bpf_metadata; + tool->tool.compressed = delegate_compressed; + tool->tool.auxtrace = delegate_auxtrace; +} diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index c957fb849ac6..e96b69d25a5b 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -15,23 +15,25 @@ struct perf_tool; struct machine; struct ordered_events; -typedef int (*event_sample)(struct perf_tool *tool, union perf_event *event, +typedef int (*event_sample)(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct evsel *evsel, struct machine *machine); -typedef int (*event_op)(struct perf_tool *tool, union perf_event *event, +typedef int (*event_op)(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -typedef int (*event_attr_op)(struct perf_tool *tool, +typedef int (*event_attr_op)(const struct perf_tool *tool, union perf_event *event, struct evlist **pevlist); -typedef int (*event_op2)(struct perf_session *session, union perf_event *event); -typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event); -typedef int (*event_op4)(struct perf_session *session, union perf_event *event, u64 data, - const char *str); +typedef int (*event_op2)(const struct perf_tool *tool, struct perf_session *session, + union perf_event *event); +typedef s64 (*event_op3)(const struct perf_tool *tool, struct perf_session *session, + union perf_event *event); +typedef int (*event_op4)(const struct perf_tool *tool, struct perf_session *session, + union perf_event *event, u64 data, const char *str); -typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event, +typedef int (*event_oe)(const struct perf_tool *tool, union perf_event *event, struct ordered_events *oe); enum show_feature_header { @@ -42,7 +44,8 @@ enum show_feature_header { struct perf_tool { event_sample sample, - read; + read, + callchain_deferred; event_op mmap, mmap2, comm, @@ -77,7 +80,8 @@ struct perf_tool { stat, stat_round, feature, - finished_init; + finished_init, + bpf_metadata; event_op4 compressed; event_op3 auxtrace; bool ordered_events; @@ -85,7 +89,28 @@ struct perf_tool { bool namespace_events; bool cgroup_events; bool no_warn; + bool dont_split_sample_group; + bool merge_deferred_callchains; enum show_feature_header show_feat_hdr; }; +void perf_tool__init(struct perf_tool *tool, bool ordered_events); + +bool perf_tool__compressed_is_stub(const struct perf_tool *tool); + +int process_event_sample_stub(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct evsel *evsel, + struct machine *machine); + +struct delegate_tool { + /** @tool: The actual tool that calls the delegate. */ + struct perf_tool tool; + /** @delegate: The tool that is delegated to. */ + struct perf_tool *delegate; +}; + +void delegate_tool__init(struct delegate_tool *tool, struct perf_tool *delegate); + #endif /* __PERF_TOOL_H */ diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c new file mode 100644 index 000000000000..37c4eae0bef1 --- /dev/null +++ b/tools/perf/util/tool_pmu.c @@ -0,0 +1,598 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "cgroup.h" +#include "counts.h" +#include "cputopo.h" +#include "debug.h" +#include "evsel.h" +#include "pmu.h" +#include "print-events.h" +#include "smt.h" +#include "stat.h" +#include "time-utils.h" +#include "tool_pmu.h" +#include "tsc.h" +#include <api/fs/fs.h> +#include <api/io.h> +#include <internal/threadmap.h> +#include <perf/cpumap.h> +#include <perf/threadmap.h> +#include <fcntl.h> +#include <strings.h> + +static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = { + NULL, + "duration_time", + "user_time", + "system_time", + "has_pmem", + "num_cores", + "num_cpus", + "num_cpus_online", + "num_dies", + "num_packages", + "slots", + "smt_on", + "system_tsc_freq", + "core_wide", + "target_cpu", +}; + +bool tool_pmu__skip_event(const char *name __maybe_unused) +{ +#if !defined(__aarch64__) + /* The slots event should only appear on arm64. */ + if (strcasecmp(name, "slots") == 0) + return true; +#endif +#if !defined(__i386__) && !defined(__x86_64__) + /* The system_tsc_freq event should only appear on x86. */ + if (strcasecmp(name, "system_tsc_freq") == 0) + return true; +#endif + return false; +} + +int tool_pmu__num_skip_events(void) +{ + int num = 0; + +#if !defined(__aarch64__) + num++; +#endif +#if !defined(__i386__) && !defined(__x86_64__) + num++; +#endif + return num; +} + +const char *tool_pmu__event_to_str(enum tool_pmu_event ev) +{ + if ((ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) && + !tool_pmu__skip_event(tool_pmu__event_names[ev])) + return tool_pmu__event_names[ev]; + + return NULL; +} + +enum tool_pmu_event tool_pmu__str_to_event(const char *str) +{ + int i; + + if (tool_pmu__skip_event(str)) + return TOOL_PMU__EVENT_NONE; + + tool_pmu__for_each_event(i) { + if (!strcasecmp(str, tool_pmu__event_names[i])) + return i; + } + return TOOL_PMU__EVENT_NONE; +} + +bool perf_pmu__is_tool(const struct perf_pmu *pmu) +{ + return pmu && pmu->type == PERF_PMU_TYPE_TOOL; +} + +bool evsel__is_tool(const struct evsel *evsel) +{ + return perf_pmu__is_tool(evsel->pmu); +} + +enum tool_pmu_event evsel__tool_event(const struct evsel *evsel) +{ + if (!evsel__is_tool(evsel)) + return TOOL_PMU__EVENT_NONE; + + return (enum tool_pmu_event)evsel->core.attr.config; +} + +const char *evsel__tool_pmu_event_name(const struct evsel *evsel) +{ + return tool_pmu__event_to_str(evsel->core.attr.config); +} + +struct perf_cpu_map *tool_pmu__cpus(struct perf_event_attr *attr) +{ + static struct perf_cpu_map *cpu0_map; + enum tool_pmu_event event = (enum tool_pmu_event)attr->config; + + if (event <= TOOL_PMU__EVENT_NONE || event >= TOOL_PMU__EVENT_MAX) { + pr_err("Invalid tool PMU event config %llx\n", attr->config); + return NULL; + } + if (event == TOOL_PMU__EVENT_USER_TIME || event == TOOL_PMU__EVENT_SYSTEM_TIME) + return cpu_map__online(); + + if (!cpu0_map) + cpu0_map = perf_cpu_map__new_int(0); + return perf_cpu_map__get(cpu0_map); +} + +static bool read_until_char(struct io *io, char e) +{ + int c; + + do { + c = io__get_char(io); + if (c == -1) + return false; + } while (c != e); + return true; +} + +static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val) +{ + char buf[256]; + struct io io; + int i; + + io__init(&io, fd, buf, sizeof(buf)); + + /* Skip lines to relevant CPU. */ + for (i = -1; i < cpu.cpu; i++) { + if (!read_until_char(&io, '\n')) + return -EINVAL; + } + /* Skip to "cpu". */ + if (io__get_char(&io) != 'c') return -EINVAL; + if (io__get_char(&io) != 'p') return -EINVAL; + if (io__get_char(&io) != 'u') return -EINVAL; + + /* Skip N of cpuN. */ + if (!read_until_char(&io, ' ')) + return -EINVAL; + + i = 1; + while (true) { + if (io__get_dec(&io, val) != ' ') + break; + if (field == i) + return 0; + i++; + } + return -EINVAL; +} + +static int read_pid_stat_field(int fd, int field, __u64 *val) +{ + char buf[256]; + struct io io; + int c, i; + + io__init(&io, fd, buf, sizeof(buf)); + if (io__get_dec(&io, val) != ' ') + return -EINVAL; + if (field == 1) + return 0; + + /* Skip comm. */ + if (io__get_char(&io) != '(' || !read_until_char(&io, ')')) + return -EINVAL; + if (field == 2) + return -EINVAL; /* String can't be returned. */ + + /* Skip state */ + if (io__get_char(&io) != ' ' || io__get_char(&io) == -1) + return -EINVAL; + if (field == 3) + return -EINVAL; /* String can't be returned. */ + + /* Loop over numeric fields*/ + if (io__get_char(&io) != ' ') + return -EINVAL; + + i = 4; + while (true) { + c = io__get_dec(&io, val); + if (c == -1) + return -EINVAL; + if (c == -2) { + /* Assume a -ve was read */ + c = io__get_dec(&io, val); + *val *= -1; + } + if (c != ' ') + return -EINVAL; + if (field == i) + return 0; + i++; + } + return -EINVAL; +} + +int evsel__tool_pmu_prepare_open(struct evsel *evsel, + struct perf_cpu_map *cpus, + int nthreads) +{ + if ((evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME || + evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) && + !evsel->start_times) { + evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus), + nthreads, + sizeof(__u64)); + if (!evsel->start_times) + return -ENOMEM; + } + return 0; +} + +#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) + +int evsel__tool_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx) +{ + enum tool_pmu_event ev = evsel__tool_event(evsel); + int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno; + + if (ev == TOOL_PMU__EVENT_NUM_CPUS) + return 0; + + if (ev == TOOL_PMU__EVENT_DURATION_TIME) { + if (evsel->core.attr.sample_period) /* no sampling */ + return -EINVAL; + evsel->start_time = rdclock(); + return 0; + } + + if (evsel->cgrp) + pid = evsel->cgrp->fd; + + nthreads = perf_thread_map__nr(threads); + for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { + for (thread = 0; thread < nthreads; thread++) { + if (!evsel->cgrp && !evsel->core.system_wide) + pid = perf_thread_map__pid(threads, thread); + + if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) { + bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME; + __u64 *start_time = NULL; + int fd; + + if (evsel->core.attr.sample_period) { + /* no sampling */ + err = -EINVAL; + goto out_close; + } + if (pid > -1) { + char buf[64]; + + snprintf(buf, sizeof(buf), "/proc/%d/stat", pid); + fd = open(buf, O_RDONLY); + evsel->pid_stat = true; + } else { + fd = open("/proc/stat", O_RDONLY); + } + FD(evsel, idx, thread) = fd; + if (fd < 0) { + err = -errno; + goto out_close; + } + start_time = xyarray__entry(evsel->start_times, idx, thread); + if (pid > -1) { + err = read_pid_stat_field(fd, system ? 15 : 14, + start_time); + } else { + struct perf_cpu cpu; + + cpu = perf_cpu_map__cpu(evsel->core.cpus, idx); + err = read_stat_field(fd, cpu, system ? 3 : 1, + start_time); + } + if (err) + goto out_close; + } + + } + } + return 0; +out_close: + if (err) + threads->err_thread = thread; + + old_errno = errno; + do { + while (--thread >= 0) { + if (FD(evsel, idx, thread) >= 0) + close(FD(evsel, idx, thread)); + FD(evsel, idx, thread) = -1; + } + thread = nthreads; + } while (--idx >= 0); + errno = old_errno; + return err; +} + +#if !defined(__i386__) && !defined(__x86_64__) +u64 arch_get_tsc_freq(void) +{ + return 0; +} +#endif + +#if !defined(__aarch64__) +u64 tool_pmu__cpu_slots_per_cycle(void) +{ + return 0; +} +#endif + +static bool has_pmem(void) +{ + static bool has_pmem, cached; + const char *sysfs = sysfs__mountpoint(); + char path[PATH_MAX]; + + if (!cached) { + snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs); + has_pmem = access(path, F_OK) == 0; + cached = true; + } + return has_pmem; +} + +bool tool_pmu__read_event(enum tool_pmu_event ev, + struct evsel *evsel, + bool system_wide, + const char *user_requested_cpu_list, + u64 *result) +{ + const struct cpu_topology *topology; + + switch (ev) { + case TOOL_PMU__EVENT_HAS_PMEM: + *result = has_pmem() ? 1 : 0; + return true; + + case TOOL_PMU__EVENT_NUM_CORES: + topology = online_topology(); + *result = topology->core_cpus_lists; + return true; + + case TOOL_PMU__EVENT_NUM_CPUS: + if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) { + /* No evsel to be specific to. */ + *result = cpu__max_present_cpu().cpu; + } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) { + /* Evsel just has specific CPUs. */ + *result = perf_cpu_map__nr(evsel->core.cpus); + } else { + /* + * "Any CPU" event that can be scheduled on any CPU in + * the PMU's cpumask. The PMU cpumask should be saved in + * pmu_cpus. If not present fall back to max. + */ + if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) + *result = perf_cpu_map__nr(evsel->core.pmu_cpus); + else + *result = cpu__max_present_cpu().cpu; + } + return true; + + case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: { + struct perf_cpu_map *online = cpu_map__online(); + + if (!online) + return false; + + if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) { + /* No evsel to be specific to. */ + *result = perf_cpu_map__nr(online); + } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) { + /* Evsel just has specific CPUs. */ + struct perf_cpu_map *tmp = + perf_cpu_map__intersect(online, evsel->core.cpus); + + *result = perf_cpu_map__nr(tmp); + perf_cpu_map__put(tmp); + } else { + /* + * "Any CPU" event that can be scheduled on any CPU in + * the PMU's cpumask. The PMU cpumask should be saved in + * pmu_cpus, if not present then just the online cpu + * mask. + */ + if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) { + struct perf_cpu_map *tmp = + perf_cpu_map__intersect(online, evsel->core.pmu_cpus); + + *result = perf_cpu_map__nr(tmp); + perf_cpu_map__put(tmp); + } else { + *result = perf_cpu_map__nr(online); + } + } + perf_cpu_map__put(online); + return true; + } + case TOOL_PMU__EVENT_NUM_DIES: + topology = online_topology(); + *result = topology->die_cpus_lists; + return true; + + case TOOL_PMU__EVENT_NUM_PACKAGES: + topology = online_topology(); + *result = topology->package_cpus_lists; + return true; + + case TOOL_PMU__EVENT_SLOTS: + *result = tool_pmu__cpu_slots_per_cycle(); + return *result ? true : false; + + case TOOL_PMU__EVENT_SMT_ON: + *result = smt_on() ? 1 : 0; + return true; + + case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: + *result = arch_get_tsc_freq(); + return true; + + case TOOL_PMU__EVENT_CORE_WIDE: + *result = core_wide(system_wide, user_requested_cpu_list) ? 1 : 0; + return true; + + case TOOL_PMU__EVENT_TARGET_CPU: + *result = system_wide || (user_requested_cpu_list != NULL) ? 1 : 0; + return true; + + case TOOL_PMU__EVENT_NONE: + case TOOL_PMU__EVENT_DURATION_TIME: + case TOOL_PMU__EVENT_USER_TIME: + case TOOL_PMU__EVENT_SYSTEM_TIME: + case TOOL_PMU__EVENT_MAX: + default: + return false; + } +} + +static void perf_counts__update(struct perf_counts_values *count, + const struct perf_counts_values *old_count, + bool raw, u64 val) +{ + /* + * The values of enabled and running must make a ratio of 100%. The + * exact values don't matter as long as they are non-zero to avoid + * issues with evsel__count_has_error. + */ + if (old_count) { + count->val = raw ? val : old_count->val + val; + count->run = old_count->run + 1; + count->ena = old_count->ena + 1; + count->lost = old_count->lost; + } else { + count->val = val; + count->run++; + count->ena++; + count->lost = 0; + } +} + +int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) +{ + __u64 *start_time, cur_time, delta_start; + int err = 0; + struct perf_counts_values *count, *old_count = NULL; + bool adjust = false; + enum tool_pmu_event ev = evsel__tool_event(evsel); + + count = perf_counts(evsel->counts, cpu_map_idx, thread); + if (evsel->prev_raw_counts) + old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + + switch (ev) { + case TOOL_PMU__EVENT_HAS_PMEM: + case TOOL_PMU__EVENT_NUM_CORES: + case TOOL_PMU__EVENT_NUM_CPUS: + case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: + case TOOL_PMU__EVENT_NUM_DIES: + case TOOL_PMU__EVENT_NUM_PACKAGES: + case TOOL_PMU__EVENT_SLOTS: + case TOOL_PMU__EVENT_SMT_ON: + case TOOL_PMU__EVENT_CORE_WIDE: + case TOOL_PMU__EVENT_TARGET_CPU: + case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: { + u64 val = 0; + + if (cpu_map_idx == 0 && thread == 0) { + if (!tool_pmu__read_event(ev, evsel, + stat_config.system_wide, + stat_config.user_requested_cpu_list, + &val)) { + count->lost++; + val = 0; + } + } + perf_counts__update(count, old_count, /*raw=*/false, val); + return 0; + } + case TOOL_PMU__EVENT_DURATION_TIME: + /* + * Pretend duration_time is only on the first CPU and thread, or + * else aggregation will scale duration_time by the number of + * CPUs/threads. + */ + start_time = &evsel->start_time; + if (cpu_map_idx == 0 && thread == 0) + cur_time = rdclock(); + else + cur_time = *start_time; + break; + case TOOL_PMU__EVENT_USER_TIME: + case TOOL_PMU__EVENT_SYSTEM_TIME: { + bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME; + int fd = FD(evsel, cpu_map_idx, thread); + + start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread); + lseek(fd, SEEK_SET, 0); + if (evsel->pid_stat) { + /* The event exists solely on 1 CPU. */ + if (cpu_map_idx == 0) + err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time); + else + cur_time = 0; + } else { + /* The event is for all threads. */ + if (thread == 0) { + struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, + cpu_map_idx); + + err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time); + } else { + cur_time = 0; + } + } + adjust = true; + break; + } + case TOOL_PMU__EVENT_NONE: + case TOOL_PMU__EVENT_MAX: + default: + err = -EINVAL; + } + if (err) + return err; + + delta_start = cur_time - *start_time; + if (adjust) { + __u64 ticks_per_sec = sysconf(_SC_CLK_TCK); + + delta_start *= 1e9 / ticks_per_sec; + } + perf_counts__update(count, old_count, /*raw=*/true, delta_start); + return 0; +} + +struct perf_pmu *tool_pmu__new(void) +{ + struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu)); + + if (!tool) + return NULL; + + if (perf_pmu__init(tool, PERF_PMU_TYPE_TOOL, "tool") != 0) { + perf_pmu__delete(tool); + return NULL; + } + tool->events_table = find_core_events_table("common", "common"); + return tool; +} diff --git a/tools/perf/util/tool_pmu.h b/tools/perf/util/tool_pmu.h new file mode 100644 index 000000000000..ea343d1983d3 --- /dev/null +++ b/tools/perf/util/tool_pmu.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TOOL_PMU_H +#define __TOOL_PMU_H + +#include "pmu.h" + +struct evsel; +struct perf_thread_map; +struct print_callbacks; + +enum tool_pmu_event { + TOOL_PMU__EVENT_NONE = 0, + TOOL_PMU__EVENT_DURATION_TIME, + TOOL_PMU__EVENT_USER_TIME, + TOOL_PMU__EVENT_SYSTEM_TIME, + TOOL_PMU__EVENT_HAS_PMEM, + TOOL_PMU__EVENT_NUM_CORES, + TOOL_PMU__EVENT_NUM_CPUS, + TOOL_PMU__EVENT_NUM_CPUS_ONLINE, + TOOL_PMU__EVENT_NUM_DIES, + TOOL_PMU__EVENT_NUM_PACKAGES, + TOOL_PMU__EVENT_SLOTS, + TOOL_PMU__EVENT_SMT_ON, + TOOL_PMU__EVENT_SYSTEM_TSC_FREQ, + TOOL_PMU__EVENT_CORE_WIDE, + TOOL_PMU__EVENT_TARGET_CPU, + + TOOL_PMU__EVENT_MAX, +}; + +#define tool_pmu__for_each_event(ev) \ + for ((ev) = TOOL_PMU__EVENT_DURATION_TIME; (ev) < TOOL_PMU__EVENT_MAX; ev++) + +const char *tool_pmu__event_to_str(enum tool_pmu_event ev); +enum tool_pmu_event tool_pmu__str_to_event(const char *str); +bool tool_pmu__skip_event(const char *name); +int tool_pmu__num_skip_events(void); + +bool tool_pmu__read_event(enum tool_pmu_event ev, + struct evsel *evsel, + bool system_wide, + const char *user_requested_cpu_list, + u64 *result); + + +u64 tool_pmu__cpu_slots_per_cycle(void); + +bool perf_pmu__is_tool(const struct perf_pmu *pmu); +struct perf_cpu_map *tool_pmu__cpus(struct perf_event_attr *attr); + +bool evsel__is_tool(const struct evsel *evsel); +enum tool_pmu_event evsel__tool_event(const struct evsel *evsel); +const char *evsel__tool_pmu_event_name(const struct evsel *evsel); +int evsel__tool_pmu_prepare_open(struct evsel *evsel, + struct perf_cpu_map *cpus, + int nthreads); +int evsel__tool_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx); +int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread); + +struct perf_pmu *tool_pmu__new(void); + +#endif /* __TOOL_PMU_H */ diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index be7157de0451..b06e10a116bb 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -28,6 +28,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) struct record_opts *opts = &top->record_opts; struct target *target = &opts->target; size_t ret = 0; + int nr_cpus; if (top->samples) { samples_per_sec = top->samples / top->delay_secs; @@ -87,25 +88,23 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) else if (target->tid) ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s", target->tid); - else if (target->uid_str != NULL) + else if (top->uid_str != NULL) ret += SNPRINTF(bf + ret, size - ret, " (uid: %s", - target->uid_str); + top->uid_str); else ret += SNPRINTF(bf + ret, size - ret, " (all"); + nr_cpus = perf_cpu_map__nr(top->evlist->core.user_requested_cpus); if (target->cpu_list) ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", - perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1 - ? "s" : "", + nr_cpus > 1 ? "s" : "", target->cpu_list); else { if (target->tid) ret += SNPRINTF(bf + ret, size - ret, ")"); else ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", - perf_cpu_map__nr(top->evlist->core.user_requested_cpus), - perf_cpu_map__nr(top->evlist->core.user_requested_cpus) > 1 - ? "s" : ""); + nr_cpus, nr_cpus > 1 ? "s" : ""); } perf_top__reset_sample_counters(top); diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index a8b0d79bd96c..04ff926846be 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -21,7 +21,6 @@ struct perf_top { struct perf_tool tool; struct evlist *evlist, *sb_evlist; struct record_opts record_opts; - struct annotation_options annotation_opts; struct evswitch evswitch; /* * Symbols will be added here in perf_event__process_sample and will @@ -49,6 +48,7 @@ struct perf_top { const char *sym_filter; float min_percent; unsigned int nr_threads_synthesize; + const char *uid_str; struct { struct ordered_events *in; diff --git a/tools/perf/util/tp_pmu.c b/tools/perf/util/tp_pmu.c new file mode 100644 index 000000000000..eddb9807131a --- /dev/null +++ b/tools/perf/util/tp_pmu.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#include "tp_pmu.h" +#include "pmus.h" +#include <api/fs/fs.h> +#include <api/fs/tracing_path.h> +#include <api/io_dir.h> +#include <linux/kernel.h> +#include <errno.h> +#include <string.h> + +int tp_pmu__id(const char *sys, const char *name) +{ + char *tp_dir = get_events_file(sys); + char path[PATH_MAX]; + int id, err; + + if (!tp_dir) + return -1; + + scnprintf(path, PATH_MAX, "%s/%s/id", tp_dir, name); + put_events_file(tp_dir); + err = filename__read_int(path, &id); + if (err) + return err; + + return id; +} + + +int tp_pmu__for_each_tp_event(const char *sys, void *state, tp_event_callback cb) +{ + char *evt_path; + struct io_dirent64 *evt_ent; + struct io_dir evt_dir; + int ret = 0; + + evt_path = get_events_file(sys); + if (!evt_path) + return -errno; + + io_dir__init(&evt_dir, open(evt_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + if (evt_dir.dirfd < 0) { + ret = -errno; + put_events_file(evt_path); + return ret; + } + put_events_file(evt_path); + + while (!ret && (evt_ent = io_dir__readdir(&evt_dir))) { + if (!strcmp(evt_ent->d_name, ".") + || !strcmp(evt_ent->d_name, "..") + || !strcmp(evt_ent->d_name, "enable") + || !strcmp(evt_ent->d_name, "filter")) + continue; + + ret = cb(state, sys, evt_ent->d_name); + if (ret) + break; + } + close(evt_dir.dirfd); + return ret; +} + +int tp_pmu__for_each_tp_sys(void *state, tp_sys_callback cb) +{ + struct io_dirent64 *events_ent; + struct io_dir events_dir; + int ret = 0; + char *events_dir_path = get_tracing_file("events"); + + if (!events_dir_path) + return -errno; + + io_dir__init(&events_dir, open(events_dir_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + if (events_dir.dirfd < 0) { + ret = -errno; + put_events_file(events_dir_path); + return ret; + } + put_events_file(events_dir_path); + + while (!ret && (events_ent = io_dir__readdir(&events_dir))) { + if (!strcmp(events_ent->d_name, ".") || + !strcmp(events_ent->d_name, "..") || + !strcmp(events_ent->d_name, "enable") || + !strcmp(events_ent->d_name, "header_event") || + !strcmp(events_ent->d_name, "header_page")) + continue; + + ret = cb(state, events_ent->d_name); + } + close(events_dir.dirfd); + return ret; +} + +bool perf_pmu__is_tracepoint(const struct perf_pmu *pmu) +{ + return pmu->type == PERF_TYPE_TRACEPOINT; +} + +struct for_each_event_args { + void *state; + pmu_event_callback cb; + const struct perf_pmu *pmu; +}; + +static int for_each_event_cb(void *state, const char *sys_name, const char *evt_name) +{ + struct for_each_event_args *args = state; + char name[2 * FILENAME_MAX + 2]; + /* 16 possible hex digits and 22 other characters and \0. */ + char encoding[16 + 22]; + char *format = NULL; + size_t format_size; + struct pmu_event_info info = { + .pmu = args->pmu, + .pmu_name = args->pmu->name, + .event_type_desc = "Tracepoint event", + }; + char *tp_dir = get_events_file(sys_name); + char path[PATH_MAX]; + int id, err; + + if (!tp_dir) + return -1; + + scnprintf(path, sizeof(path), "%s/%s/id", tp_dir, evt_name); + err = filename__read_int(path, &id); + if (err == 0) { + snprintf(encoding, sizeof(encoding), "tracepoint/config=0x%x/", id); + info.encoding_desc = encoding; + } + + scnprintf(path, sizeof(path), "%s/%s/format", tp_dir, evt_name); + put_events_file(tp_dir); + err = filename__read_str(path, &format, &format_size); + if (err == 0) { + info.long_desc = format; + for (size_t i = 0 ; i < format_size; i++) { + /* Swap tabs to spaces due to some rendering issues. */ + if (format[i] == '\t') + format[i] = ' '; + } + } + snprintf(name, sizeof(name), "%s:%s", sys_name, evt_name); + info.name = name; + err = args->cb(args->state, &info); + free(format); + return err; +} + +static int for_each_event_sys_cb(void *state, const char *sys_name) +{ + return tp_pmu__for_each_tp_event(sys_name, state, for_each_event_cb); +} + +int tp_pmu__for_each_event(struct perf_pmu *pmu, void *state, pmu_event_callback cb) +{ + struct for_each_event_args args = { + .state = state, + .cb = cb, + .pmu = pmu, + }; + + return tp_pmu__for_each_tp_sys(&args, for_each_event_sys_cb); +} + +static int num_events_cb(void *state, const char *sys_name __maybe_unused, + const char *evt_name __maybe_unused) +{ + size_t *count = state; + + (*count)++; + return 0; +} + +static int num_events_sys_cb(void *state, const char *sys_name) +{ + return tp_pmu__for_each_tp_event(sys_name, state, num_events_cb); +} + +size_t tp_pmu__num_events(struct perf_pmu *pmu __maybe_unused) +{ + size_t count = 0; + + tp_pmu__for_each_tp_sys(&count, num_events_sys_cb); + return count; +} + +bool tp_pmu__have_event(struct perf_pmu *pmu __maybe_unused, const char *name) +{ + char *dup_name, *colon; + int id; + + colon = strchr(name, ':'); + if (colon == NULL) + return false; + + dup_name = strdup(name); + if (!dup_name) + return false; + + colon = dup_name + (colon - name); + *colon = '\0'; + id = tp_pmu__id(dup_name, colon + 1); + free(dup_name); + return id >= 0; +} diff --git a/tools/perf/util/tp_pmu.h b/tools/perf/util/tp_pmu.h new file mode 100644 index 000000000000..30456bd6943d --- /dev/null +++ b/tools/perf/util/tp_pmu.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __TP_PMU_H +#define __TP_PMU_H + +#include "pmu.h" + +typedef int (*tp_sys_callback)(void *state, const char *sys_name); +typedef int (*tp_event_callback)(void *state, const char *sys_name, const char *evt_name); + +int tp_pmu__id(const char *sys, const char *name); +int tp_pmu__for_each_tp_event(const char *sys, void *state, tp_event_callback cb); +int tp_pmu__for_each_tp_sys(void *state, tp_sys_callback cb); + +bool perf_pmu__is_tracepoint(const struct perf_pmu *pmu); +int tp_pmu__for_each_event(struct perf_pmu *pmu, void *state, pmu_event_callback cb); +size_t tp_pmu__num_events(struct perf_pmu *pmu); +bool tp_pmu__have_event(struct perf_pmu *pmu, const char *name); + +#endif /* __TP_PMU_H */ diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index 319ccf09a435..c8755679281e 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -313,7 +313,8 @@ static int record_event_files(struct tracepoint_path *tps) } err = 0; out: - closedir(dir); + if (dir) + closedir(dir); put_tracing_file(path); return err; diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 2d3c2576bab7..9c015fc2bcfb 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -12,7 +12,7 @@ #include <linux/ctype.h> #include <linux/kernel.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> static int get_common_field(struct scripting_context *context, int *offset, int *size, const char *type) @@ -99,7 +99,7 @@ unsigned long long read_size(struct tep_event *event, void *ptr, int size) return tep_read_number(event->tep, ptr, size); } -void event_format__fprintf(struct tep_event *event, +void event_format__fprintf(const struct tep_event *event, int cpu, void *data, int size, FILE *fp) { struct tep_record record; @@ -116,10 +116,117 @@ void event_format__fprintf(struct tep_event *event, trace_seq_destroy(&s); } -void event_format__print(struct tep_event *event, - int cpu, void *data, int size) +/* + * prev_state is of size long, which is 32 bits on 32 bit architectures. + * As it needs to have the same bits for both 32 bit and 64 bit architectures + * we can just assume that the flags we care about will all be within + * the 32 bits. + */ +#define MAX_STATE_BITS 32 + +static const char *convert_sym(struct tep_print_flag_sym *sym) +{ + static char save_states[MAX_STATE_BITS + 1]; + + memset(save_states, 0, sizeof(save_states)); + + /* This is the flags for the prev_state_field, now make them into a string */ + for (; sym; sym = sym->next) { + long bitmask = strtoul(sym->value, NULL, 0); + int i; + + for (i = 0; !(bitmask & 1); i++) + bitmask >>= 1; + + if (i >= MAX_STATE_BITS) + continue; + + save_states[i] = sym->str[0]; + } + + return save_states; +} + +static struct tep_print_arg_field * +find_arg_field(struct tep_format_field *prev_state_field, struct tep_print_arg *arg) +{ + struct tep_print_arg_field *field; + + if (!arg) + return NULL; + + if (arg->type == TEP_PRINT_FIELD) + return &arg->field; + + if (arg->type == TEP_PRINT_OP) { + field = find_arg_field(prev_state_field, arg->op.left); + if (field && field->field == prev_state_field) + return field; + field = find_arg_field(prev_state_field, arg->op.right); + if (field && field->field == prev_state_field) + return field; + } + return NULL; +} + +static struct tep_print_flag_sym * +test_flags(struct tep_format_field *prev_state_field, struct tep_print_arg *arg) { - return event_format__fprintf(event, cpu, data, size, stdout); + struct tep_print_arg_field *field; + + field = find_arg_field(prev_state_field, arg->flags.field); + if (!field) + return NULL; + + return arg->flags.flags; +} + +static struct tep_print_flag_sym * +search_op(struct tep_format_field *prev_state_field, struct tep_print_arg *arg) +{ + struct tep_print_flag_sym *sym = NULL; + + if (!arg) + return NULL; + + if (arg->type == TEP_PRINT_OP) { + sym = search_op(prev_state_field, arg->op.left); + if (sym) + return sym; + + sym = search_op(prev_state_field, arg->op.right); + if (sym) + return sym; + } else if (arg->type == TEP_PRINT_FLAGS) { + sym = test_flags(prev_state_field, arg); + } + + return sym; +} + +const char *parse_task_states(struct tep_format_field *state_field) +{ + struct tep_print_flag_sym *sym; + struct tep_print_arg *arg; + struct tep_event *event; + + event = state_field->event; + + /* + * Look at the event format fields, and search for where + * the prev_state is parsed via the format flags. + */ + for (arg = event->print_fmt.args; arg; arg = arg->next) { + /* + * Currently, the __print_flags() for the prev_state + * is embedded in operations, so they too must be + * searched. + */ + sym = search_op(state_field, arg); + if (sym) + return convert_sym(sym); + } + return NULL; } void parse_ftrace_printk(struct tep_handle *pevent, diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 1162c49b8082..ecbbb93f0185 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -11,7 +11,7 @@ #include <sys/stat.h> #include <sys/wait.h> #include <sys/mman.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include <fcntl.h> #include <unistd.h> #include <errno.h> diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index bd0000300c77..72abb28b7b5a 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -10,17 +10,97 @@ #include <string.h> #include <errno.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif +#include "archinsn.h" #include "debug.h" +#include "event.h" #include "trace-event.h" #include "evsel.h" +#include <linux/perf_event.h> #include <linux/zalloc.h> #include "util/sample.h" +unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; + struct scripting_context *scripting_context; +struct script_spec { + struct list_head node; + struct scripting_ops *ops; + char spec[]; +}; + +static LIST_HEAD(script_specs); + +static struct script_spec *script_spec__new(const char *spec, + struct scripting_ops *ops) +{ + struct script_spec *s = malloc(sizeof(*s) + strlen(spec) + 1); + + if (s != NULL) { + strcpy(s->spec, spec); + s->ops = ops; + } + + return s; +} + +static void script_spec__add(struct script_spec *s) +{ + list_add_tail(&s->node, &script_specs); +} + +static struct script_spec *script_spec__find(const char *spec) +{ + struct script_spec *s; + + list_for_each_entry(s, &script_specs, node) + if (strcasecmp(s->spec, spec) == 0) + return s; + return NULL; +} + +static int script_spec_register(const char *spec, struct scripting_ops *ops) +{ + struct script_spec *s; + + s = script_spec__find(spec); + if (s) + return -1; + + s = script_spec__new(spec, ops); + if (!s) + return -1; + + script_spec__add(s); + return 0; +} + +struct scripting_ops *script_spec__lookup(const char *spec) +{ + struct script_spec *s = script_spec__find(spec); + + if (!s) + return NULL; + + return s->ops; +} + +int script_spec__for_each(int (*cb)(struct scripting_ops *ops, const char *spec)) +{ + struct script_spec *s; + int ret = 0; + + list_for_each_entry(s, &script_specs, node) { + ret = cb(s->ops, s->spec); + if (ret) + break; + } + return ret; +} + void scripting_context__update(struct scripting_context *c, union perf_event *event, struct perf_sample *sample, @@ -28,12 +108,14 @@ void scripting_context__update(struct scripting_context *c, struct addr_location *al, struct addr_location *addr_al) { - c->event_data = sample->raw_data; - c->pevent = NULL; #ifdef HAVE_LIBTRACEEVENT - if (evsel->tp_format) - c->pevent = evsel->tp_format->tep; + const struct tep_event *tp_format = evsel__tp_format(evsel); + + c->pevent = tp_format ? tp_format->tep : NULL; +#else + c->pevent = NULL; #endif + c->event_data = sample->raw_data; c->event = event; c->sample = sample; c->evsel = evsel; @@ -191,3 +273,154 @@ void setup_perl_scripting(void) } #endif #endif + +#if !defined(__i386__) && !defined(__x86_64__) +void arch_fetch_insn(struct perf_sample *sample __maybe_unused, + struct thread *thread __maybe_unused, + struct machine *machine __maybe_unused) +{ +} +#endif + +void script_fetch_insn(struct perf_sample *sample, struct thread *thread, + struct machine *machine, bool native_arch) +{ + if (sample->insn_len == 0 && native_arch) + arch_fetch_insn(sample, thread, machine); +} + +static const struct { + u32 flags; + const char *name; +} sample_flags[] = { + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL, "call"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN, "return"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL, "jcc"}, + {PERF_IP_FLAG_BRANCH, "jmp"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_INTERRUPT, "int"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_INTERRUPT, "iret"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_SYSCALLRET, "syscall"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_SYSCALLRET, "sysret"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_ASYNC, "async"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_INTERRUPT, + "hw int"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "tx abrt"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "tr strt"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "tr end"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMENTRY, "vmentry"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMEXIT, "vmexit"}, + {0, NULL} +}; + +static const struct { + u32 flags; + const char *name; +} branch_events[] = { + {PERF_IP_FLAG_BRANCH_MISS, "miss"}, + {PERF_IP_FLAG_NOT_TAKEN, "not_taken"}, + {0, NULL} +}; + +static int sample_flags_to_name(u32 flags, char *str, size_t size) +{ + int i; + const char *prefix; + int pos = 0, ret, ev_idx = 0; + u32 xf = flags & PERF_ADDITIONAL_STATE_MASK; + u32 types, events; + char xs[16] = { 0 }; + + /* Clear additional state bits */ + flags &= ~PERF_ADDITIONAL_STATE_MASK; + + if (flags & PERF_IP_FLAG_TRACE_BEGIN) + prefix = "tr strt "; + else if (flags & PERF_IP_FLAG_TRACE_END) + prefix = "tr end "; + else + prefix = ""; + + ret = snprintf(str + pos, size - pos, "%s", prefix); + if (ret < 0) + return ret; + pos += ret; + + flags &= ~(PERF_IP_FLAG_TRACE_BEGIN | PERF_IP_FLAG_TRACE_END); + + types = flags & ~PERF_IP_FLAG_BRANCH_EVENT_MASK; + for (i = 0; sample_flags[i].name; i++) { + if (sample_flags[i].flags != types) + continue; + + ret = snprintf(str + pos, size - pos, "%s", sample_flags[i].name); + if (ret < 0) + return ret; + pos += ret; + break; + } + + events = flags & PERF_IP_FLAG_BRANCH_EVENT_MASK; + for (i = 0; branch_events[i].name; i++) { + if (!(branch_events[i].flags & events)) + continue; + + ret = snprintf(str + pos, size - pos, !ev_idx ? "/%s" : ",%s", + branch_events[i].name); + if (ret < 0) + return ret; + pos += ret; + ev_idx++; + } + + /* Add an end character '/' for events */ + if (ev_idx) { + ret = snprintf(str + pos, size - pos, "/"); + if (ret < 0) + return ret; + pos += ret; + } + + if (!xf) + return pos; + + snprintf(xs, sizeof(xs), "(%s%s%s)", + flags & PERF_IP_FLAG_IN_TX ? "x" : "", + flags & PERF_IP_FLAG_INTR_DISABLE ? "D" : "", + flags & PERF_IP_FLAG_INTR_TOGGLE ? "t" : ""); + + /* Right align the string if its length is less than the limit */ + if ((pos + strlen(xs)) < SAMPLE_FLAGS_STR_ALIGNED_SIZE) + ret = snprintf(str + pos, size - pos, "%*s", + (int)(SAMPLE_FLAGS_STR_ALIGNED_SIZE - ret), xs); + else + ret = snprintf(str + pos, size - pos, " %s", xs); + if (ret < 0) + return ret; + + return pos + ret; +} + +int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz) +{ + const char *chars = PERF_IP_FLAG_CHARS; + const size_t n = strlen(PERF_IP_FLAG_CHARS); + size_t i, pos = 0; + int ret; + + ret = sample_flags_to_name(flags, str, sz); + if (ret > 0) + return ret; + + for (i = 0; i < n; i++, flags >>= 1) { + if ((flags & 1) && pos < sz) + str[pos++] = chars[i]; + } + for (; i < 32; i++, flags >>= 1) { + if ((flags & 1) && pos < sz) + str[pos++] = '?'; + } + if (pos < sz) + str[pos] = 0; + + return pos; +} diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index 8ad75b31e09b..6a8c66c64b70 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -8,7 +8,7 @@ #include <fcntl.h> #include <linux/kernel.h> #include <linux/err.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include <api/fs/tracing_path.h> #include <api/fs/fs.h> #include "trace-event.h" diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index a69ee29419f3..71e680bc3d4b 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -15,6 +15,7 @@ struct perf_tool; struct thread; struct tep_plugin_list; struct evsel; +struct tep_format_field; struct trace_event { struct tep_handle *pevent; @@ -38,12 +39,9 @@ trace_event__tp_format(const char *sys, const char *name); struct tep_event *trace_event__tp_format_id(int id); -void event_format__fprintf(struct tep_event *event, +void event_format__fprintf(const struct tep_event *event, int cpu, void *data, int size, FILE *fp); -void event_format__print(struct tep_event *event, - int cpu, void *data, int size); - int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size); int parse_event_file(struct tep_handle *pevent, char *buf, unsigned long size, char *sys); @@ -51,6 +49,8 @@ int parse_event_file(struct tep_handle *pevent, unsigned long long raw_field_value(struct tep_event *event, const char *name, void *data); +const char *parse_task_states(struct tep_format_field *state_field); + void parse_proc_kallsyms(struct tep_handle *pevent, char *file, unsigned int size); void parse_ftrace_printk(struct tep_handle *pevent, char *file, unsigned int size); void parse_saved_cmdline(struct tep_handle *pevent, char *file, unsigned int size); @@ -113,10 +113,11 @@ struct scripting_ops { extern unsigned int scripting_max_stack; -int script_spec_register(const char *spec, struct scripting_ops *ops); +struct scripting_ops *script_spec__lookup(const char *spec); +int script_spec__for_each(int (*cb)(struct scripting_ops *ops, const char *spec)); void script_fetch_insn(struct perf_sample *sample, struct thread *thread, - struct machine *machine); + struct machine *machine, bool native_arch); void setup_perl_scripting(void); void setup_python_scripting(void); @@ -144,10 +145,12 @@ int common_flags(struct scripting_context *context); int common_lock_depth(struct scripting_context *context); #define SAMPLE_FLAGS_BUF_SIZE 64 +#define SAMPLE_FLAGS_STR_ALIGNED_SIZE 21 + int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz); #if defined(LIBTRACEEVENT_VERSION) && LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) -#include <traceevent/event-parse.h> +#include <event-parse.h> static inline bool tep_field_is_relative(unsigned long flags) { diff --git a/tools/perf/util/trace.h b/tools/perf/util/trace.h new file mode 100644 index 000000000000..fbbcfe6f44fe --- /dev/null +++ b/tools/perf/util/trace.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef UTIL_TRACE_H +#define UTIL_TRACE_H + +#include <stdio.h> /* for FILE */ + +enum trace_summary_mode { + SUMMARY__NONE = 0, + SUMMARY__BY_TOTAL, + SUMMARY__BY_THREAD, + SUMMARY__BY_CGROUP, +}; + +#ifdef HAVE_BPF_SKEL + +int trace_prepare_bpf_summary(enum trace_summary_mode mode); +void trace_start_bpf_summary(void); +void trace_end_bpf_summary(void); +int trace_print_bpf_summary(FILE *fp, int max_summary); +void trace_cleanup_bpf_summary(void); + +#else /* !HAVE_BPF_SKEL */ + +static inline int trace_prepare_bpf_summary(enum trace_summary_mode mode __maybe_unused) +{ + return -1; +} +static inline void trace_start_bpf_summary(void) {} +static inline void trace_end_bpf_summary(void) {} +static inline int trace_print_bpf_summary(FILE *fp __maybe_unused, int max_summary __maybe_unused) +{ + return 0; +} +static inline void trace_cleanup_bpf_summary(void) {} + +#endif /* HAVE_BPF_SKEL */ + +#endif /* UTIL_TRACE_H */ diff --git a/tools/perf/util/trace_augment.h b/tools/perf/util/trace_augment.h new file mode 100644 index 000000000000..4f729bc67753 --- /dev/null +++ b/tools/perf/util/trace_augment.h @@ -0,0 +1,66 @@ +#ifndef TRACE_AUGMENT_H +#define TRACE_AUGMENT_H + +#include <linux/compiler.h> + +struct bpf_program; +struct evlist; + +#ifdef HAVE_BPF_SKEL + +int augmented_syscalls__prepare(void); +int augmented_syscalls__create_bpf_output(struct evlist *evlist); +void augmented_syscalls__setup_bpf_output(void); +int augmented_syscalls__set_filter_pids(unsigned int nr, pid_t *pids); +int augmented_syscalls__get_map_fds(int *enter_fd, int *exit_fd, int *beauty_fd); +struct bpf_program *augmented_syscalls__find_by_title(const char *name); +struct bpf_program *augmented_syscalls__unaugmented(void); +void augmented_syscalls__cleanup(void); + +#else /* !HAVE_BPF_SKEL */ + +static inline int augmented_syscalls__prepare(void) +{ + return -1; +} + +static inline int augmented_syscalls__create_bpf_output(struct evlist *evlist __maybe_unused) +{ + return -1; +} + +static inline void augmented_syscalls__setup_bpf_output(void) +{ +} + +static inline int augmented_syscalls__set_filter_pids(unsigned int nr __maybe_unused, + pid_t *pids __maybe_unused) +{ + return 0; +} + +static inline int augmented_syscalls__get_map_fds(int *enter_fd __maybe_unused, + int *exit_fd __maybe_unused, + int *beauty_fd __maybe_unused) +{ + return -1; +} + +static inline struct bpf_program * +augmented_syscalls__find_by_title(const char *name __maybe_unused) +{ + return NULL; +} + +static inline struct bpf_program *augmented_syscalls__unaugmented(void) +{ + return NULL; +} + +static inline void augmented_syscalls__cleanup(void) +{ +} + +#endif /* HAVE_BPF_SKEL */ + +#endif diff --git a/tools/perf/util/tracepoint.c b/tools/perf/util/tracepoint.c index 92dd8b455b90..95377ed5d87b 100644 --- a/tools/perf/util/tracepoint.c +++ b/tools/perf/util/tracepoint.c @@ -4,10 +4,12 @@ #include <errno.h> #include <fcntl.h> #include <stdio.h> +#include <stdlib.h> #include <sys/param.h> #include <unistd.h> #include <api/fs/tracing_path.h> +#include "fncache.h" int tp_event_has_id(const char *dir_path, struct dirent *evt_dir) { @@ -26,39 +28,25 @@ int tp_event_has_id(const char *dir_path, struct dirent *evt_dir) /* * Check whether event is in <debugfs_mount_point>/tracing/events */ -int is_valid_tracepoint(const char *event_string) +bool is_valid_tracepoint(const char *event_string) { - DIR *sys_dir, *evt_dir; - struct dirent *sys_dirent, *evt_dirent; - char evt_path[MAXPATHLEN]; - char *dir_path; - - sys_dir = tracing_events__opendir(); - if (!sys_dir) - return 0; - - for_each_subsystem(sys_dir, sys_dirent) { - dir_path = get_events_file(sys_dirent->d_name); - if (!dir_path) - continue; - evt_dir = opendir(dir_path); - if (!evt_dir) - goto next; - - for_each_event(dir_path, evt_dir, evt_dirent) { - snprintf(evt_path, MAXPATHLEN, "%s:%s", - sys_dirent->d_name, evt_dirent->d_name); - if (!strcmp(evt_path, event_string)) { - closedir(evt_dir); - put_events_file(dir_path); - closedir(sys_dir); - return 1; - } - } - closedir(evt_dir); -next: - put_events_file(dir_path); - } - closedir(sys_dir); - return 0; + char *dst, *path = malloc(strlen(event_string) + 4); /* Space for "/id\0". */ + bool have_file = false; /* Conservatively return false if memory allocation failed. */ + const char *src; + + if (!path) + return false; + + /* Copy event_string replacing the ':' with '/'. */ + for (src = event_string, dst = path; *src; src++, dst++) + *dst = (*src == ':') ? '/' : *src; + /* Add "/id\0". */ + memcpy(dst, "/id", 4); + + dst = get_events_file(path); + if (dst) + have_file = file_available(dst); + free(dst); + free(path); + return have_file; } diff --git a/tools/perf/util/tracepoint.h b/tools/perf/util/tracepoint.h index c4a110fe87d7..65ccb01fc312 100644 --- a/tools/perf/util/tracepoint.h +++ b/tools/perf/util/tracepoint.h @@ -4,6 +4,7 @@ #include <dirent.h> #include <string.h> +#include <stdbool.h> int tp_event_has_id(const char *dir_path, struct dirent *evt_dir); @@ -20,6 +21,6 @@ int tp_event_has_id(const char *dir_path, struct dirent *evt_dir); (strcmp(sys_dirent->d_name, ".")) && \ (strcmp(sys_dirent->d_name, ".."))) -int is_valid_tracepoint(const char *event_string); +bool is_valid_tracepoint(const char *event_string); #endif /* __PERF_TRACEPOINT_H */ diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c index f19791d46e99..511a517ce613 100644 --- a/tools/perf/util/tsc.c +++ b/tools/perf/util/tsc.c @@ -72,7 +72,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, } int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, - struct perf_tool *tool, + const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { @@ -119,7 +119,7 @@ size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp) size_t ret; ret = fprintf(fp, "\n... Time Shift %" PRI_lu64 "\n", tc->time_shift); - ret += fprintf(fp, "... Time Muliplier %" PRI_lu64 "\n", tc->time_mult); + ret += fprintf(fp, "... Time Multiplier %" PRI_lu64 "\n", tc->time_mult); ret += fprintf(fp, "... Time Zero %" PRI_lu64 "\n", tc->time_zero); /* diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h index 88fd1c4c1cb8..57ce8449647f 100644 --- a/tools/perf/util/tsc.h +++ b/tools/perf/util/tsc.h @@ -25,7 +25,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc); u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc); u64 rdtsc(void); -double arch_get_tsc_freq(void); +u64 arch_get_tsc_freq(void); size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c index 32c39cfe209b..4c6a86e1cb54 100644 --- a/tools/perf/util/units.c +++ b/tools/perf/util/units.c @@ -64,7 +64,7 @@ unsigned long convert_unit(unsigned long value, char *unit) int unit_number__scnprintf(char *buf, size_t size, u64 n) { - char unit[4] = "BKMG"; + char unit[] = "BKMG"; int i = 0; while (((n / 1024) > 1) && (i < 3)) { diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 2a96df4c8d42..ae70fb56a057 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -17,6 +17,7 @@ #include "event.h" #include "perf_regs.h" #include "callchain.h" +#include "util/env.h" static char *debuginfo_path; @@ -28,8 +29,8 @@ static int __find_debuginfo(Dwfl_Module *mod __maybe_unused, void **userdata, const struct dso *dso = *userdata; assert(dso); - if (dso->symsrc_filename && strcmp (file_name, dso->symsrc_filename)) - *debuginfo_file_name = strdup(dso->symsrc_filename); + if (dso__symsrc_filename(dso) && strcmp(file_name, dso__symsrc_filename(dso))) + *debuginfo_file_name = strdup(dso__symsrc_filename(dso)); return -1; } @@ -45,6 +46,7 @@ static int __report_module(struct addr_location *al, u64 ip, { Dwfl_Module *mod; struct dso *dso = NULL; + Dwarf_Addr base; /* * Some callers will use al->sym, so we can't just use the * cheaper thread__find_map() here. @@ -57,28 +59,43 @@ static int __report_module(struct addr_location *al, u64 ip, if (!dso) return 0; + /* + * The generated JIT DSO files only map the code segment without + * ELF headers. Since JIT codes used to be packed in a memory + * segment, calculating the base address using pgoff falls into + * a different code in another DSO. So just use the map->start + * directly to pick the correct one. + */ + if (!strncmp(dso__long_name(dso), "/tmp/jitted-", 12)) + base = map__start(al->map); + else + base = map__start(al->map) - map__pgoff(al->map); + mod = dwfl_addrmodule(ui->dwfl, ip); if (mod) { Dwarf_Addr s; dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); - if (s != map__start(al->map) - map__pgoff(al->map)) - mod = 0; + if (s != base) + mod = NULL; } if (!mod) { char filename[PATH_MAX]; - __symbol__join_symfs(filename, sizeof(filename), dso->long_name); - mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1, - map__start(al->map) - map__pgoff(al->map), false); + __symbol__join_symfs(filename, sizeof(filename), dso__long_name(dso)); + /* Don't hang up on device files like /dev/dri/renderD128. */ + if (is_regular_file(filename)) { + mod = dwfl_report_elf(ui->dwfl, dso__short_name(dso), filename, -1, + base, false); + } } if (!mod) { char filename[PATH_MAX]; if (dso__build_id_filename(dso, filename, sizeof(filename), false)) - mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1, - map__start(al->map) - map__pgoff(al->map), false); + mod = dwfl_report_elf(ui->dwfl, dso__short_name(dso), filename, -1, + base, false); } if (mod) { @@ -170,12 +187,17 @@ static bool memory_read(Dwfl *dwfl __maybe_unused, Dwarf_Addr addr, Dwarf_Word * void *arg) { struct unwind_info *ui = arg; + const char *arch = perf_env__arch(ui->machine->env); struct stack_dump *stack = &ui->sample->user_stack; u64 start, end; int offset; int ret; - ret = perf_reg_value(&start, &ui->sample->user_regs, PERF_REG_SP); + if (!ui->sample->user_regs) + return false; + + ret = perf_reg_value(&start, ui->sample->user_regs, + perf_arch_reg_sp(arch)); if (ret) return false; @@ -247,16 +269,17 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct unwind_info *ui, ui_buf = { .sample = data, .thread = thread, - .machine = RC_CHK_ACCESS(thread__maps(thread))->machine, + .machine = maps__machine((thread__maps(thread))), .cb = cb, .arg = arg, .max_stack = max_stack, .best_effort = best_effort }; + const char *arch = perf_env__arch(ui_buf.machine->env); Dwarf_Word ip; int err = -EINVAL, i; - if (!data->user_regs.regs) + if (!data->user_regs || !data->user_regs->regs) return -EINVAL; ui = zalloc(sizeof(ui_buf) + sizeof(ui_buf.entries[0]) * max_stack); @@ -269,7 +292,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, if (!ui->dwfl) goto out; - err = perf_reg_value(&ip, &data->user_regs, PERF_REG_IP); + err = perf_reg_value(&ip, data->user_regs, perf_arch_reg_ip(arch)); if (err) goto out; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index ebfde537b99b..0b037e7389a0 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -302,40 +302,53 @@ static int unwind_spec_ehframe(struct dso *dso, struct machine *machine, return 0; } +struct read_unwind_spec_eh_frame_maps_cb_args { + struct dso *dso; + u64 base_addr; +}; + +static int read_unwind_spec_eh_frame_maps_cb(struct map *map, void *data) +{ + + struct read_unwind_spec_eh_frame_maps_cb_args *args = data; + + if (map__dso(map) == args->dso && map__start(map) - map__pgoff(map) < args->base_addr) + args->base_addr = map__start(map) - map__pgoff(map); + + return 0; +} + + static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui, u64 *table_data, u64 *segbase, u64 *fde_count) { - struct map_rb_node *map_node; - u64 base_addr = UINT64_MAX; + struct read_unwind_spec_eh_frame_maps_cb_args args = { + .dso = dso, + .base_addr = UINT64_MAX, + }; int ret, fd; - if (dso->data.eh_frame_hdr_offset == 0) { - fd = dso__data_get_fd(dso, ui->machine); - if (fd < 0) + if (dso__data(dso)->eh_frame_hdr_offset == 0) { + if (!dso__data_get_fd(dso, ui->machine, &fd)) return -EINVAL; /* Check the .eh_frame section for unwinding info */ ret = elf_section_address_and_offset(fd, ".eh_frame_hdr", - &dso->data.eh_frame_hdr_addr, - &dso->data.eh_frame_hdr_offset); - dso->data.elf_base_addr = elf_base_address(fd); + &dso__data(dso)->eh_frame_hdr_addr, + &dso__data(dso)->eh_frame_hdr_offset); + dso__data(dso)->elf_base_addr = elf_base_address(fd); dso__data_put_fd(dso); - if (ret || dso->data.eh_frame_hdr_offset == 0) + if (ret || dso__data(dso)->eh_frame_hdr_offset == 0) return -EINVAL; } - maps__for_each_entry(thread__maps(ui->thread), map_node) { - struct map *map = map_node->map; - u64 start = map__start(map); + maps__for_each_map(thread__maps(ui->thread), read_unwind_spec_eh_frame_maps_cb, &args); - if (map__dso(map) == dso && start < base_addr) - base_addr = start; - } - base_addr -= dso->data.elf_base_addr; + args.base_addr -= dso__data(dso)->elf_base_addr; /* Address of .eh_frame_hdr */ - *segbase = base_addr + dso->data.eh_frame_hdr_addr; - ret = unwind_spec_ehframe(dso, ui->machine, dso->data.eh_frame_hdr_offset, + *segbase = args.base_addr + dso__data(dso)->eh_frame_hdr_addr; + ret = unwind_spec_ehframe(dso, ui->machine, dso__data(dso)->eh_frame_hdr_offset, table_data, fde_count); if (ret) return ret; @@ -349,7 +362,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso, struct machine *machine, u64 *offset) { int fd; - u64 ofs = dso->data.debug_frame_offset; + u64 ofs = dso__data(dso)->debug_frame_offset; /* debug_frame can reside in: * - dso @@ -358,14 +371,13 @@ static int read_unwind_spec_debug_frame(struct dso *dso, * has to be pointed by symsrc_filename */ if (ofs == 0) { - fd = dso__data_get_fd(dso, machine); - if (fd >= 0) { + if (dso__data_get_fd(dso, machine, &fd)) { ofs = elf_section_offset(fd, ".debug_frame"); dso__data_put_fd(dso); } if (ofs <= 0) { - fd = open(dso->symsrc_filename, O_RDONLY); + fd = open(dso__symsrc_filename(dso), O_RDONLY); if (fd >= 0) { ofs = elf_section_offset(fd, ".debug_frame"); close(fd); @@ -376,6 +388,11 @@ static int read_unwind_spec_debug_frame(struct dso *dso, char *debuglink = malloc(PATH_MAX); int ret = 0; + if (debuglink == NULL) { + pr_err("unwind: Can't read unwind spec debug frame.\n"); + return -ENOMEM; + } + ret = dso__read_binary_type_filename( dso, DSO_BINARY_TYPE__DEBUGLINK, machine->root_dir, debuglink, PATH_MAX); @@ -388,21 +405,21 @@ static int read_unwind_spec_debug_frame(struct dso *dso, } } if (ofs > 0) { - if (dso->symsrc_filename != NULL) { + if (dso__symsrc_filename(dso) != NULL) { pr_warning( "%s: overwrite symsrc(%s,%s)\n", __func__, - dso->symsrc_filename, + dso__symsrc_filename(dso), debuglink); - zfree(&dso->symsrc_filename); + dso__free_symsrc_filename(dso); } - dso->symsrc_filename = debuglink; + dso__set_symsrc_filename(dso, debuglink); } else { free(debuglink); } } - dso->data.debug_frame_offset = ofs; + dso__data(dso)->debug_frame_offset = ofs; } *offset = ofs; @@ -446,7 +463,7 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, return -EINVAL; } - pr_debug("unwind: find_proc_info dso %s\n", dso->name); + pr_debug("unwind: find_proc_info dso %s\n", dso__name(dso)); /* Check the .eh_frame section for unwinding info */ if (!read_unwind_spec_eh_frame(dso, ui, &table_data, &segbase, &fde_count)) { @@ -466,16 +483,18 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, /* Check the .debug_frame section for unwinding info */ if (ret < 0 && !read_unwind_spec_debug_frame(dso, ui->machine, &segbase)) { - int fd = dso__data_get_fd(dso, ui->machine); - int is_exec = elf_is_exec(fd, dso->name); + int fd; u64 start = map__start(map); - unw_word_t base = is_exec ? 0 : start; + unw_word_t base = start; const char *symfile; - if (fd >= 0) + if (dso__data_get_fd(dso, ui->machine, &fd)) { + if (elf_is_exec(fd, dso__name(dso))) + base = 0; dso__data_put_fd(dso); + } - symfile = dso->symsrc_filename ?: dso->name; + symfile = dso__symsrc_filename(dso) ?: dso__name(dso); memset(&di, 0, sizeof(di)); if (dwarf_find_debug_frame(0, &di, ip, base, symfile, start, map__end(map))) @@ -553,19 +572,20 @@ static int access_mem(unw_addr_space_t __maybe_unused as, int __write, void *arg) { struct unwind_info *ui = arg; + const char *arch = perf_env__arch(ui->machine->env); struct stack_dump *stack = &ui->sample->user_stack; u64 start, end; int offset; int ret; /* Don't support write, probably not needed. */ - if (__write || !stack || !ui->sample->user_regs.regs) { + if (__write || !stack || !ui->sample->user_regs || !ui->sample->user_regs->regs) { *valp = 0; return 0; } - ret = perf_reg_value(&start, &ui->sample->user_regs, - LIBUNWIND__ARCH_REG_SP); + ret = perf_reg_value(&start, perf_sample__user_regs(ui->sample), + perf_arch_reg_sp(arch)); if (ret) return ret; @@ -608,7 +628,7 @@ static int access_reg(unw_addr_space_t __maybe_unused as, return 0; } - if (!ui->sample->user_regs.regs) { + if (!ui->sample->user_regs || !ui->sample->user_regs->regs) { *valp = 0; return 0; } @@ -617,7 +637,7 @@ static int access_reg(unw_addr_space_t __maybe_unused as, if (id < 0) return -EINVAL; - ret = perf_reg_value(&val, &ui->sample->user_regs, id); + ret = perf_reg_value(&val, perf_sample__user_regs(ui->sample), id); if (ret) { if (!ui->best_effort) pr_err("unwind: can't read reg %d\n", regnum); @@ -691,7 +711,7 @@ static int _unwind__prepare_access(struct maps *maps) { void *addr_space = unw_create_addr_space(&accessors, 0); - RC_CHK_ACCESS(maps)->addr_space = addr_space; + maps__set_addr_space(maps, addr_space); if (!addr_space) { pr_err("unwind: Can't create unwind address space.\n"); return -ENOMEM; @@ -714,14 +734,15 @@ static void _unwind__finish_access(struct maps *maps) static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, void *arg, int max_stack) { + const char *arch = perf_env__arch(ui->machine->env); u64 val; unw_word_t ips[max_stack]; unw_addr_space_t addr_space; unw_cursor_t c; int ret, i = 0; - ret = perf_reg_value(&val, &ui->sample->user_regs, - LIBUNWIND__ARCH_REG_IP); + ret = perf_reg_value(&val, perf_sample__user_regs(ui->sample), + perf_arch_reg_ip(arch)); if (ret) return ret; @@ -787,7 +808,7 @@ static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg, .best_effort = best_effort }; - if (!data->user_regs.regs) + if (!data->user_regs || !data->user_regs->regs) return -EINVAL; if (max_stack <= 0) diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 76cd63de80a8..cb8be6acfb6f 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -12,11 +12,6 @@ struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops; struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops; struct unwind_libunwind_ops __weak *arm64_unwind_libunwind_ops; -static void unwind__register_ops(struct maps *maps, struct unwind_libunwind_ops *ops) -{ - RC_CHK_ACCESS(maps)->unwind_libunwind_ops = ops; -} - int unwind__prepare_access(struct maps *maps, struct map *map, bool *initialized) { const char *arch; @@ -30,7 +25,7 @@ int unwind__prepare_access(struct maps *maps, struct map *map, bool *initialized return 0; if (maps__addr_space(maps)) { - pr_debug("unwind: thread map already set, dso=%s\n", dso->name); + pr_debug("unwind: thread map already set, dso=%s\n", dso__name(dso)); if (initialized) *initialized = true; return 0; @@ -60,7 +55,7 @@ int unwind__prepare_access(struct maps *maps, struct map *map, bool *initialized return 0; } out_register: - unwind__register_ops(maps, ops); + maps__set_unwind_libunwind_ops(maps, ops); err = maps__unwind_libunwind_ops(maps)->prepare_access(maps); if (initialized) diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h index b2a03fa5289b..9f7164c6d9aa 100644 --- a/tools/perf/util/unwind.h +++ b/tools/perf/util/unwind.h @@ -42,14 +42,6 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, #define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arch_reg_id(regnum) #endif -#ifndef LIBUNWIND__ARCH_REG_SP -#define LIBUNWIND__ARCH_REG_SP PERF_REG_SP -#endif - -#ifndef LIBUNWIND__ARCH_REG_IP -#define LIBUNWIND__ARCH_REG_IP PERF_REG_IP -#endif - int LIBUNWIND__ARCH_REG_ID(int regnum); int unwind__prepare_access(struct maps *maps, struct map *map, bool *initialized); void unwind__flush_access(struct maps *maps); diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index c1fd9ba6d697..0f031eb80b4c 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -78,17 +78,23 @@ bool sysctl__nmi_watchdog_enabled(void) bool test_attr__enabled; +bool exclude_GH_default; + bool perf_host = true; bool perf_guest = false; void event_attr_init(struct perf_event_attr *attr) { + /* to capture ABI version */ + attr->size = sizeof(*attr); + + if (!exclude_GH_default) + return; + if (!perf_host) attr->exclude_host = 1; if (!perf_guest) attr->exclude_guest = 1; - /* to capture ABI version */ - attr->size = sizeof(*attr); } int mkdir_p(char *path, mode_t mode) @@ -325,94 +331,15 @@ int perf_event_paranoid(void) bool perf_event_paranoid_check(int max_level) { - return perf_cap__capable(CAP_SYS_ADMIN) || - perf_cap__capable(CAP_PERFMON) || - perf_event_paranoid() <= max_level; -} - -static int -fetch_ubuntu_kernel_version(unsigned int *puint) -{ - ssize_t len; - size_t line_len = 0; - char *ptr, *line = NULL; - int version, patchlevel, sublevel, err; - FILE *vsig; - - if (!puint) - return 0; + bool used_root; - vsig = fopen("/proc/version_signature", "r"); - if (!vsig) { - pr_debug("Open /proc/version_signature failed: %s\n", - strerror(errno)); - return -1; - } - - len = getline(&line, &line_len, vsig); - fclose(vsig); - err = -1; - if (len <= 0) { - pr_debug("Reading from /proc/version_signature failed: %s\n", - strerror(errno)); - goto errout; - } - - ptr = strrchr(line, ' '); - if (!ptr) { - pr_debug("Parsing /proc/version_signature failed: %s\n", line); - goto errout; - } - - err = sscanf(ptr + 1, "%d.%d.%d", - &version, &patchlevel, &sublevel); - if (err != 3) { - pr_debug("Unable to get kernel version from /proc/version_signature '%s'\n", - line); - goto errout; - } - - *puint = (version << 16) + (patchlevel << 8) + sublevel; - err = 0; -errout: - free(line); - return err; -} - -int -fetch_kernel_version(unsigned int *puint, char *str, - size_t str_size) -{ - struct utsname utsname; - int version, patchlevel, sublevel, err; - bool int_ver_ready = false; - - if (access("/proc/version_signature", R_OK) == 0) - if (!fetch_ubuntu_kernel_version(puint)) - int_ver_ready = true; - - if (uname(&utsname)) - return -1; - - if (str && str_size) { - strncpy(str, utsname.release, str_size); - str[str_size - 1] = '\0'; - } - - if (!puint || int_ver_ready) - return 0; - - err = sscanf(utsname.release, "%d.%d.%d", - &version, &patchlevel, &sublevel); + if (perf_cap__capable(CAP_SYS_ADMIN, &used_root)) + return true; - if (err != 3) { - pr_debug("Unable to get kernel version from uname '%s'\n", - utsname.release); - return -1; - } + if (!used_root && perf_cap__capable(CAP_PERFMON, &used_root)) + return true; - *puint = (version << 16) + (patchlevel << 8) + sublevel; - return 0; + return perf_event_paranoid() <= max_level; } int perf_tip(char **strp, const char *dirpath) @@ -552,3 +479,22 @@ int sched_getcpu(void) return -1; } #endif + +#ifndef HAVE_SCANDIRAT_SUPPORT +int scandirat(int dirfd, const char *dirp, + struct dirent ***namelist, + int (*filter)(const struct dirent *), + int (*compar)(const struct dirent **, const struct dirent **)) +{ + char path[PATH_MAX]; + int err, fd = openat(dirfd, dirp, O_PATH); + + if (fd < 0) + return fd; + + snprintf(path, sizeof(path), "/proc/%d/fd/%d", getpid(), fd); + err = scandir(path, namelist, filter, compar); + close(fd); + return err; +} +#endif diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 7c8915d92dca..3423778e39a5 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -6,6 +6,7 @@ /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */ #define _DEFAULT_SOURCE 1 +#include <dirent.h> #include <fcntl.h> #include <stdbool.h> #include <stddef.h> @@ -20,6 +21,9 @@ extern const char perf_more_info_string[]; extern const char *input_name; +/* This will control if perf_{host,guest} will set attr.exclude_{host,guest}. */ +extern bool exclude_GH_default; + extern bool perf_host; extern bool perf_guest; @@ -42,20 +46,19 @@ int sysctl__max_stack(void); bool sysctl__nmi_watchdog_enabled(void); -int fetch_kernel_version(unsigned int *puint, - char *str, size_t str_sz); -#define KVER_VERSION(x) (((x) >> 16) & 0xff) -#define KVER_PATCHLEVEL(x) (((x) >> 8) & 0xff) -#define KVER_SUBLEVEL(x) ((x) & 0xff) -#define KVER_FMT "%d.%d.%d" -#define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x) - int perf_tip(char **strp, const char *dirpath); #ifndef HAVE_SCHED_GETCPU_SUPPORT int sched_getcpu(void); #endif +#ifndef HAVE_SCANDIRAT_SUPPORT +int scandirat(int dirfd, const char *dirp, + struct dirent ***namelist, + int (*filter)(const struct dirent *), + int (*compar)(const struct dirent **, const struct dirent **)); +#endif + extern bool perf_singlethreaded; void perf_set_singlethreaded(void); @@ -73,13 +76,6 @@ char *perf_exe(char *buf, int len); #endif #endif -extern bool test_attr__enabled; -void test_attr__ready(void); -void test_attr__init(void); -struct perf_event_attr; -void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, - int fd, int group_fd, unsigned long flags); - struct perf_debuginfod { const char *urls; bool set; diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index b9823f414f10..ec72d29f3d58 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -8,6 +8,7 @@ #include "values.h" #include "debug.h" +#include "evsel.h" int perf_read_values_init(struct perf_read_values *values) { @@ -22,21 +23,17 @@ int perf_read_values_init(struct perf_read_values *values) values->threads = 0; values->counters_max = 16; - values->counterrawid = malloc(values->counters_max - * sizeof(*values->counterrawid)); - values->countername = malloc(values->counters_max - * sizeof(*values->countername)); - if (!values->counterrawid || !values->countername) { - pr_debug("failed to allocate read_values counters arrays"); + values->counters = malloc(values->counters_max * sizeof(*values->counters)); + if (!values->counters) { + pr_debug("failed to allocate read_values counters array"); goto out_free_counter; } - values->counters = 0; + values->num_counters = 0; return 0; out_free_counter: - zfree(&values->counterrawid); - zfree(&values->countername); + zfree(&values->counters); out_free_pid: zfree(&values->pid); zfree(&values->tid); @@ -56,10 +53,7 @@ void perf_read_values_destroy(struct perf_read_values *values) zfree(&values->value); zfree(&values->pid); zfree(&values->tid); - zfree(&values->counterrawid); - for (i = 0; i < values->counters; i++) - zfree(&values->countername[i]); - zfree(&values->countername); + zfree(&values->counters); } static int perf_read_values__enlarge_threads(struct perf_read_values *values) @@ -116,81 +110,71 @@ static int perf_read_values__findnew_thread(struct perf_read_values *values, static int perf_read_values__enlarge_counters(struct perf_read_values *values) { - char **countername; - int i, counters_max = values->counters_max * 2; - u64 *counterrawid = realloc(values->counterrawid, counters_max * sizeof(*values->counterrawid)); + int counters_max = values->counters_max * 2; + struct evsel **new_counters = realloc(values->counters, + counters_max * sizeof(*values->counters)); - if (!counterrawid) { - pr_debug("failed to enlarge read_values rawid array"); + if (!new_counters) { + pr_debug("failed to enlarge read_values counters array"); goto out_enomem; } - countername = realloc(values->countername, counters_max * sizeof(*values->countername)); - if (!countername) { - pr_debug("failed to enlarge read_values rawid array"); - goto out_free_rawid; - } - - for (i = 0; i < values->threads; i++) { + for (int i = 0; i < values->threads; i++) { u64 *value = realloc(values->value[i], counters_max * sizeof(**values->value)); - int j; if (!value) { pr_debug("failed to enlarge read_values ->values array"); - goto out_free_name; + goto out_free_counters; } - for (j = values->counters_max; j < counters_max; j++) + for (int j = values->counters_max; j < counters_max; j++) value[j] = 0; values->value[i] = value; } values->counters_max = counters_max; - values->counterrawid = counterrawid; - values->countername = countername; + values->counters = new_counters; return 0; -out_free_name: - free(countername); -out_free_rawid: - free(counterrawid); +out_free_counters: + free(new_counters); out_enomem: return -ENOMEM; } static int perf_read_values__findnew_counter(struct perf_read_values *values, - u64 rawid, const char *name) + struct evsel *evsel) { int i; - for (i = 0; i < values->counters; i++) - if (values->counterrawid[i] == rawid) + for (i = 0; i < values->num_counters; i++) + if (values->counters[i] == evsel) return i; - if (values->counters == values->counters_max) { - i = perf_read_values__enlarge_counters(values); - if (i) - return i; + if (values->num_counters == values->counters_max) { + int err = perf_read_values__enlarge_counters(values); + + if (err) + return err; } - i = values->counters++; - values->counterrawid[i] = rawid; - values->countername[i] = strdup(name); + i = values->num_counters++; + values->counters[i] = evsel; return i; } int perf_read_values_add_value(struct perf_read_values *values, u32 pid, u32 tid, - u64 rawid, const char *name, u64 value) + struct evsel *evsel, u64 value) { int tindex, cindex; tindex = perf_read_values__findnew_thread(values, pid, tid); if (tindex < 0) return tindex; - cindex = perf_read_values__findnew_counter(values, rawid, name); + cindex = perf_read_values__findnew_counter(values, evsel); if (cindex < 0) return cindex; @@ -205,15 +189,15 @@ static void perf_read_values__display_pretty(FILE *fp, int pidwidth, tidwidth; int *counterwidth; - counterwidth = malloc(values->counters * sizeof(*counterwidth)); + counterwidth = malloc(values->num_counters * sizeof(*counterwidth)); if (!counterwidth) { fprintf(fp, "INTERNAL ERROR: Failed to allocate counterwidth array\n"); return; } tidwidth = 3; pidwidth = 3; - for (j = 0; j < values->counters; j++) - counterwidth[j] = strlen(values->countername[j]); + for (j = 0; j < values->num_counters; j++) + counterwidth[j] = strlen(evsel__name(values->counters[j])); for (i = 0; i < values->threads; i++) { int width; @@ -223,7 +207,7 @@ static void perf_read_values__display_pretty(FILE *fp, width = snprintf(NULL, 0, "%d", values->tid[i]); if (width > tidwidth) tidwidth = width; - for (j = 0; j < values->counters; j++) { + for (j = 0; j < values->num_counters; j++) { width = snprintf(NULL, 0, "%" PRIu64, values->value[i][j]); if (width > counterwidth[j]) counterwidth[j] = width; @@ -231,14 +215,14 @@ static void perf_read_values__display_pretty(FILE *fp, } fprintf(fp, "# %*s %*s", pidwidth, "PID", tidwidth, "TID"); - for (j = 0; j < values->counters; j++) - fprintf(fp, " %*s", counterwidth[j], values->countername[j]); + for (j = 0; j < values->num_counters; j++) + fprintf(fp, " %*s", counterwidth[j], evsel__name(values->counters[j])); fprintf(fp, "\n"); for (i = 0; i < values->threads; i++) { fprintf(fp, " %*d %*d", pidwidth, values->pid[i], tidwidth, values->tid[i]); - for (j = 0; j < values->counters; j++) + for (j = 0; j < values->num_counters; j++) fprintf(fp, " %*" PRIu64, counterwidth[j], values->value[i][j]); fprintf(fp, "\n"); @@ -266,16 +250,16 @@ static void perf_read_values__display_raw(FILE *fp, if (width > tidwidth) tidwidth = width; } - for (j = 0; j < values->counters; j++) { - width = strlen(values->countername[j]); + for (j = 0; j < values->num_counters; j++) { + width = strlen(evsel__name(values->counters[j])); if (width > namewidth) namewidth = width; - width = snprintf(NULL, 0, "%" PRIx64, values->counterrawid[j]); + width = snprintf(NULL, 0, "%x", values->counters[j]->core.idx); if (width > rawwidth) rawwidth = width; } for (i = 0; i < values->threads; i++) { - for (j = 0; j < values->counters; j++) { + for (j = 0; j < values->num_counters; j++) { width = snprintf(NULL, 0, "%" PRIu64, values->value[i][j]); if (width > countwidth) countwidth = width; @@ -287,12 +271,12 @@ static void perf_read_values__display_raw(FILE *fp, namewidth, "Name", rawwidth, "Raw", countwidth, "Count"); for (i = 0; i < values->threads; i++) - for (j = 0; j < values->counters; j++) - fprintf(fp, " %*d %*d %*s %*" PRIx64 " %*" PRIu64, + for (j = 0; j < values->num_counters; j++) + fprintf(fp, " %*d %*d %*s %*x %*" PRIu64, pidwidth, values->pid[i], tidwidth, values->tid[i], - namewidth, values->countername[j], - rawwidth, values->counterrawid[j], + namewidth, evsel__name(values->counters[j]), + rawwidth, values->counters[j]->core.idx, countwidth, values->value[i][j]); } diff --git a/tools/perf/util/values.h b/tools/perf/util/values.h index 8c41f22f42cf..bbca33daca19 100644 --- a/tools/perf/util/values.h +++ b/tools/perf/util/values.h @@ -2,16 +2,18 @@ #ifndef __PERF_VALUES_H #define __PERF_VALUES_H +#include <stdio.h> #include <linux/types.h> +struct evsel; + struct perf_read_values { int threads; int threads_max; u32 *pid, *tid; - int counters; + int num_counters; int counters_max; - u64 *counterrawid; - char **countername; + struct evsel **counters; u64 **value; }; @@ -20,7 +22,7 @@ void perf_read_values_destroy(struct perf_read_values *values); int perf_read_values_add_value(struct perf_read_values *values, u32 pid, u32 tid, - u64 rawid, const char *name, u64 value); + struct evsel *evsel, u64 value); void perf_read_values_display(FILE *fp, struct perf_read_values *values, int raw); diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index ae3eee69b659..c12f5d8c4bf6 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -133,30 +133,39 @@ static struct dso *__machine__addnew_vdso(struct machine *machine, const char *s if (dso != NULL) { __dsos__add(&machine->dsos, dso); dso__set_long_name(dso, long_name, false); - /* Put dso here because __dsos_add already got it */ - dso__put(dso); } return dso; } +struct machine__thread_dso_type_maps_cb_args { + struct machine *machine; + enum dso_type dso_type; +}; + +static int machine__thread_dso_type_maps_cb(struct map *map, void *data) +{ + struct machine__thread_dso_type_maps_cb_args *args = data; + struct dso *dso = map__dso(map); + + if (!dso || dso__long_name(dso)[0] != '/') + return 0; + + args->dso_type = dso__type(dso, args->machine); + return (args->dso_type != DSO__TYPE_UNKNOWN) ? 1 : 0; +} + static enum dso_type machine__thread_dso_type(struct machine *machine, struct thread *thread) { - enum dso_type dso_type = DSO__TYPE_UNKNOWN; - struct map_rb_node *rb_node; - - maps__for_each_entry(thread__maps(thread), rb_node) { - struct dso *dso = map__dso(rb_node->map); + struct machine__thread_dso_type_maps_cb_args args = { + .machine = machine, + .dso_type = DSO__TYPE_UNKNOWN, + }; - if (!dso || dso->long_name[0] != '/') - continue; - dso_type = dso__type(dso, machine); - if (dso_type != DSO__TYPE_UNKNOWN) - break; - } + maps__for_each_map(thread__maps(thread), machine__thread_dso_type_maps_cb, &args); - return dso_type; + return args.dso_type; } #if BITS_PER_LONG == 64 @@ -241,17 +250,15 @@ static struct dso *__machine__findnew_compat(struct machine *machine, const char *file_name; struct dso *dso; - dso = __dsos__find(&machine->dsos, vdso_file->dso_name, true); + dso = dsos__find(&machine->dsos, vdso_file->dso_name, true); if (dso) - goto out; + return dso; file_name = vdso__get_compat_file(vdso_file); if (!file_name) - goto out; + return NULL; - dso = __machine__addnew_vdso(machine, vdso_file->dso_name, file_name); -out: - return dso; + return __machine__addnew_vdso(machine, vdso_file->dso_name, file_name); } static int __machine__findnew_vdso_compat(struct machine *machine, @@ -297,21 +304,23 @@ static struct dso *machine__find_vdso(struct machine *machine, dso_type = machine__thread_dso_type(machine, thread); switch (dso_type) { case DSO__TYPE_32BIT: - dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO32, true); + dso = dsos__find(&machine->dsos, DSO__NAME_VDSO32, true); if (!dso) { - dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, - true); - if (dso && dso_type != dso__type(dso, machine)) + dso = dsos__find(&machine->dsos, DSO__NAME_VDSO, + true); + if (dso && dso_type != dso__type(dso, machine)) { + dso__put(dso); dso = NULL; + } } break; case DSO__TYPE_X32BIT: - dso = __dsos__find(&machine->dsos, DSO__NAME_VDSOX32, true); + dso = dsos__find(&machine->dsos, DSO__NAME_VDSOX32, true); break; case DSO__TYPE_64BIT: case DSO__TYPE_UNKNOWN: default: - dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true); + dso = dsos__find(&machine->dsos, DSO__NAME_VDSO, true); break; } @@ -323,42 +332,38 @@ struct dso *machine__findnew_vdso(struct machine *machine, { struct vdso_info *vdso_info; struct dso *dso = NULL; + char *file; - down_write(&machine->dsos.lock); if (!machine->vdso_info) machine->vdso_info = vdso_info__new(); vdso_info = machine->vdso_info; if (!vdso_info) - goto out_unlock; + return NULL; dso = machine__find_vdso(machine, thread); if (dso) - goto out_unlock; + return dso; #if BITS_PER_LONG == 64 if (__machine__findnew_vdso_compat(machine, thread, vdso_info, &dso)) - goto out_unlock; + return dso; #endif - dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true); - if (!dso) { - char *file; + dso = dsos__find(&machine->dsos, DSO__NAME_VDSO, true); + if (dso) + return dso; - file = get_file(&vdso_info->vdso); - if (file) - dso = __machine__addnew_vdso(machine, DSO__NAME_VDSO, file); - } + file = get_file(&vdso_info->vdso); + if (!file) + return NULL; -out_unlock: - dso__get(dso); - up_write(&machine->dsos.lock); - return dso; + return __machine__addnew_vdso(machine, DSO__NAME_VDSO, file); } bool dso__is_vdso(struct dso *dso) { - return !strcmp(dso->short_name, DSO__NAME_VDSO) || - !strcmp(dso->short_name, DSO__NAME_VDSO32) || - !strcmp(dso->short_name, DSO__NAME_VDSOX32); + return !strcmp(dso__short_name(dso), DSO__NAME_VDSO) || + !strcmp(dso__short_name(dso), DSO__NAME_VDSO32) || + !strcmp(dso__short_name(dso), DSO__NAME_VDSOX32); } diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c index 78d2297c1b67..1f7c06523059 100644 --- a/tools/perf/util/zlib.c +++ b/tools/perf/util/zlib.c @@ -88,7 +88,7 @@ bool gzip_is_compressed(const char *input) ssize_t rc; if (fd < 0) - return -1; + return false; rc = read(fd, buf, sizeof(buf)); close(fd); diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c index 48dd2b018c47..57027e0ac7b6 100644 --- a/tools/perf/util/zstd.c +++ b/tools/perf/util/zstd.c @@ -7,35 +7,9 @@ int zstd_init(struct zstd_data *data, int level) { - size_t ret; - - data->dstream = ZSTD_createDStream(); - if (data->dstream == NULL) { - pr_err("Couldn't create decompression stream.\n"); - return -1; - } - - ret = ZSTD_initDStream(data->dstream); - if (ZSTD_isError(ret)) { - pr_err("Failed to initialize decompression stream: %s\n", ZSTD_getErrorName(ret)); - return -1; - } - - if (!level) - return 0; - - data->cstream = ZSTD_createCStream(); - if (data->cstream == NULL) { - pr_err("Couldn't create compression stream.\n"); - return -1; - } - - ret = ZSTD_initCStream(data->cstream, level); - if (ZSTD_isError(ret)) { - pr_err("Failed to initialize compression stream: %s\n", ZSTD_getErrorName(ret)); - return -1; - } - + data->comp_level = level; + data->dstream = NULL; + data->cstream = NULL; return 0; } @@ -54,7 +28,7 @@ int zstd_fini(struct zstd_data *data) return 0; } -size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, +ssize_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, void *src, size_t src_size, size_t max_record_size, size_t process_header(void *record, size_t increment)) { @@ -63,6 +37,21 @@ size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t ZSTD_outBuffer output; void *record; + if (!data->cstream) { + data->cstream = ZSTD_createCStream(); + if (data->cstream == NULL) { + pr_err("Couldn't create compression stream.\n"); + return -1; + } + + ret = ZSTD_initCStream(data->cstream, data->comp_level); + if (ZSTD_isError(ret)) { + pr_err("Failed to initialize compression stream: %s\n", + ZSTD_getErrorName(ret)); + return -1; + } + } + while (input.pos < input.size) { record = dst; size = process_header(record, 0); @@ -96,6 +85,20 @@ size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size ZSTD_inBuffer input = { src, src_size, 0 }; ZSTD_outBuffer output = { dst, dst_size, 0 }; + if (!data->dstream) { + data->dstream = ZSTD_createDStream(); + if (data->dstream == NULL) { + pr_err("Couldn't create decompression stream.\n"); + return 0; + } + + ret = ZSTD_initDStream(data->dstream); + if (ZSTD_isError(ret)) { + pr_err("Failed to initialize decompression stream: %s\n", + ZSTD_getErrorName(ret)); + return 0; + } + } while (input.pos < input.size) { ret = ZSTD_decompressStream(data->dstream, &output, &input); if (ZSTD_isError(ret)) { |
