diff options
Diffstat (limited to 'tools/perf/util')
297 files changed, 29551 insertions, 11651 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index da64efd8718f..1c2a43e1dc68 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -1,181 +1,201 @@ include $(srctree)/tools/scripts/Makefile.include include $(srctree)/tools/scripts/utilities.mak -perf-y += arm64-frame-pointer-unwind-support.o -perf-y += addr_location.o -perf-y += annotate.o -perf-y += block-info.o -perf-y += block-range.o -perf-y += build-id.o -perf-y += cacheline.o -perf-y += config.o -perf-y += copyfile.o -perf-y += ctype.o -perf-y += db-export.o -perf-y += disasm.o -perf-y += env.o -perf-y += event.o -perf-y += evlist.o -perf-y += sideband_evlist.o -perf-y += evsel.o -perf-y += evsel_fprintf.o -perf-y += perf_event_attr_fprintf.o -perf-y += evswitch.o -perf-y += find_bit.o -perf-y += get_current_dir_name.o -perf-y += levenshtein.o -perf-y += mmap.o -perf-y += memswap.o -perf-y += parse-events.o -perf-y += print-events.o -perf-y += tracepoint.o -perf-y += perf_regs.o -perf-y += perf-regs-arch/ -perf-y += path.o -perf-y += print_binary.o -perf-y += print_insn.o -perf-y += rlimit.o -perf-y += argv_split.o -perf-y += rbtree.o -perf-y += libstring.o -perf-y += bitmap.o -perf-y += hweight.o -perf-y += smt.o -perf-y += strbuf.o -perf-y += string.o -perf-y += strlist.o -perf-y += strfilter.o -perf-y += top.o -perf-y += usage.o -perf-y += dso.o -perf-y += dsos.o -perf-y += symbol.o -perf-y += symbol_fprintf.o -perf-y += map_symbol.o -perf-y += color.o -perf-y += color_config.o -perf-y += metricgroup.o -perf-y += header.o -perf-y += callchain.o -perf-y += values.o -perf-y += debug.o -perf-y += fncache.o -perf-y += machine.o -perf-y += map.o -perf-y += maps.o -perf-y += pstack.o -perf-y += session.o -perf-y += sample-raw.o -perf-y += s390-sample-raw.o -perf-y += amd-sample-raw.o -perf-$(CONFIG_TRACE) += syscalltbl.o -perf-y += ordered-events.o -perf-y += namespaces.o -perf-y += comm.o -perf-y += thread.o -perf-y += threads.o -perf-y += thread_map.o -perf-y += parse-events-flex.o -perf-y += parse-events-bison.o -perf-y += pmu.o -perf-y += pmus.o -perf-y += pmu-flex.o -perf-y += pmu-bison.o -perf-y += svghelper.o -perf-$(CONFIG_LIBTRACEEVENT) += trace-event-info.o -perf-y += trace-event-scripting.o -perf-$(CONFIG_LIBTRACEEVENT) += trace-event.o -perf-$(CONFIG_LIBTRACEEVENT) += trace-event-parse.o -perf-$(CONFIG_LIBTRACEEVENT) += trace-event-read.o -perf-y += sort.o -perf-y += hist.o -perf-y += util.o -perf-y += cpumap.o -perf-y += affinity.o -perf-y += cputopo.o -perf-y += cgroup.o -perf-y += target.o -perf-y += rblist.o -perf-y += intlist.o -perf-y += vdso.o -perf-y += counts.o -perf-y += stat.o -perf-y += stat-shadow.o -perf-y += stat-display.o -perf-y += perf_api_probe.o -perf-y += record.o -perf-y += srcline.o -perf-y += srccode.o -perf-y += synthetic-events.o -perf-y += data.o -perf-y += tsc.o -perf-y += cloexec.o -perf-y += call-path.o -perf-y += rwsem.o -perf-y += thread-stack.o -perf-y += spark.o -perf-y += topdown.o -perf-y += iostat.o -perf-y += stream.o -perf-$(CONFIG_AUXTRACE) += auxtrace.o -perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ -perf-$(CONFIG_AUXTRACE) += intel-pt.o -perf-$(CONFIG_AUXTRACE) += intel-bts.o -perf-$(CONFIG_AUXTRACE) += arm-spe.o -perf-$(CONFIG_AUXTRACE) += arm-spe-decoder/ -perf-$(CONFIG_AUXTRACE) += hisi-ptt.o -perf-$(CONFIG_AUXTRACE) += hisi-ptt-decoder/ -perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o +perf-util-y += arm64-frame-pointer-unwind-support.o +perf-util-y += addr2line.o +perf-util-y += addr_location.o +perf-util-y += annotate.o +perf-util-y += block-info.o +perf-util-y += block-range.o +perf-util-y += build-id.o +perf-util-y += cacheline.o +perf-util-y += capstone.o +perf-util-y += config.o +perf-util-y += copyfile.o +perf-util-y += ctype.o +perf-util-y += db-export.o +perf-util-y += disasm.o +perf-util-y += env.o +perf-util-y += event.o +perf-util-y += evlist.o +perf-util-y += sideband_evlist.o +perf-util-y += evsel.o +perf-util-y += evsel_fprintf.o +perf-util-y += perf_event_attr_fprintf.o +perf-util-y += evswitch.o +perf-util-y += find_bit.o +perf-util-y += levenshtein.o +perf-util-$(CONFIG_LIBBFD) += libbfd.o +perf-util-y += llvm.o +perf-util-y += mmap.o +perf-util-y += memswap.o +perf-util-y += parse-events.o +perf-util-y += print-events.o +perf-util-y += tracepoint.o +perf-util-y += perf_regs.o +perf-util-y += perf-regs-arch/ +perf-util-y += path.o +perf-util-y += print_binary.o +perf-util-y += print_insn.o +perf-util-y += rlimit.o +perf-util-y += argv_split.o +perf-util-y += rbtree.o +perf-util-y += libstring.o +perf-util-y += bitmap.o +perf-util-y += hweight.o +perf-util-y += sha1.o +perf-util-y += smt.o +perf-util-y += strbuf.o +perf-util-y += string.o +perf-util-y += strlist.o +perf-util-y += strfilter.o +perf-util-y += top.o +perf-util-y += usage.o +perf-util-y += dso.o +perf-util-y += dsos.o +perf-util-y += symbol.o +perf-util-y += symbol_fprintf.o +perf-util-y += map_symbol.o +perf-util-y += color.o +perf-util-y += color_config.o +perf-util-y += metricgroup.o +perf-util-y += header.o +perf-util-y += callchain.o +perf-util-y += values.o +perf-util-y += debug.o +perf-util-y += fncache.o +perf-util-y += machine.o +perf-util-y += map.o +perf-util-y += maps.o +perf-util-y += pstack.o +perf-util-y += session.o +perf-util-y += tool.o +perf-util-y += sample.o +perf-util-y += sample-raw.o +perf-util-y += s390-sample-raw.o +perf-util-y += amd-sample-raw.o +perf-util-$(CONFIG_TRACE) += syscalltbl.o +perf-util-y += ordered-events.o +perf-util-y += namespaces.o +perf-util-y += comm.o +perf-util-y += thread.o +perf-util-y += threads.o +perf-util-y += thread_map.o +perf-util-y += parse-events-flex.o +perf-util-y += parse-events-bison.o +perf-util-y += pmu.o +perf-util-y += pmus.o +perf-util-y += pmu-flex.o +perf-util-y += pmu-bison.o +perf-util-y += drm_pmu.o +perf-util-y += hwmon_pmu.o +perf-util-y += tool_pmu.o +perf-util-y += tp_pmu.o +perf-util-y += svghelper.o +perf-util-y += trace-event-info.o +perf-util-y += trace-event-scripting.o +perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event.o +perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event-parse.o +perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event-read.o +perf-util-y += sort.o +perf-util-y += hist.o +perf-util-y += util.o +perf-util-y += cpumap.o +perf-util-y += affinity.o +perf-util-y += cputopo.o +perf-util-y += cgroup.o +perf-util-y += target.o +perf-util-y += rblist.o +perf-util-y += intlist.o +perf-util-y += vdso.o +perf-util-y += counts.o +perf-util-y += stat.o +perf-util-y += stat-shadow.o +perf-util-y += stat-display.o +perf-util-y += perf_api_probe.o +perf-util-y += record.o +perf-util-y += srcline.o +perf-util-y += srccode.o +perf-util-y += synthetic-events.o +perf-util-y += data.o +perf-util-y += tsc.o +perf-util-y += cloexec.o +perf-util-y += call-path.o +perf-util-y += rwsem.o +perf-util-y += thread-stack.o +perf-util-y += spark.o +perf-util-y += topdown.o +perf-util-y += iostat.o +perf-util-y += stream.o +perf-util-y += kvm-stat.o +perf-util-y += lock-contention.o +perf-util-y += auxtrace.o +perf-util-y += intel-pt-decoder/ +perf-util-y += intel-pt.o +perf-util-y += intel-bts.o +perf-util-y += arm-spe.o +perf-util-y += arm-spe-decoder/ +perf-util-y += hisi-ptt.o +perf-util-y += hisi-ptt-decoder/ +perf-util-y += s390-cpumsf.o +perf-util-y += powerpc-vpadtl.o ifdef CONFIG_LIBOPENCSD -perf-$(CONFIG_AUXTRACE) += cs-etm.o -perf-$(CONFIG_AUXTRACE) += cs-etm-decoder/ +perf-util-y += cs-etm.o +perf-util-y += cs-etm-decoder/ +endif +perf-util-y += cs-etm-base.o + +perf-util-y += parse-branch-options.o +perf-util-y += dump-insn.o +perf-util-y += parse-regs-options.o +perf-util-y += parse-sublevel-options.o +perf-util-y += term.o +perf-util-y += help-unknown-cmd.o +perf-util-y += dlfilter.o +perf-util-y += mem-events.o +perf-util-y += mem-info.o +perf-util-y += vsprintf.o +perf-util-y += units.o +perf-util-y += time-utils.o +perf-util-y += expr-flex.o +perf-util-y += expr-bison.o +perf-util-y += expr.o +perf-util-y += branch.o +perf-util-y += mem2node.o +perf-util-y += clockid.o +perf-util-y += list_sort.o +perf-util-y += mutex.o +perf-util-y += sharded_mutex.o +perf-util-y += intel-tpebs.o + +perf-util-$(CONFIG_LIBBPF) += bpf_map.o +perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o +perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o +perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o +perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o +perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter.o +perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-flex.o +perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-bison.o +perf-util-$(CONFIG_PERF_BPF_SKEL) += btf.o + +ifeq ($(CONFIG_TRACE),y) + perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-trace-summary.o + perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_trace_augment.o endif -perf-$(CONFIG_AUXTRACE) += cs-etm-base.o - -perf-y += parse-branch-options.o -perf-y += dump-insn.o -perf-y += parse-regs-options.o -perf-y += parse-sublevel-options.o -perf-y += term.o -perf-y += help-unknown-cmd.o -perf-y += dlfilter.o -perf-y += mem-events.o -perf-y += mem-info.o -perf-y += vsprintf.o -perf-y += units.o -perf-y += time-utils.o -perf-y += expr-flex.o -perf-y += expr-bison.o -perf-y += expr.o -perf-y += branch.o -perf-y += mem2node.o -perf-y += clockid.o -perf-y += list_sort.o -perf-y += mutex.o -perf-y += sharded_mutex.o - -perf-$(CONFIG_LIBBPF) += bpf_map.o -perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o -perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o -perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o -perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o -perf-$(CONFIG_PERF_BPF_SKEL) += bpf-filter.o -perf-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-flex.o -perf-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-bison.o ifeq ($(CONFIG_LIBTRACEEVENT),y) - perf-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o + perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o endif ifeq ($(CONFIG_LIBTRACEEVENT),y) - perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o - perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork_top.o + perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o + perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork_top.o endif -perf-$(CONFIG_LIBELF) += symbol-elf.o -perf-$(CONFIG_LIBELF) += probe-file.o -perf-$(CONFIG_LIBELF) += probe-event.o +perf-util-$(CONFIG_LIBELF) += symbol-elf.o +perf-util-$(CONFIG_LIBELF) += probe-file.o +perf-util-$(CONFIG_LIBELF) += probe-event.o ifdef CONFIG_LIBBPF_DYNAMIC hashmap := 1 @@ -185,60 +205,67 @@ ifndef CONFIG_LIBBPF endif ifdef hashmap -perf-y += hashmap.o +perf-util-y += hashmap.o endif ifndef CONFIG_LIBELF -perf-y += symbol-minimal.o +perf-util-y += symbol-minimal.o endif ifndef CONFIG_SETNS -perf-y += setns.o +perf-util-y += setns.o endif -perf-$(CONFIG_DWARF) += probe-finder.o -perf-$(CONFIG_DWARF) += dwarf-aux.o -perf-$(CONFIG_DWARF) += dwarf-regs.o -perf-$(CONFIG_DWARF) += debuginfo.o -perf-$(CONFIG_DWARF) += annotate-data.o - -perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o -perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o -perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o -perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o -perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o +perf-util-$(CONFIG_LIBDW) += probe-finder.o +perf-util-$(CONFIG_LIBDW) += dwarf-aux.o +perf-util-$(CONFIG_LIBDW) += dwarf-regs.o +perf-util-$(CONFIG_LIBDW) += dwarf-regs-csky.o +perf-util-$(CONFIG_LIBDW) += dwarf-regs-powerpc.o +perf-util-$(CONFIG_LIBDW) += dwarf-regs-x86.o +perf-util-$(CONFIG_LIBDW) += debuginfo.o +perf-util-$(CONFIG_LIBDW) += annotate-data.o + +perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o +perf-util-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +perf-util-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o +perf-util-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o ifeq ($(CONFIG_LIBTRACEEVENT),y) - perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o + perf-util-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o endif -perf-y += data-convert-json.o +perf-util-y += data-convert-json.o -perf-y += scripting-engines/ +perf-util-y += scripting-engines/ -perf-$(CONFIG_ZLIB) += zlib.o -perf-$(CONFIG_LZMA) += lzma.o -perf-$(CONFIG_ZSTD) += zstd.o +perf-util-$(CONFIG_ZLIB) += zlib.o +perf-util-$(CONFIG_LZMA) += lzma.o +perf-util-$(CONFIG_ZSTD) += zstd.o -perf-$(CONFIG_LIBCAP) += cap.o +perf-util-y += cap.o -perf-$(CONFIG_CXX_DEMANGLE) += demangle-cxx.o -perf-y += demangle-ocaml.o -perf-y += demangle-java.o -perf-y += demangle-rust.o +perf-util-$(CONFIG_CXX_DEMANGLE) += demangle-cxx.o +perf-util-y += demangle-ocaml.o +perf-util-y += demangle-java.o +perf-util-y += demangle-rust-v0.o +perf-util-$(CONFIG_LIBLLVM) += llvm-c-helpers.o + +CFLAGS_demangle-rust-v0.o += -Wno-shadow -Wno-declaration-after-statement \ + -Wno-switch-default -Wno-switch-enum -Wno-missing-field-initializers ifdef CONFIG_JITDUMP -perf-$(CONFIG_LIBELF) += jitdump.o -perf-$(CONFIG_LIBELF) += genelf.o -perf-$(CONFIG_DWARF) += genelf_debug.o +perf-util-$(CONFIG_LIBELF) += jitdump.o +perf-util-$(CONFIG_LIBELF) += genelf.o +perf-util-$(CONFIG_LIBDW) += genelf_debug.o endif -perf-y += perf-hooks.o +perf-util-y += perf-hooks.o -perf-$(CONFIG_LIBBPF) += bpf-event.o -perf-$(CONFIG_LIBBPF) += bpf-utils.o +perf-util-$(CONFIG_LIBBPF) += bpf-event.o +perf-util-$(CONFIG_LIBBPF) += bpf-utils.o -perf-$(CONFIG_LIBPFM4) += pfm.o +perf-util-$(CONFIG_LIBPFM4) += pfm.o CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" @@ -275,12 +302,12 @@ $(OUTPUT)util/pmu-bison.c $(OUTPUT)util/pmu-bison.h: util/pmu.y $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \ -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_ -$(OUTPUT)util/bpf-filter-flex.c $(OUTPUT)util/bpf-filter-flex.h: util/bpf-filter.l $(OUTPUT)util/bpf-filter-bison.c +$(OUTPUT)util/bpf-filter-flex.c $(OUTPUT)util/bpf-filter-flex.h: util/bpf-filter.l $(OUTPUT)util/bpf-filter-bison.c util/bpf-filter.h util/bpf_skel/sample-filter.h $(call rule_mkdir) $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/bpf-filter-flex.c \ --header-file=$(OUTPUT)util/bpf-filter-flex.h $(PARSER_DEBUG_FLEX) $< -$(OUTPUT)util/bpf-filter-bison.c $(OUTPUT)util/bpf-filter-bison.h: util/bpf-filter.y +$(OUTPUT)util/bpf-filter-bison.c $(OUTPUT)util/bpf-filter-bison.h: util/bpf-filter.y util/bpf-filter.h util/bpf_skel/sample-filter.h $(call rule_mkdir) $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \ -o $(OUTPUT)util/bpf-filter-bison.c -p perf_bpf_filter_ @@ -393,14 +420,39 @@ $(OUTPUT)util/list_sort.o: ../lib/list_sort.c FORCE ifdef SHELLCHECK SHELL_TESTS := generate-cmdlist.sh - TEST_LOGS := $(SHELL_TESTS:%=%.shellcheck_log) + SHELL_TEST_LOGS := $(SHELL_TESTS:%=%.shellcheck_log) else SHELL_TESTS := - TEST_LOGS := + SHELL_TEST_LOGS := endif $(OUTPUT)%.shellcheck_log: % $(call rule_mkdir) - $(Q)$(call echo-cmd,test)shellcheck -a -S warning "$<" > $@ || (cat $@ && rm $@ && false) + $(Q)$(call echo-cmd,test)$(SHELLCHECK) "$<" > $@ || (cat $@ && rm $@ && false) + +perf-util-y += $(SHELL_TEST_LOGS) + +PY_TESTS := setup.py +ifdef MYPY + MYPY_TEST_LOGS := $(PY_TESTS:%=%.mypy_log) +else + MYPY_TEST_LOGS := +endif + +$(OUTPUT)%.mypy_log: % + $(call rule_mkdir) + $(Q)$(call echo-cmd,test)mypy "$<" > $@ || (cat $@ && rm $@ && false) + +perf-util-y += $(MYPY_TEST_LOGS) + +ifdef PYLINT + PYLINT_TEST_LOGS := $(PY_TESTS:%=%.pylint_log) +else + PYLINT_TEST_LOGS := +endif + +$(OUTPUT)%.pylint_log: % + $(call rule_mkdir) + $(Q)$(call echo-cmd,test)pylint "$<" > $@ || (cat $@ && rm $@ && false) -perf-y += $(TEST_LOGS) +perf-util-y += $(PYLINT_TEST_LOGS) diff --git a/tools/perf/util/addr2line.c b/tools/perf/util/addr2line.c new file mode 100644 index 000000000000..f2d94a3272d7 --- /dev/null +++ b/tools/perf/util/addr2line.c @@ -0,0 +1,439 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "addr2line.h" +#include "debug.h" +#include "dso.h" +#include "string2.h" +#include "srcline.h" +#include "symbol.h" +#include "symbol_conf.h" + +#include <api/io.h> +#include <linux/zalloc.h> +#include <subcmd/run-command.h> + +#include <inttypes.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> + +#define MAX_INLINE_NEST 1024 + +/* If addr2line doesn't return data for 1 second then timeout. */ +int addr2line_timeout_ms = 1 * 1000; + +static int filename_split(char *filename, unsigned int *line_nr) +{ + char *sep; + + sep = strchr(filename, '\n'); + if (sep) + *sep = '\0'; + + if (!strcmp(filename, "??:0")) + return 0; + + sep = strchr(filename, ':'); + if (sep) { + *sep++ = '\0'; + *line_nr = strtoul(sep, NULL, 0); + return 1; + } + pr_debug("addr2line missing ':' in filename split\n"); + return 0; +} + +static void addr2line_subprocess_cleanup(struct child_process *a2l) +{ + if (a2l->pid != -1) { + kill(a2l->pid, SIGKILL); + finish_command(a2l); /* ignore result, we don't care */ + a2l->pid = -1; + close(a2l->in); + close(a2l->out); + } + + free(a2l); +} + +static struct child_process *addr2line_subprocess_init(const char *addr2line_path, + const char *binary_path) +{ + const char *argv[] = { + addr2line_path ?: "addr2line", + "-e", binary_path, + "-a", "-i", "-f", NULL + }; + struct child_process *a2l = zalloc(sizeof(*a2l)); + int start_command_status = 0; + + if (a2l == NULL) { + pr_err("Failed to allocate memory for addr2line"); + return NULL; + } + + a2l->pid = -1; + a2l->in = -1; + a2l->out = -1; + a2l->no_stderr = 1; + + a2l->argv = argv; + start_command_status = start_command(a2l); + a2l->argv = NULL; /* it's not used after start_command; avoid dangling pointers */ + + if (start_command_status != 0) { + pr_warning("could not start addr2line (%s) for %s: start_command return code %d\n", + addr2line_path, binary_path, start_command_status); + addr2line_subprocess_cleanup(a2l); + return NULL; + } + + return a2l; +} + +enum a2l_style { + BROKEN, + GNU_BINUTILS, + LLVM, +}; + +static enum a2l_style addr2line_configure(struct child_process *a2l, const char *dso_name) +{ + static bool cached; + static enum a2l_style style; + + if (!cached) { + char buf[128]; + struct io io; + int ch; + int lines; + + if (write(a2l->in, ",\n", 2) != 2) + return BROKEN; + + io__init(&io, a2l->out, buf, sizeof(buf)); + ch = io__get_char(&io); + if (ch == ',') { + style = LLVM; + cached = true; + lines = 1; + pr_debug3("Detected LLVM addr2line style\n"); + } else if (ch == '0') { + style = GNU_BINUTILS; + cached = true; + lines = 3; + pr_debug3("Detected binutils addr2line style\n"); + } else { + if (!symbol_conf.disable_add2line_warn) { + char *output = NULL; + size_t output_len; + + io__getline(&io, &output, &output_len); + pr_warning("%s %s: addr2line configuration failed\n", + __func__, dso_name); + pr_warning("\t%c%s", ch, output); + } + pr_debug("Unknown/broken addr2line style\n"); + return BROKEN; + } + while (lines) { + ch = io__get_char(&io); + if (ch <= 0) + break; + if (ch == '\n') + lines--; + } + /* Ignore SIGPIPE in the event addr2line exits. */ + signal(SIGPIPE, SIG_IGN); + } + return style; +} + +static int read_addr2line_record(struct io *io, + enum a2l_style style, + const char *dso_name, + u64 addr, + bool first, + char **function, + char **filename, + unsigned int *line_nr) +{ + /* + * Returns: + * -1 ==> error + * 0 ==> sentinel (or other ill-formed) record read + * 1 ==> a genuine record read + */ + char *line = NULL; + size_t line_len = 0; + unsigned int dummy_line_nr = 0; + int ret = -1; + + if (function != NULL) + zfree(function); + + if (filename != NULL) + zfree(filename); + + if (line_nr != NULL) + *line_nr = 0; + + /* + * Read the first line. Without an error this will be: + * - for the first line an address like 0x1234, + * - the binutils sentinel 0x0000000000000000, + * - the llvm-addr2line the sentinel ',' character, + * - the function name line for an inlined function. + */ + if (io__getline(io, &line, &line_len) < 0 || !line_len) + goto error; + + pr_debug3("%s %s: addr2line read address for sentinel: %s", __func__, dso_name, line); + if (style == LLVM && line_len == 2 && line[0] == ',') { + /* Found the llvm-addr2line sentinel character. */ + zfree(&line); + return 0; + } else if (style == GNU_BINUTILS && (!first || addr != 0)) { + int zero_count = 0, non_zero_count = 0; + /* + * Check for binutils sentinel ignoring it for the case the + * requested address is 0. + */ + + /* A given address should always start 0x. */ + if (line_len >= 2 || line[0] != '0' || line[1] != 'x') { + for (size_t i = 2; i < line_len; i++) { + if (line[i] == '0') + zero_count++; + else if (line[i] != '\n') + non_zero_count++; + } + if (!non_zero_count) { + int ch; + + if (first && !zero_count) { + /* Line was erroneous just '0x'. */ + goto error; + } + /* + * Line was 0x0..0, the sentinel for binutils. Remove + * the function and filename lines. + */ + zfree(&line); + do { + ch = io__get_char(io); + } while (ch > 0 && ch != '\n'); + do { + ch = io__get_char(io); + } while (ch > 0 && ch != '\n'); + return 0; + } + } + } + /* Read the second function name line (if inline data then this is the first line). */ + if (first && (io__getline(io, &line, &line_len) < 0 || !line_len)) + goto error; + + pr_debug3("%s %s: addr2line read line: %s", __func__, dso_name, line); + if (function != NULL) + *function = strdup(strim(line)); + + zfree(&line); + line_len = 0; + + /* Read the third filename and line number line. */ + if (io__getline(io, &line, &line_len) < 0 || !line_len) + goto error; + + pr_debug3("%s %s: addr2line filename:number : %s", __func__, dso_name, line); + if (filename_split(line, line_nr == NULL ? &dummy_line_nr : line_nr) == 0 && + style == GNU_BINUTILS) { + ret = 0; + goto error; + } + + if (filename != NULL) + *filename = strdup(line); + + zfree(&line); + line_len = 0; + + return 1; + +error: + free(line); + if (function != NULL) + zfree(function); + if (filename != NULL) + zfree(filename); + return ret; +} + +static int inline_list__append_record(struct dso *dso, + struct inline_node *node, + struct symbol *sym, + const char *function, + const char *filename, + unsigned int line_nr) +{ + struct symbol *inline_sym = new_inline_sym(dso, sym, function); + + return inline_list__append(inline_sym, srcline_from_fileline(filename, line_nr), node); +} + +int cmd__addr2line(const char *dso_name, u64 addr, + char **file, unsigned int *line_nr, + struct dso *dso, + bool unwind_inlines, + struct inline_node *node, + struct symbol *sym __maybe_unused) +{ + struct child_process *a2l = dso__a2l(dso); + char *record_function = NULL; + char *record_filename = NULL; + unsigned int record_line_nr = 0; + int record_status = -1; + int ret = 0; + size_t inline_count = 0; + int len; + char buf[128]; + ssize_t written; + struct io io = { .eof = false }; + enum a2l_style a2l_style; + + if (!a2l) { + if (!filename__has_section(dso_name, ".debug_line")) + goto out; + + dso__set_a2l(dso, + addr2line_subprocess_init(symbol_conf.addr2line_path, dso_name)); + a2l = dso__a2l(dso); + } + + if (a2l == NULL) { + if (!symbol_conf.disable_add2line_warn) + pr_warning("%s %s: addr2line_subprocess_init failed\n", __func__, dso_name); + goto out; + } + a2l_style = addr2line_configure(a2l, dso_name); + if (a2l_style == BROKEN) + goto out; + + /* + * Send our request and then *deliberately* send something that can't be + * interpreted as a valid address to ask addr2line about (namely, + * ","). This causes addr2line to first write out the answer to our + * request, in an unbounded/unknown number of records, and then to write + * out the lines "0x0...0", "??" and "??:0", for GNU binutils, or "," + * for llvm-addr2line, so that we can detect when it has finished giving + * us anything useful. + */ + len = snprintf(buf, sizeof(buf), "%016"PRIx64"\n,\n", addr); + written = len > 0 ? write(a2l->in, buf, len) : -1; + if (written != len) { + if (!symbol_conf.disable_add2line_warn) + pr_warning("%s %s: could not send request\n", __func__, dso_name); + goto out; + } + io__init(&io, a2l->out, buf, sizeof(buf)); + io.timeout_ms = addr2line_timeout_ms; + switch (read_addr2line_record(&io, a2l_style, dso_name, addr, /*first=*/true, + &record_function, &record_filename, &record_line_nr)) { + case -1: + if (!symbol_conf.disable_add2line_warn) + pr_warning("%s %s: could not read first record\n", __func__, dso_name); + goto out; + case 0: + /* + * The first record was invalid, so return failure, but first + * read another record, since we sent a sentinel ',' for the + * sake of detected the last inlined function. Treat this as the + * first of a record as the ',' generates a new start with GNU + * binutils, also force a non-zero address as we're no longer + * reading that record. + */ + switch (read_addr2line_record(&io, a2l_style, dso_name, + /*addr=*/1, /*first=*/true, + NULL, NULL, NULL)) { + case -1: + if (!symbol_conf.disable_add2line_warn) + pr_warning("%s %s: could not read sentinel record\n", + __func__, dso_name); + break; + case 0: + /* The sentinel as expected. */ + break; + default: + if (!symbol_conf.disable_add2line_warn) + pr_warning("%s %s: unexpected record instead of sentinel", + __func__, dso_name); + break; + } + goto out; + default: + /* First record as expected. */ + break; + } + + if (file) { + *file = strdup(record_filename); + ret = 1; + } + if (line_nr) + *line_nr = record_line_nr; + + if (unwind_inlines) { + if (node && inline_list__append_record(dso, node, sym, + record_function, + record_filename, + record_line_nr)) { + ret = 0; + goto out; + } + } + + /* + * We have to read the records even if we don't care about the inline + * info. This isn't the first record and force the address to non-zero + * as we're reading records beyond the first. + */ + while ((record_status = read_addr2line_record(&io, + a2l_style, + dso_name, + /*addr=*/1, + /*first=*/false, + &record_function, + &record_filename, + &record_line_nr)) == 1) { + if (unwind_inlines && node && inline_count++ < MAX_INLINE_NEST) { + if (inline_list__append_record(dso, node, sym, + record_function, + record_filename, + record_line_nr)) { + ret = 0; + goto out; + } + ret = 1; /* found at least one inline frame */ + } + } + +out: + free(record_function); + free(record_filename); + if (io.eof) { + dso__set_a2l(dso, NULL); + addr2line_subprocess_cleanup(a2l); + } + return ret; +} + +void dso__free_a2l(struct dso *dso) +{ + struct child_process *a2l = dso__a2l(dso); + + if (!a2l) + return; + + addr2line_subprocess_cleanup(a2l); + + dso__set_a2l(dso, NULL); +} diff --git a/tools/perf/util/addr2line.h b/tools/perf/util/addr2line.h new file mode 100644 index 000000000000..d35a47ba8dab --- /dev/null +++ b/tools/perf/util/addr2line.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ADDR2LINE_H +#define __PERF_ADDR2LINE_H + +#include <linux/types.h> + +struct dso; +struct inline_node; +struct symbol; + +extern int addr2line_timeout_ms; + +int cmd__addr2line(const char *dso_name, u64 addr, + char **file, unsigned int *line_nr, + struct dso *dso, + bool unwind_inlines, + struct inline_node *node, + struct symbol *sym); + +#endif /* __PERF_ADDR2LINE_H */ diff --git a/tools/perf/util/addr_location.c b/tools/perf/util/addr_location.c index 51825ef8c0ab..007a2f5df9a6 100644 --- a/tools/perf/util/addr_location.c +++ b/tools/perf/util/addr_location.c @@ -17,6 +17,7 @@ void addr_location__init(struct addr_location *al) al->cpumode = 0; al->cpu = 0; al->socket = 0; + al->parallelism = 1; } /* diff --git a/tools/perf/util/addr_location.h b/tools/perf/util/addr_location.h index d8ac0428dff2..64b551025216 100644 --- a/tools/perf/util/addr_location.h +++ b/tools/perf/util/addr_location.h @@ -17,10 +17,14 @@ struct addr_location { const char *srcline; u64 addr; char level; - u8 filtered; u8 cpumode; + u16 filtered; s32 cpu; s32 socket; + /* Same as machine.parallelism but within [1, nr_cpus]. */ + int parallelism; + /* See he_stat.latency. */ + u64 latency; }; void addr_location__init(struct addr_location *al); diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c index 38dc4524b7e8..4fe851334296 100644 --- a/tools/perf/util/affinity.c +++ b/tools/perf/util/affinity.c @@ -5,6 +5,7 @@ #include <stdlib.h> #include <linux/bitmap.h> #include <linux/zalloc.h> +#include <perf/cpumap.h> #include "perf.h" #include "cpumap.h" #include "affinity.h" @@ -83,3 +84,20 @@ void affinity__cleanup(struct affinity *a) if (a != NULL) __affinity__cleanup(a); } + +void cpu_map__set_affinity(const struct perf_cpu_map *cpumap) +{ + int cpu_set_size = get_cpu_set_size(); + unsigned long *cpuset = bitmap_zalloc(cpu_set_size * 8); + struct perf_cpu cpu; + int idx; + + if (!cpuset) + return; + + perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpumap) + __set_bit(cpu.cpu, cpuset); + + sched_setaffinity(0, cpu_set_size, (cpu_set_t *)cpuset); + zfree(&cpuset); +} diff --git a/tools/perf/util/affinity.h b/tools/perf/util/affinity.h index 0ad6a18ef20c..7341194b2298 100644 --- a/tools/perf/util/affinity.h +++ b/tools/perf/util/affinity.h @@ -4,6 +4,7 @@ #include <stdbool.h> +struct perf_cpu_map; struct affinity { unsigned long *orig_cpus; unsigned long *sched_cpus; @@ -13,5 +14,6 @@ struct affinity { void affinity__cleanup(struct affinity *a); void affinity__set(struct affinity *a, int cpu); int affinity__setup(struct affinity *a); +void cpu_map__set_affinity(const struct perf_cpu_map *cpumap); #endif // PERF_AFFINITY_H diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c index 9d0ce88e90e4..b084dee76b1a 100644 --- a/tools/perf/util/amd-sample-raw.c +++ b/tools/perf/util/amd-sample-raw.c @@ -9,7 +9,7 @@ #include <inttypes.h> #include <linux/string.h> -#include "../../arch/x86/include/asm/amd-ibs.h" +#include "../../arch/x86/include/asm/amd/ibs.h" #include "debug.h" #include "session.h" @@ -19,6 +19,8 @@ static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type; static bool zen4_ibs_extensions; +static bool ldlat_cap; +static bool dtlb_pgsize_cap; static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg) { @@ -78,14 +80,20 @@ static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg) static void pr_ibs_op_ctl(union ibs_op_ctl reg) { char l3_miss_only[sizeof(" L3MissOnly _")] = ""; + char ldlat[sizeof(" LdLatThrsh __ LdLatEn _")] = ""; if (zen4_ibs_extensions) snprintf(l3_miss_only, sizeof(l3_miss_only), " L3MissOnly %d", reg.l3_miss_only); - printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d\n", + if (ldlat_cap) { + snprintf(ldlat, sizeof(ldlat), " LdLatThrsh %2d LdLatEn %d", + reg.ldlat_thrsh, reg.ldlat_en); + } + + printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d%s\n", reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, l3_miss_only, reg.op_en, reg.op_val, reg.cnt_ctl, - reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt); + reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt, ldlat); } static void pr_ibs_op_data(union ibs_op_data reg) @@ -154,9 +162,20 @@ static void pr_ibs_op_data2(union ibs_op_data2 reg) static void pr_ibs_op_data3(union ibs_op_data3 reg) { - char l2_miss_str[sizeof(" L2Miss _")] = ""; - char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = ""; + static const char * const dc_page_sizes[] = { + " 4K", + " 2M", + " 1G", + " ??", + }; char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = ""; + char dc_l1_l2tlb_miss_str[sizeof(" DcL1TlbMiss _ DcL2TlbMiss _")] = ""; + char dc_l1tlb_hit_str[sizeof(" DcL1TlbHit2M _ DcL1TlbHit1G _")] = ""; + char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = ""; + char dc_l2tlb_hit_2m_str[sizeof(" DcL2TlbHit2M _")] = ""; + char dc_l2tlb_hit_1g_str[sizeof(" DcL2TlbHit1G _")] = ""; + char dc_page_size_str[sizeof(" DcPageSize ____")] = ""; + char l2_miss_str[sizeof(" L2Miss _")] = ""; /* * Erratum #1293 @@ -172,16 +191,40 @@ static void pr_ibs_op_data3(union ibs_op_data3 reg) snprintf(op_mem_width_str, sizeof(op_mem_width_str), " OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1)); - printf("ibs_op_data3:\t%016llx LdOp %d StOp %d DcL1TlbMiss %d DcL2TlbMiss %d " - "DcL1TlbHit2M %d DcL1TlbHit1G %d DcL2TlbHit2M %d DcMiss %d DcMisAcc %d " - "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d DcLinAddrValid %d " - "DcPhyAddrValid %d DcL2TlbHit1G %d%s SwPf %d%s%s DcMissLat %5d TlbRefillLat %5d\n", - reg.val, reg.ld_op, reg.st_op, reg.dc_l1tlb_miss, reg.dc_l2tlb_miss, - reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g, reg.dc_l2tlb_hit_2m, reg.dc_miss, - reg.dc_mis_acc, reg.dc_wc_mem_acc, reg.dc_uc_mem_acc, reg.dc_locked_op, - reg.dc_miss_no_mab_alloc, reg.dc_lin_addr_valid, reg.dc_phy_addr_valid, - reg.dc_l2_tlb_hit_1g, l2_miss_str, reg.sw_pf, op_mem_width_str, - op_dc_miss_open_mem_reqs_str, reg.dc_miss_lat, reg.tlb_refill_lat); + if (dtlb_pgsize_cap) { + if (reg.dc_phy_addr_valid) { + int idx = (reg.dc_l1tlb_hit_1g << 1) | reg.dc_l1tlb_hit_2m; + + snprintf(dc_l1_l2tlb_miss_str, sizeof(dc_l1_l2tlb_miss_str), + " DcL1TlbMiss %d DcL2TlbMiss %d", + reg.dc_l1tlb_miss, reg.dc_l2tlb_miss); + snprintf(dc_page_size_str, sizeof(dc_page_size_str), + " DcPageSize %4s", dc_page_sizes[idx]); + } + } else { + snprintf(dc_l1_l2tlb_miss_str, sizeof(dc_l1_l2tlb_miss_str), + " DcL1TlbMiss %d DcL2TlbMiss %d", + reg.dc_l1tlb_miss, reg.dc_l2tlb_miss); + snprintf(dc_l1tlb_hit_str, sizeof(dc_l1tlb_hit_str), + " DcL1TlbHit2M %d DcL1TlbHit1G %d", + reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g); + snprintf(dc_l2tlb_hit_2m_str, sizeof(dc_l2tlb_hit_2m_str), + " DcL2TlbHit2M %d", reg.dc_l2tlb_hit_2m); + snprintf(dc_l2tlb_hit_1g_str, sizeof(dc_l2tlb_hit_1g_str), + " DcL2TlbHit1G %d", reg.dc_l2_tlb_hit_1g); + } + + printf("ibs_op_data3:\t%016llx LdOp %d StOp %d%s%s%s DcMiss %d DcMisAcc %d " + "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d " + "DcLinAddrValid %d DcPhyAddrValid %d%s%s SwPf %d%s%s " + "DcMissLat %5d TlbRefillLat %5d\n", + reg.val, reg.ld_op, reg.st_op, dc_l1_l2tlb_miss_str, + dtlb_pgsize_cap ? dc_page_size_str : dc_l1tlb_hit_str, + dc_l2tlb_hit_2m_str, reg.dc_miss, reg.dc_mis_acc, reg.dc_wc_mem_acc, + reg.dc_uc_mem_acc, reg.dc_locked_op, reg.dc_miss_no_mab_alloc, + reg.dc_lin_addr_valid, reg.dc_phy_addr_valid, dc_l2tlb_hit_1g_str, + l2_miss_str, reg.sw_pf, op_mem_width_str, op_dc_miss_open_mem_reqs_str, + reg.dc_miss_lat, reg.tlb_refill_lat); } /* @@ -311,7 +354,7 @@ static void parse_cpuid(struct perf_env *env) */ bool evlist__has_amd_ibs(struct evlist *evlist) { - struct perf_env *env = evlist->env; + struct perf_env *env = perf_session__env(evlist->session); int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env); const char *pmu_mapping = perf_env__pmu_mappings(env); char name[sizeof("ibs_fetch")]; @@ -331,6 +374,12 @@ bool evlist__has_amd_ibs(struct evlist *evlist) if (perf_env__find_pmu_cap(env, "ibs_op", "zen4_ibs_extensions")) zen4_ibs_extensions = 1; + if (perf_env__find_pmu_cap(env, "ibs_op", "ldlat")) + ldlat_cap = 1; + + if (perf_env__find_pmu_cap(env, "ibs_op", "dtlb_pgsize")) + dtlb_pgsize_cap = 1; + if (ibs_fetch_type || ibs_op_type) { if (!cpu_family) parse_cpuid(env); diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c index 965da6c0b542..07cf9c334be0 100644 --- a/tools/perf/util/annotate-data.c +++ b/tools/perf/util/annotate-data.c @@ -4,7 +4,7 @@ * * Written by Namhyung Kim <namhyung@kernel.org> */ - +#include <errno.h> #include <stdio.h> #include <stdlib.h> #include <inttypes.h> @@ -31,15 +31,6 @@ static void delete_var_types(struct die_var_type *var_types); -enum type_state_kind { - TSR_KIND_INVALID = 0, - TSR_KIND_TYPE, - TSR_KIND_PERCPU_BASE, - TSR_KIND_CONST, - TSR_KIND_POINTER, - TSR_KIND_CANARY, -}; - #define pr_debug_dtp(fmt, ...) \ do { \ if (debug_type_profile) \ @@ -48,7 +39,7 @@ do { \ pr_debug3(fmt, ##__VA_ARGS__); \ } while (0) -static void pr_debug_type_name(Dwarf_Die *die, enum type_state_kind kind) +void pr_debug_type_name(Dwarf_Die *die, enum type_state_kind kind) { struct strbuf sb; char *str; @@ -67,6 +58,10 @@ static void pr_debug_type_name(Dwarf_Die *die, enum type_state_kind kind) case TSR_KIND_CONST: pr_info(" constant\n"); return; + case TSR_KIND_PERCPU_POINTER: + pr_info(" percpu pointer"); + /* it also prints the type info */ + break; case TSR_KIND_POINTER: pr_info(" pointer"); /* it also prints the type info */ @@ -104,7 +99,7 @@ static void pr_debug_location(Dwarf_Die *die, u64 pc, int reg) return; while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) { - if (reg != DWARF_REG_PC && end < pc) + if (reg != DWARF_REG_PC && end <= pc) continue; if (reg != DWARF_REG_PC && start > pc) break; @@ -140,49 +135,27 @@ static void pr_debug_location(Dwarf_Die *die, u64 pc, int reg) } } -/* - * Type information in a register, valid when @ok is true. - * The @caller_saved registers are invalidated after a function call. - */ -struct type_state_reg { - Dwarf_Die type; - u32 imm_value; - bool ok; - bool caller_saved; - u8 kind; -}; +static void pr_debug_scope(Dwarf_Die *scope_die) +{ + int tag; -/* Type information in a stack location, dynamically allocated */ -struct type_state_stack { - struct list_head list; - Dwarf_Die type; - int offset; - int size; - bool compound; - u8 kind; -}; + if (!debug_type_profile && verbose < 3) + return; -/* FIXME: This should be arch-dependent */ -#define TYPE_STATE_MAX_REGS 16 + pr_info("(die:%lx) ", (long)dwarf_dieoffset(scope_die)); -/* - * State table to maintain type info in each register and stack location. - * It'll be updated when new variable is allocated or type info is moved - * to a new location (register or stack). As it'd be used with the - * shortest path of basic blocks, it only maintains a single table. - */ -struct type_state { - /* state of general purpose registers */ - struct type_state_reg regs[TYPE_STATE_MAX_REGS]; - /* state of stack location */ - struct list_head stack_vars; - /* return value register */ - int ret_reg; - /* stack pointer register */ - int stack_reg; -}; + tag = dwarf_tag(scope_die); + if (tag == DW_TAG_subprogram) + pr_info("[function] %s\n", dwarf_diename(scope_die)); + else if (tag == DW_TAG_inlined_subroutine) + pr_info("[inlined] %s\n", dwarf_diename(scope_die)); + else if (tag == DW_TAG_lexical_block) + pr_info("[block]\n"); + else + pr_info("[unknown] tag=%x\n", tag); +} -static bool has_reg_type(struct type_state *state, int reg) +bool has_reg_type(struct type_state *state, int reg) { return (unsigned)reg < ARRAY_SIZE(state->regs); } @@ -253,7 +226,7 @@ static int __add_member_cb(Dwarf_Die *die, void *arg) struct annotated_member *parent = arg; struct annotated_member *member; Dwarf_Die member_type, die_mem; - Dwarf_Word size, loc; + Dwarf_Word size, loc, bit_size = 0; Dwarf_Attribute attr; struct strbuf sb; int tag; @@ -268,29 +241,56 @@ static int __add_member_cb(Dwarf_Die *die, void *arg) strbuf_init(&sb, 32); die_get_typename(die, &sb); - die_get_real_type(die, &member_type); - if (dwarf_aggregate_size(&member_type, &size) < 0) + __die_get_real_type(die, &member_type); + if (dwarf_tag(&member_type) == DW_TAG_typedef) + die_get_real_type(&member_type, &die_mem); + else + die_mem = member_type; + + if (dwarf_aggregate_size(&die_mem, &size) < 0) size = 0; - if (!dwarf_attr_integrate(die, DW_AT_data_member_location, &attr)) - loc = 0; - else + if (dwarf_attr_integrate(die, DW_AT_data_member_location, &attr)) dwarf_formudata(&attr, &loc); + else { + /* bitfield member */ + if (dwarf_attr_integrate(die, DW_AT_data_bit_offset, &attr) && + dwarf_formudata(&attr, &loc) == 0) + loc /= 8; + else + loc = 0; + + if (dwarf_attr_integrate(die, DW_AT_bit_size, &attr) && + dwarf_formudata(&attr, &bit_size) == 0) + size = (bit_size + 7) / 8; + } member->type_name = strbuf_detach(&sb, NULL); /* member->var_name can be NULL */ - if (dwarf_diename(die)) - member->var_name = strdup(dwarf_diename(die)); + if (dwarf_diename(die)) { + if (bit_size) { + if (asprintf(&member->var_name, "%s:%ld", + dwarf_diename(die), (long)bit_size) < 0) + member->var_name = NULL; + } else { + member->var_name = strdup(dwarf_diename(die)); + } + + if (member->var_name == NULL) { + free(member); + return DIE_FIND_CB_END; + } + } member->size = size; member->offset = loc + parent->offset; INIT_LIST_HEAD(&member->children); list_add_tail(&member->node, &parent->children); - tag = dwarf_tag(&member_type); + tag = dwarf_tag(&die_mem); switch (tag) { case DW_TAG_structure_type: case DW_TAG_union_type: - die_find_child(&member_type, __add_member_cb, member, &die_mem); + die_find_child(&die_mem, __add_member_cb, member, &die_mem); break; default: break; @@ -318,6 +318,40 @@ static void delete_members(struct annotated_member *member) } } +static int fill_member_name(char *buf, size_t sz, struct annotated_member *m, + int offset, bool first) +{ + struct annotated_member *child; + + if (list_empty(&m->children)) + return 0; + + list_for_each_entry(child, &m->children, node) { + int len; + + if (offset < child->offset || offset >= child->offset + child->size) + continue; + + /* It can have anonymous struct/union members */ + if (child->var_name) { + len = scnprintf(buf, sz, "%s%s", + first ? "" : ".", child->var_name); + first = false; + } else { + len = 0; + } + + return fill_member_name(buf + len, sz - len, child, offset, first) + len; + } + return 0; +} + +int annotated_data_type__get_member_name(struct annotated_data_type *adt, + char *buf, size_t sz, int member_offset) +{ + return fill_member_name(buf, sz, &adt->self, member_offset, /*first=*/true); +} + static struct annotated_data_type *dso__findnew_data_type(struct dso *dso, Dwarf_Die *type_die) { @@ -332,6 +366,10 @@ static struct annotated_data_type *dso__findnew_data_type(struct dso *dso, if (die_get_typename_from_type(type_die, &sb) < 0) strbuf_add(&sb, "(unknown type)", 14); type_name = strbuf_detach(&sb, NULL); + + if (dwarf_tag(type_die) == DW_TAG_typedef) + die_get_real_type(type_die, type_die); + dwarf_aggregate_size(type_die, &size); /* Check existing nodes in dso->data_types tree */ @@ -387,61 +425,142 @@ static bool find_cu_die(struct debuginfo *di, u64 pc, Dwarf_Die *cu_die) return false; } +enum type_match_result { + PERF_TMR_UNKNOWN = 0, + PERF_TMR_OK, + PERF_TMR_NO_TYPE, + PERF_TMR_NO_POINTER, + PERF_TMR_NO_SIZE, + PERF_TMR_BAD_OFFSET, + PERF_TMR_BAIL_OUT, +}; + +static const char *match_result_str(enum type_match_result tmr) +{ + switch (tmr) { + case PERF_TMR_OK: + return "Good!"; + case PERF_TMR_NO_TYPE: + return "no type information"; + case PERF_TMR_NO_POINTER: + return "no/void pointer"; + case PERF_TMR_NO_SIZE: + return "type size is unknown"; + case PERF_TMR_BAD_OFFSET: + return "offset bigger than size"; + case PERF_TMR_UNKNOWN: + case PERF_TMR_BAIL_OUT: + default: + return "invalid state"; + } +} + +static bool is_pointer_type(Dwarf_Die *type_die) +{ + int tag = dwarf_tag(type_die); + + return tag == DW_TAG_pointer_type || tag == DW_TAG_array_type; +} + +static bool is_compound_type(Dwarf_Die *type_die) +{ + int tag = dwarf_tag(type_die); + + return tag == DW_TAG_structure_type || tag == DW_TAG_union_type; +} + +/* returns if Type B has better information than Type A */ +static bool is_better_type(Dwarf_Die *type_a, Dwarf_Die *type_b) +{ + Dwarf_Word size_a, size_b; + Dwarf_Die die_a, die_b; + + /* pointer type is preferred */ + if (is_pointer_type(type_a) != is_pointer_type(type_b)) + return is_pointer_type(type_b); + + if (is_pointer_type(type_b)) { + /* + * We want to compare the target type, but 'void *' can fail to + * get the target type. + */ + if (die_get_real_type(type_a, &die_a) == NULL) + return true; + if (die_get_real_type(type_b, &die_b) == NULL) + return false; + + type_a = &die_a; + type_b = &die_b; + } + + /* bigger type is preferred */ + if (dwarf_aggregate_size(type_a, &size_a) < 0 || + dwarf_aggregate_size(type_b, &size_b) < 0) + return false; + + if (size_a != size_b) + return size_a < size_b; + + /* struct or union is preferred */ + if (is_compound_type(type_a) != is_compound_type(type_b)) + return is_compound_type(type_b); + + /* typedef is preferred */ + if (dwarf_tag(type_b) == DW_TAG_typedef) + return true; + + return false; +} + /* The type info will be saved in @type_die */ -static int check_variable(struct data_loc_info *dloc, Dwarf_Die *var_die, - Dwarf_Die *type_die, int reg, int offset, bool is_fbreg) +static enum type_match_result check_variable(struct data_loc_info *dloc, + Dwarf_Die *var_die, + Dwarf_Die *type_die, int reg, + int offset, bool is_fbreg) { Dwarf_Word size; - bool is_pointer = true; + bool needs_pointer = true; + Dwarf_Die sized_type; if (reg == DWARF_REG_PC) - is_pointer = false; + needs_pointer = false; else if (reg == dloc->fbreg || is_fbreg) - is_pointer = false; + needs_pointer = false; else if (arch__is(dloc->arch, "x86") && reg == X86_REG_SP) - is_pointer = false; + needs_pointer = false; /* Get the type of the variable */ - if (die_get_real_type(var_die, type_die) == NULL) { - pr_debug_dtp("variable has no type\n"); - ann_data_stat.no_typeinfo++; - return -1; - } + if (__die_get_real_type(var_die, type_die) == NULL) + return PERF_TMR_NO_TYPE; /* * Usually it expects a pointer type for a memory access. * Convert to a real type it points to. But global variables * and local variables are accessed directly without a pointer. */ - if (is_pointer) { - if ((dwarf_tag(type_die) != DW_TAG_pointer_type && - dwarf_tag(type_die) != DW_TAG_array_type) || - die_get_real_type(type_die, type_die) == NULL) { - pr_debug_dtp("no pointer or no type\n"); - ann_data_stat.no_typeinfo++; - return -1; - } + if (needs_pointer) { + if (!is_pointer_type(type_die) || + __die_get_real_type(type_die, type_die) == NULL) + return PERF_TMR_NO_POINTER; } + if (dwarf_tag(type_die) == DW_TAG_typedef) + die_get_real_type(type_die, &sized_type); + else + sized_type = *type_die; + /* Get the size of the actual type */ - if (dwarf_aggregate_size(type_die, &size) < 0) { - pr_debug_dtp("type size is unknown\n"); - ann_data_stat.invalid_size++; - return -1; - } + if (dwarf_aggregate_size(&sized_type, &size) < 0) + return PERF_TMR_NO_SIZE; /* Minimal sanity check */ - if ((unsigned)offset >= size) { - pr_debug_dtp("offset: %d is bigger than size: %"PRIu64"\n", - offset, size); - ann_data_stat.bad_offset++; - return -1; - } + if ((unsigned)offset >= size) + return PERF_TMR_BAD_OFFSET; - return 0; + return PERF_TMR_OK; } -static struct type_state_stack *find_stack_state(struct type_state *state, +struct type_state_stack *find_stack_state(struct type_state *state, int offset) { struct type_state_stack *stack; @@ -457,26 +576,36 @@ static struct type_state_stack *find_stack_state(struct type_state *state, return NULL; } -static void set_stack_state(struct type_state_stack *stack, int offset, u8 kind, - Dwarf_Die *type_die) +void set_stack_state(struct type_state_stack *stack, int offset, u8 kind, + Dwarf_Die *type_die, int ptr_offset) { int tag; Dwarf_Word size; - if (dwarf_aggregate_size(type_die, &size) < 0) + if (kind == TSR_KIND_POINTER) { + /* TODO: arch-dependent pointer size */ + size = sizeof(void *); + } + else if (dwarf_aggregate_size(type_die, &size) < 0) size = 0; - tag = dwarf_tag(type_die); - stack->type = *type_die; stack->size = size; stack->offset = offset; + stack->ptr_offset = ptr_offset; stack->kind = kind; + if (kind == TSR_KIND_POINTER) { + stack->compound = false; + return; + } + + tag = dwarf_tag(type_die); + switch (tag) { case DW_TAG_structure_type: case DW_TAG_union_type: - stack->compound = (kind != TSR_KIND_POINTER); + stack->compound = (kind != TSR_KIND_PERCPU_POINTER); break; default: stack->compound = false; @@ -484,20 +613,21 @@ static void set_stack_state(struct type_state_stack *stack, int offset, u8 kind, } } -static struct type_state_stack *findnew_stack_state(struct type_state *state, +struct type_state_stack *findnew_stack_state(struct type_state *state, int offset, u8 kind, - Dwarf_Die *type_die) + Dwarf_Die *type_die, + int ptr_offset) { struct type_state_stack *stack = find_stack_state(state, offset); if (stack) { - set_stack_state(stack, offset, kind, type_die); + set_stack_state(stack, offset, kind, type_die, ptr_offset); return stack; } stack = malloc(sizeof(*stack)); if (stack) { - set_stack_state(stack, offset, kind, type_die); + set_stack_state(stack, offset, kind, type_die, ptr_offset); list_add(&stack->list, &state->stack_vars); } return stack; @@ -588,7 +718,7 @@ void global_var_type__tree_delete(struct rb_root *root) } } -static bool get_global_var_info(struct data_loc_info *dloc, u64 addr, +bool get_global_var_info(struct data_loc_info *dloc, u64 addr, const char **var_name, int *var_offset) { struct addr_location al; @@ -662,7 +792,7 @@ static void global_var__collect(struct data_loc_info *dloc) } } -static bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc, +bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc, u64 ip, u64 var_addr, int *var_offset, Dwarf_Die *type_die) { @@ -688,7 +818,7 @@ static bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc, /* Try to get the variable by address first */ if (die_find_variable_by_addr(cu_die, var_addr, &var_die, &offset) && check_variable(dloc, &var_die, type_die, DWARF_REG_PC, offset, - /*is_fbreg=*/false) == 0) { + /*is_fbreg=*/false) == PERF_TMR_OK) { var_name = dwarf_diename(&var_die); *var_offset = offset; goto ok; @@ -702,7 +832,7 @@ static bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc, /* Try to get the name of global variable */ if (die_find_variable_at(cu_die, var_name, pc, &var_die) && check_variable(dloc, &var_die, type_die, DWARF_REG_PC, *var_offset, - /*is_fbreg=*/false) == 0) + /*is_fbreg=*/false) == PERF_TMR_OK) goto ok; return false; @@ -713,6 +843,11 @@ ok: return true; } +static bool die_is_same(Dwarf_Die *die_a, Dwarf_Die *die_b) +{ + return (die_a->cu == die_b->cu) && (die_a->addr == die_b->addr); +} + /** * update_var_state - Update type state using given variables * @state: type state table @@ -744,408 +879,99 @@ static void update_var_state(struct type_state *state, struct data_loc_info *dlo if (!dwarf_offdie(dloc->di->dbg, var->die_off, &mem_die)) continue; - if (var->reg == DWARF_REG_FB) { - findnew_stack_state(state, var->offset, TSR_KIND_TYPE, - &mem_die); - - pr_debug_dtp("var [%"PRIx64"] -%#x(stack)", - insn_offset, -var->offset); - pr_debug_type_name(&mem_die, TSR_KIND_TYPE); - } else if (var->reg == fbreg) { - findnew_stack_state(state, var->offset - fb_offset, - TSR_KIND_TYPE, &mem_die); - - pr_debug_dtp("var [%"PRIx64"] -%#x(stack)", - insn_offset, -var->offset + fb_offset); - pr_debug_type_name(&mem_die, TSR_KIND_TYPE); - } else if (has_reg_type(state, var->reg) && var->offset == 0) { - struct type_state_reg *reg; - - reg = &state->regs[var->reg]; - reg->type = mem_die; - reg->kind = TSR_KIND_TYPE; - reg->ok = true; - - pr_debug_dtp("var [%"PRIx64"] reg%d", - insn_offset, var->reg); - pr_debug_type_name(&mem_die, TSR_KIND_TYPE); - } - } -} - -static void update_insn_state_x86(struct type_state *state, - struct data_loc_info *dloc, Dwarf_Die *cu_die, - struct disasm_line *dl) -{ - struct annotated_insn_loc loc; - struct annotated_op_loc *src = &loc.ops[INSN_OP_SOURCE]; - struct annotated_op_loc *dst = &loc.ops[INSN_OP_TARGET]; - struct type_state_reg *tsr; - Dwarf_Die type_die; - u32 insn_offset = dl->al.offset; - int fbreg = dloc->fbreg; - int fboff = 0; - - if (annotate_get_insn_location(dloc->arch, dl, &loc) < 0) - return; - - if (ins__is_call(&dl->ins)) { - struct symbol *func = dl->ops.target.sym; - - if (func == NULL) - return; - - /* __fentry__ will preserve all registers */ - if (!strcmp(func->name, "__fentry__")) - return; + if (var->reg == DWARF_REG_FB || var->reg == fbreg || var->reg == state->stack_reg) { + int offset = var->offset; + struct type_state_stack *stack; - pr_debug_dtp("call [%x] %s\n", insn_offset, func->name); + /* If the reg location holds the pointer value, dereference the type */ + if (!var->is_reg_var_addr && is_pointer_type(&mem_die) && + __die_get_real_type(&mem_die, &mem_die) == NULL) + continue; - /* Otherwise invalidate caller-saved registers after call */ - for (unsigned i = 0; i < ARRAY_SIZE(state->regs); i++) { - if (state->regs[i].caller_saved) - state->regs[i].ok = false; - } + if (var->reg != DWARF_REG_FB) + offset -= fb_offset; - /* Update register with the return type (if any) */ - if (die_find_func_rettype(cu_die, func->name, &type_die)) { - tsr = &state->regs[state->ret_reg]; - tsr->type = type_die; - tsr->kind = TSR_KIND_TYPE; - tsr->ok = true; + stack = find_stack_state(state, offset); + if (stack && stack->kind == TSR_KIND_TYPE && + !is_better_type(&stack->type, &mem_die)) + continue; - pr_debug_dtp("call [%x] return -> reg%d", - insn_offset, state->ret_reg); - pr_debug_type_name(&type_die, tsr->kind); - } - return; - } + findnew_stack_state(state, offset, TSR_KIND_TYPE, + &mem_die, /*ptr_offset=*/0); - if (!strncmp(dl->ins.name, "add", 3)) { - u64 imm_value = -1ULL; - int offset; - const char *var_name = NULL; - struct map_symbol *ms = dloc->ms; - u64 ip = ms->sym->start + dl->al.offset; - - if (!has_reg_type(state, dst->reg1)) - return; - - tsr = &state->regs[dst->reg1]; - - if (src->imm) - imm_value = src->offset; - else if (has_reg_type(state, src->reg1) && - state->regs[src->reg1].kind == TSR_KIND_CONST) - imm_value = state->regs[src->reg1].imm_value; - else if (src->reg1 == DWARF_REG_PC) { - u64 var_addr = annotate_calc_pcrel(dloc->ms, ip, - src->offset, dl); - - if (get_global_var_info(dloc, var_addr, - &var_name, &offset) && - !strcmp(var_name, "this_cpu_off") && - tsr->kind == TSR_KIND_CONST) { - tsr->kind = TSR_KIND_PERCPU_BASE; - imm_value = tsr->imm_value; + if (var->reg == state->stack_reg) { + pr_debug_dtp("var [%"PRIx64"] %#x(reg%d)", + insn_offset, offset, state->stack_reg); + } else { + pr_debug_dtp("var [%"PRIx64"] -%#x(stack)", + insn_offset, -offset); } - } - else - return; - - if (tsr->kind != TSR_KIND_PERCPU_BASE) - return; - - if (get_global_var_type(cu_die, dloc, ip, imm_value, &offset, - &type_die) && offset == 0) { - /* - * This is not a pointer type, but it should be treated - * as a pointer. - */ - tsr->type = type_die; - tsr->kind = TSR_KIND_POINTER; - tsr->ok = true; - - pr_debug_dtp("add [%x] percpu %#"PRIx64" -> reg%d", - insn_offset, imm_value, dst->reg1); - pr_debug_type_name(&tsr->type, tsr->kind); - } - return; - } + pr_debug_type_name(&mem_die, TSR_KIND_TYPE); + } else if (has_reg_type(state, var->reg)) { + struct type_state_reg *reg; + Dwarf_Die orig_type; - if (strncmp(dl->ins.name, "mov", 3)) - return; + reg = &state->regs[var->reg]; - if (dloc->fb_cfa) { - u64 ip = dloc->ms->sym->start + dl->al.offset; - u64 pc = map__rip_2objdump(dloc->ms->map, ip); + if (reg->ok && reg->kind == TSR_KIND_TYPE && + (!is_better_type(®->type, &mem_die) || var->is_reg_var_addr)) + continue; - if (die_get_cfa(dloc->di->dbg, pc, &fbreg, &fboff) < 0) - fbreg = -1; - } + /* Handle address registers with TSR_KIND_POINTER */ + if (var->is_reg_var_addr) { + if (reg->ok && reg->kind == TSR_KIND_POINTER && + !is_better_type(®->type, &mem_die)) + continue; - /* Case 1. register to register or segment:offset to register transfers */ - if (!src->mem_ref && !dst->mem_ref) { - if (!has_reg_type(state, dst->reg1)) - return; + reg->offset = -var->offset; + reg->type = mem_die; + reg->kind = TSR_KIND_POINTER; + reg->ok = true; - tsr = &state->regs[dst->reg1]; - if (dso__kernel(map__dso(dloc->ms->map)) && - src->segment == INSN_SEG_X86_GS && src->imm) { - u64 ip = dloc->ms->sym->start + dl->al.offset; - u64 var_addr; - int offset; + pr_debug_dtp("var [%"PRIx64"] reg%d addr offset %x", + insn_offset, var->reg, var->offset); + pr_debug_type_name(&mem_die, TSR_KIND_POINTER); + continue; + } + orig_type = reg->type; /* - * In kernel, %gs points to a per-cpu region for the - * current CPU. Access with a constant offset should - * be treated as a global variable access. + * var->offset + reg value is the beginning of the struct + * reg->offset is the offset the reg points */ - var_addr = src->offset; - - if (var_addr == 40) { - tsr->kind = TSR_KIND_CANARY; - tsr->ok = true; - - pr_debug_dtp("mov [%x] stack canary -> reg%d\n", - insn_offset, dst->reg1); - return; - } - - if (!get_global_var_type(cu_die, dloc, ip, var_addr, - &offset, &type_die) || - !die_get_member_type(&type_die, offset, &type_die)) { - tsr->ok = false; - return; - } - - tsr->type = type_die; - tsr->kind = TSR_KIND_TYPE; - tsr->ok = true; - - pr_debug_dtp("mov [%x] this-cpu addr=%#"PRIx64" -> reg%d", - insn_offset, var_addr, dst->reg1); - pr_debug_type_name(&tsr->type, tsr->kind); - return; - } - - if (src->imm) { - tsr->kind = TSR_KIND_CONST; - tsr->imm_value = src->offset; - tsr->ok = true; - - pr_debug_dtp("mov [%x] imm=%#x -> reg%d\n", - insn_offset, tsr->imm_value, dst->reg1); - return; - } - - if (!has_reg_type(state, src->reg1) || - !state->regs[src->reg1].ok) { - tsr->ok = false; - return; - } - - tsr->type = state->regs[src->reg1].type; - tsr->kind = state->regs[src->reg1].kind; - tsr->ok = true; - - pr_debug_dtp("mov [%x] reg%d -> reg%d", - insn_offset, src->reg1, dst->reg1); - pr_debug_type_name(&tsr->type, tsr->kind); - } - /* Case 2. memory to register transers */ - if (src->mem_ref && !dst->mem_ref) { - int sreg = src->reg1; - - if (!has_reg_type(state, dst->reg1)) - return; - - tsr = &state->regs[dst->reg1]; - -retry: - /* Check stack variables with offset */ - if (sreg == fbreg) { - struct type_state_stack *stack; - int offset = src->offset - fboff; - - stack = find_stack_state(state, offset); - if (stack == NULL) { - tsr->ok = false; - return; - } else if (!stack->compound) { - tsr->type = stack->type; - tsr->kind = stack->kind; - tsr->ok = true; - } else if (die_get_member_type(&stack->type, - offset - stack->offset, - &type_die)) { - tsr->type = type_die; - tsr->kind = TSR_KIND_TYPE; - tsr->ok = true; - } else { - tsr->ok = false; - return; - } - - pr_debug_dtp("mov [%x] -%#x(stack) -> reg%d", - insn_offset, -offset, dst->reg1); - pr_debug_type_name(&tsr->type, tsr->kind); - } - /* And then dereference the pointer if it has one */ - else if (has_reg_type(state, sreg) && state->regs[sreg].ok && - state->regs[sreg].kind == TSR_KIND_TYPE && - die_deref_ptr_type(&state->regs[sreg].type, - src->offset, &type_die)) { - tsr->type = type_die; - tsr->kind = TSR_KIND_TYPE; - tsr->ok = true; - - pr_debug_dtp("mov [%x] %#x(reg%d) -> reg%d", - insn_offset, src->offset, sreg, dst->reg1); - pr_debug_type_name(&tsr->type, tsr->kind); - } - /* Or check if it's a global variable */ - else if (sreg == DWARF_REG_PC) { - struct map_symbol *ms = dloc->ms; - u64 ip = ms->sym->start + dl->al.offset; - u64 addr; - int offset; - - addr = annotate_calc_pcrel(ms, ip, src->offset, dl); - - if (!get_global_var_type(cu_die, dloc, ip, addr, &offset, - &type_die) || - !die_get_member_type(&type_die, offset, &type_die)) { - tsr->ok = false; - return; - } - - tsr->type = type_die; - tsr->kind = TSR_KIND_TYPE; - tsr->ok = true; + reg->offset = -var->offset; + reg->type = mem_die; + reg->kind = TSR_KIND_TYPE; + reg->ok = true; - pr_debug_dtp("mov [%x] global addr=%"PRIx64" -> reg%d", - insn_offset, addr, dst->reg1); - pr_debug_type_name(&type_die, tsr->kind); - } - /* And check percpu access with base register */ - else if (has_reg_type(state, sreg) && - state->regs[sreg].kind == TSR_KIND_PERCPU_BASE) { - u64 ip = dloc->ms->sym->start + dl->al.offset; - u64 var_addr = src->offset; - int offset; - - if (src->multi_regs) { - int reg2 = (sreg == src->reg1) ? src->reg2 : src->reg1; - - if (has_reg_type(state, reg2) && state->regs[reg2].ok && - state->regs[reg2].kind == TSR_KIND_CONST) - var_addr += state->regs[reg2].imm_value; - } + pr_debug_dtp("var [%"PRIx64"] reg%d offset %x", + insn_offset, var->reg, var->offset); + pr_debug_type_name(&mem_die, TSR_KIND_TYPE); /* - * In kernel, %gs points to a per-cpu region for the - * current CPU. Access with a constant offset should - * be treated as a global variable access. + * If this register is directly copied from another and it gets a + * better type, also update the type of the source register. This + * is usually the case of container_of() macro with offset of 0. */ - if (get_global_var_type(cu_die, dloc, ip, var_addr, - &offset, &type_die) && - die_get_member_type(&type_die, offset, &type_die)) { - tsr->type = type_die; - tsr->kind = TSR_KIND_TYPE; - tsr->ok = true; - - if (src->multi_regs) { - pr_debug_dtp("mov [%x] percpu %#x(reg%d,reg%d) -> reg%d", - insn_offset, src->offset, src->reg1, - src->reg2, dst->reg1); - } else { - pr_debug_dtp("mov [%x] percpu %#x(reg%d) -> reg%d", - insn_offset, src->offset, sreg, dst->reg1); - } - pr_debug_type_name(&tsr->type, tsr->kind); - } else { - tsr->ok = false; - } - } - /* And then dereference the calculated pointer if it has one */ - else if (has_reg_type(state, sreg) && state->regs[sreg].ok && - state->regs[sreg].kind == TSR_KIND_POINTER && - die_get_member_type(&state->regs[sreg].type, - src->offset, &type_die)) { - tsr->type = type_die; - tsr->kind = TSR_KIND_TYPE; - tsr->ok = true; - - pr_debug_dtp("mov [%x] pointer %#x(reg%d) -> reg%d", - insn_offset, src->offset, sreg, dst->reg1); - pr_debug_type_name(&tsr->type, tsr->kind); - } - /* Or try another register if any */ - else if (src->multi_regs && sreg == src->reg1 && - src->reg1 != src->reg2) { - sreg = src->reg2; - goto retry; - } - else { - int offset; - const char *var_name = NULL; - - /* it might be per-cpu variable (in kernel) access */ - if (src->offset < 0) { - if (get_global_var_info(dloc, (s64)src->offset, - &var_name, &offset) && - !strcmp(var_name, "__per_cpu_offset")) { - tsr->kind = TSR_KIND_PERCPU_BASE; + if (has_reg_type(state, reg->copied_from)) { + struct type_state_reg *copy_reg; - pr_debug_dtp("mov [%x] percpu base reg%d\n", - insn_offset, dst->reg1); - } - } + copy_reg = &state->regs[reg->copied_from]; - tsr->ok = false; - } - } - /* Case 3. register to memory transfers */ - if (!src->mem_ref && dst->mem_ref) { - if (!has_reg_type(state, src->reg1) || - !state->regs[src->reg1].ok) - return; - - /* Check stack variables with offset */ - if (dst->reg1 == fbreg) { - struct type_state_stack *stack; - int offset = dst->offset - fboff; + /* TODO: check if type is compatible or embedded */ + if (!copy_reg->ok || (copy_reg->kind != TSR_KIND_TYPE) || + !die_is_same(©_reg->type, &orig_type) || + !is_better_type(©_reg->type, &mem_die)) + continue; - tsr = &state->regs[src->reg1]; + copy_reg->type = mem_die; - stack = find_stack_state(state, offset); - if (stack) { - /* - * The source register is likely to hold a type - * of member if it's a compound type. Do not - * update the stack variable type since we can - * get the member type later by using the - * die_get_member_type(). - */ - if (!stack->compound) - set_stack_state(stack, offset, tsr->kind, - &tsr->type); - } else { - findnew_stack_state(state, offset, tsr->kind, - &tsr->type); + pr_debug_dtp("var [%"PRIx64"] copyback reg%d", + insn_offset, reg->copied_from); + pr_debug_type_name(&mem_die, TSR_KIND_TYPE); } - - pr_debug_dtp("mov [%x] reg%d -> -%#x(stack)", - insn_offset, src->reg1, -offset); - pr_debug_type_name(&tsr->type, tsr->kind); } - /* - * Ignore other transfers since it'd set a value in a struct - * and won't change the type. - */ } - /* Case 4. memory to memory transfers (not handled for now) */ } /** @@ -1166,8 +992,8 @@ retry: static void update_insn_state(struct type_state *state, struct data_loc_info *dloc, Dwarf_Die *cu_die, struct disasm_line *dl) { - if (arch__is(dloc->arch, "x86")) - update_insn_state_x86(state, dloc, cu_die, dl); + if (dloc->arch->update_insn_state) + dloc->arch->update_insn_state(state, dloc, cu_die, dl); } /* @@ -1254,114 +1080,135 @@ static void setup_stack_canary(struct data_loc_info *dloc) /* * It's at the target address, check if it has a matching type. - * It returns 1 if found, 0 if not or -1 if not found but no need to - * repeat the search. The last case is for per-cpu variables which + * It returns PERF_TMR_BAIL_OUT when it looks up per-cpu variables which * are similar to global variables and no additional info is needed. */ -static int check_matching_type(struct type_state *state, - struct data_loc_info *dloc, - Dwarf_Die *cu_die, Dwarf_Die *type_die) +static enum type_match_result check_matching_type(struct type_state *state, + struct data_loc_info *dloc, + Dwarf_Die *cu_die, + struct disasm_line *dl, + Dwarf_Die *type_die) { Dwarf_Word size; - u32 insn_offset = dloc->ip - dloc->ms->sym->start; + u32 insn_offset = dl->al.offset; int reg = dloc->op->reg1; + int offset = dloc->op->offset; + const char *offset_sign = ""; + bool retry = true; - pr_debug_dtp("chk [%x] reg%d offset=%#x ok=%d kind=%d", - insn_offset, reg, dloc->op->offset, + if (offset < 0) { + offset = -offset; + offset_sign = "-"; + } + +again: + pr_debug_dtp("chk [%x] reg%d offset=%s%#x ok=%d kind=%d ", + insn_offset, reg, offset_sign, offset, state->regs[reg].ok, state->regs[reg].kind); - if (state->regs[reg].ok && state->regs[reg].kind == TSR_KIND_TYPE) { - int tag = dwarf_tag(&state->regs[reg].type); + if (!state->regs[reg].ok) + goto check_non_register; + + if (state->regs[reg].kind == TSR_KIND_TYPE) { + Dwarf_Die sized_type; + struct strbuf sb; + + strbuf_init(&sb, 32); + die_get_typename_from_type(&state->regs[reg].type, &sb); + pr_debug_dtp("(%s)", sb.buf); + strbuf_release(&sb); /* * Normal registers should hold a pointer (or array) to * dereference a memory location. */ - if (tag != DW_TAG_pointer_type && tag != DW_TAG_array_type) { + if (!is_pointer_type(&state->regs[reg].type)) { if (dloc->op->offset < 0 && reg != state->stack_reg) goto check_kernel; - pr_debug_dtp("\n"); - return -1; + return PERF_TMR_NO_POINTER; } - pr_debug_dtp("\n"); - /* Remove the pointer and get the target type */ - if (die_get_real_type(&state->regs[reg].type, type_die) == NULL) - return -1; + if (__die_get_real_type(&state->regs[reg].type, type_die) == NULL) + return PERF_TMR_NO_POINTER; - dloc->type_offset = dloc->op->offset; + dloc->type_offset = dloc->op->offset + state->regs[reg].offset; + + if (dwarf_tag(type_die) == DW_TAG_typedef) + die_get_real_type(type_die, &sized_type); + else + sized_type = *type_die; /* Get the size of the actual type */ - if (dwarf_aggregate_size(type_die, &size) < 0 || + if (dwarf_aggregate_size(&sized_type, &size) < 0 || (unsigned)dloc->type_offset >= size) - return -1; + return PERF_TMR_BAD_OFFSET; - return 1; + return PERF_TMR_OK; } - if (reg == dloc->fbreg) { - struct type_state_stack *stack; - - pr_debug_dtp(" fbreg\n"); + if (state->regs[reg].kind == TSR_KIND_POINTER) { + struct strbuf sb; - stack = find_stack_state(state, dloc->type_offset); - if (stack == NULL) - return 0; + strbuf_init(&sb, 32); + die_get_typename_from_type(&state->regs[reg].type, &sb); + pr_debug_dtp("(ptr->%s)", sb.buf); + strbuf_release(&sb); - if (stack->kind == TSR_KIND_CANARY) { - setup_stack_canary(dloc); - return -1; - } + /* + * Register holds a pointer (address) to the target variable. + * The type is the type of the variable it points to. + */ + *type_die = state->regs[reg].type; - if (stack->kind != TSR_KIND_TYPE) - return 0; + dloc->type_offset = dloc->op->offset + state->regs[reg].offset; - *type_die = stack->type; - /* Update the type offset from the start of slot */ - dloc->type_offset -= stack->offset; + /* Get the size of the actual type */ + if (dwarf_aggregate_size(type_die, &size) < 0 || + (unsigned)dloc->type_offset >= size) + return PERF_TMR_BAD_OFFSET; - return 1; + return PERF_TMR_OK; } - if (dloc->fb_cfa) { - struct type_state_stack *stack; - u64 pc = map__rip_2objdump(dloc->ms->map, dloc->ip); - int fbreg, fboff; - - pr_debug_dtp(" cfa\n"); + if (state->regs[reg].kind == TSR_KIND_PERCPU_POINTER) { + pr_debug_dtp("percpu ptr"); - if (die_get_cfa(dloc->di->dbg, pc, &fbreg, &fboff) < 0) - fbreg = -1; + /* + * It's actaully pointer but the address was calculated using + * some arithmetic. So it points to the actual type already. + */ + *type_die = state->regs[reg].type; - if (reg != fbreg) - return 0; + dloc->type_offset = dloc->op->offset; - stack = find_stack_state(state, dloc->type_offset - fboff); - if (stack == NULL) - return 0; + /* Get the size of the actual type */ + if (dwarf_aggregate_size(type_die, &size) < 0 || + (unsigned)dloc->type_offset >= size) + return PERF_TMR_BAIL_OUT; - if (stack->kind == TSR_KIND_CANARY) { - setup_stack_canary(dloc); - return -1; - } + return PERF_TMR_OK; + } - if (stack->kind != TSR_KIND_TYPE) - return 0; + if (state->regs[reg].kind == TSR_KIND_CANARY) { + pr_debug_dtp("stack canary"); - *type_die = stack->type; - /* Update the type offset from the start of slot */ - dloc->type_offset -= fboff + stack->offset; + /* + * This is a saved value of the stack canary which will be handled + * in the outer logic when it returns failure here. Pretend it's + * from the stack canary directly. + */ + setup_stack_canary(dloc); - return 1; + return PERF_TMR_BAIL_OUT; } if (state->regs[reg].kind == TSR_KIND_PERCPU_BASE) { u64 var_addr = dloc->op->offset; int var_offset; - pr_debug_dtp(" percpu var\n"); + pr_debug_dtp("percpu var"); if (dloc->op->multi_regs) { int reg2 = dloc->op->reg2; @@ -1377,62 +1224,104 @@ static int check_matching_type(struct type_state *state, if (get_global_var_type(cu_die, dloc, dloc->ip, var_addr, &var_offset, type_die)) { dloc->type_offset = var_offset; - return 1; + return PERF_TMR_OK; } /* No need to retry per-cpu (global) variables */ - return -1; + return PERF_TMR_BAIL_OUT; } - if (state->regs[reg].ok && state->regs[reg].kind == TSR_KIND_POINTER) { - pr_debug_dtp(" percpu ptr\n"); +check_non_register: + if (reg == dloc->fbreg || reg == state->stack_reg) { + struct type_state_stack *stack; - /* - * It's actaully pointer but the address was calculated using - * some arithmetic. So it points to the actual type already. - */ - *type_die = state->regs[reg].type; + pr_debug_dtp("%s", reg == dloc->fbreg ? "fbreg" : "stack"); - dloc->type_offset = dloc->op->offset; + stack = find_stack_state(state, dloc->type_offset); + if (stack == NULL) { + if (retry) { + pr_debug_dtp(" : retry\n"); + retry = false; - /* Get the size of the actual type */ - if (dwarf_aggregate_size(type_die, &size) < 0 || - (unsigned)dloc->type_offset >= size) - return -1; + /* update type info it's the first store to the stack */ + update_insn_state(state, dloc, cu_die, dl); + goto again; + } + return PERF_TMR_NO_TYPE; + } - return 1; + if (stack->kind == TSR_KIND_CANARY) { + setup_stack_canary(dloc); + return PERF_TMR_BAIL_OUT; + } + + if (stack->kind != TSR_KIND_TYPE) + return PERF_TMR_NO_TYPE; + + *type_die = stack->type; + /* Update the type offset from the start of slot */ + dloc->type_offset -= stack->offset; + + return PERF_TMR_OK; } - if (state->regs[reg].ok && state->regs[reg].kind == TSR_KIND_CANARY) { - pr_debug_dtp(" stack canary\n"); + if (dloc->fb_cfa) { + struct type_state_stack *stack; + u64 pc = map__rip_2objdump(dloc->ms->map, dloc->ip); + int fbreg, fboff; - /* - * This is a saved value of the stack canary which will be handled - * in the outer logic when it returns failure here. Pretend it's - * from the stack canary directly. - */ - setup_stack_canary(dloc); + pr_debug_dtp("cfa"); - return -1; + if (die_get_cfa(dloc->di->dbg, pc, &fbreg, &fboff) < 0) + fbreg = -1; + + if (reg != fbreg) + return PERF_TMR_NO_TYPE; + + stack = find_stack_state(state, dloc->type_offset - fboff); + if (stack == NULL) { + if (retry) { + pr_debug_dtp(" : retry\n"); + retry = false; + + /* update type info it's the first store to the stack */ + update_insn_state(state, dloc, cu_die, dl); + goto again; + } + return PERF_TMR_NO_TYPE; + } + + if (stack->kind == TSR_KIND_CANARY) { + setup_stack_canary(dloc); + return PERF_TMR_BAIL_OUT; + } + + if (stack->kind != TSR_KIND_TYPE) + return PERF_TMR_NO_TYPE; + + *type_die = stack->type; + /* Update the type offset from the start of slot */ + dloc->type_offset -= fboff + stack->offset; + + return PERF_TMR_OK; } check_kernel: if (dso__kernel(map__dso(dloc->ms->map))) { u64 addr; - int offset; /* Direct this-cpu access like "%gs:0x34740" */ if (dloc->op->segment == INSN_SEG_X86_GS && dloc->op->imm && arch__is(dloc->arch, "x86")) { - pr_debug_dtp(" this-cpu var\n"); + pr_debug_dtp("this-cpu var"); addr = dloc->op->offset; if (get_global_var_type(cu_die, dloc, dloc->ip, addr, &offset, type_die)) { dloc->type_offset = offset; - return 1; + return PERF_TMR_OK; } - return -1; + return PERF_TMR_BAIL_OUT; } /* Access to global variable like "-0x7dcf0500(,%rdx,8)" */ @@ -1441,31 +1330,30 @@ check_kernel: if (get_global_var_type(cu_die, dloc, dloc->ip, addr, &offset, type_die)) { - pr_debug_dtp(" global var\n"); + pr_debug_dtp("global var"); dloc->type_offset = offset; - return 1; + return PERF_TMR_OK; } - pr_debug_dtp(" negative offset\n"); - return -1; + return PERF_TMR_BAIL_OUT; } } - pr_debug_dtp("\n"); - return 0; + return PERF_TMR_UNKNOWN; } /* Iterate instructions in basic blocks and update type table */ -static int find_data_type_insn(struct data_loc_info *dloc, - struct list_head *basic_blocks, - struct die_var_type *var_types, - Dwarf_Die *cu_die, Dwarf_Die *type_die) +static enum type_match_result find_data_type_insn(struct data_loc_info *dloc, + struct list_head *basic_blocks, + struct die_var_type *var_types, + Dwarf_Die *cu_die, + Dwarf_Die *type_die) { struct type_state state; struct symbol *sym = dloc->ms->sym; struct annotation *notes = symbol__annotation(sym); struct annotated_basic_block *bb; - int ret = 0; + enum type_match_result ret = PERF_TMR_UNKNOWN; init_type_state(&state, dloc->arch); @@ -1490,7 +1378,8 @@ static int find_data_type_insn(struct data_loc_info *dloc, if (this_ip == dloc->ip) { ret = check_matching_type(&state, dloc, - cu_die, type_die); + cu_die, dl, type_die); + pr_debug_dtp(" : %s\n", match_result_str(ret)); goto out; } @@ -1506,34 +1395,43 @@ out: return ret; } +static int arch_supports_insn_tracking(struct data_loc_info *dloc) +{ + if ((arch__is(dloc->arch, "x86")) || (arch__is(dloc->arch, "powerpc"))) + return 1; + return 0; +} + /* * Construct a list of basic blocks for each scope with variables and try to find * the data type by updating a type state table through instructions. */ -static int find_data_type_block(struct data_loc_info *dloc, - Dwarf_Die *cu_die, Dwarf_Die *scopes, - int nr_scopes, Dwarf_Die *type_die) +static enum type_match_result find_data_type_block(struct data_loc_info *dloc, + Dwarf_Die *cu_die, + Dwarf_Die *scopes, + int nr_scopes, + Dwarf_Die *type_die) { LIST_HEAD(basic_blocks); struct die_var_type *var_types = NULL; u64 src_ip, dst_ip, prev_dst_ip; - int ret = -1; + enum type_match_result ret = PERF_TMR_UNKNOWN; /* TODO: other architecture support */ - if (!arch__is(dloc->arch, "x86")) - return -1; + if (!arch_supports_insn_tracking(dloc)) + return PERF_TMR_BAIL_OUT; prev_dst_ip = dst_ip = dloc->ip; for (int i = nr_scopes - 1; i >= 0; i--) { Dwarf_Addr base, start, end; LIST_HEAD(this_blocks); - int found; if (dwarf_ranges(&scopes[i], 0, &base, &start, &end) < 0) break; - pr_debug_dtp("scope: [%d/%d] (die:%lx)\n", - i + 1, nr_scopes, (long)dwarf_dieoffset(&scopes[i])); + pr_debug_dtp("scope: [%d/%d] ", i + 1, nr_scopes); + pr_debug_scope(&scopes[i]); + src_ip = map__objdump_2rip(dloc->ms->map, start); again: @@ -1558,10 +1456,17 @@ again: fixup_var_address(var_types, start); /* Find from start of this scope to the target instruction */ - found = find_data_type_insn(dloc, &basic_blocks, var_types, + ret = find_data_type_insn(dloc, &basic_blocks, var_types, cu_die, type_die); - if (found > 0) { + if (ret == PERF_TMR_OK) { char buf[64]; + int offset = dloc->op->offset; + const char *offset_sign = ""; + + if (offset < 0) { + offset = -offset; + offset_sign = "-"; + } if (dloc->op->multi_regs) snprintf(buf, sizeof(buf), "reg%d, reg%d", @@ -1569,14 +1474,12 @@ again: else snprintf(buf, sizeof(buf), "reg%d", dloc->op->reg1); - pr_debug_dtp("found by insn track: %#x(%s) type-offset=%#x\n", - dloc->op->offset, buf, dloc->type_offset); - pr_debug_type_name(type_die, TSR_KIND_TYPE); - ret = 0; + pr_debug_dtp("found by insn track: %s%#x(%s) type-offset=%#x\n", + offset_sign, offset, buf, dloc->type_offset); break; } - if (found < 0) + if (ret == PERF_TMR_BAIL_OUT) break; /* Go up to the next scope and find blocks to the start */ @@ -1595,14 +1498,17 @@ static int find_data_type_die(struct data_loc_info *dloc, Dwarf_Die *type_die) struct annotated_op_loc *loc = dloc->op; Dwarf_Die cu_die, var_die; Dwarf_Die *scopes = NULL; - int reg, offset; + int reg, offset = loc->offset; int ret = -1; int i, nr_scopes; int fbreg = -1; int fb_offset = 0; bool is_fbreg = false; + bool found = false; u64 pc; char buf[64]; + enum type_match_result result = PERF_TMR_UNKNOWN; + const char *offset_sign = ""; if (dloc->op->multi_regs) snprintf(buf, sizeof(buf), "reg%d, reg%d", dloc->op->reg1, dloc->op->reg2); @@ -1611,10 +1517,15 @@ static int find_data_type_die(struct data_loc_info *dloc, Dwarf_Die *type_die) else snprintf(buf, sizeof(buf), "reg%d", dloc->op->reg1); + if (offset < 0) { + offset = -offset; + offset_sign = "-"; + } + pr_debug_dtp("-----------------------------------------------------------\n"); - pr_debug_dtp("find data type for %#x(%s) at %s+%#"PRIx64"\n", - dloc->op->offset, buf, dloc->ms->sym->name, - dloc->ip - dloc->ms->sym->start); + pr_debug_dtp("find data type for %s%#x(%s) at %s+%#"PRIx64"\n", + offset_sign, offset, buf, + dloc->ms->sym->name, dloc->ip - dloc->ms->sym->start); /* * IP is a relative instruction address from the start of the map, as @@ -1644,7 +1555,7 @@ static int find_data_type_die(struct data_loc_info *dloc, Dwarf_Die *type_die) pr_debug_dtp("found by addr=%#"PRIx64" type_offset=%#x\n", dloc->var_addr, offset); pr_debug_type_name(type_die, TSR_KIND_TYPE); - ret = 0; + found = true; goto out; } } @@ -1685,65 +1596,95 @@ retry: /* Search from the inner-most scope to the outer */ for (i = nr_scopes - 1; i >= 0; i--) { + Dwarf_Die mem_die; + int type_offset = offset; + if (reg == DWARF_REG_PC) { if (!die_find_variable_by_addr(&scopes[i], dloc->var_addr, - &var_die, &offset)) + &var_die, &type_offset)) continue; } else { /* Look up variables/parameters in this scope */ if (!die_find_variable_by_reg(&scopes[i], pc, reg, - &offset, is_fbreg, &var_die)) + &type_offset, is_fbreg, &var_die)) continue; } + pr_debug_dtp("found \"%s\" (die: %#lx) in scope=%d/%d (die: %#lx) ", + dwarf_diename(&var_die), (long)dwarf_dieoffset(&var_die), + i+1, nr_scopes, (long)dwarf_dieoffset(&scopes[i])); + /* Found a variable, see if it's correct */ - ret = check_variable(dloc, &var_die, type_die, reg, offset, is_fbreg); - if (ret == 0) { - pr_debug_dtp("found \"%s\" in scope=%d/%d (die: %#lx) ", - dwarf_diename(&var_die), i+1, nr_scopes, - (long)dwarf_dieoffset(&scopes[i])); + result = check_variable(dloc, &var_die, &mem_die, reg, type_offset, is_fbreg); + if (result == PERF_TMR_OK) { if (reg == DWARF_REG_PC) { pr_debug_dtp("addr=%#"PRIx64" type_offset=%#x\n", - dloc->var_addr, offset); + dloc->var_addr, type_offset); } else if (reg == DWARF_REG_FB || is_fbreg) { pr_debug_dtp("stack_offset=%#x type_offset=%#x\n", - fb_offset, offset); + fb_offset, type_offset); } else { - pr_debug_dtp("type_offset=%#x\n", offset); + pr_debug_dtp("type_offset=%#x\n", type_offset); + } + + if (!found || is_better_type(type_die, &mem_die)) { + *type_die = mem_die; + dloc->type_offset = type_offset; + found = true; } - pr_debug_location(&var_die, pc, reg); - pr_debug_type_name(type_die, TSR_KIND_TYPE); } else { - pr_debug_dtp("check variable \"%s\" failed (die: %#lx)\n", - dwarf_diename(&var_die), - (long)dwarf_dieoffset(&var_die)); - pr_debug_location(&var_die, pc, reg); - pr_debug_type_name(type_die, TSR_KIND_TYPE); + pr_debug_dtp("failed: %s\n", match_result_str(result)); } - dloc->type_offset = offset; - goto out; + + pr_debug_location(&var_die, pc, reg); + pr_debug_type_name(&mem_die, TSR_KIND_TYPE); } - if (loc->multi_regs && reg == loc->reg1 && loc->reg1 != loc->reg2) { + if (!found && loc->multi_regs && reg == loc->reg1 && loc->reg1 != loc->reg2) { reg = loc->reg2; goto retry; } - if (reg != DWARF_REG_PC) { - ret = find_data_type_block(dloc, &cu_die, scopes, - nr_scopes, type_die); - if (ret == 0) { + if (!found && reg != DWARF_REG_PC) { + result = find_data_type_block(dloc, &cu_die, scopes, + nr_scopes, type_die); + if (result == PERF_TMR_OK) { ann_data_stat.insn_track++; - goto out; + found = true; } } - if (ret < 0) { - pr_debug_dtp("no variable found\n"); - ann_data_stat.no_var++; +out: + pr_debug_dtp("final result: "); + if (found) { + pr_debug_type_name(type_die, TSR_KIND_TYPE); + ret = 0; + } else { + switch (result) { + case PERF_TMR_NO_TYPE: + case PERF_TMR_NO_POINTER: + pr_debug_dtp("%s\n", match_result_str(result)); + ann_data_stat.no_typeinfo++; + break; + case PERF_TMR_NO_SIZE: + pr_debug_dtp("%s\n", match_result_str(result)); + ann_data_stat.invalid_size++; + break; + case PERF_TMR_BAD_OFFSET: + pr_debug_dtp("%s\n", match_result_str(result)); + ann_data_stat.bad_offset++; + break; + case PERF_TMR_UNKNOWN: + case PERF_TMR_BAIL_OUT: + case PERF_TMR_OK: /* should not reach here */ + default: + pr_debug_dtp("no variable found\n"); + ann_data_stat.no_var++; + break; + } + ret = -1; } -out: free(scopes); return ret; } @@ -1764,16 +1705,9 @@ out: */ struct annotated_data_type *find_data_type(struct data_loc_info *dloc) { - struct annotated_data_type *result = NULL; struct dso *dso = map__dso(dloc->ms->map); Dwarf_Die type_die; - dloc->di = debuginfo__new(dso__long_name(dso)); - if (dloc->di == NULL) { - pr_debug_dtp("cannot get the debug info\n"); - return NULL; - } - /* * The type offset is the same as instruction offset by default. * But when finding a global variable, the offset won't be valid. @@ -1783,13 +1717,9 @@ struct annotated_data_type *find_data_type(struct data_loc_info *dloc) dloc->fbreg = -1; if (find_data_type_die(dloc, &type_die) < 0) - goto out; - - result = dso__findnew_data_type(dso, &type_die); + return NULL; -out: - debuginfo__delete(dloc->di); - return result; + return dso__findnew_data_type(dso, &type_die); } static int alloc_data_type_histograms(struct annotated_data_type *adt, int nr_entries) @@ -1911,10 +1841,15 @@ static void print_annotated_data_header(struct hist_entry *he, struct evsel *evs struct evsel *pos; int i = 0; - for_each_group_evsel(pos, evsel) - printf(" event[%d] = %s\n", i++, pos->name); + nr_members = 0; + for_each_group_evsel(pos, evsel) { + if (symbol_conf.skip_empty && + evsel__hists(pos)->stats.nr_samples == 0) + continue; - nr_members = evsel->core.nr_members; + printf(" event[%d] = %s\n", i++, pos->name); + nr_members++; + } } if (symbol_conf.show_total_period) { @@ -1949,34 +1884,29 @@ static void print_annotated_data_type(struct annotated_data_type *mem_type, { struct annotated_member *child; struct type_hist *h = mem_type->histograms[evsel->core.idx]; - int i, nr_events = 1, samples = 0; + int i, nr_events = 0, samples = 0; u64 period = 0; int width = symbol_conf.show_total_period ? 11 : 7; + struct evsel *pos; - for (i = 0; i < member->size; i++) { - samples += h->addr[member->offset + i].nr_samples; - period += h->addr[member->offset + i].period; - } - print_annotated_data_value(h, period, samples); - - if (evsel__is_group_event(evsel)) { - struct evsel *pos; + for_each_group_evsel(pos, evsel) { + h = mem_type->histograms[pos->core.idx]; - for_each_group_member(pos, evsel) { - h = mem_type->histograms[pos->core.idx]; + if (symbol_conf.skip_empty && + evsel__hists(pos)->stats.nr_samples == 0) + continue; - samples = 0; - period = 0; - for (i = 0; i < member->size; i++) { - samples += h->addr[member->offset + i].nr_samples; - period += h->addr[member->offset + i].period; - } - print_annotated_data_value(h, period, samples); + samples = 0; + period = 0; + for (i = 0; i < member->size; i++) { + samples += h->addr[member->offset + i].nr_samples; + period += h->addr[member->offset + i].period; } - nr_events = evsel->core.nr_members; + print_annotated_data_value(h, period, samples); + nr_events++; } - printf(" %10d %10d %*s%s\t%s", + printf(" %#10x %#10x %*s%s\t%s", member->offset, member->size, indent, "", member->type_name, member->var_name ?: ""); diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h index 0a57d9f5ee78..869307c7f130 100644 --- a/tools/perf/util/annotate-data.h +++ b/tools/perf/util/annotate-data.h @@ -6,6 +6,12 @@ #include <linux/compiler.h> #include <linux/rbtree.h> #include <linux/types.h> +#include "dwarf-regs.h" +#include "annotate.h" + +#ifdef HAVE_LIBDW_SUPPORT +#include "debuginfo.h" +#endif struct annotated_op_loc; struct debuginfo; @@ -15,6 +21,24 @@ struct hist_entry; struct map_symbol; struct thread; +#define pr_debug_dtp(fmt, ...) \ +do { \ + if (debug_type_profile) \ + pr_info(fmt, ##__VA_ARGS__); \ + else \ + pr_debug3(fmt, ##__VA_ARGS__); \ +} while (0) + +enum type_state_kind { + TSR_KIND_INVALID = 0, + TSR_KIND_TYPE, + TSR_KIND_PERCPU_BASE, + TSR_KIND_CONST, + TSR_KIND_PERCPU_POINTER, + TSR_KIND_POINTER, + TSR_KIND_CANARY, +}; + /** * struct annotated_member - Type of member field * @node: List entry in the parent list @@ -100,9 +124,9 @@ struct data_loc_info { u64 var_addr; u8 cpumode; struct annotated_op_loc *op; + struct debuginfo *di; /* These are used internally */ - struct debuginfo *di; int fbreg; bool fb_cfa; @@ -142,7 +166,64 @@ struct annotated_data_stat { }; extern struct annotated_data_stat ann_data_stat; -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT +/* + * Type information in a register, valid when @ok is true. + * The @caller_saved registers are invalidated after a function call. + */ +struct type_state_reg { + Dwarf_Die type; + u32 imm_value; + /* + * The offset within the struct that the register points to. + * A value of 0 means the register points to the beginning. + * type_offset = op->offset + reg->offset + */ + s32 offset; + bool ok; + bool caller_saved; + u8 kind; + u8 copied_from; +}; + +/* Type information in a stack location, dynamically allocated */ +struct type_state_stack { + struct list_head list; + Dwarf_Die type; + int offset; + /* pointer offset, saves tsr->offset on the stack state */ + int ptr_offset; + int size; + bool compound; + u8 kind; +}; + +/* + * Maximum number of registers tracked in type_state. + * + * This limit must cover all supported architectures, since perf + * may analyze perf.data files generated on systems with a different + * register set. Use 32 as a safe upper bound instead of relying on + * build-arch specific values. + */ +#define TYPE_STATE_MAX_REGS 32 + +/* + * State table to maintain type info in each register and stack location. + * It'll be updated when new variable is allocated or type info is moved + * to a new location (register or stack). As it'd be used with the + * shortest path of basic blocks, it only maintains a single table. + */ +struct type_state { + /* state of general purpose registers */ + struct type_state_reg regs[TYPE_STATE_MAX_REGS]; + /* state of stack location */ + struct list_head stack_vars; + /* return value register */ + int ret_reg; + /* stack pointer register */ + int stack_reg; +}; /* Returns data type at the location (ip, reg, offset) */ struct annotated_data_type *find_data_type(struct data_loc_info *dloc); @@ -158,9 +239,30 @@ void annotated_data_type__tree_delete(struct rb_root *root); /* Release all global variable information in the tree */ void global_var_type__tree_delete(struct rb_root *root); +/* Print data type annotation (including members) on stdout */ int hist_entry__annotate_data_tty(struct hist_entry *he, struct evsel *evsel); -#else /* HAVE_DWARF_SUPPORT */ +/* Get name of member field at the given offset in the data type */ +int annotated_data_type__get_member_name(struct annotated_data_type *adt, + char *buf, size_t sz, int member_offset); + +bool has_reg_type(struct type_state *state, int reg); +struct type_state_stack *findnew_stack_state(struct type_state *state, + int offset, u8 kind, + Dwarf_Die *type_die, + int ptr_offset); +void set_stack_state(struct type_state_stack *stack, int offset, u8 kind, + Dwarf_Die *type_die, int ptr_offset); +struct type_state_stack *find_stack_state(struct type_state *state, + int offset); +bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc, + u64 ip, u64 var_addr, int *var_offset, + Dwarf_Die *type_die); +bool get_global_var_info(struct data_loc_info *dloc, u64 addr, + const char **var_name, int *var_offset); +void pr_debug_type_name(Dwarf_Die *die, enum type_state_kind kind); + +#else /* HAVE_LIBDW_SUPPORT */ static inline struct annotated_data_type * find_data_type(struct data_loc_info *dloc __maybe_unused) @@ -192,7 +294,15 @@ static inline int hist_entry__annotate_data_tty(struct hist_entry *he __maybe_un return -1; } -#endif /* HAVE_DWARF_SUPPORT */ +static inline int annotated_data_type__get_member_name(struct annotated_data_type *adt __maybe_unused, + char *buf __maybe_unused, + size_t sz __maybe_unused, + int member_offset __maybe_unused) +{ + return -1; +} + +#endif /* HAVE_LIBDW_SUPPORT */ #ifdef HAVE_SLANG_SUPPORT int hist_entry__annotate_data_tui(struct hist_entry *he, struct evsel *evsel, diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 1451caf25e77..cc7764455faf 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -25,6 +25,7 @@ #include "srcline.h" #include "units.h" #include "debug.h" +#include "debuginfo.h" #include "annotate.h" #include "annotate-data.h" #include "evsel.h" @@ -40,6 +41,7 @@ #include "namespaces.h" #include "thread.h" #include "hashmap.h" +#include "strbuf.h" #include <regex.h> #include <linux/bitops.h> #include <linux/kernel.h> @@ -47,6 +49,7 @@ #include <linux/zalloc.h> #include <subcmd/parse-options.h> #include <subcmd/run-command.h> +#include <math.h> /* FIXME: For the HE_COLORSET */ #include "ui/browser.h" @@ -84,6 +87,8 @@ struct annotated_data_type canary_type = { }, }; +#define NO_TYPE ((struct annotated_data_type *)-1UL) + /* symbol histogram: key = offset << 16 | evsel->core.idx */ static size_t sym_hist_hash(long key, void *ctx __maybe_unused) { @@ -206,7 +211,7 @@ static int __symbol__account_cycles(struct cyc_hist *ch, } static int __symbol__inc_addr_samples(struct map_symbol *ms, - struct annotated_source *src, int evidx, u64 addr, + struct annotated_source *src, struct evsel *evsel, u64 addr, struct perf_sample *sample) { struct symbol *sym = ms->sym; @@ -225,14 +230,14 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms, } offset = addr - sym->start; - h = annotated_source__histogram(src, evidx); + h = annotated_source__histogram(src, evsel); if (h == NULL) { pr_debug("%s(%d): ENOMEM! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 ", func: %d\n", __func__, __LINE__, sym->name, sym->start, addr, sym->end, sym->type == STT_FUNC); return -ENOMEM; } - hash_key = offset << 16 | evidx; + hash_key = offset << 16 | evsel->core.idx; if (!hashmap__find(src->samples, hash_key, &entry)) { entry = zalloc(sizeof(*entry)); if (entry == NULL) @@ -249,7 +254,7 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms, pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64 ", evidx=%d] => nr_samples: %" PRIu64 ", period: %" PRIu64 "\n", - sym->start, sym->name, addr, addr - sym->start, evidx, + sym->start, sym->name, addr, addr - sym->start, evsel->core.idx, entry->nr_samples, entry->period); return 0; } @@ -265,22 +270,30 @@ struct annotated_branch *annotation__get_branch(struct annotation *notes) return notes->branch; } -static struct cyc_hist *symbol__cycles_hist(struct symbol *sym) +static struct annotated_branch *symbol__find_branch_hist(struct symbol *sym, + unsigned int br_cntr_nr) { struct annotation *notes = symbol__annotation(sym); struct annotated_branch *branch; + const size_t size = symbol__size(sym); branch = annotation__get_branch(notes); if (branch == NULL) return NULL; if (branch->cycles_hist == NULL) { - const size_t size = symbol__size(sym); - branch->cycles_hist = calloc(size, sizeof(struct cyc_hist)); + if (!branch->cycles_hist) + return NULL; + } + + if (br_cntr_nr && branch->br_cntr == NULL) { + branch->br_cntr = calloc(br_cntr_nr * size, sizeof(u64)); + if (!branch->br_cntr) + return NULL; } - return branch->cycles_hist; + return branch; } struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists) @@ -312,19 +325,48 @@ static int symbol__inc_addr_samples(struct map_symbol *ms, if (sym == NULL) return 0; src = symbol__hists(sym, evsel->evlist->core.nr_entries); - return src ? __symbol__inc_addr_samples(ms, src, evsel->core.idx, addr, sample) : 0; + return src ? __symbol__inc_addr_samples(ms, src, evsel, addr, sample) : 0; +} + +static int symbol__account_br_cntr(struct annotated_branch *branch, + struct evsel *evsel, + unsigned offset, + u64 br_cntr) +{ + unsigned int br_cntr_nr = evsel__leader(evsel)->br_cntr_nr; + unsigned int base = evsel__leader(evsel)->br_cntr_idx; + unsigned int off = offset * evsel->evlist->nr_br_cntr; + u64 *branch_br_cntr = branch->br_cntr; + unsigned int i, mask, width; + + if (!br_cntr || !branch_br_cntr) + return 0; + + perf_env__find_br_cntr_info(evsel__env(evsel), NULL, &width); + mask = (1L << width) - 1; + for (i = 0; i < br_cntr_nr; i++) { + u64 cntr = (br_cntr >> i * width) & mask; + + branch_br_cntr[off + i + base] += cntr; + if (cntr == mask) + branch_br_cntr[off + i + base] |= ANNOTATION__BR_CNTR_SATURATED_FLAG; + } + + return 0; } -static int symbol__account_cycles(u64 addr, u64 start, - struct symbol *sym, unsigned cycles) +static int symbol__account_cycles(u64 addr, u64 start, struct symbol *sym, + unsigned cycles, struct evsel *evsel, + u64 br_cntr) { - struct cyc_hist *cycles_hist; + struct annotated_branch *branch; unsigned offset; + int ret; if (sym == NULL) return 0; - cycles_hist = symbol__cycles_hist(sym); - if (cycles_hist == NULL) + branch = symbol__find_branch_hist(sym, evsel->evlist->nr_br_cntr); + if (!branch) return -ENOMEM; if (addr < sym->start || addr >= sym->end) return -ERANGE; @@ -336,15 +378,22 @@ static int symbol__account_cycles(u64 addr, u64 start, start = 0; } offset = addr - sym->start; - return __symbol__account_cycles(cycles_hist, + ret = __symbol__account_cycles(branch->cycles_hist, start ? start - sym->start : 0, offset, cycles, !!start); + + if (ret) + return ret; + + return symbol__account_br_cntr(branch, evsel, offset, br_cntr); } int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, struct addr_map_symbol *start, - unsigned cycles) + unsigned cycles, + struct evsel *evsel, + u64 br_cntr) { u64 saddr = 0; int err; @@ -370,7 +419,7 @@ int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, start ? start->addr : 0, ams->ms.sym ? ams->ms.sym->start + map__start(ams->ms.map) : 0, saddr); - err = symbol__account_cycles(ams->al_addr, saddr, ams->ms.sym, cycles); + err = symbol__account_cycles(ams->al_addr, saddr, ams->ms.sym, cycles, evsel, br_cntr); if (err) pr_debug2("account_cycles failed %d\n", err); return err; @@ -411,6 +460,7 @@ static void annotated_branch__delete(struct annotated_branch *branch) { if (branch) { zfree(&branch->cycles_hist); + free(branch->br_cntr); free(branch); } } @@ -454,8 +504,10 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 } } -static int annotation__compute_ipc(struct annotation *notes, size_t size) +static int annotation__compute_ipc(struct annotation *notes, size_t size, + struct evsel *evsel) { + unsigned int br_cntr_nr = evsel->evlist->nr_br_cntr; int err = 0; s64 offset; @@ -490,6 +542,20 @@ static int annotation__compute_ipc(struct annotation *notes, size_t size) al->cycles->max = ch->cycles_max; al->cycles->min = ch->cycles_min; } + if (al && notes->branch->br_cntr) { + if (!al->br_cntr) { + al->br_cntr = calloc(br_cntr_nr, sizeof(u64)); + if (!al->br_cntr) { + err = ENOMEM; + break; + } + } + al->num_aggr = ch->num_aggr; + al->br_cntr_nr = br_cntr_nr; + al->evsel = evsel; + memcpy(al->br_cntr, ¬es->branch->br_cntr[offset * br_cntr_nr], + br_cntr_nr * sizeof(u64)); + } } } @@ -501,8 +567,10 @@ static int annotation__compute_ipc(struct annotation *notes, size_t size) struct annotation_line *al; al = annotated_source__get_line(notes->src, offset); - if (al) + if (al) { zfree(&al->cycles); + zfree(&al->br_cntr); + } } } } @@ -692,20 +760,38 @@ static int disasm_line__print(struct disasm_line *dl, u64 start, int addr_fmt_wi return 0; } +static struct annotated_data_type * +__hist_entry__get_data_type(struct hist_entry *he, struct arch *arch, + struct debuginfo *dbg, struct disasm_line *dl, + int *type_offset); + +static bool needs_type_info(struct annotated_data_type *data_type) +{ + if (data_type == NULL || data_type == NO_TYPE) + return false; + + if (verbose) + return true; + + return (data_type != &stackop_type) && (data_type != &canary_type); +} + static int -annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start, - struct evsel *evsel, u64 len, int min_pcnt, int printed, - int max_lines, struct annotation_line *queue, int addr_fmt_width, - int percent_type) +annotation_line__print(struct annotation_line *al, struct annotation_print_data *apd, + struct annotation_options *opts, int printed, + struct annotation_line *queue) { + struct symbol *sym = apd->he->ms.sym; struct disasm_line *dl = container_of(al, struct disasm_line, al); + struct annotation *notes = symbol__annotation(sym); static const char *prev_line; + int max_lines = opts->max_lines; + int percent_type = opts->percent_type; if (al->offset != -1) { double max_percent = 0.0; int i, nr_percent = 1; const char *color; - struct annotation *notes = symbol__annotation(sym); for (i = 0; i < al->data_nr; i++) { double percent; @@ -720,19 +806,23 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start if (al->data_nr > nr_percent) nr_percent = al->data_nr; - if (max_percent < min_pcnt) + if (max_percent < opts->min_pcnt) return -1; if (max_lines && printed >= max_lines) return 1; if (queue != NULL) { + struct annotation_options queue_opts = { + .max_lines = 1, + .percent_type = percent_type, + }; + list_for_each_entry_from(queue, ¬es->src->source, node) { if (queue == al) break; - annotation_line__print(queue, sym, start, evsel, len, - 0, 0, 1, NULL, addr_fmt_width, - percent_type); + annotation_line__print(queue, apd, &queue_opts, + /*printed=*/0, /*queue=*/NULL); } } @@ -757,7 +847,31 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start printf(" : "); - disasm_line__print(dl, start, addr_fmt_width); + disasm_line__print(dl, notes->src->start, apd->addr_fmt_width); + + if (opts->code_with_type && apd->dbg) { + struct annotated_data_type *data_type; + int offset = 0; + + data_type = __hist_entry__get_data_type(apd->he, apd->arch, + apd->dbg, dl, &offset); + if (needs_type_info(data_type)) { + char buf[4096]; + + printf("\t\t# data-type: %s", + data_type->self.type_name); + + if (data_type != &stackop_type && + data_type != &canary_type) + printf(" +%#x", offset); + + if (annotated_data_type__get_member_name(data_type, + buf, + sizeof(buf), + offset)) + printf(" (%s)", buf); + } + } /* * Also color the filename and line if needed, with @@ -775,18 +889,16 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start } else if (max_lines && printed >= max_lines) return 1; else { - int width = symbol_conf.show_total_period ? 12 : 8; + int width = annotation__pcnt_width(notes); if (queue) return -1; - if (evsel__is_group_event(evsel)) - width *= evsel->core.nr_members; - if (!*al->line) printf(" %*s:\n", width, " "); else - printf(" %*s: %-*d %s\n", width, " ", addr_fmt_width, al->line_nr, al->line); + printf(" %*s: %-*d %s\n", width, " ", apd->addr_fmt_width, + al->line_nr, al->line); } return 0; @@ -798,15 +910,14 @@ static void calc_percent(struct annotation *notes, s64 offset, s64 end) { struct hists *hists = evsel__hists(evsel); - int evidx = evsel->core.idx; - struct sym_hist *sym_hist = annotation__histogram(notes, evidx); + struct sym_hist *sym_hist = annotation__histogram(notes, evsel); unsigned int hits = 0; u64 period = 0; while (offset < end) { struct sym_hist_entry *entry; - entry = annotated_source__hist_entry(notes->src, evidx, offset); + entry = annotated_source__hist_entry(notes->src, evsel, offset); if (entry) { hits += entry->nr_samples; period += entry->period; @@ -851,6 +962,10 @@ static void annotation__calc_percent(struct annotation *notes, BUG_ON(i >= al->data_nr); + if (symbol_conf.skip_empty && + evsel__hists(evsel)->stats.nr_samples == 0) + continue; + data = &al->data[i++]; calc_percent(notes, evsel, data, al->offset, end); @@ -865,7 +980,7 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel) annotation__calc_percent(notes, evsel, symbol__size(sym)); } -static int evsel__get_arch(struct evsel *evsel, struct arch **parch) +int evsel__get_arch(struct evsel *evsel, struct arch **parch) { struct perf_env *env = evsel__env(evsel); const char *arch_name = perf_env__arch(env); @@ -900,14 +1015,13 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); struct annotate_args args = { - .evsel = evsel, .options = &annotate_opts, }; struct arch *arch = NULL; - int err; + int err, nr; err = evsel__get_arch(evsel, &arch); - if (err < 0) + if (err) return err; if (parch) @@ -925,6 +1039,19 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, return -1; } + nr = 0; + if (evsel__is_group_event(evsel)) { + struct evsel *pos; + + for_each_group_evsel(pos, evsel) { + if (symbol_conf.skip_empty && + evsel__hists(pos)->stats.nr_samples == 0) + continue; + nr++; + } + } + notes->src->nr_events = nr ? nr : 1; + if (annotate_opts.full_addr) notes->src->start = map__objdump_2mem(ms->map, ms->sym->start); else @@ -1060,15 +1187,14 @@ static void print_summary(struct rb_root *root, const char *filename) static void symbol__annotate_hits(struct symbol *sym, struct evsel *evsel) { - int evidx = evsel->core.idx; struct annotation *notes = symbol__annotation(sym); - struct sym_hist *h = annotation__histogram(notes, evidx); + struct sym_hist *h = annotation__histogram(notes, evsel); u64 len = symbol__size(sym), offset; for (offset = 0; offset < len; ++offset) { struct sym_hist_entry *entry; - entry = annotated_source__hist_entry(notes->src, evidx, offset); + entry = annotated_source__hist_entry(notes->src, evsel, offset); if (entry && entry->nr_samples != 0) printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2, sym->start + offset, entry->nr_samples); @@ -1089,8 +1215,9 @@ static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start) return 0; } -int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel) +int hist_entry__annotate_printf(struct hist_entry *he, struct evsel *evsel) { + struct map_symbol *ms = &he->ms; struct map *map = ms->map; struct symbol *sym = ms->sym; struct dso *dso = map__dso(map); @@ -1098,15 +1225,17 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel) const char *d_filename; const char *evsel_name = evsel__name(evsel); struct annotation *notes = symbol__annotation(sym); - struct sym_hist *h = annotation__histogram(notes, evsel->core.idx); + struct sym_hist *h = annotation__histogram(notes, evsel); struct annotation_line *pos, *queue = NULL; struct annotation_options *opts = &annotate_opts; - u64 start = map__rip_2objdump(map, sym->start); - int printed = 2, queue_len = 0, addr_fmt_width; + struct annotation_print_data apd = { + .he = he, + .evsel = evsel, + }; + int printed = 2, queue_len = 0; int more = 0; bool context = opts->context; - u64 len; - int width = symbol_conf.show_total_period ? 12 : 8; + int width = annotation__pcnt_width(notes); int graph_dotted_len; char buf[512]; @@ -1119,10 +1248,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel) else d_filename = basename(filename); - len = symbol__size(sym); - if (evsel__is_group_event(evsel)) { - width *= evsel->core.nr_members; evsel__group_desc(evsel, buf, sizeof(buf)); evsel_name = buf; } @@ -1140,7 +1266,10 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel) if (verbose > 0) symbol__annotate_hits(sym, evsel); - addr_fmt_width = annotated_source__addr_fmt_width(¬es->src->source, start); + apd.addr_fmt_width = annotated_source__addr_fmt_width(¬es->src->source, + notes->src->start); + evsel__get_arch(evsel, &apd.arch); + apd.dbg = dso__debuginfo(dso); list_for_each_entry(pos, ¬es->src->source, node) { int err; @@ -1150,9 +1279,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel) queue_len = 0; } - err = annotation_line__print(pos, sym, start, evsel, len, - opts->min_pcnt, printed, opts->max_lines, - queue, addr_fmt_width, opts->percent_type); + err = annotation_line__print(pos, &apd, opts, printed, queue); switch (err) { case 0: @@ -1183,6 +1310,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel) } } + debuginfo__delete(apd.dbg); free(filename); return more; @@ -1229,7 +1357,8 @@ static void FILE__write_graph(void *fp, int graph) fputs(s, fp); } -static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp) +static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp, + struct annotation_print_data *apd) { struct annotation *notes = symbol__annotation(sym); struct annotation_write_ops wops = { @@ -1243,24 +1372,37 @@ static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp) }; struct annotation_line *al; + if (annotate_opts.code_with_type) { + evsel__get_arch(apd->evsel, &apd->arch); + apd->dbg = dso__debuginfo(map__dso(apd->he->ms.map)); + } + list_for_each_entry(al, ¬es->src->source, node) { if (annotation_line__filter(al)) continue; - annotation_line__write(al, notes, &wops); + annotation_line__write(al, notes, &wops, apd); fputc('\n', fp); wops.first_line = false; } + if (annotate_opts.code_with_type) + debuginfo__delete(apd->dbg); + return 0; } -int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel) +int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, + struct hist_entry *he) { const char *ev_name = evsel__name(evsel); char buf[1024]; char *filename; int err = -1; FILE *fp; + struct annotation_print_data apd = { + .he = he, + .evsel = evsel, + }; if (asprintf(&filename, "%s.annotation", ms->sym->name) < 0) return -1; @@ -1276,7 +1418,7 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel) fprintf(fp, "%s() %s\nEvent: %s\n\n", ms->sym->name, dso__long_name(map__dso(ms->map)), ev_name); - symbol__annotate_fprintf2(ms->sym, fp); + symbol__annotate_fprintf2(ms->sym, fp, &apd); fclose(fp); err = 0; @@ -1285,18 +1427,18 @@ out_free_filename: return err; } -void symbol__annotate_zero_histogram(struct symbol *sym, int evidx) +void symbol__annotate_zero_histogram(struct symbol *sym, struct evsel *evsel) { struct annotation *notes = symbol__annotation(sym); - struct sym_hist *h = annotation__histogram(notes, evidx); + struct sym_hist *h = annotation__histogram(notes, evsel); memset(h, 0, sizeof(*notes->src->histograms) * notes->src->nr_histograms); } -void symbol__annotate_decay_histogram(struct symbol *sym, int evidx) +void symbol__annotate_decay_histogram(struct symbol *sym, struct evsel *evsel) { struct annotation *notes = symbol__annotation(sym); - struct sym_hist *h = annotation__histogram(notes, evidx); + struct sym_hist *h = annotation__histogram(notes, evsel); struct annotation_line *al; h->nr_samples = 0; @@ -1306,7 +1448,7 @@ void symbol__annotate_decay_histogram(struct symbol *sym, int evidx) if (al->offset == -1) continue; - entry = annotated_source__hist_entry(notes->src, evidx, al->offset); + entry = annotated_source__hist_entry(notes->src, evsel, al->offset); if (entry == NULL) continue; @@ -1323,6 +1465,7 @@ void annotated_source__purge(struct annotated_source *as) list_del_init(&al->node); disasm_line__free(disasm_line(al)); } + as->tried_source = false; } static size_t disasm_line__fprintf(struct disasm_line *dl, FILE *fp) @@ -1520,12 +1663,17 @@ static void symbol__calc_lines(struct map_symbol *ms, struct rb_root *root) annotation__calc_lines(notes, ms, root); } -int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel) +int hist_entry__tty_annotate2(struct hist_entry *he, struct evsel *evsel) { + struct map_symbol *ms = &he->ms; struct dso *dso = map__dso(ms->map); struct symbol *sym = ms->sym; struct rb_root source_line = RB_ROOT; struct hists *hists = evsel__hists(evsel); + struct annotation_print_data apd = { + .he = he, + .evsel = evsel, + }; char buf[1024]; int err; @@ -1548,15 +1696,16 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel) hists__scnprintf_title(hists, buf, sizeof(buf)); fprintf(stdout, "%s, [percent: %s]\n%s() %s\n", buf, percent_type_str(annotate_opts.percent_type), sym->name, dso__long_name(dso)); - symbol__annotate_fprintf2(sym, stdout); + symbol__annotate_fprintf2(sym, stdout, &apd); annotated_source__purge(symbol__annotation(sym)->src); return 0; } -int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel) +int hist_entry__tty_annotate(struct hist_entry *he, struct evsel *evsel) { + struct map_symbol *ms = &he->ms; struct dso *dso = map__dso(ms->map); struct symbol *sym = ms->sym; struct rb_root source_line = RB_ROOT; @@ -1580,7 +1729,7 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel) print_summary(&source_line, dso__long_name(dso)); } - symbol__annotate_printf(ms, evsel); + hist_entry__annotate_printf(he, evsel); annotated_source__purge(symbol__annotation(sym)->src); @@ -1594,13 +1743,12 @@ bool ui__has_annotation(void) static double annotation_line__max_percent(struct annotation_line *al, - struct annotation *notes, unsigned int percent_type) { double percent_max = 0.0; int i; - for (i = 0; i < notes->src->nr_events; i++) { + for (i = 0; i < al->data_nr; i++) { double percent; percent = annotation_data__percent(&al->data[i], @@ -1613,7 +1761,7 @@ static double annotation_line__max_percent(struct annotation_line *al, return percent_max; } -static void disasm_line__write(struct disasm_line *dl, struct annotation *notes, +static int disasm_line__write(struct disasm_line *dl, struct annotation *notes, void *obj, char *bf, size_t size, void (*obj__printf)(void *obj, const char *fmt, ...), void (*obj__write_graph)(void *obj, int graph)) @@ -1641,8 +1789,8 @@ call_like: obj__printf(obj, " "); } - disasm_line__scnprintf(dl, bf, size, !annotate_opts.use_offset, - notes->src->widths.max_ins_name); + return disasm_line__scnprintf(dl, bf, size, !annotate_opts.use_offset, + notes->src->widths.max_ins_name) + 2; } static void ipc_coverage_string(char *bf, int size, struct annotation *notes) @@ -1662,24 +1810,225 @@ static void ipc_coverage_string(char *bf, int size, struct annotation *notes) ipc, coverage); } -static void __annotation_line__write(struct annotation_line *al, struct annotation *notes, - bool first_line, bool current_entry, bool change_color, int width, - void *obj, unsigned int percent_type, - int (*obj__set_color)(void *obj, int color), - void (*obj__set_percent_color)(void *obj, double percent, bool current), - int (*obj__set_jumps_percent_color)(void *obj, int nr, bool current), - void (*obj__printf)(void *obj, const char *fmt, ...), - void (*obj__write_graph)(void *obj, int graph)) +int annotation_br_cntr_abbr_list(char **str, struct evsel *evsel, bool header) +{ + struct evsel *pos; + struct strbuf sb; + + if (evsel->evlist->nr_br_cntr <= 0) + return -ENOTSUP; + + strbuf_init(&sb, /*hint=*/ 0); + + if (header && strbuf_addf(&sb, "# Branch counter abbr list:\n")) + goto err; + + evlist__for_each_entry(evsel->evlist, pos) { + if (!(pos->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) + continue; + if (header && strbuf_addf(&sb, "#")) + goto err; + + if (strbuf_addf(&sb, " %s = %s\n", pos->name, pos->abbr_name)) + goto err; + } + + if (header && strbuf_addf(&sb, "#")) + goto err; + if (strbuf_addf(&sb, " '-' No event occurs\n")) + goto err; + + if (header && strbuf_addf(&sb, "#")) + goto err; + if (strbuf_addf(&sb, " '+' Event occurrences may be lost due to branch counter saturated\n")) + goto err; + + *str = strbuf_detach(&sb, NULL); + + return 0; +err: + strbuf_release(&sb); + return -ENOMEM; +} + +/* Assume the branch counter saturated at 3 */ +#define ANNOTATION_BR_CNTR_SATURATION 3 + +int annotation_br_cntr_entry(char **str, int br_cntr_nr, + u64 *br_cntr, int num_aggr, + struct evsel *evsel) +{ + struct evsel *pos = evsel ? evlist__first(evsel->evlist) : NULL; + bool saturated = false; + int i, j, avg, used; + struct strbuf sb; + + strbuf_init(&sb, /*hint=*/ 0); + for (i = 0; i < br_cntr_nr; i++) { + used = 0; + avg = ceil((double)(br_cntr[i] & ~ANNOTATION__BR_CNTR_SATURATED_FLAG) / + (double)num_aggr); + + /* + * A histogram with the abbr name is displayed by default. + * With -v, the exact number of branch counter is displayed. + */ + if (verbose) { + evlist__for_each_entry_from(evsel->evlist, pos) { + if ((pos->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) && + (pos->br_cntr_idx == i)) + break; + } + if (strbuf_addstr(&sb, pos->abbr_name)) + goto err; + + if (!br_cntr[i]) { + if (strbuf_addstr(&sb, "=-")) + goto err; + } else { + if (strbuf_addf(&sb, "=%d", avg)) + goto err; + } + if (br_cntr[i] & ANNOTATION__BR_CNTR_SATURATED_FLAG) { + if (strbuf_addch(&sb, '+')) + goto err; + } else { + if (strbuf_addch(&sb, ' ')) + goto err; + } + + if ((i < br_cntr_nr - 1) && strbuf_addch(&sb, ',')) + goto err; + continue; + } + + if (strbuf_addch(&sb, '|')) + goto err; + + if (!br_cntr[i]) { + if (strbuf_addch(&sb, '-')) + goto err; + used++; + } else { + evlist__for_each_entry_from(evsel->evlist, pos) { + if ((pos->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) && + (pos->br_cntr_idx == i)) + break; + } + if (br_cntr[i] & ANNOTATION__BR_CNTR_SATURATED_FLAG) + saturated = true; + + for (j = 0; j < avg; j++, used++) { + /* Print + if the number of logged events > 3 */ + if (j >= ANNOTATION_BR_CNTR_SATURATION) { + saturated = true; + break; + } + if (strbuf_addstr(&sb, pos->abbr_name)) + goto err; + } + + if (saturated) { + if (strbuf_addch(&sb, '+')) + goto err; + used++; + } + pos = list_next_entry(pos, core.node); + } + + for (j = used; j < ANNOTATION_BR_CNTR_SATURATION + 1; j++) { + if (strbuf_addch(&sb, ' ')) + goto err; + } + } + + if (!verbose && strbuf_addch(&sb, br_cntr_nr ? '|' : ' ')) + goto err; + + *str = strbuf_detach(&sb, NULL); + + return 0; +err: + strbuf_release(&sb); + return -ENOMEM; +} + +struct type_hash_entry { + struct annotated_data_type *type; + int offset; +}; +static int disasm_line__snprint_type_info(struct disasm_line *dl, + char *buf, int len, + struct annotation_print_data *apd) { - double percent_max = annotation_line__max_percent(al, notes, percent_type); - int pcnt_width = annotation__pcnt_width(notes), - cycles_width = annotation__cycles_width(notes); + struct annotated_data_type *data_type = NULL; + struct type_hash_entry *entry = NULL; + char member[256]; + int offset = 0; + int printed; + + scnprintf(buf, len, " "); + + if (!annotate_opts.code_with_type || apd->dbg == NULL) + return 1; + + if (apd->type_hash) { + hashmap__find(apd->type_hash, dl->al.offset, &entry); + if (entry != NULL) { + data_type = entry->type; + offset = entry->offset; + } + } + + if (data_type == NULL) + data_type = __hist_entry__get_data_type(apd->he, apd->arch, apd->dbg, dl, &offset); + + if (apd->type_hash && entry == NULL) { + entry = malloc(sizeof(*entry)); + if (entry != NULL) { + entry->type = data_type; + entry->offset = offset; + hashmap__add(apd->type_hash, dl->al.offset, entry); + } + } + + if (!needs_type_info(data_type)) + return 1; + + printed = scnprintf(buf, len, "\t\t# data-type: %s", data_type->self.type_name); + + if (data_type != &stackop_type && data_type != &canary_type && len > printed) + printed += scnprintf(buf + printed, len - printed, " +%#x", offset); + + if (annotated_data_type__get_member_name(data_type, member, sizeof(member), offset) && + len > printed) { + printed += scnprintf(buf + printed, len - printed, " (%s)", member); + } + return printed; +} + +void annotation_line__write(struct annotation_line *al, struct annotation *notes, + const struct annotation_write_ops *wops, + struct annotation_print_data *apd) +{ + bool current_entry = wops->current_entry; + bool change_color = wops->change_color; + double percent_max = annotation_line__max_percent(al, annotate_opts.percent_type); + int width = wops->width; + int pcnt_width = annotation__pcnt_width(notes); + int cycles_width = annotation__cycles_width(notes); bool show_title = false; char bf[256]; int printed; - - if (first_line && (al->offset == -1 || percent_max == 0.0)) { + void *obj = wops->obj; + int (*obj__set_color)(void *obj, int color) = wops->set_color; + void (*obj__set_percent_color)(void *obj, double percent, bool current) = wops->set_percent_color; + int (*obj__set_jumps_percent_color)(void *obj, int nr, bool current) = wops->set_jumps_percent_color; + void (*obj__printf)(void *obj, const char *fmt, ...) = wops->printf; + void (*obj__write_graph)(void *obj, int graph) = wops->write_graph; + + if (wops->first_line && (al->offset == -1 || percent_max == 0.0)) { if (notes->branch && al->cycles) { if (al->cycles->ipc == 0.0 && al->cycles->avg == 0) show_title = true; @@ -1690,19 +2039,20 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati if (al->offset != -1 && percent_max != 0.0) { int i; - for (i = 0; i < notes->src->nr_events; i++) { + for (i = 0; i < al->data_nr; i++) { double percent; - percent = annotation_data__percent(&al->data[i], percent_type); + percent = annotation_data__percent(&al->data[i], + annotate_opts.percent_type); obj__set_percent_color(obj, percent, current_entry); if (symbol_conf.show_total_period) { obj__printf(obj, "%11" PRIu64 " ", al->data[i].he.period); } else if (symbol_conf.show_nr_samples) { - obj__printf(obj, "%6" PRIu64 " ", + obj__printf(obj, "%7" PRIu64 " ", al->data[i].he.nr_samples); } else { - obj__printf(obj, "%6.2f ", percent); + obj__printf(obj, "%7.2f ", percent); } } } else { @@ -1716,6 +2066,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati symbol_conf.show_nr_samples ? "Samples" : "Percent"); } } + width -= pcnt_width; if (notes->branch) { if (al->cycles && al->cycles->ipc) @@ -1758,16 +2109,34 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati "Cycle(min/max)"); } + if (annotate_opts.show_br_cntr) { + if (show_title) { + obj__printf(obj, "%*s ", + ANNOTATION__BR_CNTR_WIDTH, + "Branch Counter"); + } else { + char *buf; + + if (!annotation_br_cntr_entry(&buf, al->br_cntr_nr, al->br_cntr, + al->num_aggr, al->evsel)) { + obj__printf(obj, "%*s ", ANNOTATION__BR_CNTR_WIDTH, buf); + free(buf); + } + } + } + if (show_title && !*al->line) { ipc_coverage_string(bf, sizeof(bf), notes); obj__printf(obj, "%*s", ANNOTATION__AVG_IPC_WIDTH, bf); } } + width -= cycles_width; obj__printf(obj, " "); + width -= 1; if (!*al->line) - obj__printf(obj, "%-*s", width - pcnt_width - cycles_width, " "); + obj__printf(obj, "%-*s", width, " "); else if (al->offset == -1) { if (al->line_nr && annotate_opts.show_linenr) printed = scnprintf(bf, sizeof(bf), "%-*d ", @@ -1776,7 +2145,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati printed = scnprintf(bf, sizeof(bf), "%-*s ", notes->src->widths.addr, " "); obj__printf(obj, bf); - obj__printf(obj, "%-*s", width - printed - pcnt_width - cycles_width + 1, al->line); + width -= printed; + obj__printf(obj, "%-*s", width, al->line); } else { u64 addr = al->offset; int color = -1; @@ -1819,22 +2189,18 @@ print_addr: if (change_color) obj__set_color(obj, color); - disasm_line__write(disasm_line(al), notes, obj, bf, sizeof(bf), obj__printf, obj__write_graph); + width -= printed; - obj__printf(obj, "%-*s", width - pcnt_width - cycles_width - 3 - printed, bf); - } + printed = disasm_line__write(disasm_line(al), notes, obj, bf, sizeof(bf), + obj__printf, obj__write_graph); -} + obj__printf(obj, "%s", bf); + width -= printed; + + disasm_line__snprint_type_info(disasm_line(al), bf, sizeof(bf), apd); + obj__printf(obj, "%-*s", width, bf); + } -void annotation_line__write(struct annotation_line *al, struct annotation *notes, - struct annotation_write_ops *wops) -{ - __annotation_line__write(al, notes, wops->first_line, wops->current_entry, - wops->change_color, wops->width, wops->obj, - annotate_opts.percent_type, - wops->set_color, wops->set_percent_color, - wops->set_jumps_percent_color, wops->printf, - wops->write_graph); } int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, @@ -1843,10 +2209,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); size_t size = symbol__size(sym); - int nr_pcnt = 1, err; - - if (evsel__is_group_event(evsel)) - nr_pcnt = evsel->core.nr_members; + int err; err = symbol__annotate(ms, evsel, parch); if (err) @@ -1857,19 +2220,68 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, annotation__set_index(notes); annotation__mark_jump_targets(notes, sym); - err = annotation__compute_ipc(notes, size); + err = annotation__compute_ipc(notes, size, evsel); if (err) return err; annotation__init_column_widths(notes, sym); - notes->src->nr_events = nr_pcnt; - annotation__update_column_widths(notes); sym->annotate2 = 1; return 0; } +const char * const perf_disassembler__strs[] = { + [PERF_DISASM_UNKNOWN] = "unknown", + [PERF_DISASM_LLVM] = "llvm", + [PERF_DISASM_CAPSTONE] = "capstone", + [PERF_DISASM_OBJDUMP] = "objdump", +}; + + +static void annotation_options__add_disassembler(struct annotation_options *options, + enum perf_disassembler dis) +{ + for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers); i++) { + if (options->disassemblers[i] == dis) { + /* Disassembler is already present then don't add again. */ + return; + } + if (options->disassemblers[i] == PERF_DISASM_UNKNOWN) { + /* Found a free slot. */ + options->disassemblers[i] = dis; + return; + } + } + pr_err("Failed to add disassembler %d\n", dis); +} + +static int annotation_options__add_disassemblers_str(struct annotation_options *options, + const char *str) +{ + while (str && *str != '\0') { + const char *comma = strchr(str, ','); + int len = comma ? comma - str : (int)strlen(str); + bool match = false; + + for (u8 i = 0; i < ARRAY_SIZE(perf_disassembler__strs); i++) { + const char *dis_str = perf_disassembler__strs[i]; + + if (len == (int)strlen(dis_str) && !strncmp(str, dis_str, len)) { + annotation_options__add_disassembler(options, i); + match = true; + break; + } + } + if (!match) { + pr_err("Invalid disassembler '%.*s'\n", len, str); + return -1; + } + str = comma ? comma + 1 : NULL; + } + return 0; +} + static int annotation__config(const char *var, const char *value, void *data) { struct annotation_options *opt = data; @@ -1884,6 +2296,11 @@ static int annotation__config(const char *var, const char *value, void *data) opt->offset_level = ANNOTATION__MAX_OFFSET_LEVEL; else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL) opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL; + } else if (!strcmp(var, "annotate.disassemblers")) { + int err = annotation_options__add_disassemblers_str(opt, value); + + if (err) + return err; } else if (!strcmp(var, "annotate.hide_src_code")) { opt->hide_src_code = perf_config_bool("hide_src_code", value); } else if (!strcmp(var, "annotate.jump_arrows")) { @@ -1941,6 +2358,8 @@ void annotation_options__init(void) opt->annotate_src = true; opt->offset_level = ANNOTATION__OFFSET_JUMP_TARGETS; opt->percent_type = PERCENT_PERIOD_LOCAL; + opt->hide_src_code = true; + opt->hide_src_code_on_title = true; } void annotation_options__exit(void) @@ -1949,9 +2368,25 @@ void annotation_options__exit(void) zfree(&annotate_opts.objdump_path); } +static void annotation_options__default_init_disassemblers(struct annotation_options *options) +{ + if (options->disassemblers[0] != PERF_DISASM_UNKNOWN) { + /* Already initialized. */ + return; + } +#ifdef HAVE_LIBLLVM_SUPPORT + annotation_options__add_disassembler(options, PERF_DISASM_LLVM); +#endif +#ifdef HAVE_LIBCAPSTONE_SUPPORT + annotation_options__add_disassembler(options, PERF_DISASM_CAPSTONE); +#endif + annotation_options__add_disassembler(options, PERF_DISASM_OBJDUMP); +} + void annotation_config__init(void) { perf_config(annotation__config, &annotate_opts); + annotation_options__default_init_disassemblers(&annotate_opts); } static unsigned int parse_percent_type(char *str1, char *str2) @@ -2060,7 +2495,7 @@ static int extract_reg_offset(struct arch *arch, const char *str, if (regname == NULL) return -1; - op_loc->reg1 = get_dwarf_regnum(regname, 0); + op_loc->reg1 = get_dwarf_regnum(regname, arch->e_machine, arch->e_flags); free(regname); /* Get the second register */ @@ -2073,7 +2508,7 @@ static int extract_reg_offset(struct arch *arch, const char *str, if (regname == NULL) return -1; - op_loc->reg2 = get_dwarf_regnum(regname, 0); + op_loc->reg2 = get_dwarf_regnum(regname, arch->e_machine, arch->e_flags); free(regname); } return 0; @@ -2123,20 +2558,33 @@ int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl, for_each_insn_op_loc(loc, i, op_loc) { const char *insn_str = ops->source.raw; bool multi_regs = ops->source.multi_regs; + bool mem_ref = ops->source.mem_ref; if (i == INSN_OP_TARGET) { insn_str = ops->target.raw; multi_regs = ops->target.multi_regs; + mem_ref = ops->target.mem_ref; } /* Invalidate the register by default */ op_loc->reg1 = -1; op_loc->reg2 = -1; - if (insn_str == NULL) - continue; + if (insn_str == NULL) { + if (!arch__is(arch, "powerpc")) + continue; + } - if (strchr(insn_str, arch->objdump.memory_ref_char)) { + /* + * For powerpc, call get_powerpc_regs function which extracts the + * required fields for op_loc, ie reg1, reg2, offset from the + * raw instruction. + */ + if (arch__is(arch, "powerpc")) { + op_loc->mem_ref = mem_ref; + op_loc->multi_regs = multi_regs; + get_powerpc_regs(dl->raw.raw_insn, !i, op_loc); + } else if (strchr(insn_str, arch->objdump.memory_ref_char)) { op_loc->mem_ref = true; op_loc->multi_regs = multi_regs; extract_reg_offset(arch, insn_str, op_loc); @@ -2160,7 +2608,7 @@ int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl, return -1; if (*s == arch->objdump.register_char) - op_loc->reg1 = get_dwarf_regnum(s, 0); + op_loc->reg1 = get_dwarf_regnum(s, arch->e_machine, arch->e_flags); else if (*s == arch->objdump.imm_char) { op_loc->offset = strtol(s + 1, &p, 0); if (p && p != s + 1) @@ -2216,7 +2664,7 @@ static struct annotated_item_stat *annotate_data_stat(struct list_head *head, return NULL; istat->name = strdup(name); - if (istat->name == NULL) { + if ((istat->name == NULL) || (!strlen(istat->name))) { free(istat); return NULL; } @@ -2230,6 +2678,7 @@ static bool is_stack_operation(struct arch *arch, struct disasm_line *dl) if (arch__is(arch, "x86")) { if (!strncmp(dl->ins.name, "push", 4) || !strncmp(dl->ins.name, "pop", 3) || + !strncmp(dl->ins.name, "call", 4) || !strncmp(dl->ins.name, "ret", 3)) return true; } @@ -2249,6 +2698,20 @@ static bool is_stack_canary(struct arch *arch, struct annotated_op_loc *loc) return false; } +/** + * Returns true if the instruction has a memory operand without + * performing a load/store + */ +static bool is_address_gen_insn(struct arch *arch, struct disasm_line *dl) +{ + if (arch__is(arch, "x86")) { + if (!strncmp(dl->ins.name, "lea", 3)) + return true; + } + + return false; +} + static struct disasm_line * annotation__prev_asm_line(struct annotation *notes, struct disasm_line *curr) { @@ -2313,92 +2776,70 @@ u64 annotate_calc_pcrel(struct map_symbol *ms, u64 ip, int offset, return map__rip_2objdump(ms->map, addr); } -/** - * hist_entry__get_data_type - find data type for given hist entry - * @he: hist entry - * - * This function first annotates the instruction at @he->ip and extracts - * register and offset info from it. Then it searches the DWARF debug - * info to get a variable and type information using the address, register, - * and offset. - */ -struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he) +static struct debuginfo_cache { + struct dso *dso; + struct debuginfo *dbg; +} di_cache; + +void debuginfo_cache__delete(void) +{ + dso__put(di_cache.dso); + di_cache.dso = NULL; + + debuginfo__delete(di_cache.dbg); + di_cache.dbg = NULL; +} + +static struct annotated_data_type * +__hist_entry__get_data_type(struct hist_entry *he, struct arch *arch, + struct debuginfo *dbg, struct disasm_line *dl, + int *type_offset) { struct map_symbol *ms = &he->ms; - struct evsel *evsel = hists_to_evsel(he->hists); - struct arch *arch; - struct disasm_line *dl; struct annotated_insn_loc loc; struct annotated_op_loc *op_loc; struct annotated_data_type *mem_type; struct annotated_item_stat *istat; - u64 ip = he->ip; int i; - ann_data_stat.total++; - - if (ms->map == NULL || ms->sym == NULL) { - ann_data_stat.no_sym++; - return NULL; - } - - if (!symbol_conf.init_annotation) { - ann_data_stat.no_sym++; - return NULL; - } - - /* Make sure it has the disasm of the function */ - if (symbol__annotate(ms, evsel, &arch) < 0) { - ann_data_stat.no_insn++; - return NULL; - } - - /* - * Get a disasm to extract the location from the insn. - * This is too slow... - */ - dl = find_disasm_line(ms->sym, ip, /*allow_update=*/true); - if (dl == NULL) { - ann_data_stat.no_insn++; - return NULL; - } - -retry: istat = annotate_data_stat(&ann_insn_stat, dl->ins.name); if (istat == NULL) { ann_data_stat.no_insn++; - return NULL; + return NO_TYPE; } if (annotate_get_insn_location(arch, dl, &loc) < 0) { ann_data_stat.no_insn_ops++; istat->bad++; - return NULL; + return NO_TYPE; } if (is_stack_operation(arch, dl)) { istat->good++; - he->mem_type_off = 0; + *type_offset = 0; return &stackop_type; } + if (is_address_gen_insn(arch, dl)) { + istat->bad++; + ann_data_stat.no_mem_ops++; + return NO_TYPE; + } + for_each_insn_op_loc(&loc, i, op_loc) { struct data_loc_info dloc = { .arch = arch, .thread = he->thread, .ms = ms, - /* Recalculate IP for LOCK prefix or insn fusion */ .ip = ms->sym->start + dl->al.offset, .cpumode = he->cpumode, .op = op_loc, + .di = dbg, }; if (!op_loc->mem_ref && op_loc->segment == INSN_SEG_NONE) continue; - /* Recalculate IP because of LOCK prefix or insn fusion */ - ip = ms->sym->start + dl->al.offset; - /* PC-relative addressing */ if (op_loc->reg1 == DWARF_REG_PC) { dloc.var_addr = annotate_calc_pcrel(ms, dloc.ip, @@ -2416,7 +2857,7 @@ retry: if (mem_type == NULL && is_stack_canary(arch, op_loc)) { istat->good++; - he->mem_type_off = 0; + *type_offset = 0; return &canary_type; } @@ -2426,15 +2867,95 @@ retry: istat->bad++; if (symbol_conf.annotate_data_sample) { + struct evsel *evsel = hists_to_evsel(he->hists); + annotated_data_type__update_samples(mem_type, evsel, dloc.type_offset, he->stat.nr_events, he->stat.period); } - he->mem_type_off = dloc.type_offset; - return mem_type; + *type_offset = dloc.type_offset; + return mem_type ?: NO_TYPE; + } + + /* retry with a fused instruction */ + return NULL; +} + +/** + * hist_entry__get_data_type - find data type for given hist entry + * @he: hist entry + * + * This function first annotates the instruction at @he->ip and extracts + * register and offset info from it. Then it searches the DWARF debug + * info to get a variable and type information using the address, register, + * and offset. + */ +struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he) +{ + struct map_symbol *ms = &he->ms; + struct evsel *evsel = hists_to_evsel(he->hists); + struct arch *arch; + struct disasm_line *dl; + struct annotated_data_type *mem_type; + struct annotated_item_stat *istat; + u64 ip = he->ip; + + ann_data_stat.total++; + + if (ms->map == NULL || ms->sym == NULL) { + ann_data_stat.no_sym++; + return NULL; + } + + if (!symbol_conf.init_annotation) { + ann_data_stat.no_sym++; + return NULL; + } + + /* + * di_cache holds a pair of values, but code below assumes + * di_cache.dso can be compared/updated and di_cache.dbg can be + * read/updated independently from each other. That assumption only + * holds in single threaded code. + */ + assert(perf_singlethreaded); + + if (map__dso(ms->map) != di_cache.dso) { + dso__put(di_cache.dso); + di_cache.dso = dso__get(map__dso(ms->map)); + + debuginfo__delete(di_cache.dbg); + di_cache.dbg = dso__debuginfo(di_cache.dso); + } + + if (di_cache.dbg == NULL) { + ann_data_stat.no_dbginfo++; + return NULL; } + /* Make sure it has the disasm of the function */ + if (symbol__annotate(ms, evsel, &arch) < 0) { + ann_data_stat.no_insn++; + return NULL; + } + + /* + * Get a disasm to extract the location from the insn. + * This is too slow... + */ + dl = find_disasm_line(ms->sym, ip, /*allow_update=*/true); + if (dl == NULL) { + ann_data_stat.no_insn++; + return NULL; + } + +retry: + mem_type = __hist_entry__get_data_type(he, arch, di_cache.dbg, dl, + &he->mem_type_off); + if (mem_type) + return mem_type == NO_TYPE ? NULL : mem_type; + /* * Some instructions can be fused and the actual memory access came * from the previous instruction. @@ -2453,7 +2974,9 @@ retry: } ann_data_stat.no_mem_ops++; - istat->bad++; + istat = annotate_data_stat(&ann_insn_stat, dl->ins.name); + if (istat) + istat->bad++; return NULL; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index d5c821c22f79..d4990bff29a7 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -14,6 +14,8 @@ #include "spark.h" #include "hashmap.h" #include "disasm.h" +#include "branch.h" +#include "evsel.h" struct hist_browser_timer; struct hist_entry; @@ -22,7 +24,6 @@ struct map_symbol; struct addr_map_symbol; struct option; struct perf_sample; -struct evsel; struct symbol; struct annotated_data_type; @@ -30,10 +31,20 @@ struct annotated_data_type; #define ANNOTATION__CYCLES_WIDTH 6 #define ANNOTATION__MINMAX_CYCLES_WIDTH 19 #define ANNOTATION__AVG_IPC_WIDTH 36 +#define ANNOTATION__BR_CNTR_WIDTH 30 #define ANNOTATION_DUMMY_LEN 256 +enum perf_disassembler { + PERF_DISASM_UNKNOWN = 0, + PERF_DISASM_LLVM, + PERF_DISASM_CAPSTONE, + PERF_DISASM_OBJDUMP, +}; +#define MAX_DISASSEMBLERS (PERF_DISASM_OBJDUMP + 1) + struct annotation_options { bool hide_src_code, + hide_src_code_on_title, use_offset, jump_arrows, print_lines, @@ -43,9 +54,13 @@ struct annotation_options { show_nr_jumps, show_minmax_cycle, show_asm_raw, + show_br_cntr, annotate_src, + code_with_type, full_addr; u8 offset_level; + u8 disassemblers[MAX_DISASSEMBLERS]; + u8 disassembler_used; int min_pcnt; int max_lines; int context; @@ -103,6 +118,10 @@ struct annotation_line { char *fileloc; char *path; struct cycles_info *cycles; + int num_aggr; + int br_cntr_nr; + u64 *br_cntr; + struct evsel *evsel; int jump_sources; u32 idx; int idx_asm; @@ -113,11 +132,16 @@ struct annotation_line { struct disasm_line { struct ins ins; struct ins_operands ops; - + union { + u8 bytes[4]; + u32 raw_insn; + } raw; /* This needs to be at the end. */ struct annotation_line al; }; +extern const char * const perf_disassembler__strs[]; + void annotation_line__add(struct annotation_line *al, struct list_head *head); static inline double annotation_data__percent(struct annotation_data *data, @@ -175,8 +199,20 @@ struct annotation_write_ops { void (*write_graph)(void *obj, int graph); }; +struct annotation_print_data { + struct hist_entry *he; + struct evsel *evsel; + struct arch *arch; + struct debuginfo *dbg; + /* save data type info keyed by al->offset */ + struct hashmap *type_hash; + /* It'll be set in hist_entry__annotate_printf() */ + int addr_fmt_width; +}; + void annotation_line__write(struct annotation_line *al, struct annotation *notes, - struct annotation_write_ops *ops); + const struct annotation_write_ops *ops, + struct annotation_print_data *apd); int __annotation__scnprintf_samples_period(struct annotation *notes, char *bf, size_t size, @@ -270,6 +306,7 @@ struct annotated_source { int nr_entries; int nr_asm_entries; int max_jump_sources; + bool tried_source; u64 start; struct { u8 addr; @@ -285,6 +322,9 @@ struct annotated_source { struct annotation_line *annotated_source__get_line(struct annotated_source *src, s64 offset); +/* A branch counter once saturated */ +#define ANNOTATION__BR_CNTR_SATURATED_FLAG (1ULL << 63) + /** * struct annotated_branch - basic block and IPC information for a symbol. * @@ -294,6 +334,7 @@ struct annotation_line *annotated_source__get_line(struct annotated_source *src, * @cover_insn: Number of distinct, actually executed instructions. * @cycles_hist: Array of cyc_hist for each instruction. * @max_coverage: Maximum number of covered basic block (used for block-range). + * @br_cntr: Array of the occurrences of events (branch counters) during a block. * * This struct is used by two different codes when the sample has branch stack * and cycles information. annotation__compute_ipc() calculates average IPC @@ -310,6 +351,7 @@ struct annotated_branch { unsigned int cover_insn; struct cyc_hist *cycles_hist; u64 max_coverage; + u64 *br_cntr; }; struct LOCKABLE annotation { @@ -336,7 +378,7 @@ static inline int annotation__cycles_width(struct annotation *notes) static inline int annotation__pcnt_width(struct annotation *notes) { - return (symbol_conf.show_total_period ? 12 : 7) * notes->src->nr_events; + return (symbol_conf.show_total_period ? 12 : 8) * notes->src->nr_events; } static inline bool annotation_line__filter(struct annotation_line *al) @@ -344,24 +386,31 @@ static inline bool annotation_line__filter(struct annotation_line *al) return annotate_opts.hide_src_code && al->offset == -1; } +static inline u8 annotation__br_cntr_width(void) +{ + return annotate_opts.show_br_cntr ? ANNOTATION__BR_CNTR_WIDTH : 0; +} + void annotation__update_column_widths(struct annotation *notes); void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms); -static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src, int idx) +static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src, + const struct evsel *evsel) { - return &src->histograms[idx]; + return &src->histograms[evsel->core.idx]; } -static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx) +static inline struct sym_hist *annotation__histogram(struct annotation *notes, + const struct evsel *evsel) { - return annotated_source__histogram(notes->src, idx); + return annotated_source__histogram(notes->src, evsel); } static inline struct sym_hist_entry * -annotated_source__hist_entry(struct annotated_source *src, int idx, u64 offset) +annotated_source__hist_entry(struct annotated_source *src, const struct evsel *evsel, u64 offset) { struct sym_hist_entry *entry; - long key = offset << 16 | idx; + long key = offset << 16 | evsel->core.idx; if (!hashmap__find(src->samples, key, &entry)) return NULL; @@ -380,7 +429,9 @@ struct annotated_branch *annotation__get_branch(struct annotation *notes); int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, struct addr_map_symbol *start, - unsigned cycles); + unsigned cycles, + struct evsel *evsel, + u64 br_cntr); int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample, struct evsel *evsel, u64 addr); @@ -413,36 +464,25 @@ enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP, SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE, SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF, + SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE, __SYMBOL_ANNOTATE_ERRNO__END, }; int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen); -int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel); -void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); -void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); +void symbol__annotate_zero_histogram(struct symbol *sym, struct evsel *evsel); +void symbol__annotate_decay_histogram(struct symbol *sym, struct evsel *evsel); void annotated_source__purge(struct annotated_source *as); -int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel); +int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, + struct hist_entry *he); bool ui__has_annotation(void); -int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel); - -int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel); - -#ifdef HAVE_SLANG_SUPPORT -int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt); -#else -static inline int symbol__tui_annotate(struct map_symbol *ms __maybe_unused, - struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused) -{ - return 0; -} -#endif +int hist_entry__annotate_printf(struct hist_entry *he, struct evsel *evsel); +int hist_entry__tty_annotate(struct hist_entry *he, struct evsel *evsel); +int hist_entry__tty_annotate2(struct hist_entry *he, struct evsel *evsel); void annotation_options__init(void); void annotation_options__exit(void); @@ -540,4 +580,11 @@ struct annotated_basic_block { int annotate_get_basic_blocks(struct symbol *sym, s64 src, s64 dst, struct list_head *head); +void debuginfo_cache__delete(void); + +int annotation_br_cntr_entry(char **str, int br_cntr_nr, u64 *br_cntr, + int num_aggr, struct evsel *evsel); +int annotation_br_cntr_abbr_list(char **str, struct evsel *evsel, bool header); + +int evsel__get_arch(struct evsel *evsel, struct arch **parch); #endif /* __PERF_ANNOTATE_H */ diff --git a/tools/perf/util/arm-spe-decoder/Build b/tools/perf/util/arm-spe-decoder/Build index f8dae13fc876..ab500e0efe24 100644 --- a/tools/perf/util/arm-spe-decoder/Build +++ b/tools/perf/util/arm-spe-decoder/Build @@ -1 +1 @@ -perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o arm-spe-decoder.o +perf-util-y += arm-spe-pkt-decoder.o arm-spe-decoder.o diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index ba807071d3c1..9e02b2bdd117 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -28,7 +28,8 @@ static u64 arm_spe_calc_ip(int index, u64 payload) /* Instruction virtual address or Branch target address */ if (index == SPE_ADDR_PKT_HDR_INDEX_INS || - index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) { + index == SPE_ADDR_PKT_HDR_INDEX_BRANCH || + index == SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH) { ns = SPE_ADDR_PKT_GET_NS(payload); el = SPE_ADDR_PKT_GET_EL(payload); @@ -181,6 +182,8 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) decoder->record.virt_addr = ip; else if (idx == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) decoder->record.phys_addr = ip; + else if (idx == SPE_ADDR_PKT_HDR_INDEX_PREV_BRANCH) + decoder->record.prev_br_tgt = ip; break; case ARM_SPE_COUNTER: if (idx == SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT) @@ -197,16 +200,76 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) decoder->record.op |= ARM_SPE_OP_ST; else decoder->record.op |= ARM_SPE_OP_LD; - if (SPE_OP_PKT_IS_LDST_SVE(payload)) - decoder->record.op |= ARM_SPE_OP_SVE_LDST; + + if (SPE_OP_PKT_LDST_SUBCLASS_GP_REG(payload)) { + decoder->record.op |= ARM_SPE_OP_GP_REG; + } else if (SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(payload)) { + decoder->record.op |= ARM_SPE_OP_SIMD_FP; + } else if (SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(payload)) { + decoder->record.op |= ARM_SPE_OP_UNSPEC_REG; + } else if (SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(payload)) { + decoder->record.op |= ARM_SPE_OP_NV_SYSREG; + } else if (SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(payload)) { + decoder->record.op |= ARM_SPE_OP_MTE_TAG; + } else if (SPE_OP_PKT_LDST_SUBCLASS_EXTENDED(payload)) { + if (payload & SPE_OP_PKT_AR) + decoder->record.op |= ARM_SPE_OP_AR; + if (payload & SPE_OP_PKT_EXCL) + decoder->record.op |= ARM_SPE_OP_EXCL; + if (payload & SPE_OP_PKT_AT) + decoder->record.op |= ARM_SPE_OP_ATOMIC; + } else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) { + decoder->record.op |= ARM_SPE_OP_SVE; + if (payload & SPE_OP_PKT_SVE_PRED) + decoder->record.op |= ARM_SPE_OP_PRED; + if (payload & SPE_OP_PKT_SVE_SG) + decoder->record.op |= ARM_SPE_OP_SG; + } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(payload)) { + decoder->record.op |= ARM_SPE_OP_MEMCPY; + } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMSET(payload)) { + decoder->record.op |= ARM_SPE_OP_MEMSET; + } else if (SPE_OP_PKT_LDST_SUBCLASS_GCS(payload)) { + decoder->record.op |= ARM_SPE_OP_GCS; + if (payload & SPE_OP_PKT_GCS_COMM) + decoder->record.op |= ARM_SPE_OP_COMM; + } + break; case SPE_OP_PKT_HDR_CLASS_OTHER: decoder->record.op |= ARM_SPE_OP_OTHER; - if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload)) - decoder->record.op |= ARM_SPE_OP_SVE_OTHER; + if (SPE_OP_PKT_OTHER_SUBCLASS_SVE(payload)) { + decoder->record.op |= ARM_SPE_OP_SVE | ARM_SPE_OP_DP; + if (payload & SPE_OP_PKT_OTHER_FP) + decoder->record.op |= ARM_SPE_OP_FP; + if (payload & SPE_OP_PKT_SVE_PRED) + decoder->record.op |= ARM_SPE_OP_PRED; + } else if (SPE_OP_PKT_OTHER_SUBCLASS_SME(payload)) { + decoder->record.op |= ARM_SPE_OP_SME; + if (payload & SPE_OP_PKT_OTHER_FP) + decoder->record.op |= ARM_SPE_OP_FP; + } else if (SPE_OP_PKT_OTHER_SUBCLASS_OTHER(payload)) { + if (payload & SPE_OP_PKT_OTHER_ASE) + decoder->record.op |= ARM_SPE_OP_ASE; + if (payload & SPE_OP_PKT_OTHER_FP) + decoder->record.op |= ARM_SPE_OP_FP; + if (payload & SPE_OP_PKT_COND) + decoder->record.op |= ARM_SPE_OP_COND; + } break; case SPE_OP_PKT_HDR_CLASS_BR_ERET: decoder->record.op |= ARM_SPE_OP_BRANCH_ERET; + if (payload & SPE_OP_PKT_COND) + decoder->record.op |= ARM_SPE_OP_BR_COND; + if (payload & SPE_OP_PKT_INDIRECT_BRANCH) + decoder->record.op |= ARM_SPE_OP_BR_INDIRECT; + if (payload & SPE_OP_PKT_GCS) + decoder->record.op |= ARM_SPE_OP_BR_GCS; + if (SPE_OP_PKT_CR_BL(payload)) + decoder->record.op |= ARM_SPE_OP_BR_CR_BL; + if (SPE_OP_PKT_CR_RET(payload)) + decoder->record.op |= ARM_SPE_OP_BR_CR_RET; + if (SPE_OP_PKT_CR_NON_BL_RET(payload)) + decoder->record.op |= ARM_SPE_OP_BR_CR_NON_BL_RET; break; default: pr_err("Get packet error!\n"); @@ -214,36 +277,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) } break; case ARM_SPE_EVENTS: - if (payload & BIT(EV_L1D_REFILL)) - decoder->record.type |= ARM_SPE_L1D_MISS; - - if (payload & BIT(EV_L1D_ACCESS)) - decoder->record.type |= ARM_SPE_L1D_ACCESS; - - if (payload & BIT(EV_TLB_WALK)) - decoder->record.type |= ARM_SPE_TLB_MISS; - - if (payload & BIT(EV_TLB_ACCESS)) - decoder->record.type |= ARM_SPE_TLB_ACCESS; - - if (payload & BIT(EV_LLC_MISS)) - decoder->record.type |= ARM_SPE_LLC_MISS; - - if (payload & BIT(EV_LLC_ACCESS)) - decoder->record.type |= ARM_SPE_LLC_ACCESS; - - if (payload & BIT(EV_REMOTE_ACCESS)) - decoder->record.type |= ARM_SPE_REMOTE_ACCESS; - - if (payload & BIT(EV_MISPRED)) - decoder->record.type |= ARM_SPE_BRANCH_MISS; - - if (payload & BIT(EV_PARTIAL_PREDICATE)) - decoder->record.type |= ARM_SPE_SVE_PARTIAL_PRED; - - if (payload & BIT(EV_EMPTY_PREDICATE)) - decoder->record.type |= ARM_SPE_SVE_EMPTY_PRED; - + decoder->record.type = payload; break; case ARM_SPE_DATA_SOURCE: decoder->record.source = payload; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 1443c28545a9..3310e05122f0 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -13,67 +13,112 @@ #include "arm-spe-pkt-decoder.h" -enum arm_spe_sample_type { - ARM_SPE_L1D_ACCESS = 1 << 0, - ARM_SPE_L1D_MISS = 1 << 1, - ARM_SPE_LLC_ACCESS = 1 << 2, - ARM_SPE_LLC_MISS = 1 << 3, - ARM_SPE_TLB_ACCESS = 1 << 4, - ARM_SPE_TLB_MISS = 1 << 5, - ARM_SPE_BRANCH_MISS = 1 << 6, - ARM_SPE_REMOTE_ACCESS = 1 << 7, - ARM_SPE_SVE_PARTIAL_PRED = 1 << 8, - ARM_SPE_SVE_EMPTY_PRED = 1 << 9, -}; +#define ARM_SPE_L1D_ACCESS BIT(EV_L1D_ACCESS) +#define ARM_SPE_L1D_MISS BIT(EV_L1D_REFILL) +#define ARM_SPE_LLC_ACCESS BIT(EV_LLC_ACCESS) +#define ARM_SPE_LLC_MISS BIT(EV_LLC_MISS) +#define ARM_SPE_TLB_ACCESS BIT(EV_TLB_ACCESS) +#define ARM_SPE_TLB_MISS BIT(EV_TLB_WALK) +#define ARM_SPE_BRANCH_MISS BIT(EV_MISPRED) +#define ARM_SPE_BRANCH_NOT_TAKEN BIT(EV_NOT_TAKEN) +#define ARM_SPE_REMOTE_ACCESS BIT(EV_REMOTE_ACCESS) +#define ARM_SPE_SVE_PARTIAL_PRED BIT(EV_PARTIAL_PREDICATE) +#define ARM_SPE_SVE_EMPTY_PRED BIT(EV_EMPTY_PREDICATE) +#define ARM_SPE_IN_TXN BIT(EV_TRANSACTIONAL) +#define ARM_SPE_L2D_ACCESS BIT(EV_L2D_ACCESS) +#define ARM_SPE_L2D_MISS BIT(EV_L2D_MISS) +#define ARM_SPE_RECENTLY_FETCHED BIT(EV_RECENTLY_FETCHED) +#define ARM_SPE_DATA_SNOOPED BIT(EV_DATA_SNOOPED) +#define ARM_SPE_HITM BIT(EV_CACHE_DATA_MODIFIED) enum arm_spe_op_type { /* First level operation type */ ARM_SPE_OP_OTHER = 1 << 0, ARM_SPE_OP_LDST = 1 << 1, ARM_SPE_OP_BRANCH_ERET = 1 << 2, +}; + +enum arm_spe_2nd_op_ldst { + ARM_SPE_OP_GP_REG = 1 << 8, + ARM_SPE_OP_UNSPEC_REG = 1 << 9, + ARM_SPE_OP_NV_SYSREG = 1 << 10, + ARM_SPE_OP_SIMD_FP = 1 << 11, + ARM_SPE_OP_SVE = 1 << 12, + ARM_SPE_OP_MTE_TAG = 1 << 13, + ARM_SPE_OP_MEMCPY = 1 << 14, + ARM_SPE_OP_MEMSET = 1 << 15, + ARM_SPE_OP_GCS = 1 << 16, + ARM_SPE_OP_SME = 1 << 17, + ARM_SPE_OP_ASE = 1 << 18, + + /* Assisted information for memory / SIMD */ + ARM_SPE_OP_LD = 1 << 20, + ARM_SPE_OP_ST = 1 << 21, + ARM_SPE_OP_ATOMIC = 1 << 22, + ARM_SPE_OP_EXCL = 1 << 23, + ARM_SPE_OP_AR = 1 << 24, + ARM_SPE_OP_DP = 1 << 25, /* Data processing */ + ARM_SPE_OP_PRED = 1 << 26, /* Predicated */ + ARM_SPE_OP_SG = 1 << 27, /* Gather/Scatter */ + ARM_SPE_OP_COMM = 1 << 28, /* Common */ + ARM_SPE_OP_FP = 1 << 29, /* Floating-point */ + ARM_SPE_OP_COND = 1 << 30, /* Conditional */ +}; + +enum arm_spe_2nd_op_branch { + ARM_SPE_OP_BR_COND = 1 << 8, + ARM_SPE_OP_BR_INDIRECT = 1 << 9, + ARM_SPE_OP_BR_GCS = 1 << 10, + ARM_SPE_OP_BR_CR_BL = 1 << 11, + ARM_SPE_OP_BR_CR_RET = 1 << 12, + ARM_SPE_OP_BR_CR_NON_BL_RET = 1 << 13, +}; + +enum arm_spe_common_data_source { + ARM_SPE_COMMON_DS_L1D = 0x0, + ARM_SPE_COMMON_DS_L2 = 0x8, + ARM_SPE_COMMON_DS_PEER_CORE = 0x9, + ARM_SPE_COMMON_DS_LOCAL_CLUSTER = 0xa, + ARM_SPE_COMMON_DS_SYS_CACHE = 0xb, + ARM_SPE_COMMON_DS_PEER_CLUSTER = 0xc, + ARM_SPE_COMMON_DS_REMOTE = 0xd, + ARM_SPE_COMMON_DS_DRAM = 0xe, +}; - /* Second level operation type for OTHER */ - ARM_SPE_OP_SVE_OTHER = 1 << 16, - ARM_SPE_OP_SVE_FP = 1 << 17, - ARM_SPE_OP_SVE_PRED_OTHER = 1 << 18, - - /* Second level operation type for LDST */ - ARM_SPE_OP_LD = 1 << 16, - ARM_SPE_OP_ST = 1 << 17, - ARM_SPE_OP_ATOMIC = 1 << 18, - ARM_SPE_OP_EXCL = 1 << 19, - ARM_SPE_OP_AR = 1 << 20, - ARM_SPE_OP_SIMD_FP = 1 << 21, - ARM_SPE_OP_GP_REG = 1 << 22, - ARM_SPE_OP_UNSPEC_REG = 1 << 23, - ARM_SPE_OP_NV_SYSREG = 1 << 24, - ARM_SPE_OP_SVE_LDST = 1 << 25, - ARM_SPE_OP_SVE_PRED_LDST = 1 << 26, - ARM_SPE_OP_SVE_SG = 1 << 27, - - /* Second level operation type for BRANCH_ERET */ - ARM_SPE_OP_BR_COND = 1 << 16, - ARM_SPE_OP_BR_INDIRECT = 1 << 17, +enum arm_spe_ampereone_data_source { + ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE = 0x0, + ARM_SPE_AMPEREONE_SLC = 0x3, + ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE = 0x5, + ARM_SPE_AMPEREONE_DDR = 0x7, + ARM_SPE_AMPEREONE_L1D = 0x8, + ARM_SPE_AMPEREONE_L2D = 0x9, }; -enum arm_spe_neoverse_data_source { - ARM_SPE_NV_L1D = 0x0, - ARM_SPE_NV_L2 = 0x8, - ARM_SPE_NV_PEER_CORE = 0x9, - ARM_SPE_NV_LOCAL_CLUSTER = 0xa, - ARM_SPE_NV_SYS_CACHE = 0xb, - ARM_SPE_NV_PEER_CLUSTER = 0xc, - ARM_SPE_NV_REMOTE = 0xd, - ARM_SPE_NV_DRAM = 0xe, +enum arm_spe_hisi_hip_data_source { + ARM_SPE_HISI_HIP_PEER_CPU = 0, + ARM_SPE_HISI_HIP_PEER_CPU_HITM = 1, + ARM_SPE_HISI_HIP_L3 = 2, + ARM_SPE_HISI_HIP_L3_HITM = 3, + ARM_SPE_HISI_HIP_PEER_CLUSTER = 4, + ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM = 5, + ARM_SPE_HISI_HIP_REMOTE_SOCKET = 6, + ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM = 7, + ARM_SPE_HISI_HIP_LOCAL_MEM = 8, + ARM_SPE_HISI_HIP_REMOTE_MEM = 9, + ARM_SPE_HISI_HIP_NC_DEV = 13, + ARM_SPE_HISI_HIP_L2 = 16, + ARM_SPE_HISI_HIP_L2_HITM = 17, + ARM_SPE_HISI_HIP_L1 = 18, }; struct arm_spe_record { - enum arm_spe_sample_type type; + u64 type; int err; u32 op; u32 latency; u64 from_ip; u64 to_ip; + u64 prev_br_tgt; u64 timestamp; u64 virt_addr; u64 phys_addr; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index a454c6737563..5769ba2f4140 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -10,24 +10,11 @@ #include <byteswap.h> #include <linux/bitops.h> #include <stdarg.h> +#include <linux/kernel.h> +#include <linux/unaligned.h> #include "arm-spe-pkt-decoder.h" -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#define le16_to_cpu bswap_16 -#define le32_to_cpu bswap_32 -#define le64_to_cpu bswap_64 -#define memcpy_le64(d, s, n) do { \ - memcpy((d), (s), (n)); \ - *(d) = le64_to_cpu(*(d)); \ -} while (0) -#else -#define le16_to_cpu -#define le32_to_cpu -#define le64_to_cpu -#define memcpy_le64 memcpy -#endif - static const char * const arm_spe_packet_name[] = { [ARM_SPE_PAD] = "PAD", [ARM_SPE_END] = "END", @@ -70,9 +57,9 @@ static int arm_spe_get_payload(const unsigned char *buf, size_t len, switch (payload_len) { case 1: packet->payload = *(uint8_t *)buf; break; - case 2: packet->payload = le16_to_cpu(*(uint16_t *)buf); break; - case 4: packet->payload = le32_to_cpu(*(uint32_t *)buf); break; - case 8: packet->payload = le64_to_cpu(*(uint64_t *)buf); break; + case 2: packet->payload = get_unaligned_le16(buf); break; + case 4: packet->payload = get_unaligned_le32(buf); break; + case 8: packet->payload = get_unaligned_le64(buf); break; default: return ARM_SPE_BAD_PACKET; } @@ -321,10 +308,26 @@ static int arm_spe_pkt_desc_event(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, " REMOTE-ACCESS"); if (payload & BIT(EV_ALIGNMENT)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " ALIGNMENT"); + if (payload & BIT(EV_TRANSACTIONAL)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " TXN"); if (payload & BIT(EV_PARTIAL_PREDICATE)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-PARTIAL-PRED"); if (payload & BIT(EV_EMPTY_PREDICATE)) arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-EMPTY-PRED"); + if (payload & BIT(EV_L2D_ACCESS)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " L2D-ACCESS"); + if (payload & BIT(EV_L2D_MISS)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " L2D-MISS"); + if (payload & BIT(EV_CACHE_DATA_MODIFIED)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " HITM"); + if (payload & BIT(EV_RECENTLY_FETCHED)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " LFB"); + if (payload & BIT(EV_DATA_SNOOPED)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " SNOOPED"); + if (payload & BIT(EV_STREAMING_SVE_MODE)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " STREAMING-SVE"); + if (payload & BIT(EV_SMCU)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " SMCU"); return err; } @@ -337,7 +340,7 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, switch (packet->index) { case SPE_OP_PKT_HDR_CLASS_OTHER: - if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload)) { + if (SPE_OP_PKT_OTHER_SUBCLASS_SVE(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, "SVE-OTHER"); /* SVE effective vector length */ @@ -348,8 +351,21 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP"); if (payload & SPE_OP_PKT_SVE_PRED) arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED"); - } else { + } else if (SPE_OP_PKT_OTHER_SUBCLASS_SME(payload)) { + arm_spe_pkt_out_string(&err, &buf, &buf_len, "SME-OTHER"); + + /* SME effective vector length or tile size */ + arm_spe_pkt_out_string(&err, &buf, &buf_len, " ETS %d", + SPE_OP_PKG_SME_ETS(payload)); + + if (payload & SPE_OP_PKT_OTHER_FP) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP"); + } else if (SPE_OP_PKT_OTHER_SUBCLASS_OTHER(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, "OTHER"); + if (payload & SPE_OP_PKT_OTHER_ASE) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " ASE"); + if (payload & SPE_OP_PKT_OTHER_FP) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP"); arm_spe_pkt_out_string(&err, &buf, &buf_len, " %s", payload & SPE_OP_PKT_COND ? "COND-SELECT" : "INSN-OTHER"); @@ -359,42 +375,30 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, payload & 0x1 ? "ST" : "LD"); - if (SPE_OP_PKT_IS_LDST_ATOMIC(payload)) { + if (SPE_OP_PKT_LDST_SUBCLASS_EXTENDED(payload)) { if (payload & SPE_OP_PKT_AT) arm_spe_pkt_out_string(&err, &buf, &buf_len, " AT"); if (payload & SPE_OP_PKT_EXCL) arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL"); if (payload & SPE_OP_PKT_AR) arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR"); - } - - switch (SPE_OP_PKT_LDST_SUBCLASS_GET(payload)) { - case SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP: + } else if (SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_GP_REG: + } else if (SPE_OP_PKT_LDST_SUBCLASS_GP_REG(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " GP-REG"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG: + } else if (SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " UNSPEC-REG"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG: + } else if (SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " NV-SYSREG"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG: + } else if (SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " MTE-TAG"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_MEMCPY: + } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " MEMCPY"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_MEMSET: + } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMSET(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " MEMSET"); - break; - default: - break; - } + } else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) { + arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-SME-REG"); - if (SPE_OP_PKT_IS_LDST_SVE(payload)) { /* SVE effective vector length */ arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d", SPE_OP_PKG_SVE_EVL(payload)); @@ -403,6 +407,10 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED"); if (payload & SPE_OP_PKT_SVE_SG) arm_spe_pkt_out_string(&err, &buf, &buf_len, " SG"); + } else if (SPE_OP_PKT_LDST_SUBCLASS_GCS(payload)) { + arm_spe_pkt_out_string(&err, &buf, &buf_len, " GCS"); + if (payload & SPE_OP_PKT_GCS_COMM) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " COMM"); } break; case SPE_OP_PKT_HDR_CLASS_BR_ERET: @@ -410,10 +418,16 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, if (payload & SPE_OP_PKT_COND) arm_spe_pkt_out_string(&err, &buf, &buf_len, " COND"); - - if (SPE_OP_PKT_IS_INDIRECT_BRANCH(payload)) + if (payload & SPE_OP_PKT_INDIRECT_BRANCH) arm_spe_pkt_out_string(&err, &buf, &buf_len, " IND"); - + if (payload & SPE_OP_PKT_GCS) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " GCS"); + if (SPE_OP_PKT_CR_BL(payload)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " CR-BL"); + if (SPE_OP_PKT_CR_RET(payload)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " CR-RET"); + if (SPE_OP_PKT_CR_NON_BL_RET(payload)) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " CR-NON-BL-RET"); break; default: /* Unknown index */ diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index 464a912b221c..adf4cde320aa 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -7,6 +7,7 @@ #ifndef INCLUDE__ARM_SPE_PKT_DECODER_H__ #define INCLUDE__ARM_SPE_PKT_DECODER_H__ +#include <linux/bitfield.h> #include <stddef.h> #include <stdint.h> @@ -104,8 +105,16 @@ enum arm_spe_events { EV_LLC_MISS = 9, EV_REMOTE_ACCESS = 10, EV_ALIGNMENT = 11, + EV_TRANSACTIONAL = 16, EV_PARTIAL_PREDICATE = 17, EV_EMPTY_PREDICATE = 18, + EV_L2D_ACCESS = 19, + EV_L2D_MISS = 20, + EV_CACHE_DATA_MODIFIED = 21, + EV_RECENTLY_FETCHED = 22, + EV_DATA_SNOOPED = 23, + EV_STREAMING_SVE_MODE = 24, + EV_SMCU = 25, }; /* Operation packet header */ @@ -114,27 +123,39 @@ enum arm_spe_events { #define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC 0x1 #define SPE_OP_PKT_HDR_CLASS_BR_ERET 0x2 -#define SPE_OP_PKT_IS_OTHER_SVE_OP(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8) +#define SPE_OP_PKT_OTHER_SUBCLASS_OTHER(v) (((v) & GENMASK_ULL(7, 3)) == 0x0) +#define SPE_OP_PKT_OTHER_SUBCLASS_SVE(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8) +#define SPE_OP_PKT_OTHER_SUBCLASS_SME(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x88) -#define SPE_OP_PKT_COND BIT(0) +#define SPE_OP_PKT_OTHER_ASE BIT(2) +#define SPE_OP_PKT_OTHER_FP BIT(1) + +/* + * SME effective vector length or tile size (ETS) is stored in byte 0 + * bits [6:4,2]; the length is rounded up to a power of two and use 128 + * as one step, so ETS calculation is: + * + * 128 * (2 ^ bits [6:4,2]) = 32 << (bits [6:4,2]) + */ +#define SPE_OP_PKG_SME_ETS(v) (128 << (FIELD_GET(GENMASK_ULL(6, 4), (v)) << 1 | \ + (FIELD_GET(BIT(2), (v))))) -#define SPE_OP_PKT_LDST_SUBCLASS_GET(v) ((v) & GENMASK_ULL(7, 1)) -#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG 0x0 -#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP 0x4 -#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG 0x10 -#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG 0x30 -#define SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG 0x14 -#define SPE_OP_PKT_LDST_SUBCLASS_MEMCPY 0x20 -#define SPE_OP_PKT_LDST_SUBCLASS_MEMSET 0x25 +#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x0) +#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(v) (((v) & GENMASK_ULL(7, 1)) == 0x4) +#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x10) +#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(v) (((v) & GENMASK_ULL(7, 1)) == 0x30) +#define SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(v) (((v) & GENMASK_ULL(7, 1)) == 0x14) +#define SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(v) (((v) & GENMASK_ULL(7, 1)) == 0x20) +#define SPE_OP_PKT_LDST_SUBCLASS_MEMSET(v) (((v) & GENMASK_ULL(7, 0)) == 0x25) -#define SPE_OP_PKT_IS_LDST_ATOMIC(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2) +#define SPE_OP_PKT_LDST_SUBCLASS_EXTENDED(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2) #define SPE_OP_PKT_AR BIT(4) #define SPE_OP_PKT_EXCL BIT(3) #define SPE_OP_PKT_AT BIT(2) #define SPE_OP_PKT_ST BIT(0) -#define SPE_OP_PKT_IS_LDST_SVE(v) (((v) & (BIT(3) | BIT(1))) == 0x8) +#define SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(v) (((v) & (BIT(3) | BIT(1))) == 0x8) #define SPE_OP_PKT_SVE_SG BIT(7) /* @@ -148,7 +169,17 @@ enum arm_spe_events { #define SPE_OP_PKT_SVE_PRED BIT(2) #define SPE_OP_PKT_SVE_FP BIT(1) -#define SPE_OP_PKT_IS_INDIRECT_BRANCH(v) (((v) & GENMASK_ULL(7, 1)) == 0x2) +#define SPE_OP_PKT_LDST_SUBCLASS_GCS(v) (((v) & (GENMASK_ULL(7, 3) | BIT(1))) == 0x40) + +#define SPE_OP_PKT_GCS_COMM BIT(2) + +#define SPE_OP_PKT_CR_MASK GENMASK_ULL(4, 3) +#define SPE_OP_PKT_CR_BL(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 1) +#define SPE_OP_PKT_CR_RET(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 2) +#define SPE_OP_PKT_CR_NON_BL_RET(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 3) +#define SPE_OP_PKT_GCS BIT(2) +#define SPE_OP_PKT_INDIRECT_BRANCH BIT(1) +#define SPE_OP_PKT_COND BIT(0) const char *arm_spe_pkt_name(enum arm_spe_pkt_type); diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index afbd5869f6bf..dc19e72258f3 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -37,6 +37,25 @@ #include "../../arch/arm64/include/asm/cputype.h" #define MAX_TIMESTAMP (~0ULL) +#define is_ldst_op(op) (!!((op) & ARM_SPE_OP_LDST)) + +#define is_simd_op(op) (!!((op) & (ARM_SPE_OP_SIMD_FP | ARM_SPE_OP_SVE | \ + ARM_SPE_OP_SME | ARM_SPE_OP_ASE))) + +#define is_mem_op(op) (is_ldst_op(op) || is_simd_op(op)) + +#define ARM_SPE_CACHE_EVENT(lvl) \ + (ARM_SPE_##lvl##_ACCESS | ARM_SPE_##lvl##_MISS) + +#define arm_spe_is_cache_level(type, lvl) \ + ((type) & ARM_SPE_CACHE_EVENT(lvl)) + +#define arm_spe_is_cache_hit(type, lvl) \ + (((type) & ARM_SPE_CACHE_EVENT(lvl)) == ARM_SPE_##lvl##_ACCESS) + +#define arm_spe_is_cache_miss(type, lvl) \ + ((type) & ARM_SPE_##lvl##_MISS) + struct arm_spe { struct auxtrace auxtrace; struct auxtrace_queues queues; @@ -46,7 +65,6 @@ struct arm_spe { struct perf_session *session; struct machine *machine; u32 pmu_type; - u64 midr; struct perf_tsc_conversion tc; @@ -61,7 +79,6 @@ struct arm_spe { u8 sample_remote_access; u8 sample_memory; u8 sample_instructions; - u64 instructions_sample_period; u64 l1d_miss_id; u64 l1d_access_id; @@ -69,7 +86,7 @@ struct arm_spe { u64 llc_access_id; u64 tlb_miss_id; u64 tlb_access_id; - u64 branch_miss_id; + u64 branch_id; u64 remote_access_id; u64 memory_id; u64 instructions_id; @@ -78,6 +95,11 @@ struct arm_spe { unsigned long num_events; u8 use_ctx_pkt_for_pid; + + u64 **metadata; + u64 metadata_ver; + u64 metadata_nr_cpu; + bool is_homogeneous; }; struct arm_spe_queue { @@ -95,9 +117,23 @@ struct arm_spe_queue { u64 time; u64 timestamp; struct thread *thread; - u64 period_instructions; + u64 sample_count; + u32 flags; + struct branch_stack *last_branch; +}; + +struct data_source_handle { + const struct midr_range *midr_ranges; + void (*ds_synth)(const struct arm_spe_record *record, + union perf_mem_data_src *data_src); }; +#define DS(range, func) \ + { \ + .midr_ranges = range, \ + .ds_synth = arm_spe__synth_##func, \ + } + static void arm_spe_dump(struct arm_spe *spe __maybe_unused, unsigned char *buf, size_t len) { @@ -118,7 +154,7 @@ static void arm_spe_dump(struct arm_spe *spe __maybe_unused, else pkt_len = 1; printf("."); - color_fprintf(stdout, color, " %08x: ", pos); + color_fprintf(stdout, color, " %08zx: ", pos); for (i = 0; i < pkt_len; i++) color_fprintf(stdout, color, " %02x", buf[i]); for (; i < 16; i++) @@ -208,12 +244,22 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, speq->pid = -1; speq->tid = -1; speq->cpu = -1; - speq->period_instructions = 0; /* params set */ params.get_trace = arm_spe_get_trace; params.data = speq; + if (spe->synth_opts.last_branch) { + size_t sz = sizeof(struct branch_stack); + + /* Allocate up to two entries for PBT + TGT */ + sz += sizeof(struct branch_entry) * + min(spe->synth_opts.last_branch_sz, 2U); + speq->last_branch = zalloc(sz); + if (!speq->last_branch) + goto out_free; + } + /* create new decoder */ speq->decoder = arm_spe_decoder_new(¶ms); if (!speq->decoder) @@ -223,6 +269,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, out_free: zfree(&speq->event_buf); + zfree(&speq->last_branch); free(speq); return NULL; @@ -273,14 +320,38 @@ static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) return 0; } +static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, int cpu) +{ + u64 i; + + if (!spe->metadata) + return NULL; + + /* CPU ID is -1 for per-thread mode */ + if (cpu < 0) { + /* + * On the heterogeneous system, due to CPU ID is -1, + * cannot confirm the data source packet is supported. + */ + if (!spe->is_homogeneous) + return NULL; + + /* In homogeneous system, simply use CPU0's metadata */ + return spe->metadata[0]; + } + + for (i = 0; i < spe->metadata_nr_cpu; i++) + if (spe->metadata[i][ARM_SPE_CPU] == (u64)cpu) + return spe->metadata[i]; + + return NULL; +} + static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record) { struct simd_flags simd_flags = {}; - if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST)) - simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE; - - if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER)) + if (record->op & ARM_SPE_OP_SVE) simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE; if (record->type & ARM_SPE_SVE_PARTIAL_PRED) @@ -306,7 +377,7 @@ static void arm_spe_prep_sample(struct arm_spe *spe, sample->cpumode = arm_spe_cpumode(spe, sample->ip); sample->pid = speq->pid; sample->tid = speq->tid; - sample->period = 1; + sample->period = spe->synth_opts.period; sample->cpu = speq->cpu; sample->simd_flags = arm_spe__synth_simd_flags(record); @@ -315,6 +386,88 @@ static void arm_spe_prep_sample(struct arm_spe *spe, event->sample.header.size = sizeof(struct perf_event_header); } +static void arm_spe__prep_branch_stack(struct arm_spe_queue *speq) +{ + struct arm_spe *spe = speq->spe; + struct arm_spe_record *record = &speq->decoder->record; + struct branch_stack *bstack = speq->last_branch; + struct branch_flags *bs_flags; + unsigned int last_branch_sz = spe->synth_opts.last_branch_sz; + bool have_tgt = !!(speq->flags & PERF_IP_FLAG_BRANCH); + bool have_pbt = last_branch_sz >= (have_tgt + 1U) && record->prev_br_tgt; + size_t sz = sizeof(struct branch_stack) + + sizeof(struct branch_entry) * min(last_branch_sz, 2U) /* PBT + TGT */; + int i = 0; + + /* Clean up branch stack */ + memset(bstack, 0x0, sz); + + if (!have_tgt && !have_pbt) + return; + + if (have_tgt) { + bstack->entries[i].from = record->from_ip; + bstack->entries[i].to = record->to_ip; + + bs_flags = &bstack->entries[i].flags; + bs_flags->value = 0; + + if (record->op & ARM_SPE_OP_BR_CR_BL) { + if (record->op & ARM_SPE_OP_BR_COND) + bs_flags->type |= PERF_BR_COND_CALL; + else + bs_flags->type |= PERF_BR_CALL; + /* + * Indirect branch instruction without link (e.g. BR), + * take this case as function return. + */ + } else if (record->op & ARM_SPE_OP_BR_CR_RET || + record->op & ARM_SPE_OP_BR_INDIRECT) { + if (record->op & ARM_SPE_OP_BR_COND) + bs_flags->type |= PERF_BR_COND_RET; + else + bs_flags->type |= PERF_BR_RET; + } else if (record->op & ARM_SPE_OP_BR_CR_NON_BL_RET) { + if (record->op & ARM_SPE_OP_BR_COND) + bs_flags->type |= PERF_BR_COND; + else + bs_flags->type |= PERF_BR_UNCOND; + } else { + if (record->op & ARM_SPE_OP_BR_COND) + bs_flags->type |= PERF_BR_COND; + else + bs_flags->type |= PERF_BR_UNKNOWN; + } + + if (record->type & ARM_SPE_BRANCH_MISS) { + bs_flags->mispred = 1; + bs_flags->predicted = 0; + } else { + bs_flags->mispred = 0; + bs_flags->predicted = 1; + } + + if (record->type & ARM_SPE_BRANCH_NOT_TAKEN) + bs_flags->not_taken = 1; + + if (record->type & ARM_SPE_IN_TXN) + bs_flags->in_tx = 1; + + bs_flags->cycles = min(record->latency, 0xFFFFU); + i++; + } + + if (have_pbt) { + bs_flags = &bstack->entries[i].flags; + bs_flags->type |= PERF_BR_UNKNOWN; + bstack->entries[i].to = record->prev_br_tgt; + i++; + } + + bstack->nr = i; + bstack->hw_idx = -1ULL; +} + static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) { event->header.size = perf_event__sample_event_size(sample, type, 0); @@ -343,23 +496,28 @@ arm_spe_deliver_synth_event(struct arm_spe *spe, } static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, - u64 spe_events_id, u64 data_src) + u64 spe_events_id, + union perf_mem_data_src data_src) { struct arm_spe *spe = speq->spe; struct arm_spe_record *record = &speq->decoder->record; union perf_event *event = speq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; + int ret; + perf_sample__init(&sample, /*all=*/true); arm_spe_prep_sample(spe, speq, event, &sample); sample.id = spe_events_id; sample.stream_id = spe_events_id; sample.addr = record->virt_addr; sample.phys_addr = record->phys_addr; - sample.data_src = data_src; + sample.data_src = data_src.val; sample.weight = record->latency; - return arm_spe_deliver_synth_event(spe, speq, event, &sample); + ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); + perf_sample__exit(&sample); + return ret; } static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, @@ -368,56 +526,115 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, struct arm_spe *spe = speq->spe; struct arm_spe_record *record = &speq->decoder->record; union perf_event *event = speq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; + int ret; + perf_sample__init(&sample, /*all=*/true); arm_spe_prep_sample(spe, speq, event, &sample); sample.id = spe_events_id; sample.stream_id = spe_events_id; sample.addr = record->to_ip; sample.weight = record->latency; + sample.flags = speq->flags; + sample.branch_stack = speq->last_branch; - return arm_spe_deliver_synth_event(spe, speq, event, &sample); + ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); + perf_sample__exit(&sample); + return ret; } static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, - u64 spe_events_id, u64 data_src) + u64 spe_events_id, + union perf_mem_data_src data_src) { struct arm_spe *spe = speq->spe; struct arm_spe_record *record = &speq->decoder->record; union perf_event *event = speq->event_buf; - struct perf_sample sample = { .ip = 0, }; - - /* - * Handles perf instruction sampling period. - */ - speq->period_instructions++; - if (speq->period_instructions < spe->instructions_sample_period) - return 0; - speq->period_instructions = 0; + struct perf_sample sample; + int ret; + perf_sample__init(&sample, /*all=*/true); arm_spe_prep_sample(spe, speq, event, &sample); sample.id = spe_events_id; sample.stream_id = spe_events_id; - sample.addr = record->virt_addr; + sample.addr = record->to_ip; sample.phys_addr = record->phys_addr; - sample.data_src = data_src; - sample.period = spe->instructions_sample_period; + sample.data_src = data_src.val; sample.weight = record->latency; + sample.flags = speq->flags; + sample.branch_stack = speq->last_branch; - return arm_spe_deliver_synth_event(spe, speq, event, &sample); + ret = arm_spe_deliver_synth_event(spe, speq, event, &sample); + perf_sample__exit(&sample); + return ret; } -static const struct midr_range neoverse_spe[] = { +static const struct midr_range common_ds_encoding_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A715), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A720AE), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X4), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), {}, }; -static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record, - union perf_mem_data_src *data_src) +static const struct midr_range ampereone_ds_encoding_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_AMPERE1A), + {}, +}; + +static const struct midr_range hisi_hip_ds_encoding_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_HISI_HIP12), + {}, +}; + +static void arm_spe__sample_flags(struct arm_spe_queue *speq) +{ + const struct arm_spe_record *record = &speq->decoder->record; + + speq->flags = 0; + if (record->op & ARM_SPE_OP_BRANCH_ERET) { + speq->flags = PERF_IP_FLAG_BRANCH; + + if (record->type & ARM_SPE_BRANCH_MISS) + speq->flags |= PERF_IP_FLAG_BRANCH_MISS; + + if (record->type & ARM_SPE_BRANCH_NOT_TAKEN) + speq->flags |= PERF_IP_FLAG_NOT_TAKEN; + + if (record->type & ARM_SPE_IN_TXN) + speq->flags |= PERF_IP_FLAG_IN_TX; + + if (record->op & ARM_SPE_OP_BR_COND) + speq->flags |= PERF_IP_FLAG_CONDITIONAL; + + if (record->op & ARM_SPE_OP_BR_CR_BL) + speq->flags |= PERF_IP_FLAG_CALL; + else if (record->op & ARM_SPE_OP_BR_CR_RET) + speq->flags |= PERF_IP_FLAG_RETURN; + /* + * Indirect branch instruction without link (e.g. BR), + * take it as a function return. + */ + else if (record->op & ARM_SPE_OP_BR_INDIRECT) + speq->flags |= PERF_IP_FLAG_RETURN; + } +} + +static void arm_spe__synth_data_source_common(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) { /* * Even though four levels of cache hierarchy are possible, no known @@ -439,17 +656,17 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec } switch (record->source) { - case ARM_SPE_NV_L1D: + case ARM_SPE_COMMON_DS_L1D: data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; break; - case ARM_SPE_NV_L2: + case ARM_SPE_COMMON_DS_L2: data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; break; - case ARM_SPE_NV_PEER_CORE: + case ARM_SPE_COMMON_DS_PEER_CORE: data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; @@ -458,8 +675,8 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec * We don't know if this is L1, L2 but we do know it was a cache-2-cache * transfer, so set SNOOPX_PEER */ - case ARM_SPE_NV_LOCAL_CLUSTER: - case ARM_SPE_NV_PEER_CLUSTER: + case ARM_SPE_COMMON_DS_LOCAL_CLUSTER: + case ARM_SPE_COMMON_DS_PEER_CLUSTER: data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; @@ -467,7 +684,7 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec /* * System cache is assumed to be L3 */ - case ARM_SPE_NV_SYS_CACHE: + case ARM_SPE_COMMON_DS_SYS_CACHE: data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; data_src->mem_snoop = PERF_MEM_SNOOP_HIT; @@ -476,61 +693,321 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec * We don't know what level it hit in, except it came from the other * socket */ - case ARM_SPE_NV_REMOTE: - data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1; + case ARM_SPE_COMMON_DS_REMOTE: + data_src->mem_lvl = PERF_MEM_LVL_NA; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_COMMON_DS_DRAM: + data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + default: + break; + } +} + +/* + * Source is IMPDEF. Here we convert the source code used on AmpereOne cores + * to the common (Neoverse, Cortex) to avoid duplicating the decoding code. + */ +static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ + struct arm_spe_record common_record; + + switch (record->source) { + case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE: + common_record.source = ARM_SPE_COMMON_DS_PEER_CORE; + break; + case ARM_SPE_AMPEREONE_SLC: + common_record.source = ARM_SPE_COMMON_DS_SYS_CACHE; + break; + case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE: + common_record.source = ARM_SPE_COMMON_DS_REMOTE; + break; + case ARM_SPE_AMPEREONE_DDR: + common_record.source = ARM_SPE_COMMON_DS_DRAM; + break; + case ARM_SPE_AMPEREONE_L1D: + common_record.source = ARM_SPE_COMMON_DS_L1D; + break; + case ARM_SPE_AMPEREONE_L2D: + common_record.source = ARM_SPE_COMMON_DS_L2; + break; + default: + pr_warning_once("AmpereOne: Unknown data source (0x%x)\n", + record->source); + return; + } + + common_record.op = record->op; + arm_spe__synth_data_source_common(&common_record, data_src); +} + +static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ + /* Use common synthesis method to handle store operations */ + if (record->op & ARM_SPE_OP_ST) { + arm_spe__synth_data_source_common(record, data_src); + return; + } + + switch (record->source) { + case ARM_SPE_HISI_HIP_PEER_CPU: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_PEER_CPU_HITM: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_L3: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HIT; + break; + case ARM_SPE_HISI_HIP_L3_HITM: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + break; + case ARM_SPE_HISI_HIP_PEER_CLUSTER: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_REMOTE_SOCKET: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; break; - case ARM_SPE_NV_DRAM: + case ARM_SPE_HISI_HIP_LOCAL_MEM: data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; break; + case ARM_SPE_HISI_HIP_REMOTE_MEM: + data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + break; + case ARM_SPE_HISI_HIP_NC_DEV: + data_src->mem_lvl = PERF_MEM_LVL_IO | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_HISI_HIP_L2: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_HISI_HIP_L2_HITM: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + break; + case ARM_SPE_HISI_HIP_L1: + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; default: break; } } -static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record, - union perf_mem_data_src *data_src) +static const struct data_source_handle data_source_handles[] = { + DS(common_ds_encoding_cpus, data_source_common), + DS(ampereone_ds_encoding_cpus, data_source_ampereone), + DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip), +}; + +static void arm_spe__synth_ld_memory_level(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) { - if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { - data_src->mem_lvl = PERF_MEM_LVL_L3; + /* + * To find a cache hit, search in ascending order from the lower level + * caches to the higher level caches. This reflects the best scenario + * for a cache hit. + */ + if (arm_spe_is_cache_hit(record->type, L1D)) { + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; + } else if (record->type & ARM_SPE_RECENTLY_FETCHED) { + data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_LFB; + } else if (arm_spe_is_cache_hit(record->type, L2D)) { + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + } else if (arm_spe_is_cache_hit(record->type, LLC)) { + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + /* + * To find a cache miss, search in descending order from the higher + * level cache to the lower level cache. This represents the worst + * scenario for a cache miss. + */ + } else if (arm_spe_is_cache_miss(record->type, LLC)) { + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_MISS; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + } else if (arm_spe_is_cache_miss(record->type, L2D)) { + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_MISS; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + } else if (arm_spe_is_cache_miss(record->type, L1D)) { + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; + } +} - if (record->type & ARM_SPE_LLC_MISS) - data_src->mem_lvl |= PERF_MEM_LVL_MISS; - else - data_src->mem_lvl |= PERF_MEM_LVL_HIT; - } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) { +static void arm_spe__synth_st_memory_level(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ + /* Record the greatest level info for a store operation. */ + if (arm_spe_is_cache_level(record->type, LLC)) { + data_src->mem_lvl = PERF_MEM_LVL_L3; + data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, LLC) ? + PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + } else if (arm_spe_is_cache_level(record->type, L2D)) { + data_src->mem_lvl = PERF_MEM_LVL_L2; + data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, L2D) ? + PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + } else if (arm_spe_is_cache_level(record->type, L1D)) { data_src->mem_lvl = PERF_MEM_LVL_L1; + data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, L1D) ? + PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; + } +} - if (record->type & ARM_SPE_L1D_MISS) - data_src->mem_lvl |= PERF_MEM_LVL_MISS; - else - data_src->mem_lvl |= PERF_MEM_LVL_HIT; +static void arm_spe__synth_memory_level(struct arm_spe_queue *speq, + const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ + struct arm_spe *spe = speq->spe; + + /* + * The data source packet contains more info for cache levels for + * peer snooping. So respect the memory level if has been set by + * data source parsing. + */ + if (!data_src->mem_lvl) { + if (data_src->mem_op == PERF_MEM_OP_LOAD) + arm_spe__synth_ld_memory_level(record, data_src); + if (data_src->mem_op == PERF_MEM_OP_STORE) + arm_spe__synth_st_memory_level(record, data_src); + } + + if (!data_src->mem_lvl) { + data_src->mem_lvl = PERF_MEM_LVL_NA; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA; + } + + /* + * If 'mem_snoop' has been set by data source packet, skip to set + * it at here. + */ + if (!data_src->mem_snoop) { + if (record->type & ARM_SPE_DATA_SNOOPED) { + if (record->type & ARM_SPE_HITM) + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + else + data_src->mem_snoop = PERF_MEM_SNOOP_HIT; + } else { + u64 *metadata = + arm_spe__get_metadata_by_cpu(spe, speq->cpu); + + /* + * Set NA ("Not available") mode if no meta data or the + * SNOOPED event is not supported. + */ + if (!metadata || + !(metadata[ARM_SPE_CAP_EVENT_FILTER] & ARM_SPE_DATA_SNOOPED)) + data_src->mem_snoop = PERF_MEM_SNOOP_NA; + else + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + } + } + + if (!data_src->mem_remote) { + if (record->type & ARM_SPE_REMOTE_ACCESS) + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + } +} + +static void arm_spe__synth_ds(struct arm_spe_queue *speq, + const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ + struct arm_spe *spe = speq->spe; + u64 *metadata = NULL; + u64 midr; + unsigned int i; + + /* Metadata version 1 assumes all CPUs are the same (old behavior) */ + if (spe->metadata_ver == 1) { + const char *cpuid; + + pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n"); + cpuid = perf_env__cpuid(perf_session__env(spe->session)); + midr = strtol(cpuid, NULL, 16); + } else { + metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu); + if (!metadata) + return; + + midr = metadata[ARM_SPE_CPU_MIDR]; } - if (record->type & ARM_SPE_REMOTE_ACCESS) - data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; + for (i = 0; i < ARRAY_SIZE(data_source_handles); i++) { + if (is_midr_in_range_list(midr, data_source_handles[i].midr_ranges)) { + return data_source_handles[i].ds_synth(record, data_src); + } + } + + return; } -static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) +static union perf_mem_data_src +arm_spe__synth_data_source(struct arm_spe_queue *speq, + const struct arm_spe_record *record) { - union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; - bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe); + union perf_mem_data_src data_src = {}; + + if (!is_mem_op(record->op)) + return data_src; if (record->op & ARM_SPE_OP_LD) data_src.mem_op = PERF_MEM_OP_LOAD; else if (record->op & ARM_SPE_OP_ST) data_src.mem_op = PERF_MEM_OP_STORE; else - return 0; + data_src.mem_op = PERF_MEM_OP_NA; - if (is_neoverse) - arm_spe__synth_data_source_neoverse(record, &data_src); - else - arm_spe__synth_data_source_generic(record, &data_src); + arm_spe__synth_ds(speq, record, &data_src); + arm_spe__synth_memory_level(speq, record, &data_src); if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { data_src.mem_dtlb = PERF_MEM_TLB_WK; @@ -541,17 +1018,26 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m data_src.mem_dtlb |= PERF_MEM_TLB_HIT; } - return data_src.val; + return data_src; } static int arm_spe_sample(struct arm_spe_queue *speq) { const struct arm_spe_record *record = &speq->decoder->record; struct arm_spe *spe = speq->spe; - u64 data_src; + union perf_mem_data_src data_src; int err; - data_src = arm_spe__synth_data_source(record, spe->midr); + /* + * Discard all samples until period is reached + */ + speq->sample_count++; + if (speq->sample_count < spe->synth_opts.period) + return 0; + speq->sample_count = 0; + + arm_spe__sample_flags(speq); + data_src = arm_spe__synth_data_source(speq, record); if (spe->sample_flc) { if (record->type & ARM_SPE_L1D_MISS) { @@ -601,8 +1087,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq) } } - if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) { - err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id); + if (spe->synth_opts.last_branch && + (spe->sample_branch || spe->sample_instructions)) + arm_spe__prep_branch_stack(speq); + + if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) { + err = arm_spe__synth_branch_sample(speq, spe->branch_id); if (err) return err; } @@ -615,11 +1105,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq) return err; } - /* - * When data_src is zero it means the record is not a memory operation, - * skip to synthesize memory sample for this case. - */ - if (spe->sample_memory && data_src) { + if (spe->sample_memory && is_mem_op(record->op)) { err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); if (err) return err; @@ -899,7 +1385,7 @@ static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event, static int arm_spe_process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, - struct perf_tool *tool) + const struct perf_tool *tool) { int err = 0; u64 timestamp; @@ -947,7 +1433,7 @@ static int arm_spe_process_event(struct perf_session *session, static int arm_spe_process_auxtrace_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); @@ -985,7 +1471,7 @@ static int arm_spe_process_auxtrace_event(struct perf_session *session, } static int arm_spe_flush(struct perf_session *session __maybe_unused, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); @@ -1016,6 +1502,73 @@ static int arm_spe_flush(struct perf_session *session __maybe_unused, return 0; } +static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size) +{ + u64 *metadata; + + metadata = zalloc(per_cpu_size); + if (!metadata) + return NULL; + + memcpy(metadata, buf, per_cpu_size); + return metadata; +} + +static void arm_spe__free_metadata(u64 **metadata, int nr_cpu) +{ + int i; + + for (i = 0; i < nr_cpu; i++) + zfree(&metadata[i]); + free(metadata); +} + +static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info, + u64 *ver, int *nr_cpu) +{ + u64 *ptr = (u64 *)info->priv; + u64 metadata_size; + u64 **metadata = NULL; + int hdr_sz, per_cpu_sz, i; + + metadata_size = info->header.size - + sizeof(struct perf_record_auxtrace_info); + + /* Metadata version 1 */ + if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) { + *ver = 1; + *nr_cpu = 0; + /* No per CPU metadata */ + return NULL; + } + + *ver = ptr[ARM_SPE_HEADER_VERSION]; + hdr_sz = ptr[ARM_SPE_HEADER_SIZE]; + *nr_cpu = ptr[ARM_SPE_CPUS_NUM]; + + metadata = calloc(*nr_cpu, sizeof(*metadata)); + if (!metadata) + return NULL; + + /* Locate the start address of per CPU metadata */ + ptr += hdr_sz; + per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu); + + for (i = 0; i < *nr_cpu; i++) { + metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz); + if (!metadata[i]) + goto err_per_cpu_metadata; + + ptr += per_cpu_sz / sizeof(u64); + } + + return metadata; + +err_per_cpu_metadata: + arm_spe__free_metadata(metadata, *nr_cpu); + return NULL; +} + static void arm_spe_free_queue(void *priv) { struct arm_spe_queue *speq = priv; @@ -1025,6 +1578,7 @@ static void arm_spe_free_queue(void *priv) thread__zput(speq->thread); arm_spe_decoder_free(speq->decoder); zfree(&speq->event_buf); + zfree(&speq->last_branch); free(speq); } @@ -1050,6 +1604,7 @@ static void arm_spe_free(struct perf_session *session) auxtrace_heap__free(&spe->heap); arm_spe_free_events(session); session->auxtrace = NULL; + arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu); free(spe); } @@ -1061,45 +1616,61 @@ static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, return evsel->core.attr.type == spe->pmu_type; } -static const char * const arm_spe_info_fmts[] = { - [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n", +static const char * const metadata_hdr_v1_fmts[] = { + [ARM_SPE_PMU_TYPE] = " PMU Type :%"PRId64"\n", + [ARM_SPE_PER_CPU_MMAPS] = " Per CPU mmaps :%"PRId64"\n", }; -static void arm_spe_print_info(__u64 *arr) -{ - if (!dump_trace) - return; - - fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]); -} +static const char * const metadata_hdr_fmts[] = { + [ARM_SPE_HEADER_VERSION] = " Header version :%"PRId64"\n", + [ARM_SPE_HEADER_SIZE] = " Header size :%"PRId64"\n", + [ARM_SPE_PMU_TYPE_V2] = " PMU type v2 :%"PRId64"\n", + [ARM_SPE_CPUS_NUM] = " CPU number :%"PRId64"\n", +}; -struct arm_spe_synth { - struct perf_tool dummy_tool; - struct perf_session *session; +static const char * const metadata_per_cpu_fmts[] = { + [ARM_SPE_MAGIC] = " Magic :0x%"PRIx64"\n", + [ARM_SPE_CPU] = " CPU # :%"PRId64"\n", + [ARM_SPE_CPU_NR_PARAMS] = " Num of params :%"PRId64"\n", + [ARM_SPE_CPU_MIDR] = " MIDR :0x%"PRIx64"\n", + [ARM_SPE_CPU_PMU_TYPE] = " PMU Type :%"PRId64"\n", + [ARM_SPE_CAP_MIN_IVAL] = " Min Interval :%"PRId64"\n", + [ARM_SPE_CAP_EVENT_FILTER] = " Event Filter :0x%"PRIx64"\n", }; -static int arm_spe_event_synth(struct perf_tool *tool, - union perf_event *event, - struct perf_sample *sample __maybe_unused, - struct machine *machine __maybe_unused) +static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr) { - struct arm_spe_synth *arm_spe_synth = - container_of(tool, struct arm_spe_synth, dummy_tool); + unsigned int i, cpu, hdr_size, cpu_num, cpu_size; + const char * const *hdr_fmts; - return perf_session__deliver_synth_event(arm_spe_synth->session, - event, NULL); -} + if (!dump_trace) + return; -static int arm_spe_synth_event(struct perf_session *session, - struct perf_event_attr *attr, u64 id) -{ - struct arm_spe_synth arm_spe_synth; + if (spe->metadata_ver == 1) { + cpu_num = 0; + hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX; + hdr_fmts = metadata_hdr_v1_fmts; + } else { + cpu_num = arr[ARM_SPE_CPUS_NUM]; + hdr_size = arr[ARM_SPE_HEADER_SIZE]; + hdr_fmts = metadata_hdr_fmts; + } - memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth)); - arm_spe_synth.session = session; + for (i = 0; i < hdr_size; i++) + fprintf(stdout, hdr_fmts[i], arr[i]); - return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1, - &id, arm_spe_event_synth); + arr += hdr_size; + for (cpu = 0; cpu < cpu_num; cpu++) { + /* + * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS + * are fixed. The sequential parameter size is decided by the + * field 'ARM_SPE_CPU_NR_PARAMS'. + */ + cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS]; + for (i = 0; i < cpu_size; i++) + fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]); + arr += cpu_size; + } } static void arm_spe_set_event_name(struct evlist *evlist, u64 id, @@ -1161,18 +1732,16 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) attr.exclude_guest = evsel->core.attr.exclude_guest; attr.sample_id_all = evsel->core.attr.sample_id_all; attr.read_format = evsel->core.attr.read_format; + attr.sample_period = spe->synth_opts.period; /* create new id val to be a fixed offset from evsel id */ - id = evsel->core.id[0] + 1000000000; - - if (!id) - id = 1; + id = auxtrace_synth_id_range_start(evsel); if (spe->synth_opts.flc) { spe->sample_flc = true; /* Level 1 data cache miss */ - err = arm_spe_synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; spe->l1d_miss_id = id; @@ -1180,7 +1749,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) id += 1; /* Level 1 data cache access */ - err = arm_spe_synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; spe->l1d_access_id = id; @@ -1192,7 +1761,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) spe->sample_llc = true; /* Last level cache miss */ - err = arm_spe_synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; spe->llc_miss_id = id; @@ -1200,7 +1769,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) id += 1; /* Last level cache access */ - err = arm_spe_synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; spe->llc_access_id = id; @@ -1212,7 +1781,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) spe->sample_tlb = true; /* TLB miss */ - err = arm_spe_synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; spe->tlb_miss_id = id; @@ -1220,7 +1789,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) id += 1; /* TLB access */ - err = arm_spe_synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; spe->tlb_access_id = id; @@ -1228,15 +1797,28 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) id += 1; } + if (spe->synth_opts.last_branch) { + if (spe->synth_opts.last_branch_sz > 2) + pr_debug("Arm SPE supports only two bstack entries (PBT+TGT).\n"); + + attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + /* + * We don't use the hardware index, but the sample generation + * code uses the new format branch_stack with this field, + * so the event attributes must indicate that it's present. + */ + attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; + } + if (spe->synth_opts.branches) { spe->sample_branch = true; - /* Branch miss */ - err = arm_spe_synth_event(session, &attr, id); + /* Branch */ + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; - spe->branch_miss_id = id; - arm_spe_set_event_name(evlist, id, "branch-miss"); + spe->branch_id = id; + arm_spe_set_event_name(evlist, id, "branch"); id += 1; } @@ -1244,7 +1826,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) spe->sample_remote_access = true; /* Remote access */ - err = arm_spe_synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; spe->remote_access_id = id; @@ -1255,7 +1837,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) if (spe->synth_opts.mem) { spe->sample_memory = true; - err = arm_spe_synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; spe->memory_id = id; @@ -1264,47 +1846,70 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) } if (spe->synth_opts.instructions) { - if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) { - pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n"); - goto synth_instructions_out; - } - if (spe->synth_opts.period > 1) - pr_warning("Arm SPE has a hardware-based sample period.\n" - "Additional instruction events will be discarded by --itrace\n"); - spe->sample_instructions = true; attr.config = PERF_COUNT_HW_INSTRUCTIONS; - attr.sample_period = spe->synth_opts.period; - spe->instructions_sample_period = attr.sample_period; - err = arm_spe_synth_event(session, &attr, id); + + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; spe->instructions_id = id; arm_spe_set_event_name(evlist, id, "instructions"); } -synth_instructions_out: return 0; } +static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu) +{ + u64 midr; + int i; + + if (!nr_cpu) + return false; + + for (i = 0; i < nr_cpu; i++) { + if (!metadata[i]) + return false; + + if (i == 0) { + midr = metadata[i][ARM_SPE_CPU_MIDR]; + continue; + } + + if (midr != metadata[i][ARM_SPE_CPU_MIDR]) + return false; + } + + return true; +} + int arm_spe_process_auxtrace_info(union perf_event *event, struct perf_session *session) { struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; - size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX; + size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE; struct perf_record_time_conv *tc = &session->time_conv; - const char *cpuid = perf_env__cpuid(session->evlist->env); - u64 midr = strtol(cpuid, NULL, 16); struct arm_spe *spe; - int err; + u64 **metadata = NULL; + u64 metadata_ver; + int nr_cpu, err; if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + min_sz) return -EINVAL; + metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver, + &nr_cpu); + if (!metadata && metadata_ver != 1) { + pr_err("Failed to parse Arm SPE metadata.\n"); + return -EINVAL; + } + spe = zalloc(sizeof(struct arm_spe)); - if (!spe) - return -ENOMEM; + if (!spe) { + err = -ENOMEM; + goto err_free_metadata; + } err = auxtrace_queues__init(&spe->queues); if (err) @@ -1313,8 +1918,14 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->session = session; spe->machine = &session->machines.host; /* No kvm support */ spe->auxtrace_type = auxtrace_info->type; - spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; - spe->midr = midr; + if (metadata_ver == 1) + spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; + else + spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2]; + spe->metadata = metadata; + spe->metadata_ver = metadata_ver; + spe->metadata_nr_cpu = nr_cpu; + spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu); spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); @@ -1347,15 +1958,28 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; session->auxtrace = &spe->auxtrace; - arm_spe_print_info(&auxtrace_info->priv[0]); + arm_spe_print_info(spe, &auxtrace_info->priv[0]); if (dump_trace) return 0; - if (session->itrace_synth_opts && session->itrace_synth_opts->set) + if (session->itrace_synth_opts && session->itrace_synth_opts->set) { spe->synth_opts = *session->itrace_synth_opts; - else + } else { itrace_synth_opts__set_default(&spe->synth_opts, false); + /* Default nanoseconds period not supported */ + spe->synth_opts.period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS; + spe->synth_opts.period = 1; + } + + if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) { + ui__error("You must only use i (instructions) --itrace period with Arm SPE. e.g --itrace=i1i\n"); + err = -EINVAL; + goto err_free_queues; + } + if (spe->synth_opts.period > 1) + ui__warning("Arm SPE has a hardware-based sampling period.\n\n" + "--itrace periods > 1i downsample by an interval of n SPE samples rather than n instructions.\n"); err = arm_spe_synth_events(spe, session); if (err) @@ -1375,5 +1999,7 @@ err_free_queues: session->auxtrace = NULL; err_free: free(spe); +err_free_metadata: + arm_spe__free_metadata(metadata, nr_cpu); return err; } diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h index 4f4900c18f3e..3966df1856d8 100644 --- a/tools/perf/util/arm-spe.h +++ b/tools/perf/util/arm-spe.h @@ -12,10 +12,48 @@ enum { ARM_SPE_PMU_TYPE, ARM_SPE_PER_CPU_MMAPS, + ARM_SPE_AUXTRACE_V1_PRIV_MAX, +}; + +#define ARM_SPE_AUXTRACE_V1_PRIV_SIZE \ + (ARM_SPE_AUXTRACE_V1_PRIV_MAX * sizeof(u64)) + +enum { + /* + * The old metadata format (defined above) does not include a + * field for version number. Version 1 is reserved and starts + * from version 2. + */ + ARM_SPE_HEADER_VERSION, + /* Number of sizeof(u64) */ + ARM_SPE_HEADER_SIZE, + /* PMU type shared by CPUs */ + ARM_SPE_PMU_TYPE_V2, + /* Number of CPUs */ + ARM_SPE_CPUS_NUM, ARM_SPE_AUXTRACE_PRIV_MAX, }; -#define ARM_SPE_AUXTRACE_PRIV_SIZE (ARM_SPE_AUXTRACE_PRIV_MAX * sizeof(u64)) +enum { + /* Magic number */ + ARM_SPE_MAGIC, + /* CPU logical number in system */ + ARM_SPE_CPU, + /* Number of parameters */ + ARM_SPE_CPU_NR_PARAMS, + /* CPU MIDR */ + ARM_SPE_CPU_MIDR, + /* Associated PMU type */ + ARM_SPE_CPU_PMU_TYPE, + /* Minimal interval */ + ARM_SPE_CAP_MIN_IVAL, + /* Event filter */ + ARM_SPE_CAP_EVENT_FILTER, + ARM_SPE_CPU_PRIV_MAX, +}; + +#define ARM_SPE_HEADER_CURRENT_VERSION 2 + union perf_event; struct perf_session; diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c index 4940be4a0569..958afe8b821e 100644 --- a/tools/perf/util/arm64-frame-pointer-unwind-support.c +++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c @@ -4,6 +4,7 @@ #include "event.h" #include "perf_regs.h" // SMPL_REG_MASK #include "unwind.h" +#include <string.h> #define perf_event_arm_regs perf_event_arm64_regs #include "../../arch/arm64/include/uapi/asm/perf_regs.h" @@ -16,8 +17,13 @@ struct entries { static bool get_leaf_frame_caller_enabled(struct perf_sample *sample) { - return callchain_param.record_mode == CALLCHAIN_FP && sample->user_regs.regs - && sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_LR); + struct regs_dump *regs; + + if (callchain_param.record_mode != CALLCHAIN_FP) + return false; + + regs = perf_sample__user_regs(sample); + return regs->regs && regs->mask & SMPL_REG_MASK(PERF_REG_ARM64_LR); } static int add_entry(struct unwind_entry *entry, void *arg) @@ -32,7 +38,7 @@ u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thr { int ret; struct entries entries = {}; - struct regs_dump old_regs = sample->user_regs; + struct regs_dump old_regs, *regs; if (!get_leaf_frame_caller_enabled(sample)) return 0; @@ -42,19 +48,20 @@ u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thr * and set its mask. SP is not used when doing the unwinding but it * still needs to be set to prevent failures. */ - - if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) { - sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC); - sample->user_regs.cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1]; + regs = perf_sample__user_regs(sample); + memcpy(&old_regs, regs, sizeof(*regs)); + if (!(regs->mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) { + regs->cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC); + regs->cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1]; } - if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) { - sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP); - sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0; + if (!(regs->mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) { + regs->cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP); + regs->cache_regs[PERF_REG_ARM64_SP] = 0; } ret = unwind__get_entries(add_entry, &entries, thread, sample, 2, true); - sample->user_regs = old_regs; + memcpy(regs, &old_regs, sizeof(*regs)); if (ret || entries.length != 2) return ret; diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index e2f317063eec..a224687ffbc1 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -55,12 +55,29 @@ #include "hisi-ptt.h" #include "s390-cpumsf.h" #include "util/mmap.h" +#include "powerpc-vpadtl.h" #include <linux/ctype.h> #include "symbol/kallsyms.h" #include <internal/lib.h> #include "util/sample.h" +#define AUXTRACE_SYNTH_EVENT_ID_OFFSET 1000000000ULL + +/* + * Event IDs are allocated sequentially, so a big offset from any + * existing ID will reach a unused range. + */ +u64 auxtrace_synth_id_range_start(struct evsel *evsel) +{ + u64 id = evsel->core.id[0] + AUXTRACE_SYNTH_EVENT_ID_OFFSET; + + if (!id) + id = 1; + + return id; +} + /* * Make a group from 'leader' to 'last', requiring that the events were not * already grouped to a different leader. @@ -185,10 +202,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, if (per_cpu) { mp->cpu = perf_cpu_map__cpu(evlist->core.all_cpus, idx); - if (evlist->core.threads) - mp->tid = perf_thread_map__pid(evlist->core.threads, 0); - else - mp->tid = -1; + mp->tid = perf_thread_map__pid(evlist->core.threads, 0); } else { mp->cpu.cpu = -1; mp->tid = perf_thread_map__pid(evlist->core.threads, idx); @@ -671,11 +685,11 @@ int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx) { struct evsel *evsel; - if (!itr->evlist || !itr->pmu) + if (!itr->evlist) return -EINVAL; evlist__for_each_entry(itr->evlist, evsel) { - if (evsel->core.attr.type == itr->pmu->type) { + if (evsel__is_aux_event(evsel)) { if (evsel->disabled) return 0; return evlist__enable_event_idx(itr->evlist, evsel, idx); @@ -810,19 +824,76 @@ no_opt: return auxtrace_validate_aux_sample_size(evlist, opts); } -void auxtrace_regroup_aux_output(struct evlist *evlist) +static struct aux_action_opt { + const char *str; + u32 aux_action; + bool aux_event_opt; +} aux_action_opts[] = { + {"start-paused", BIT(0), true}, + {"pause", BIT(1), false}, + {"resume", BIT(2), false}, + {.str = NULL}, +}; + +static const struct aux_action_opt *auxtrace_parse_aux_action_str(const char *str) +{ + const struct aux_action_opt *opt; + + if (!str) + return NULL; + + for (opt = aux_action_opts; opt->str; opt++) + if (!strcmp(str, opt->str)) + return opt; + + return NULL; +} + +int auxtrace_parse_aux_action(struct evlist *evlist) { - struct evsel *evsel, *aux_evsel = NULL; struct evsel_config_term *term; + struct evsel *aux_evsel = NULL; + struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - if (evsel__is_aux_event(evsel)) + bool is_aux_event = evsel__is_aux_event(evsel); + const struct aux_action_opt *opt; + + if (is_aux_event) aux_evsel = evsel; - term = evsel__get_config_term(evsel, AUX_OUTPUT); + term = evsel__get_config_term(evsel, AUX_ACTION); + if (!term) { + if (evsel__get_config_term(evsel, AUX_OUTPUT)) + goto regroup; + continue; + } + opt = auxtrace_parse_aux_action_str(term->val.str); + if (!opt) { + pr_err("Bad aux-action '%s'\n", term->val.str); + return -EINVAL; + } + if (opt->aux_event_opt && !is_aux_event) { + pr_err("aux-action '%s' can only be used with AUX area event\n", + term->val.str); + return -EINVAL; + } + if (!opt->aux_event_opt && is_aux_event) { + pr_err("aux-action '%s' cannot be used for AUX area event itself\n", + term->val.str); + return -EINVAL; + } + evsel->core.attr.aux_action = opt->aux_action; +regroup: /* If possible, group with the AUX event */ - if (term && aux_evsel) + if (aux_evsel) evlist__regroup(evlist, aux_evsel, evsel); + if (!evsel__is_aux_event(evsel__leader(evsel))) { + pr_err("Events with aux-action must have AUX area event group leader\n"); + return -EINVAL; + } } + + return 0; } struct auxtrace_record *__weak @@ -1116,16 +1187,19 @@ static int auxtrace_queue_data_cb(struct perf_session *session, if (!qd->samples || event->header.type != PERF_RECORD_SAMPLE) return 0; + perf_sample__init(&sample, /*all=*/false); err = evlist__parse_sample(session->evlist, event, &sample); if (err) - return err; - - if (!sample.aux_sample.size) - return 0; + goto out; - offset += sample.aux_sample.data - (void *)event; + if (sample.aux_sample.size) { + offset += sample.aux_sample.data - (void *)event; - return session->auxtrace->queue_data(session, &sample, NULL, offset); + err = session->auxtrace->queue_data(session, &sample, NULL, offset); + } +out: + perf_sample__exit(&sample); + return err; } int auxtrace_queue_data(struct perf_session *session, bool samples, bool events) @@ -1240,7 +1314,7 @@ void auxtrace_synth_error(struct perf_record_auxtrace_error *auxtrace_error, int } int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, - struct perf_tool *tool, + const struct perf_tool *tool, struct perf_session *session, perf_event__handler_t process) { @@ -1305,7 +1379,8 @@ static void unleader_auxtrace(struct perf_session *session) } } -int perf_event__process_auxtrace_info(struct perf_session *session, +int perf_event__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { enum auxtrace_type type = event->auxtrace_info.type; @@ -1333,6 +1408,9 @@ int perf_event__process_auxtrace_info(struct perf_session *session, case PERF_AUXTRACE_HISI_PTT: err = hisi_ptt_process_auxtrace_info(event, session); break; + case PERF_AUXTRACE_VPA_DTL: + err = powerpc_vpadtl_process_auxtrace_info(event, session); + break; case PERF_AUXTRACE_UNKNOWN: default: return -EINVAL; @@ -1346,7 +1424,8 @@ int perf_event__process_auxtrace_info(struct perf_session *session, return 0; } -s64 perf_event__process_auxtrace(struct perf_session *session, +s64 perf_event__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { s64 err; @@ -1743,7 +1822,8 @@ void events_stats__auxtrace_error_warn(const struct events_stats *stats) } } -int perf_event__process_auxtrace_error(struct perf_session *session, +int perf_event__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { if (auxtrace__dont_decode(session)) @@ -1830,8 +1910,8 @@ int __weak compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail) } static int __auxtrace_mmap__read(struct mmap *map, - struct auxtrace_record *itr, - struct perf_tool *tool, process_auxtrace_t fn, + struct auxtrace_record *itr, struct perf_env *env, + const struct perf_tool *tool, process_auxtrace_t fn, bool snapshot, size_t snapshot_size) { struct auxtrace_mmap *mm = &map->auxtrace_mmap; @@ -1840,7 +1920,7 @@ static int __auxtrace_mmap__read(struct mmap *map, size_t size, head_off, old_off, len1, len2, padding; union perf_event ev; void *data1, *data2; - int kernel_is_64_bit = perf_env__kernel_is_64_bit(evsel__env(NULL)); + int kernel_is_64_bit = perf_env__kernel_is_64_bit(env); head = auxtrace_mmap__read_head(mm, kernel_is_64_bit); @@ -1942,17 +2022,18 @@ static int __auxtrace_mmap__read(struct mmap *map, } int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr, - struct perf_tool *tool, process_auxtrace_t fn) + struct perf_env *env, const struct perf_tool *tool, + process_auxtrace_t fn) { - return __auxtrace_mmap__read(map, itr, tool, fn, false, 0); + return __auxtrace_mmap__read(map, itr, env, tool, fn, false, 0); } int auxtrace_mmap__read_snapshot(struct mmap *map, - struct auxtrace_record *itr, - struct perf_tool *tool, process_auxtrace_t fn, + struct auxtrace_record *itr, struct perf_env *env, + const struct perf_tool *tool, process_auxtrace_t fn, size_t snapshot_size) { - return __auxtrace_mmap__read(map, itr, tool, fn, true, snapshot_size); + return __auxtrace_mmap__read(map, itr, env, tool, fn, true, snapshot_size); } /** @@ -2829,7 +2910,7 @@ int auxtrace_parse_filters(struct evlist *evlist) } int auxtrace__process_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, struct perf_tool *tool) + struct perf_sample *sample, const struct perf_tool *tool) { if (!session->auxtrace) return 0; @@ -2847,7 +2928,7 @@ void auxtrace__dump_auxtrace_sample(struct perf_session *session, session->auxtrace->dump_auxtrace_sample(session, sample); } -int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool) +int auxtrace__flush_events(struct perf_session *session, const struct perf_tool *tool) { if (!session->auxtrace) return 0; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 8a6ec9565835..6947f3f284c0 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -8,21 +8,17 @@ #define __PERF_AUXTRACE_H #include <sys/types.h> -#include <errno.h> -#include <stdbool.h> -#include <stddef.h> #include <stdio.h> // FILE -#include <linux/list.h> #include <linux/perf_event.h> #include <linux/types.h> -#include <perf/cpumap.h> -#include <asm/bitsperlong.h> #include <asm/barrier.h> +#include <perf/cpumap.h> union perf_event; struct perf_session; struct evlist; struct evsel; +struct perf_env; struct perf_tool; struct mmap; struct perf_sample; @@ -49,6 +45,7 @@ enum auxtrace_type { PERF_AUXTRACE_ARM_SPE, PERF_AUXTRACE_S390_CPUMSF, PERF_AUXTRACE_HISI_PTT, + PERF_AUXTRACE_VPA_DTL, }; enum itrace_period_type { @@ -75,7 +72,6 @@ enum itrace_period_type { * (not fully accurate, since CYC packets are only emitted * together with other events, such as branches) * @branches: whether to synthesize 'branches' events - * (branch misses only for Arm SPE) * @transactions: whether to synthesize events for transactions * @ptwrites: whether to synthesize events for ptwrites * @pwr_events: whether to synthesize power events @@ -208,17 +204,17 @@ struct auxtrace { int (*process_event)(struct perf_session *session, union perf_event *event, struct perf_sample *sample, - struct perf_tool *tool); + const struct perf_tool *tool); int (*process_auxtrace_event)(struct perf_session *session, union perf_event *event, - struct perf_tool *tool); + const struct perf_tool *tool); int (*queue_data)(struct perf_session *session, struct perf_sample *sample, union perf_event *event, u64 data_offset); void (*dump_auxtrace_sample)(struct perf_session *session, struct perf_sample *sample); int (*flush_events)(struct perf_session *session, - struct perf_tool *tool); + const struct perf_tool *tool); void (*free_events)(struct perf_session *session); void (*free)(struct perf_session *session); bool (*evsel_is_auxtrace)(struct perf_session *session, @@ -411,7 +407,6 @@ struct auxtrace_record { int (*read_finish)(struct auxtrace_record *itr, int idx); unsigned int alignment; unsigned int default_aux_sample_size; - struct perf_pmu *pmu; struct evlist *evlist; }; @@ -459,8 +454,6 @@ struct addr_filters { struct auxtrace_cache; -#ifdef HAVE_AUXTRACE_SUPPORT - u64 compat_auxtrace_mmap__read_head(struct auxtrace_mmap *mm); int compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail); @@ -508,17 +501,18 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, struct evlist *evlist, struct evsel *evsel, int idx); -typedef int (*process_auxtrace_t)(struct perf_tool *tool, +typedef int (*process_auxtrace_t)(const struct perf_tool *tool, struct mmap *map, union perf_event *event, void *data1, size_t len1, void *data2, size_t len2); int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr, - struct perf_tool *tool, process_auxtrace_t fn); + struct perf_env *env, const struct perf_tool *tool, + process_auxtrace_t fn); int auxtrace_mmap__read_snapshot(struct mmap *map, - struct auxtrace_record *itr, - struct perf_tool *tool, process_auxtrace_t fn, + struct auxtrace_record *itr, struct perf_env *env, + const struct perf_tool *tool, process_auxtrace_t fn, size_t snapshot_size); int auxtrace_queues__init_nr(struct auxtrace_queues *queues, int nr_queues); @@ -580,7 +574,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, int auxtrace_parse_sample_options(struct auxtrace_record *itr, struct evlist *evlist, struct record_opts *opts, const char *str); -void auxtrace_regroup_aux_output(struct evlist *evlist); +int auxtrace_parse_aux_action(struct evlist *evlist); int auxtrace_record__options(struct auxtrace_record *itr, struct evlist *evlist, struct record_opts *opts); @@ -614,11 +608,14 @@ void auxtrace_synth_error(struct perf_record_auxtrace_error *auxtrace_error, int int code, int cpu, pid_t pid, pid_t tid, u64 ip, const char *msg, u64 timestamp); -int perf_event__process_auxtrace_info(struct perf_session *session, +int perf_event__process_auxtrace_info(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); -s64 perf_event__process_auxtrace(struct perf_session *session, +s64 perf_event__process_auxtrace(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); -int perf_event__process_auxtrace_error(struct perf_session *session, +int perf_event__process_auxtrace_error(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, const char *str, int unset); @@ -639,19 +636,20 @@ int addr_filters__parse_bare_filter(struct addr_filters *filts, int auxtrace_parse_filters(struct evlist *evlist); int auxtrace__process_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, struct perf_tool *tool); + struct perf_sample *sample, const struct perf_tool *tool); void auxtrace__dump_auxtrace_sample(struct perf_session *session, struct perf_sample *sample); -int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool); +int auxtrace__flush_events(struct perf_session *session, const struct perf_tool *tool); void auxtrace__free_events(struct perf_session *session); void auxtrace__free(struct perf_session *session); bool auxtrace__evsel_is_auxtrace(struct perf_session *session, struct evsel *evsel); +u64 auxtrace_synth_id_range_start(struct evsel *evsel); #define ITRACE_HELP \ " i[period]: synthesize instructions events\n" \ " y[period]: synthesize cycles events (same period as i)\n" \ -" b: synthesize branches events (branch misses for Arm SPE)\n" \ +" b: synthesize branches events\n" \ " c: synthesize branches events (calls only)\n" \ " r: synthesize branches events (returns only)\n" \ " x: synthesize transactions events\n" \ @@ -701,210 +699,4 @@ void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts) opts->range_num = 0; } -#else -#include "debug.h" - -static inline struct auxtrace_record * -auxtrace_record__init(struct evlist *evlist __maybe_unused, - int *err) -{ - *err = 0; - return NULL; -} - -static inline -void auxtrace_record__free(struct auxtrace_record *itr __maybe_unused) -{ -} - -static inline -int auxtrace_record__options(struct auxtrace_record *itr __maybe_unused, - struct evlist *evlist __maybe_unused, - struct record_opts *opts __maybe_unused) -{ - return 0; -} - -static inline -int perf_event__process_auxtrace_info(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused) -{ - return 0; -} - -static inline -s64 perf_event__process_auxtrace(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused) -{ - return 0; -} - -static inline -int perf_event__process_auxtrace_error(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused) -{ - return 0; -} - -static inline -void perf_session__auxtrace_error_inc(struct perf_session *session - __maybe_unused, - union perf_event *event - __maybe_unused) -{ -} - -static inline -void events_stats__auxtrace_error_warn(const struct events_stats *stats - __maybe_unused) -{ -} - -static inline -int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts __maybe_unused, - const char *str __maybe_unused, int unset __maybe_unused) -{ - pr_err("AUX area tracing not supported\n"); - return -EINVAL; -} - -static inline -int itrace_parse_synth_opts(const struct option *opt __maybe_unused, - const char *str __maybe_unused, - int unset __maybe_unused) -{ - pr_err("AUX area tracing not supported\n"); - return -EINVAL; -} - -static inline -int auxtrace_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused, - struct record_opts *opts __maybe_unused, - const char *str) -{ - if (!str) - return 0; - pr_err("AUX area tracing not supported\n"); - return -EINVAL; -} - -static inline -int auxtrace_parse_sample_options(struct auxtrace_record *itr __maybe_unused, - struct evlist *evlist __maybe_unused, - struct record_opts *opts __maybe_unused, - const char *str) -{ - if (!str) - return 0; - pr_err("AUX area tracing not supported\n"); - return -EINVAL; -} - -static inline -void auxtrace_regroup_aux_output(struct evlist *evlist __maybe_unused) -{ -} - -static inline -int auxtrace__process_event(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_sample *sample __maybe_unused, - struct perf_tool *tool __maybe_unused) -{ - return 0; -} - -static inline -void auxtrace__dump_auxtrace_sample(struct perf_session *session __maybe_unused, - struct perf_sample *sample __maybe_unused) -{ -} - -static inline -int auxtrace__flush_events(struct perf_session *session __maybe_unused, - struct perf_tool *tool __maybe_unused) -{ - return 0; -} - -static inline -void auxtrace__free_events(struct perf_session *session __maybe_unused) -{ -} - -static inline -void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache __maybe_unused) -{ -} - -static inline -void auxtrace__free(struct perf_session *session __maybe_unused) -{ -} - -static inline -int auxtrace_index__write(int fd __maybe_unused, - struct list_head *head __maybe_unused) -{ - return -EINVAL; -} - -static inline -int auxtrace_index__process(int fd __maybe_unused, - u64 size __maybe_unused, - struct perf_session *session __maybe_unused, - bool needs_swap __maybe_unused) -{ - return -EINVAL; -} - -static inline -void auxtrace_index__free(struct list_head *head __maybe_unused) -{ -} - -static inline -bool auxtrace__evsel_is_auxtrace(struct perf_session *session __maybe_unused, - struct evsel *evsel __maybe_unused) -{ - return false; -} - -static inline -int auxtrace_parse_filters(struct evlist *evlist __maybe_unused) -{ - return 0; -} - -int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, - struct auxtrace_mmap_params *mp, - void *userpg, int fd); -void auxtrace_mmap__munmap(struct auxtrace_mmap *mm); -void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, - off_t auxtrace_offset, - unsigned int auxtrace_pages, - bool auxtrace_overwrite); -void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, - struct evlist *evlist, - struct evsel *evsel, int idx); - -#define ITRACE_HELP "" - -static inline -void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts - __maybe_unused, - struct perf_time_interval *ptime_range - __maybe_unused, - int range_num __maybe_unused) -{ -} - -static inline -void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts - __maybe_unused) -{ -} - -#endif - #endif diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index 04068d48683f..649392bee7ed 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -40,16 +40,32 @@ static struct block_header_column { [PERF_HPP_REPORT__BLOCK_DSO] = { .name = "Shared Object", .width = 20, + }, + [PERF_HPP_REPORT__BLOCK_BRANCH_COUNTER] = { + .name = "Branch Counter", + .width = 30, } }; -struct block_info *block_info__new(void) +static struct block_info *block_info__new(unsigned int br_cntr_nr) { - return zalloc(sizeof(struct block_info)); + struct block_info *bi = zalloc(sizeof(struct block_info)); + + if (bi && br_cntr_nr) { + bi->br_cntr = calloc(br_cntr_nr, sizeof(u64)); + if (!bi->br_cntr) { + free(bi); + return NULL; + } + } + + return bi; } void block_info__delete(struct block_info *bi) { + if (bi) + free(bi->br_cntr); free(bi); } @@ -86,7 +102,8 @@ int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, static void init_block_info(struct block_info *bi, struct symbol *sym, struct cyc_hist *ch, int offset, - u64 total_cycles) + u64 total_cycles, unsigned int br_cntr_nr, + u64 *br_cntr, struct evsel *evsel) { bi->sym = sym; bi->start = ch->start; @@ -99,10 +116,18 @@ static void init_block_info(struct block_info *bi, struct symbol *sym, memcpy(bi->cycles_spark, ch->cycles_spark, NUM_SPARKS * sizeof(u64)); + + if (br_cntr && br_cntr_nr) { + bi->br_cntr_nr = br_cntr_nr; + memcpy(bi->br_cntr, &br_cntr[offset * br_cntr_nr], + br_cntr_nr * sizeof(u64)); + } + bi->evsel = evsel; } int block_info__process_sym(struct hist_entry *he, struct block_hist *bh, - u64 *block_cycles_aggr, u64 total_cycles) + u64 *block_cycles_aggr, u64 total_cycles, + unsigned int br_cntr_nr) { struct annotation *notes; struct cyc_hist *ch; @@ -125,12 +150,14 @@ int block_info__process_sym(struct hist_entry *he, struct block_hist *bh, struct block_info *bi; struct hist_entry *he_block; - bi = block_info__new(); + bi = block_info__new(br_cntr_nr); if (!bi) return -1; init_block_info(bi, he->ms.sym, &ch[i], i, - total_cycles); + total_cycles, br_cntr_nr, + notes->branch->br_cntr, + hists_to_evsel(he->hists)); cycles += bi->cycles_aggr / bi->num_aggr; he_block = hists__add_entry_block(&bh->block_hists, @@ -327,6 +354,24 @@ static void init_block_header(struct block_fmt *block_fmt) fmt->width = block_column_width; } +static int block_branch_counter_entry(struct perf_hpp_fmt *fmt, + struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt); + struct block_info *bi = he->block_info; + char *buf; + int ret; + + if (annotation_br_cntr_entry(&buf, bi->br_cntr_nr, bi->br_cntr, + bi->num_aggr, bi->evsel)) + return 0; + + ret = scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf); + free(buf); + return ret; +} + static void hpp_register(struct block_fmt *block_fmt, int idx, struct perf_hpp_list *hpp_list) { @@ -357,6 +402,9 @@ static void hpp_register(struct block_fmt *block_fmt, int idx, case PERF_HPP_REPORT__BLOCK_DSO: fmt->entry = block_dso_entry; break; + case PERF_HPP_REPORT__BLOCK_BRANCH_COUNTER: + fmt->entry = block_branch_counter_entry; + break; default: return; } @@ -390,7 +438,7 @@ static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts, static int process_block_report(struct hists *hists, struct block_report *block_report, u64 total_cycles, int *block_hpps, - int nr_hpps) + int nr_hpps, unsigned int br_cntr_nr) { struct rb_node *next = rb_first_cached(&hists->entries); struct block_hist *bh = &block_report->hist; @@ -405,7 +453,7 @@ static int process_block_report(struct hists *hists, while (next) { he = rb_entry(next, struct hist_entry, rb_node); block_info__process_sym(he, bh, &block_report->cycles, - total_cycles); + total_cycles, br_cntr_nr); next = rb_next(&he->rb_node); } @@ -435,7 +483,7 @@ struct block_report *block_info__create_report(struct evlist *evlist, struct hists *hists = evsel__hists(pos); process_block_report(hists, &block_reports[i], total_cycles, - block_hpps, nr_hpps); + block_hpps, nr_hpps, evlist->nr_br_cntr); i++; } diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h index 0b9e1aad4c55..b9329dc3ab59 100644 --- a/tools/perf/util/block-info.h +++ b/tools/perf/util/block-info.h @@ -18,6 +18,9 @@ struct block_info { u64 total_cycles; int num; int num_aggr; + int br_cntr_nr; + u64 *br_cntr; + struct evsel *evsel; }; struct block_fmt { @@ -36,6 +39,7 @@ enum { PERF_HPP_REPORT__BLOCK_AVG_CYCLES, PERF_HPP_REPORT__BLOCK_RANGE, PERF_HPP_REPORT__BLOCK_DSO, + PERF_HPP_REPORT__BLOCK_BRANCH_COUNTER, PERF_HPP_REPORT__BLOCK_MAX_INDEX }; @@ -46,7 +50,6 @@ struct block_report { int nr_fmts; }; -struct block_info *block_info__new(void); void block_info__delete(struct block_info *bi); int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right); @@ -55,7 +58,8 @@ int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, struct hist_entry *left, struct hist_entry *right); int block_info__process_sym(struct hist_entry *he, struct block_hist *bh, - u64 *block_cycles_aggr, u64 total_cycles); + u64 *block_cycles_aggr, u64 total_cycles, + unsigned int br_cntr_nr); struct block_report *block_info__create_report(struct evlist *evlist, u64 total_cycles, diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 827695cd0408..2298cd396c42 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -1,13 +1,21 @@ // SPDX-License-Identifier: GPL-2.0 #include <errno.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> #include <stdlib.h> +#include <string.h> #include <bpf/bpf.h> #include <bpf/btf.h> #include <bpf/libbpf.h> +#include <linux/bpf.h> #include <linux/btf.h> #include <linux/err.h> +#include <linux/perf_event.h> #include <linux/string.h> +#include <linux/zalloc.h> #include <internal/lib.h> +#include <perf/event.h> #include <symbol/kallsyms.h> #include "bpf-event.h" #include "bpf-utils.h" @@ -151,6 +159,362 @@ static int synthesize_bpf_prog_name(char *buf, int size, return name_len; } +#ifdef HAVE_LIBBPF_STRINGS_SUPPORT + +#define BPF_METADATA_PREFIX "bpf_metadata_" +#define BPF_METADATA_PREFIX_LEN (sizeof(BPF_METADATA_PREFIX) - 1) + +static bool name_has_bpf_metadata_prefix(const char **s) +{ + if (strncmp(*s, BPF_METADATA_PREFIX, BPF_METADATA_PREFIX_LEN) != 0) + return false; + *s += BPF_METADATA_PREFIX_LEN; + return true; +} + +struct bpf_metadata_map { + struct btf *btf; + const struct btf_type *datasec; + void *rodata; + size_t rodata_size; + unsigned int num_vars; +}; + +static int bpf_metadata_read_map_data(__u32 map_id, struct bpf_metadata_map *map) +{ + int map_fd; + struct bpf_map_info map_info; + __u32 map_info_len; + int key; + struct btf *btf; + const struct btf_type *datasec; + struct btf_var_secinfo *vsi; + unsigned int vlen, vars; + void *rodata; + + map_fd = bpf_map_get_fd_by_id(map_id); + if (map_fd < 0) + return -1; + + memset(&map_info, 0, sizeof(map_info)); + map_info_len = sizeof(map_info); + if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len) < 0) + goto out_close; + + /* If it's not an .rodata map, don't bother. */ + if (map_info.type != BPF_MAP_TYPE_ARRAY || + map_info.key_size != sizeof(int) || + map_info.max_entries != 1 || + !map_info.btf_value_type_id || + !strstr(map_info.name, ".rodata")) { + goto out_close; + } + + btf = btf__load_from_kernel_by_id(map_info.btf_id); + if (!btf) + goto out_close; + datasec = btf__type_by_id(btf, map_info.btf_value_type_id); + if (!btf_is_datasec(datasec)) + goto out_free_btf; + + /* + * If there aren't any variables with the "bpf_metadata_" prefix, + * don't bother. + */ + vlen = btf_vlen(datasec); + vsi = btf_var_secinfos(datasec); + vars = 0; + for (unsigned int i = 0; i < vlen; i++, vsi++) { + const struct btf_type *t_var = btf__type_by_id(btf, vsi->type); + const char *name = btf__name_by_offset(btf, t_var->name_off); + + if (name_has_bpf_metadata_prefix(&name)) + vars++; + } + if (vars == 0) + goto out_free_btf; + + rodata = zalloc(map_info.value_size); + if (!rodata) + goto out_free_btf; + key = 0; + if (bpf_map_lookup_elem(map_fd, &key, rodata)) { + free(rodata); + goto out_free_btf; + } + close(map_fd); + + map->btf = btf; + map->datasec = datasec; + map->rodata = rodata; + map->rodata_size = map_info.value_size; + map->num_vars = vars; + return 0; + +out_free_btf: + btf__free(btf); +out_close: + close(map_fd); + return -1; +} + +struct format_btf_ctx { + char *buf; + size_t buf_size; + size_t buf_idx; +}; + +static void format_btf_cb(void *arg, const char *fmt, va_list ap) +{ + int n; + struct format_btf_ctx *ctx = (struct format_btf_ctx *)arg; + + n = vsnprintf(ctx->buf + ctx->buf_idx, ctx->buf_size - ctx->buf_idx, + fmt, ap); + ctx->buf_idx += n; + if (ctx->buf_idx >= ctx->buf_size) + ctx->buf_idx = ctx->buf_size; +} + +static void format_btf_variable(struct btf *btf, char *buf, size_t buf_size, + const struct btf_type *t, const void *btf_data) +{ + struct format_btf_ctx ctx = { + .buf = buf, + .buf_idx = 0, + .buf_size = buf_size, + }; + const struct btf_dump_type_data_opts opts = { + .sz = sizeof(struct btf_dump_type_data_opts), + .skip_names = 1, + .compact = 1, + .emit_strings = 1, + }; + struct btf_dump *d; + size_t btf_size; + + d = btf_dump__new(btf, format_btf_cb, &ctx, NULL); + btf_size = btf__resolve_size(btf, t->type); + btf_dump__dump_type_data(d, t->type, btf_data, btf_size, &opts); + btf_dump__free(d); +} + +static void bpf_metadata_fill_event(struct bpf_metadata_map *map, + struct perf_record_bpf_metadata *bpf_metadata_event) +{ + struct btf_var_secinfo *vsi; + unsigned int i, vlen; + + memset(bpf_metadata_event->prog_name, 0, BPF_PROG_NAME_LEN); + vlen = btf_vlen(map->datasec); + vsi = btf_var_secinfos(map->datasec); + + for (i = 0; i < vlen; i++, vsi++) { + const struct btf_type *t_var = btf__type_by_id(map->btf, + vsi->type); + const char *name = btf__name_by_offset(map->btf, + t_var->name_off); + const __u64 nr_entries = bpf_metadata_event->nr_entries; + struct perf_record_bpf_metadata_entry *entry; + + if (!name_has_bpf_metadata_prefix(&name)) + continue; + + if (nr_entries >= (__u64)map->num_vars) + break; + + entry = &bpf_metadata_event->entries[nr_entries]; + memset(entry, 0, sizeof(*entry)); + snprintf(entry->key, BPF_METADATA_KEY_LEN, "%s", name); + format_btf_variable(map->btf, entry->value, + BPF_METADATA_VALUE_LEN, t_var, + map->rodata + vsi->offset); + bpf_metadata_event->nr_entries++; + } +} + +static void bpf_metadata_free_map_data(struct bpf_metadata_map *map) +{ + btf__free(map->btf); + free(map->rodata); +} + +static struct bpf_metadata *bpf_metadata_alloc(__u32 nr_prog_tags, + __u32 nr_variables) +{ + struct bpf_metadata *metadata; + size_t event_size; + + metadata = zalloc(sizeof(struct bpf_metadata)); + if (!metadata) + return NULL; + + metadata->prog_names = zalloc(nr_prog_tags * sizeof(char *)); + if (!metadata->prog_names) { + bpf_metadata_free(metadata); + return NULL; + } + for (__u32 prog_index = 0; prog_index < nr_prog_tags; prog_index++) { + metadata->prog_names[prog_index] = zalloc(BPF_PROG_NAME_LEN); + if (!metadata->prog_names[prog_index]) { + bpf_metadata_free(metadata); + return NULL; + } + metadata->nr_prog_names++; + } + + event_size = sizeof(metadata->event->bpf_metadata) + + nr_variables * sizeof(metadata->event->bpf_metadata.entries[0]); + metadata->event = zalloc(event_size); + if (!metadata->event) { + bpf_metadata_free(metadata); + return NULL; + } + metadata->event->bpf_metadata = (struct perf_record_bpf_metadata) { + .header = { + .type = PERF_RECORD_BPF_METADATA, + .size = event_size, + }, + .nr_entries = 0, + }; + + return metadata; +} + +static struct bpf_metadata *bpf_metadata_create(struct bpf_prog_info *info) +{ + struct bpf_metadata *metadata; + const __u32 *map_ids = (__u32 *)(uintptr_t)info->map_ids; + + for (__u32 map_index = 0; map_index < info->nr_map_ids; map_index++) { + struct bpf_metadata_map map; + + if (bpf_metadata_read_map_data(map_ids[map_index], &map) != 0) + continue; + + metadata = bpf_metadata_alloc(info->nr_prog_tags, map.num_vars); + if (!metadata) + continue; + + bpf_metadata_fill_event(&map, &metadata->event->bpf_metadata); + + for (__u32 index = 0; index < info->nr_prog_tags; index++) { + synthesize_bpf_prog_name(metadata->prog_names[index], + BPF_PROG_NAME_LEN, info, + map.btf, index); + } + + bpf_metadata_free_map_data(&map); + + return metadata; + } + + return NULL; +} + +static int synthesize_perf_record_bpf_metadata(const struct bpf_metadata *metadata, + const struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + const size_t event_size = metadata->event->header.size; + union perf_event *event; + int err = 0; + + event = zalloc(event_size + machine->id_hdr_size); + if (!event) + return -1; + memcpy(event, metadata->event, event_size); + memset((void *)event + event->header.size, 0, machine->id_hdr_size); + event->header.size += machine->id_hdr_size; + for (__u32 index = 0; index < metadata->nr_prog_names; index++) { + memcpy(event->bpf_metadata.prog_name, + metadata->prog_names[index], BPF_PROG_NAME_LEN); + err = perf_tool__process_synth_event(tool, event, machine, + process); + if (err != 0) + break; + } + + free(event); + return err; +} + +void bpf_metadata_free(struct bpf_metadata *metadata) +{ + if (metadata == NULL) + return; + for (__u32 index = 0; index < metadata->nr_prog_names; index++) + free(metadata->prog_names[index]); + free(metadata->prog_names); + free(metadata->event); + free(metadata); +} + +#else /* HAVE_LIBBPF_STRINGS_SUPPORT */ + +static struct bpf_metadata *bpf_metadata_create(struct bpf_prog_info *info __maybe_unused) +{ + return NULL; +} + +static int synthesize_perf_record_bpf_metadata(const struct bpf_metadata *metadata __maybe_unused, + const struct perf_tool *tool __maybe_unused, + perf_event__handler_t process __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + +void bpf_metadata_free(struct bpf_metadata *metadata __maybe_unused) +{ +} + +#endif /* HAVE_LIBBPF_STRINGS_SUPPORT */ + +struct bpf_metadata_final_ctx { + const struct perf_tool *tool; + perf_event__handler_t process; + struct machine *machine; +}; + +static void synthesize_final_bpf_metadata_cb(struct bpf_prog_info_node *node, + void *data) +{ + struct bpf_metadata_final_ctx *ctx = (struct bpf_metadata_final_ctx *)data; + struct bpf_metadata *metadata = node->metadata; + int err; + + if (metadata == NULL) + return; + err = synthesize_perf_record_bpf_metadata(metadata, ctx->tool, + ctx->process, ctx->machine); + if (err != 0) { + const char *prog_name = metadata->prog_names[0]; + + if (prog_name != NULL) + pr_warning("Couldn't synthesize final BPF metadata for %s.\n", prog_name); + else + pr_warning("Couldn't synthesize final BPF metadata.\n"); + } + bpf_metadata_free(metadata); + node->metadata = NULL; +} + +void perf_event__synthesize_final_bpf_metadata(struct perf_session *session, + perf_event__handler_t process) +{ + struct perf_env *env = &session->header.env; + struct bpf_metadata_final_ctx ctx = { + .tool = session->tool, + .process = process, + .machine = &session->machines.host, + }; + + perf_env__iterate_bpf_prog_info(env, synthesize_final_bpf_metadata_cb, + &ctx); +} + /* * Synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for one bpf * program. One PERF_RECORD_BPF_EVENT is generated for the program. And @@ -170,9 +534,10 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, { struct perf_record_ksymbol *ksymbol_event = &event->ksymbol; struct perf_record_bpf_event *bpf_event = &event->bpf; - struct perf_tool *tool = session->tool; + const struct perf_tool *tool = session->tool; struct bpf_prog_info_node *info_node; struct perf_bpil *info_linear; + struct bpf_metadata *metadata; struct bpf_prog_info *info; struct btf *btf = NULL; struct perf_env *env; @@ -184,7 +549,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, * for perf-record and perf-report use header.env; * otherwise, use global perf_env. */ - env = session->data ? &session->header.env : &perf_env; + env = perf_session__env(session); arrays = 1UL << PERF_BPIL_JITED_KSYMS; arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS; @@ -193,6 +558,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, arrays |= 1UL << PERF_BPIL_JITED_INSNS; arrays |= 1UL << PERF_BPIL_LINE_INFO; arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO; + arrays |= 1UL << PERF_BPIL_MAP_IDS; info_linear = get_bpf_prog_info_linear(fd, arrays); if (IS_ERR_OR_NULL(info_linear)) { @@ -289,7 +655,17 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, } info_node->info_linear = info_linear; - perf_env__insert_bpf_prog_info(env, info_node); + info_node->metadata = NULL; + if (!perf_env__insert_bpf_prog_info(env, info_node)) { + /* + * Insert failed, likely because of a duplicate event + * made by the sideband thread. Ignore synthesizing the + * metadata. + */ + free(info_node); + goto out; + } + /* info_linear is now owned by info_node and shouldn't be freed below. */ info_linear = NULL; /* @@ -298,6 +674,15 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, */ err = perf_tool__process_synth_event(tool, event, machine, process); + + /* Synthesize PERF_RECORD_BPF_METADATA */ + metadata = bpf_metadata_create(info); + if (metadata != NULL) { + err = synthesize_perf_record_bpf_metadata(metadata, + tool, process, + machine); + bpf_metadata_free(metadata); + } } out: @@ -310,7 +695,7 @@ struct kallsyms_parse { union perf_event *event; perf_event__handler_t process; struct machine *machine; - struct perf_tool *tool; + const struct perf_tool *tool; }; static int @@ -448,18 +833,18 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, return err; } -static void perf_env__add_bpf_info(struct perf_env *env, u32 id) +static int perf_env__add_bpf_info(struct perf_env *env, u32 id) { struct bpf_prog_info_node *info_node; struct perf_bpil *info_linear; struct btf *btf = NULL; u64 arrays; u32 btf_id; - int fd; + int fd, err = 0; fd = bpf_prog_get_fd_by_id(id); if (fd < 0) - return; + return -EINVAL; arrays = 1UL << PERF_BPIL_JITED_KSYMS; arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS; @@ -468,10 +853,12 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) arrays |= 1UL << PERF_BPIL_JITED_INSNS; arrays |= 1UL << PERF_BPIL_LINE_INFO; arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO; + arrays |= 1UL << PERF_BPIL_MAP_IDS; info_linear = get_bpf_prog_info_linear(fd, arrays); if (IS_ERR_OR_NULL(info_linear)) { pr_debug("%s: failed to get BPF program info. aborting\n", __func__); + err = PTR_ERR(info_linear); goto out; } @@ -480,36 +867,48 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) info_node = malloc(sizeof(struct bpf_prog_info_node)); if (info_node) { info_node->info_linear = info_linear; - perf_env__insert_bpf_prog_info(env, info_node); - } else + info_node->metadata = bpf_metadata_create(&info_linear->info); + if (!perf_env__insert_bpf_prog_info(env, info_node)) { + pr_debug("%s: duplicate add bpf info request for id %u\n", + __func__, btf_id); + free(info_linear); + free(info_node); + goto out; + } + } else { free(info_linear); + err = -ENOMEM; + goto out; + } if (btf_id == 0) goto out; btf = btf__load_from_kernel_by_id(btf_id); - if (libbpf_get_error(btf)) { - pr_debug("%s: failed to get BTF of id %u, aborting\n", - __func__, btf_id); - goto out; + if (!btf) { + err = -errno; + pr_debug("%s: failed to get BTF of id %u %d\n", __func__, btf_id, err); + } else { + perf_env__fetch_btf(env, btf_id, btf); } - perf_env__fetch_btf(env, btf_id, btf); out: btf__free(btf); close(fd); + return err; } static int bpf_event__sb_cb(union perf_event *event, void *data) { struct perf_env *env = data; + int ret = 0; if (event->header.type != PERF_RECORD_BPF_EVENT) return -1; switch (event->bpf.type) { case PERF_BPF_EVENT_PROG_LOAD: - perf_env__add_bpf_info(env, event->bpf.id); + ret = perf_env__add_bpf_info(env, event->bpf.id); case PERF_BPF_EVENT_PROG_UNLOAD: /* @@ -523,7 +922,7 @@ static int bpf_event__sb_cb(union perf_event *event, void *data) break; } - return 0; + return ret; } int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env) diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index e2f0420905f5..60d2c6637af5 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -17,8 +17,15 @@ struct record_opts; struct evlist; struct target; +struct bpf_metadata { + union perf_event *event; + char **prog_names; + __u64 nr_prog_names; +}; + struct bpf_prog_info_node { struct perf_bpil *info_linear; + struct bpf_metadata *metadata; struct rb_node rb_node; }; @@ -36,6 +43,7 @@ int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env); void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, struct perf_env *env, FILE *fp); +void bpf_metadata_free(struct bpf_metadata *metadata); #else static inline int machine__process_bpf(struct machine *machine __maybe_unused, union perf_event *event __maybe_unused, @@ -56,5 +64,10 @@ static inline void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info _ { } + +static inline void bpf_metadata_free(struct bpf_metadata *metadata __maybe_unused) +{ + +} #endif // HAVE_LIBBPF_SUPPORT #endif diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c index b51544996046..1a2e7b388d57 100644 --- a/tools/perf/util/bpf-filter.c +++ b/tools/perf/util/bpf-filter.c @@ -1,12 +1,62 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/** + * Generic event filter for sampling events in BPF. + * + * The BPF program is fixed and just to read filter expressions in the 'filters' + * map and compare the sample data in order to reject samples that don't match. + * Each filter expression contains a sample flag (term) to compare, an operation + * (==, >=, and so on) and a value. + * + * Note that each entry has an array of filter expressions and it only succeeds + * when all of the expressions are satisfied. But it supports the logical OR + * using a GROUP operation which is satisfied when any of its member expression + * is evaluated to true. But it doesn't allow nested GROUP operations for now. + * + * To support non-root users, the filters map can be loaded and pinned in the BPF + * filesystem by root (perf record --setup-filter pin). Then each user will get + * a new entry in the shared filters map to fill the filter expressions. And the + * BPF program will find the filter using (task-id, event-id) as a key. + * + * The pinned BPF object (shared for regular users) has: + * + * event_hash | + * | | | + * event->id ---> | id | ---+ idx_hash | filters + * | | | | | | | | + * | .... | +-> | idx | --+--> | exprs | ---> perf_bpf_filter_entry[] + * | | | | | | .op + * task id (tgid) --------------+ | .... | | | ... | .term (+ part) + * | .value + * | + * ======= (root would skip this part) ======== (compares it in a loop) + * + * This is used for per-task use cases while system-wide profiling (normally from + * root user) uses a separate copy of the program and the maps for its own so that + * it can proceed even if a lot of non-root users are using the filters at the + * same time. In this case the filters map has a single entry and no need to use + * the hash maps to get the index (key) of the filters map (IOW it's always 0). + * + * The BPF program returns 1 to accept the sample or 0 to drop it. + * The 'dropped' map is to keep how many samples it dropped by the filter and + * it will be reported as lost samples. + */ #include <stdlib.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/stat.h> #include <bpf/bpf.h> #include <linux/err.h> +#include <linux/list.h> +#include <api/fs/fs.h> #include <internal/xyarray.h> +#include <perf/threadmap.h> +#include "util/cap.h" #include "util/debug.h" #include "util/evsel.h" +#include "util/target.h" +#include "util/bpf-utils.h" #include "util/bpf-filter.h" #include <util/bpf-filter-flex.h> @@ -17,11 +67,21 @@ #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) -#define __PERF_SAMPLE_TYPE(st, opt) { st, #st, opt } -#define PERF_SAMPLE_TYPE(_st, opt) __PERF_SAMPLE_TYPE(PERF_SAMPLE_##_st, opt) +#define __PERF_SAMPLE_TYPE(tt, st, opt) { tt, #st, opt } +#define PERF_SAMPLE_TYPE(_st, opt) __PERF_SAMPLE_TYPE(PBF_TERM_##_st, PERF_SAMPLE_##_st, opt) + +/* Index in the pinned 'filters' map. Should be released after use. */ +struct pinned_filter_idx { + struct list_head list; + struct evsel *evsel; + u64 event_id; + int hash_idx; +}; + +static LIST_HEAD(pinned_filters); static const struct perf_sample_info { - u64 type; + enum perf_bpf_filter_term type; const char *name; const char *option; } sample_table[] = { @@ -42,14 +102,17 @@ static const struct perf_sample_info { PERF_SAMPLE_TYPE(TRANSACTION, "--transaction"), PERF_SAMPLE_TYPE(CODE_PAGE_SIZE, "--code-page-size"), PERF_SAMPLE_TYPE(DATA_PAGE_SIZE, "--data-page-size"), + PERF_SAMPLE_TYPE(CGROUP, "--all-cgroups"), }; -static const struct perf_sample_info *get_sample_info(u64 flags) +static int get_pinned_fd(const char *name); + +static const struct perf_sample_info *get_sample_info(enum perf_bpf_filter_term type) { size_t i; for (i = 0; i < ARRAY_SIZE(sample_table); i++) { - if (sample_table[i].type == flags) + if (sample_table[i].type == type) return &sample_table[i]; } return NULL; @@ -59,9 +122,15 @@ static int check_sample_flags(struct evsel *evsel, struct perf_bpf_filter_expr * { const struct perf_sample_info *info; - if (evsel->core.attr.sample_type & expr->sample_flags) + if (expr->term >= PBF_TERM_SAMPLE_START && expr->term <= PBF_TERM_SAMPLE_END && + (evsel->core.attr.sample_type & (1 << (expr->term - PBF_TERM_SAMPLE_START)))) return 0; + if (expr->term == PBF_TERM_UID || expr->term == PBF_TERM_GID) { + /* Not dependent on the sample_type as computed from a BPF helper. */ + return 0; + } + if (expr->op == PBF_OP_GROUP_BEGIN) { struct perf_bpf_filter_expr *group; @@ -72,10 +141,10 @@ static int check_sample_flags(struct evsel *evsel, struct perf_bpf_filter_expr * return 0; } - info = get_sample_info(expr->sample_flags); + info = get_sample_info(expr->term); if (info == NULL) { - pr_err("Error: %s event does not have sample flags %lx\n", - evsel__name(evsel), expr->sample_flags); + pr_err("Error: %s event does not have sample flags %d\n", + evsel__name(evsel), expr->term); return -1; } @@ -85,95 +154,463 @@ static int check_sample_flags(struct evsel *evsel, struct perf_bpf_filter_expr * return -1; } -int perf_bpf_filter__prepare(struct evsel *evsel) +static int get_filter_entries(struct evsel *evsel, struct perf_bpf_filter_entry *entry) { - int i, x, y, fd; - struct sample_filter_bpf *skel; - struct bpf_program *prog; - struct bpf_link *link; + int i = 0; struct perf_bpf_filter_expr *expr; - skel = sample_filter_bpf__open_and_load(); - if (!skel) { - pr_err("Failed to load perf sample-filter BPF skeleton\n"); - return -1; - } - - i = 0; - fd = bpf_map__fd(skel->maps.filters); list_for_each_entry(expr, &evsel->bpf_filters, list) { - struct perf_bpf_filter_entry entry = { - .op = expr->op, - .part = expr->part, - .flags = expr->sample_flags, - .value = expr->val, - }; - if (check_sample_flags(evsel, expr) < 0) - return -1; + return -EINVAL; + + if (i == MAX_FILTERS) + return -E2BIG; - bpf_map_update_elem(fd, &i, &entry, BPF_ANY); + entry[i].op = expr->op; + entry[i].part = expr->part; + entry[i].term = expr->term; + entry[i].value = expr->val; i++; if (expr->op == PBF_OP_GROUP_BEGIN) { struct perf_bpf_filter_expr *group; list_for_each_entry(group, &expr->groups, list) { - struct perf_bpf_filter_entry group_entry = { - .op = group->op, - .part = group->part, - .flags = group->sample_flags, - .value = group->val, - }; - bpf_map_update_elem(fd, &i, &group_entry, BPF_ANY); + if (i == MAX_FILTERS) + return -E2BIG; + + entry[i].op = group->op; + entry[i].part = group->part; + entry[i].term = group->term; + entry[i].value = group->val; i++; } - memset(&entry, 0, sizeof(entry)); - entry.op = PBF_OP_GROUP_END; - bpf_map_update_elem(fd, &i, &entry, BPF_ANY); + if (i == MAX_FILTERS) + return -E2BIG; + + entry[i].op = PBF_OP_GROUP_END; i++; } } - if (i > MAX_FILTERS) { - pr_err("Too many filters: %d (max = %d)\n", i, MAX_FILTERS); + if (i < MAX_FILTERS) { + /* to terminate the loop early */ + entry[i].op = PBF_OP_DONE; + i++; + } + return 0; +} + +static int convert_to_tgid(int tid) +{ + char path[128]; + char *buf, *p, *q; + int tgid; + size_t len; + + scnprintf(path, sizeof(path), "%d/status", tid); + if (procfs__read_str(path, &buf, &len) < 0) return -1; + + p = strstr(buf, "Tgid:"); + if (p == NULL) { + free(buf); + return -1; + } + + tgid = strtol(p + 6, &q, 0); + free(buf); + if (*q != '\n') + return -1; + + return tgid; +} + +/* + * The event might be closed already so we cannot get the list of ids using FD + * like in create_event_hash() below, let's iterate the event_hash map and + * delete all entries that have the event id as a key. + */ +static void destroy_event_hash(u64 event_id) +{ + int fd; + u64 key, *prev_key = NULL; + int num = 0, alloced = 32; + u64 *ids = calloc(alloced, sizeof(*ids)); + + if (ids == NULL) + return; + + fd = get_pinned_fd("event_hash"); + if (fd < 0) { + pr_debug("cannot get fd for 'event_hash' map\n"); + free(ids); + return; + } + + /* Iterate the whole map to collect keys for the event id. */ + while (!bpf_map_get_next_key(fd, prev_key, &key)) { + u64 id; + + if (bpf_map_lookup_elem(fd, &key, &id) == 0 && id == event_id) { + if (num == alloced) { + void *tmp; + + alloced *= 2; + tmp = realloc(ids, alloced * sizeof(*ids)); + if (tmp == NULL) + break; + + ids = tmp; + } + ids[num++] = key; + } + + prev_key = &key; } + + for (int i = 0; i < num; i++) + bpf_map_delete_elem(fd, &ids[i]); + + free(ids); + close(fd); +} + +/* + * Return a representative id if ok, or 0 for failures. + * + * The perf_event->id is good for this, but an evsel would have multiple + * instances for CPUs and tasks. So pick up the first id and setup a hash + * from id of each instance to the representative id (the first one). + */ +static u64 create_event_hash(struct evsel *evsel) +{ + int x, y, fd; + u64 the_id = 0, id; + + fd = get_pinned_fd("event_hash"); + if (fd < 0) { + pr_err("cannot get fd for 'event_hash' map\n"); + return 0; + } + + for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) { + for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) { + int ret = ioctl(FD(evsel, x, y), PERF_EVENT_IOC_ID, &id); + + if (ret < 0) { + pr_err("Failed to get the event id\n"); + if (the_id) + destroy_event_hash(the_id); + return 0; + } + + if (the_id == 0) + the_id = id; + + bpf_map_update_elem(fd, &id, &the_id, BPF_ANY); + } + } + + close(fd); + return the_id; +} + +static void destroy_idx_hash(struct pinned_filter_idx *pfi) +{ + int fd, nr; + struct perf_thread_map *threads; + + fd = get_pinned_fd("filters"); + bpf_map_delete_elem(fd, &pfi->hash_idx); + close(fd); + + if (pfi->event_id) + destroy_event_hash(pfi->event_id); + + threads = perf_evsel__threads(&pfi->evsel->core); + if (threads == NULL) + return; + + fd = get_pinned_fd("idx_hash"); + nr = perf_thread_map__nr(threads); + for (int i = 0; i < nr; i++) { + /* The target task might be dead already, just try the pid */ + struct idx_hash_key key = { + .evt_id = pfi->event_id, + .tgid = perf_thread_map__pid(threads, i), + }; + + bpf_map_delete_elem(fd, &key); + } + close(fd); +} + +/* Maintain a hashmap from (tgid, event-id) to filter index */ +static int create_idx_hash(struct evsel *evsel, struct perf_bpf_filter_entry *entry) +{ + int filter_idx; + int fd, nr, last; + u64 event_id = 0; + struct pinned_filter_idx *pfi = NULL; + struct perf_thread_map *threads; + + fd = get_pinned_fd("filters"); + if (fd < 0) { + pr_err("cannot get fd for 'filters' map\n"); + return fd; + } + + /* Find the first available entry in the filters map */ + for (filter_idx = 0; filter_idx < MAX_FILTERS; filter_idx++) { + if (bpf_map_update_elem(fd, &filter_idx, entry, BPF_NOEXIST) == 0) + break; + } + close(fd); + + if (filter_idx == MAX_FILTERS) { + pr_err("Too many users for the filter map\n"); + return -EBUSY; + } + + pfi = zalloc(sizeof(*pfi)); + if (pfi == NULL) { + pr_err("Cannot save pinned filter index\n"); + return -ENOMEM; + } + + pfi->evsel = evsel; + pfi->hash_idx = filter_idx; + + event_id = create_event_hash(evsel); + if (event_id == 0) { + pr_err("Cannot update the event hash\n"); + goto err; + } + + pfi->event_id = event_id; + + threads = perf_evsel__threads(&evsel->core); + if (threads == NULL) { + pr_err("Cannot get the thread list of the event\n"); + goto err; + } + + /* save the index to a hash map */ + fd = get_pinned_fd("idx_hash"); + if (fd < 0) { + pr_err("cannot get fd for 'idx_hash' map\n"); + goto err; + } + + last = -1; + nr = perf_thread_map__nr(threads); + for (int i = 0; i < nr; i++) { + int pid = perf_thread_map__pid(threads, i); + int tgid; + struct idx_hash_key key = { + .evt_id = event_id, + }; + + /* it actually needs tgid, let's get tgid from /proc. */ + tgid = convert_to_tgid(pid); + if (tgid < 0) { + /* the thread may be dead, ignore. */ + continue; + } + + if (tgid == last) + continue; + last = tgid; + key.tgid = tgid; + + if (bpf_map_update_elem(fd, &key, &filter_idx, BPF_ANY) < 0) { + pr_err("Failed to update the idx_hash\n"); + close(fd); + goto err; + } + pr_debug("bpf-filter: idx_hash (task=%d,%s) -> %d\n", + tgid, evsel__name(evsel), filter_idx); + } + + list_add(&pfi->list, &pinned_filters); + close(fd); + return filter_idx; + +err: + destroy_idx_hash(pfi); + free(pfi); + return -1; +} + +int perf_bpf_filter__prepare(struct evsel *evsel, struct target *target) +{ + int i, x, y, fd, ret; + struct sample_filter_bpf *skel = NULL; + struct bpf_program *prog; + struct bpf_link *link; + struct perf_bpf_filter_entry *entry; + bool needs_idx_hash = !target__has_cpu(target); +#if LIBBPF_CURRENT_VERSION_GEQ(1, 7) + DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts, + .dont_enable = true); +#else + DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); +#endif + + entry = calloc(MAX_FILTERS, sizeof(*entry)); + if (entry == NULL) + return -1; + + ret = get_filter_entries(evsel, entry); + if (ret < 0) { + pr_err("Failed to process filter entries\n"); + goto err; + } + + if (needs_idx_hash && geteuid() != 0) { + int zero = 0; + + /* The filters map is shared among other processes */ + ret = create_idx_hash(evsel, entry); + if (ret < 0) + goto err; + + fd = get_pinned_fd("dropped"); + if (fd < 0) { + ret = fd; + goto err; + } + + /* Reset the lost count */ + bpf_map_update_elem(fd, &ret, &zero, BPF_ANY); + close(fd); + + fd = get_pinned_fd("perf_sample_filter"); + if (fd < 0) { + ret = fd; + goto err; + } + + for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) { + for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) { + ret = ioctl(FD(evsel, x, y), PERF_EVENT_IOC_SET_BPF, fd); + if (ret < 0) { + pr_err("Failed to attach perf sample-filter\n"); + close(fd); + goto err; + } + } + } + + close(fd); + free(entry); + return 0; + } + + skel = sample_filter_bpf__open_and_load(); + if (!skel) { + ret = -errno; + pr_err("Failed to load perf sample-filter BPF skeleton\n"); + goto err; + } + + i = 0; + fd = bpf_map__fd(skel->maps.filters); + + /* The filters map has only one entry in this case */ + if (bpf_map_update_elem(fd, &i, entry, BPF_ANY) < 0) { + ret = -errno; + pr_err("Failed to update the filter map\n"); + goto err; + } + prog = skel->progs.perf_sample_filter; for (x = 0; x < xyarray__max_x(evsel->core.fd); x++) { for (y = 0; y < xyarray__max_y(evsel->core.fd); y++) { - link = bpf_program__attach_perf_event(prog, FD(evsel, x, y)); + link = bpf_program__attach_perf_event_opts(prog, FD(evsel, x, y), + &pe_opts); if (IS_ERR(link)) { pr_err("Failed to attach perf sample-filter program\n"); - return PTR_ERR(link); + ret = PTR_ERR(link); + goto err; } } } + free(entry); evsel->bpf_skel = skel; return 0; + +err: + free(entry); + if (!list_empty(&pinned_filters)) { + struct pinned_filter_idx *pfi, *tmp; + + list_for_each_entry_safe(pfi, tmp, &pinned_filters, list) { + destroy_idx_hash(pfi); + list_del(&pfi->list); + free(pfi); + } + } + sample_filter_bpf__destroy(skel); + return ret; } int perf_bpf_filter__destroy(struct evsel *evsel) { struct perf_bpf_filter_expr *expr, *tmp; + struct pinned_filter_idx *pfi, *pos; list_for_each_entry_safe(expr, tmp, &evsel->bpf_filters, list) { list_del(&expr->list); free(expr); } sample_filter_bpf__destroy(evsel->bpf_skel); + + list_for_each_entry_safe(pfi, pos, &pinned_filters, list) { + destroy_idx_hash(pfi); + list_del(&pfi->list); + free(pfi); + } return 0; } u64 perf_bpf_filter__lost_count(struct evsel *evsel) { - struct sample_filter_bpf *skel = evsel->bpf_skel; + int count = 0; + + if (list_empty(&evsel->bpf_filters)) + return 0; + + if (!list_empty(&pinned_filters)) { + int fd = get_pinned_fd("dropped"); + struct pinned_filter_idx *pfi; + + if (fd < 0) + return 0; + + list_for_each_entry(pfi, &pinned_filters, list) { + if (pfi->evsel != evsel) + continue; - return skel ? skel->bss->dropped : 0; + bpf_map_lookup_elem(fd, &pfi->hash_idx, &count); + break; + } + close(fd); + } else if (evsel->bpf_skel) { + struct sample_filter_bpf *skel = evsel->bpf_skel; + int fd = bpf_map__fd(skel->maps.dropped); + int idx = 0; + + bpf_map_lookup_elem(fd, &idx, &count); + } + + return count; } -struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(unsigned long sample_flags, int part, +struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(enum perf_bpf_filter_term term, + int part, enum perf_bpf_filter_op op, unsigned long val) { @@ -181,7 +618,7 @@ struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(unsigned long sample_flag expr = malloc(sizeof(*expr)); if (expr != NULL) { - expr->sample_flags = sample_flags; + expr->term = term; expr->part = part; expr->op = op; expr->val = val; @@ -190,11 +627,38 @@ struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(unsigned long sample_flag return expr; } +static bool check_bpf_filter_capable(void) +{ + bool used_root; + + if (perf_cap__capable(CAP_BPF, &used_root)) + return true; + + if (!used_root) { + /* Check if root already pinned the filter programs and maps */ + int fd = get_pinned_fd("filters"); + + if (fd >= 0) { + close(fd); + return true; + } + } + + pr_err("Error: BPF filter only works for %s!\n" + "\tPlease run 'perf record --setup-filter pin' as root first.\n", + used_root ? "root" : "users with the CAP_BPF capability"); + + return false; +} + int perf_bpf_filter__parse(struct list_head *expr_head, const char *str) { YY_BUFFER_STATE buffer; int ret; + if (!check_bpf_filter_capable()) + return -EPERM; + buffer = perf_bpf_filter__scan_string(str); ret = perf_bpf_filter_parse(expr_head); @@ -205,3 +669,139 @@ int perf_bpf_filter__parse(struct list_head *expr_head, const char *str) return ret; } + +int perf_bpf_filter__pin(void) +{ + struct sample_filter_bpf *skel; + char *path = NULL; + int dir_fd, ret = -1; + + skel = sample_filter_bpf__open(); + if (!skel) { + ret = -errno; + pr_err("Failed to open perf sample-filter BPF skeleton\n"); + goto err; + } + + /* pinned program will use pid-hash */ + bpf_map__set_max_entries(skel->maps.filters, MAX_FILTERS); + bpf_map__set_max_entries(skel->maps.event_hash, MAX_EVT_HASH); + bpf_map__set_max_entries(skel->maps.idx_hash, MAX_IDX_HASH); + bpf_map__set_max_entries(skel->maps.dropped, MAX_FILTERS); + skel->rodata->use_idx_hash = 1; + + if (sample_filter_bpf__load(skel) < 0) { + ret = -errno; + pr_err("Failed to load perf sample-filter BPF skeleton\n"); + goto err; + } + + if (asprintf(&path, "%s/fs/bpf/%s", sysfs__mountpoint(), + PERF_BPF_FILTER_PIN_PATH) < 0) { + ret = -errno; + pr_err("Failed to allocate pathname in the BPF-fs\n"); + goto err; + } + + ret = bpf_object__pin(skel->obj, path); + if (ret < 0) { + pr_err("Failed to pin BPF filter objects\n"); + goto err; + } + + /* setup access permissions for the pinned objects */ + dir_fd = open(path, O_PATH); + if (dir_fd < 0) { + bpf_object__unpin(skel->obj, path); + ret = dir_fd; + goto err; + } + + /* BPF-fs root has the sticky bit */ + if (fchmodat(dir_fd, "..", 01755, 0) < 0) { + pr_debug("chmod for BPF-fs failed\n"); + ret = -errno; + goto err_close; + } + + /* perf_filter directory */ + if (fchmodat(dir_fd, ".", 0755, 0) < 0) { + pr_debug("chmod for perf_filter directory failed?\n"); + ret = -errno; + goto err_close; + } + + /* programs need write permission for some reason */ + if (fchmodat(dir_fd, "perf_sample_filter", 0777, 0) < 0) { + pr_debug("chmod for perf_sample_filter failed\n"); + ret = -errno; + } + /* maps */ + if (fchmodat(dir_fd, "filters", 0666, 0) < 0) { + pr_debug("chmod for filters failed\n"); + ret = -errno; + } + if (fchmodat(dir_fd, "event_hash", 0666, 0) < 0) { + pr_debug("chmod for event_hash failed\n"); + ret = -errno; + } + if (fchmodat(dir_fd, "idx_hash", 0666, 0) < 0) { + pr_debug("chmod for idx_hash failed\n"); + ret = -errno; + } + if (fchmodat(dir_fd, "dropped", 0666, 0) < 0) { + pr_debug("chmod for dropped failed\n"); + ret = -errno; + } + +err_close: + close(dir_fd); + +err: + free(path); + sample_filter_bpf__destroy(skel); + return ret; +} + +int perf_bpf_filter__unpin(void) +{ + struct sample_filter_bpf *skel; + char *path = NULL; + int ret = -1; + + skel = sample_filter_bpf__open_and_load(); + if (!skel) { + ret = -errno; + pr_err("Failed to open perf sample-filter BPF skeleton\n"); + goto err; + } + + if (asprintf(&path, "%s/fs/bpf/%s", sysfs__mountpoint(), + PERF_BPF_FILTER_PIN_PATH) < 0) { + ret = -errno; + pr_err("Failed to allocate pathname in the BPF-fs\n"); + goto err; + } + + ret = bpf_object__unpin(skel->obj, path); + +err: + free(path); + sample_filter_bpf__destroy(skel); + return ret; +} + +static int get_pinned_fd(const char *name) +{ + char *path = NULL; + int fd; + + if (asprintf(&path, "%s/fs/bpf/%s/%s", sysfs__mountpoint(), + PERF_BPF_FILTER_PIN_PATH, name) < 0) + return -1; + + fd = bpf_obj_get(path); + + free(path); + return fd; +} diff --git a/tools/perf/util/bpf-filter.h b/tools/perf/util/bpf-filter.h index 7afd159411b8..818c554b91b2 100644 --- a/tools/perf/util/bpf-filter.h +++ b/tools/perf/util/bpf-filter.h @@ -5,35 +5,48 @@ #include <linux/list.h> #include "bpf_skel/sample-filter.h" +#include "util/debug.h" struct perf_bpf_filter_expr { struct list_head list; struct list_head groups; enum perf_bpf_filter_op op; int part; - unsigned long sample_flags; + enum perf_bpf_filter_term term; unsigned long val; }; struct evsel; +struct target; + +/* path in BPF-fs for the pinned program and maps */ +#define PERF_BPF_FILTER_PIN_PATH "perf_filter" #ifdef HAVE_BPF_SKEL -struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(unsigned long sample_flags, int part, +struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(enum perf_bpf_filter_term term, + int part, enum perf_bpf_filter_op op, unsigned long val); int perf_bpf_filter__parse(struct list_head *expr_head, const char *str); -int perf_bpf_filter__prepare(struct evsel *evsel); +int perf_bpf_filter__prepare(struct evsel *evsel, struct target *target); int perf_bpf_filter__destroy(struct evsel *evsel); u64 perf_bpf_filter__lost_count(struct evsel *evsel); +int perf_bpf_filter__pin(void); +int perf_bpf_filter__unpin(void); #else /* !HAVE_BPF_SKEL */ +#include <errno.h> + static inline int perf_bpf_filter__parse(struct list_head *expr_head __maybe_unused, const char *str __maybe_unused) { + pr_err("Error: BPF filter is requested but perf is not built with BPF.\n" + "\tPlease make sure to build with libbpf and BPF skeleton.\n"); return -EOPNOTSUPP; } -static inline int perf_bpf_filter__prepare(struct evsel *evsel __maybe_unused) +static inline int perf_bpf_filter__prepare(struct evsel *evsel __maybe_unused, + struct target *target __maybe_unused) { return -EOPNOTSUPP; } @@ -45,5 +58,13 @@ static inline u64 perf_bpf_filter__lost_count(struct evsel *evsel __maybe_unused { return 0; } +static inline int perf_bpf_filter__pin(void) +{ + return -EOPNOTSUPP; +} +static inline int perf_bpf_filter__unpin(void) +{ + return -EOPNOTSUPP; +} #endif /* HAVE_BPF_SKEL*/ #endif /* PERF_UTIL_BPF_FILTER_H */ diff --git a/tools/perf/util/bpf-filter.l b/tools/perf/util/bpf-filter.l index d4ff0f1345cd..6aa65ade3385 100644 --- a/tools/perf/util/bpf-filter.l +++ b/tools/perf/util/bpf-filter.l @@ -9,20 +9,32 @@ #include "bpf-filter.h" #include "bpf-filter-bison.h" -static int sample(unsigned long sample_flag) +extern int perf_bpf_filter_needs_path; + +static int sample(enum perf_bpf_filter_term term) { - perf_bpf_filter_lval.sample.type = sample_flag; + perf_bpf_filter_needs_path = 0; + perf_bpf_filter_lval.sample.term = term; perf_bpf_filter_lval.sample.part = 0; return BFT_SAMPLE; } -static int sample_part(unsigned long sample_flag, int part) +static int sample_part(enum perf_bpf_filter_term term, int part) { - perf_bpf_filter_lval.sample.type = sample_flag; + perf_bpf_filter_needs_path = 0; + perf_bpf_filter_lval.sample.term = term; perf_bpf_filter_lval.sample.part = part; return BFT_SAMPLE; } +static int sample_path(enum perf_bpf_filter_term term) +{ + perf_bpf_filter_needs_path = 1; + perf_bpf_filter_lval.sample.term = term; + perf_bpf_filter_lval.sample.part = 0; + return BFT_SAMPLE_PATH; +} + static int operator(enum perf_bpf_filter_op op) { perf_bpf_filter_lval.op = op; @@ -48,10 +60,15 @@ static int constant(int val) return BFT_NUM; } -static int error(const char *str) +static int path_or_error(void) { - printf("perf_bpf_filter: Unexpected filter %s: %s\n", str, perf_bpf_filter_text); - return BFT_ERROR; + if (!perf_bpf_filter_needs_path) { + printf("perf_bpf_filter: Error: Unexpected item: %s\n", + perf_bpf_filter_text); + return BFT_ERROR; + } + perf_bpf_filter_lval.path = perf_bpf_filter_text; + return BFT_PATH; } %} @@ -59,6 +76,7 @@ static int error(const char *str) num_dec [0-9]+ num_hex 0[Xx][0-9a-fA-F]+ space [ \t]+ +path [^ \t\n,]+ ident [_a-zA-Z][_a-zA-Z0-9]+ %% @@ -67,34 +85,37 @@ ident [_a-zA-Z][_a-zA-Z0-9]+ {num_hex} { return value(16); } {space} { } -ip { return sample(PERF_SAMPLE_IP); } -id { return sample(PERF_SAMPLE_ID); } -tid { return sample(PERF_SAMPLE_TID); } -pid { return sample_part(PERF_SAMPLE_TID, 1); } -cpu { return sample(PERF_SAMPLE_CPU); } -time { return sample(PERF_SAMPLE_TIME); } -addr { return sample(PERF_SAMPLE_ADDR); } -period { return sample(PERF_SAMPLE_PERIOD); } -txn { return sample(PERF_SAMPLE_TRANSACTION); } -weight { return sample(PERF_SAMPLE_WEIGHT); } -weight1 { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 1); } -weight2 { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 2); } -weight3 { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 3); } -ins_lat { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 2); } /* alias for weight2 */ -p_stage_cyc { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 3); } /* alias for weight3 */ -retire_lat { return sample_part(PERF_SAMPLE_WEIGHT_STRUCT, 3); } /* alias for weight3 */ -phys_addr { return sample(PERF_SAMPLE_PHYS_ADDR); } -code_pgsz { return sample(PERF_SAMPLE_CODE_PAGE_SIZE); } -data_pgsz { return sample(PERF_SAMPLE_DATA_PAGE_SIZE); } -mem_op { return sample_part(PERF_SAMPLE_DATA_SRC, 1); } -mem_lvlnum { return sample_part(PERF_SAMPLE_DATA_SRC, 2); } -mem_lvl { return sample_part(PERF_SAMPLE_DATA_SRC, 2); } /* alias for mem_lvlnum */ -mem_snoop { return sample_part(PERF_SAMPLE_DATA_SRC, 3); } /* include snoopx */ -mem_remote { return sample_part(PERF_SAMPLE_DATA_SRC, 4); } -mem_lock { return sample_part(PERF_SAMPLE_DATA_SRC, 5); } -mem_dtlb { return sample_part(PERF_SAMPLE_DATA_SRC, 6); } -mem_blk { return sample_part(PERF_SAMPLE_DATA_SRC, 7); } -mem_hops { return sample_part(PERF_SAMPLE_DATA_SRC, 8); } +ip { return sample(PBF_TERM_IP); } +id { return sample(PBF_TERM_ID); } +tid { return sample(PBF_TERM_TID); } +pid { return sample_part(PBF_TERM_TID, 1); } +cpu { return sample(PBF_TERM_CPU); } +time { return sample(PBF_TERM_TIME); } +addr { return sample(PBF_TERM_ADDR); } +period { return sample(PBF_TERM_PERIOD); } +txn { return sample(PBF_TERM_TRANSACTION); } +weight { return sample(PBF_TERM_WEIGHT); } +weight1 { return sample_part(PBF_TERM_WEIGHT_STRUCT, 1); } +weight2 { return sample_part(PBF_TERM_WEIGHT_STRUCT, 2); } +weight3 { return sample_part(PBF_TERM_WEIGHT_STRUCT, 3); } +ins_lat { return sample_part(PBF_TERM_WEIGHT_STRUCT, 2); } /* alias for weight2 */ +p_stage_cyc { return sample_part(PBF_TERM_WEIGHT_STRUCT, 3); } /* alias for weight3 */ +retire_lat { return sample_part(PBF_TERM_WEIGHT_STRUCT, 3); } /* alias for weight3 */ +phys_addr { return sample(PBF_TERM_PHYS_ADDR); } +code_pgsz { return sample(PBF_TERM_CODE_PAGE_SIZE); } +data_pgsz { return sample(PBF_TERM_DATA_PAGE_SIZE); } +mem_op { return sample_part(PBF_TERM_DATA_SRC, 1); } +mem_lvlnum { return sample_part(PBF_TERM_DATA_SRC, 2); } +mem_lvl { return sample_part(PBF_TERM_DATA_SRC, 2); } /* alias for mem_lvlnum */ +mem_snoop { return sample_part(PBF_TERM_DATA_SRC, 3); } /* include snoopx */ +mem_remote { return sample_part(PBF_TERM_DATA_SRC, 4); } +mem_lock { return sample_part(PBF_TERM_DATA_SRC, 5); } +mem_dtlb { return sample_part(PBF_TERM_DATA_SRC, 6); } +mem_blk { return sample_part(PBF_TERM_DATA_SRC, 7); } +mem_hops { return sample_part(PBF_TERM_DATA_SRC, 8); } +uid { return sample(PBF_TERM_UID); } +gid { return sample(PBF_TERM_GID); } +cgroup { return sample_path(PBF_TERM_CGROUP); } "==" { return operator(PBF_OP_EQ); } "!=" { return operator(PBF_OP_NEQ); } @@ -153,7 +174,6 @@ hops3 { return constant(PERF_MEM_HOPS_3); } "," { return ','; } "||" { return BFT_LOGICAL_OR; } -{ident} { return error("ident"); } -. { return error("input"); } +{path} { return path_or_error(); } %% diff --git a/tools/perf/util/bpf-filter.y b/tools/perf/util/bpf-filter.y index 0e4d6de3c2ad..5a79a8e7a45b 100644 --- a/tools/perf/util/bpf-filter.y +++ b/tools/perf/util/bpf-filter.y @@ -12,9 +12,13 @@ #include <linux/compiler.h> #include <linux/list.h> #include "bpf-filter.h" +#include "cgroup.h" int perf_bpf_filter_lex(void); +/* To indicate if the current term needs a pathname or not */ +int perf_bpf_filter_needs_path; + static void perf_bpf_filter_error(struct list_head *expr __maybe_unused, char const *msg) { @@ -26,20 +30,22 @@ static void perf_bpf_filter_error(struct list_head *expr __maybe_unused, %union { unsigned long num; + char *path; struct { - unsigned long type; + enum perf_bpf_filter_term term; int part; } sample; enum perf_bpf_filter_op op; struct perf_bpf_filter_expr *expr; } -%token BFT_SAMPLE BFT_OP BFT_ERROR BFT_NUM BFT_LOGICAL_OR +%token BFT_SAMPLE BFT_SAMPLE_PATH BFT_OP BFT_ERROR BFT_NUM BFT_LOGICAL_OR BFT_PATH %type <expr> filter_term filter_expr %destructor { free ($$); } <expr> -%type <sample> BFT_SAMPLE +%type <sample> BFT_SAMPLE BFT_SAMPLE_PATH %type <op> BFT_OP %type <num> BFT_NUM +%type <path> BFT_PATH %% @@ -62,7 +68,8 @@ filter_term BFT_LOGICAL_OR filter_expr if ($1->op == PBF_OP_GROUP_BEGIN) { expr = $1; } else { - expr = perf_bpf_filter_expr__new(0, 0, PBF_OP_GROUP_BEGIN, 1); + expr = perf_bpf_filter_expr__new(PBF_TERM_NONE, /*part=*/0, + PBF_OP_GROUP_BEGIN, /*val=*/1); list_add_tail(&$1->list, &expr->groups); } expr->val++; @@ -78,7 +85,25 @@ filter_expr filter_expr: BFT_SAMPLE BFT_OP BFT_NUM { - $$ = perf_bpf_filter_expr__new($1.type, $1.part, $2, $3); + $$ = perf_bpf_filter_expr__new($1.term, $1.part, $2, $3); +} +| +BFT_SAMPLE_PATH BFT_OP BFT_PATH +{ + struct cgroup *cgrp; + unsigned long cgroup_id = 0; + + if ($2 != PBF_OP_EQ && $2 != PBF_OP_NEQ) { + printf("perf_bpf_filter: cgroup accepts '==' or '!=' only\n"); + YYERROR; + } + + cgrp = cgroup__new($3, /*do_open=*/false); + if (cgrp && read_cgroup_id(cgrp) == 0) + cgroup_id = cgrp->id; + + $$ = perf_bpf_filter_expr__new($1.term, $1.part, $2, cgroup_id); + cgroup__put(cgrp); } %% diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h deleted file mode 100644 index 66dcf751ef65..000000000000 --- a/tools/perf/util/bpf-prologue.h +++ /dev/null @@ -1,37 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2015, He Kuang <hekuang@huawei.com> - * Copyright (C) 2015, Huawei Inc. - */ -#ifndef __BPF_PROLOGUE_H -#define __BPF_PROLOGUE_H - -struct probe_trace_arg; -struct bpf_insn; - -#define BPF_PROLOGUE_MAX_ARGS 3 -#define BPF_PROLOGUE_START_ARG_REG BPF_REG_3 -#define BPF_PROLOGUE_FETCH_RESULT_REG BPF_REG_2 - -#ifdef HAVE_BPF_PROLOGUE -int bpf__gen_prologue(struct probe_trace_arg *args, int nargs, - struct bpf_insn *new_prog, size_t *new_cnt, - size_t cnt_space); -#else -#include <linux/compiler.h> -#include <errno.h> - -static inline int -bpf__gen_prologue(struct probe_trace_arg *args __maybe_unused, - int nargs __maybe_unused, - struct bpf_insn *new_prog __maybe_unused, - size_t *new_cnt, - size_t cnt_space __maybe_unused) -{ - if (!new_cnt) - return -EINVAL; - *new_cnt = 0; - return -ENOTSUP; -} -#endif -#endif /* __BPF_PROLOGUE_H */ diff --git a/tools/perf/util/bpf-trace-summary.c b/tools/perf/util/bpf-trace-summary.c new file mode 100644 index 000000000000..cf6e1e4402d5 --- /dev/null +++ b/tools/perf/util/bpf-trace-summary.c @@ -0,0 +1,465 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <errno.h> +#include <inttypes.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> + +#include "dwarf-regs.h" /* for EM_HOST */ +#include "syscalltbl.h" +#include "util/cgroup.h" +#include "util/hashmap.h" +#include "util/trace.h" +#include "util/util.h" +#include <bpf/bpf.h> +#include <linux/rbtree.h> +#include <linux/time64.h> +#include <tools/libc_compat.h> /* reallocarray */ + +#include "bpf_skel/syscall_summary.h" +#include "bpf_skel/syscall_summary.skel.h" + + +static struct syscall_summary_bpf *skel; +static struct rb_root cgroups = RB_ROOT; + +int trace_prepare_bpf_summary(enum trace_summary_mode mode) +{ + skel = syscall_summary_bpf__open(); + if (skel == NULL) { + fprintf(stderr, "failed to open syscall summary bpf skeleton\n"); + return -1; + } + + if (mode == SUMMARY__BY_THREAD) + skel->rodata->aggr_mode = SYSCALL_AGGR_THREAD; + else if (mode == SUMMARY__BY_CGROUP) + skel->rodata->aggr_mode = SYSCALL_AGGR_CGROUP; + else + skel->rodata->aggr_mode = SYSCALL_AGGR_CPU; + + if (cgroup_is_v2("perf_event") > 0) + skel->rodata->use_cgroup_v2 = 1; + + if (syscall_summary_bpf__load(skel) < 0) { + fprintf(stderr, "failed to load syscall summary bpf skeleton\n"); + return -1; + } + + if (syscall_summary_bpf__attach(skel) < 0) { + fprintf(stderr, "failed to attach syscall summary bpf skeleton\n"); + return -1; + } + + if (mode == SUMMARY__BY_CGROUP) + read_all_cgroups(&cgroups); + + return 0; +} + +void trace_start_bpf_summary(void) +{ + skel->bss->enabled = 1; +} + +void trace_end_bpf_summary(void) +{ + skel->bss->enabled = 0; +} + +struct syscall_node { + int syscall_nr; + struct syscall_stats stats; +}; + +static double rel_stddev(struct syscall_stats *stat) +{ + double variance, average; + + if (stat->count < 2) + return 0; + + average = (double)stat->total_time / stat->count; + + variance = stat->squared_sum; + variance -= (stat->total_time * stat->total_time) / stat->count; + variance /= stat->count - 1; + + return 100 * sqrt(variance / stat->count) / average; +} + +/* + * The syscall_data is to maintain syscall stats ordered by total time. + * It supports different summary modes like per-thread or global. + * + * For per-thread stats, it uses two-level data strurcture - + * syscall_data is keyed by TID and has an array of nodes which + * represents each syscall for the thread. + * + * For global stats, it's still two-level technically but we don't need + * per-cpu analysis so it's keyed by the syscall number to combine stats + * from different CPUs. And syscall_data always has a syscall_node so + * it can effectively work as flat hierarchy. + * + * For per-cgroup stats, it uses two-level data structure like thread + * syscall_data is keyed by CGROUP and has an array of node which + * represents each syscall for the cgroup. + */ +struct syscall_data { + u64 key; /* tid if AGGR_THREAD, syscall-nr if AGGR_CPU, cgroup if AGGR_CGROUP */ + int nr_events; + int nr_nodes; + u64 total_time; + struct syscall_node *nodes; +}; + +static int datacmp(const void *a, const void *b) +{ + const struct syscall_data * const *sa = a; + const struct syscall_data * const *sb = b; + + return (*sa)->total_time > (*sb)->total_time ? -1 : 1; +} + +static int nodecmp(const void *a, const void *b) +{ + const struct syscall_node *na = a; + const struct syscall_node *nb = b; + + return na->stats.total_time > nb->stats.total_time ? -1 : 1; +} + +static size_t sc_node_hash(long key, void *ctx __maybe_unused) +{ + return key; +} + +static bool sc_node_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + +static int print_common_stats(struct syscall_data *data, int max_summary, FILE *fp) +{ + int printed = 0; + + if (max_summary == 0 || max_summary > data->nr_nodes) + max_summary = data->nr_nodes; + + for (int i = 0; i < max_summary; i++) { + struct syscall_node *node = &data->nodes[i]; + struct syscall_stats *stat = &node->stats; + double total = (double)(stat->total_time) / NSEC_PER_MSEC; + double min = (double)(stat->min_time) / NSEC_PER_MSEC; + double max = (double)(stat->max_time) / NSEC_PER_MSEC; + double avg = total / stat->count; + const char *name; + + /* TODO: support other ABIs */ + name = syscalltbl__name(EM_HOST, node->syscall_nr); + if (name) + printed += fprintf(fp, " %-15s", name); + else + printed += fprintf(fp, " syscall:%-7d", node->syscall_nr); + + printed += fprintf(fp, " %8u %6u %9.3f %9.3f %9.3f %9.3f %9.2f%%\n", + stat->count, stat->error, total, min, avg, max, + rel_stddev(stat)); + } + return printed; +} + +static int update_thread_stats(struct hashmap *hash, struct syscall_key *map_key, + struct syscall_stats *map_data) +{ + struct syscall_data *data; + struct syscall_node *nodes; + + if (!hashmap__find(hash, map_key->cpu_or_tid, &data)) { + data = zalloc(sizeof(*data)); + if (data == NULL) + return -ENOMEM; + + data->key = map_key->cpu_or_tid; + if (hashmap__add(hash, data->key, data) < 0) { + free(data); + return -ENOMEM; + } + } + + /* update thread total stats */ + data->nr_events += map_data->count; + data->total_time += map_data->total_time; + + nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes)); + if (nodes == NULL) + return -ENOMEM; + + data->nodes = nodes; + nodes = &data->nodes[data->nr_nodes++]; + nodes->syscall_nr = map_key->nr; + + /* each thread has an entry for each syscall, just use the stat */ + memcpy(&nodes->stats, map_data, sizeof(*map_data)); + return 0; +} + +static int print_thread_stat(struct syscall_data *data, int max_summary, FILE *fp) +{ + int printed = 0; + + qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp); + + printed += fprintf(fp, " thread (%d), ", (int)data->key); + printed += fprintf(fp, "%d events\n\n", data->nr_events); + + printed += fprintf(fp, " syscall calls errors total min avg max stddev\n"); + printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); + printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); + + printed += print_common_stats(data, max_summary, fp); + printed += fprintf(fp, "\n\n"); + + return printed; +} + +static int print_thread_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp) +{ + int printed = 0; + + for (int i = 0; i < nr_data; i++) + printed += print_thread_stat(data[i], max_summary, fp); + + return printed; +} + +static int update_total_stats(struct hashmap *hash, struct syscall_key *map_key, + struct syscall_stats *map_data) +{ + struct syscall_data *data; + struct syscall_stats *stat; + + if (!hashmap__find(hash, map_key->nr, &data)) { + data = zalloc(sizeof(*data)); + if (data == NULL) + return -ENOMEM; + + data->nodes = zalloc(sizeof(*data->nodes)); + if (data->nodes == NULL) { + free(data); + return -ENOMEM; + } + + data->nr_nodes = 1; + data->key = map_key->nr; + data->nodes->syscall_nr = data->key; + + if (hashmap__add(hash, data->key, data) < 0) { + free(data->nodes); + free(data); + return -ENOMEM; + } + } + + /* update total stats for this syscall */ + data->nr_events += map_data->count; + data->total_time += map_data->total_time; + + /* This is sum of the same syscall from different CPUs */ + stat = &data->nodes->stats; + + stat->total_time += map_data->total_time; + stat->squared_sum += map_data->squared_sum; + stat->count += map_data->count; + stat->error += map_data->error; + + if (stat->max_time < map_data->max_time) + stat->max_time = map_data->max_time; + if (stat->min_time > map_data->min_time || stat->min_time == 0) + stat->min_time = map_data->min_time; + + return 0; +} + +static int print_total_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp) +{ + int printed = 0; + int nr_events = 0; + + for (int i = 0; i < nr_data; i++) + nr_events += data[i]->nr_events; + + printed += fprintf(fp, " total, %d events\n\n", nr_events); + + printed += fprintf(fp, " syscall calls errors total min avg max stddev\n"); + printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); + printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); + + if (max_summary == 0 || max_summary > nr_data) + max_summary = nr_data; + + for (int i = 0; i < max_summary; i++) + printed += print_common_stats(data[i], max_summary, fp); + + printed += fprintf(fp, "\n\n"); + return printed; +} + +static int update_cgroup_stats(struct hashmap *hash, struct syscall_key *map_key, + struct syscall_stats *map_data) +{ + struct syscall_data *data; + struct syscall_node *nodes; + + if (!hashmap__find(hash, map_key->cgroup, &data)) { + data = zalloc(sizeof(*data)); + if (data == NULL) + return -ENOMEM; + + data->key = map_key->cgroup; + if (hashmap__add(hash, data->key, data) < 0) { + free(data); + return -ENOMEM; + } + } + + /* update thread total stats */ + data->nr_events += map_data->count; + data->total_time += map_data->total_time; + + nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes)); + if (nodes == NULL) + return -ENOMEM; + + data->nodes = nodes; + nodes = &data->nodes[data->nr_nodes++]; + nodes->syscall_nr = map_key->nr; + + /* each thread has an entry for each syscall, just use the stat */ + memcpy(&nodes->stats, map_data, sizeof(*map_data)); + return 0; +} + +static int print_cgroup_stat(struct syscall_data *data, int max_summary, FILE *fp) +{ + int printed = 0; + struct cgroup *cgrp = __cgroup__find(&cgroups, data->key); + + qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp); + + if (cgrp) + printed += fprintf(fp, " cgroup %s,", cgrp->name); + else + printed += fprintf(fp, " cgroup id:%lu,", (unsigned long)data->key); + + printed += fprintf(fp, " %d events\n\n", data->nr_events); + + printed += fprintf(fp, " syscall calls errors total min avg max stddev\n"); + printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); + printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); + + printed += print_common_stats(data, max_summary, fp); + printed += fprintf(fp, "\n\n"); + + return printed; +} + +static int print_cgroup_stats(struct syscall_data **data, int nr_data, int max_summary, FILE *fp) +{ + int printed = 0; + + for (int i = 0; i < nr_data; i++) + printed += print_cgroup_stat(data[i], max_summary, fp); + + return printed; +} + +int trace_print_bpf_summary(FILE *fp, int max_summary) +{ + struct bpf_map *map = skel->maps.syscall_stats_map; + struct syscall_key *prev_key, key; + struct syscall_data **data = NULL; + struct hashmap schash; + struct hashmap_entry *entry; + int nr_data = 0; + int printed = 0; + int i; + size_t bkt; + + hashmap__init(&schash, sc_node_hash, sc_node_equal, /*ctx=*/NULL); + + printed = fprintf(fp, "\n Summary of events:\n\n"); + + /* get stats from the bpf map */ + prev_key = NULL; + while (!bpf_map__get_next_key(map, prev_key, &key, sizeof(key))) { + struct syscall_stats stat; + + if (!bpf_map__lookup_elem(map, &key, sizeof(key), &stat, sizeof(stat), 0)) { + switch (skel->rodata->aggr_mode) { + case SYSCALL_AGGR_THREAD: + update_thread_stats(&schash, &key, &stat); + break; + case SYSCALL_AGGR_CPU: + update_total_stats(&schash, &key, &stat); + break; + case SYSCALL_AGGR_CGROUP: + update_cgroup_stats(&schash, &key, &stat); + break; + default: + break; + } + } + + prev_key = &key; + } + + nr_data = hashmap__size(&schash); + data = calloc(nr_data, sizeof(*data)); + if (data == NULL) + goto out; + + i = 0; + hashmap__for_each_entry(&schash, entry, bkt) + data[i++] = entry->pvalue; + + qsort(data, nr_data, sizeof(*data), datacmp); + + switch (skel->rodata->aggr_mode) { + case SYSCALL_AGGR_THREAD: + printed += print_thread_stats(data, nr_data, max_summary, fp); + break; + case SYSCALL_AGGR_CPU: + printed += print_total_stats(data, nr_data, max_summary, fp); + break; + case SYSCALL_AGGR_CGROUP: + printed += print_cgroup_stats(data, nr_data, max_summary, fp); + break; + default: + break; + } + + for (i = 0; i < nr_data && data; i++) { + free(data[i]->nodes); + free(data[i]); + } + free(data); + +out: + hashmap__clear(&schash); + return printed; +} + +void trace_cleanup_bpf_summary(void) +{ + if (!RB_EMPTY_ROOT(&cgroups)) { + struct cgroup *cgrp, *tmp; + + rbtree_postorder_for_each_entry_safe(cgrp, tmp, &cgroups, node) + cgroup__put(cgrp); + + cgroups = RB_ROOT; + } + + syscall_summary_bpf__destroy(skel); +} diff --git a/tools/perf/util/bpf-utils.c b/tools/perf/util/bpf-utils.c index 80b1d2b3729b..5a66dc8594aa 100644 --- a/tools/perf/util/bpf-utils.c +++ b/tools/perf/util/bpf-utils.c @@ -20,7 +20,7 @@ struct bpil_array_desc { */ }; -static struct bpil_array_desc bpil_array_desc[] = { +static const struct bpil_array_desc bpil_array_desc[] = { [PERF_BPIL_JITED_INSNS] = { offsetof(struct bpf_prog_info, jited_prog_insns), offsetof(struct bpf_prog_info, jited_prog_len), @@ -115,7 +115,7 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) __u32 info_len = sizeof(info); __u32 data_len = 0; int i, err; - void *ptr; + __u8 *ptr; if (arrays >> PERF_BPIL_LAST_ARRAY) return ERR_PTR(-EINVAL); @@ -126,15 +126,15 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) pr_debug("can't get prog info: %s", strerror(errno)); return ERR_PTR(-EFAULT); } + if (info.type >= __MAX_BPF_PROG_TYPE) + pr_debug("%s:%d: unexpected program type %u\n", __func__, __LINE__, info.type); /* step 2: calculate total size of all arrays */ for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { + const struct bpil_array_desc *desc = &bpil_array_desc[i]; bool include_array = (arrays & (1UL << i)) > 0; - struct bpil_array_desc *desc; __u32 count, size; - desc = bpil_array_desc + i; - /* kernel is too old to support this field */ if (info_len < desc->array_offset + sizeof(__u32) || info_len < desc->count_offset + sizeof(__u32) || @@ -163,19 +163,20 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) ptr = info_linear->data; for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { - struct bpil_array_desc *desc; + const struct bpil_array_desc *desc = &bpil_array_desc[i]; __u32 count, size; if ((arrays & (1UL << i)) == 0) continue; - desc = bpil_array_desc + i; count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); bpf_prog_info_set_offset_u32(&info_linear->info, desc->count_offset, count); bpf_prog_info_set_offset_u32(&info_linear->info, desc->size_offset, size); + assert(ptr >= info_linear->data); + assert(ptr < &info_linear->data[data_len]); bpf_prog_info_set_offset_u64(&info_linear->info, desc->array_offset, ptr_to_u64(ptr)); @@ -189,27 +190,45 @@ get_bpf_prog_info_linear(int fd, __u64 arrays) free(info_linear); return ERR_PTR(-EFAULT); } + if (info_linear->info.type >= __MAX_BPF_PROG_TYPE) { + pr_debug("%s:%d: unexpected program type %u\n", + __func__, __LINE__, info_linear->info.type); + } /* step 6: verify the data */ + ptr = info_linear->data; for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { - struct bpil_array_desc *desc; - __u32 v1, v2; + const struct bpil_array_desc *desc = &bpil_array_desc[i]; + __u32 count1, count2, size1, size2; + __u64 ptr2; if ((arrays & (1UL << i)) == 0) continue; - desc = bpil_array_desc + i; - v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset); - v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + count1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + count2 = bpf_prog_info_read_offset_u32(&info_linear->info, desc->count_offset); - if (v1 != v2) - pr_warning("%s: mismatch in element count\n", __func__); + if (count1 != count2) { + pr_warning("%s: mismatch in element count %u vs %u\n", __func__, count1, count2); + free(info_linear); + return ERR_PTR(-ERANGE); + } - v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); - v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + size1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + size2 = bpf_prog_info_read_offset_u32(&info_linear->info, desc->size_offset); - if (v1 != v2) - pr_warning("%s: mismatch in rec size\n", __func__); + if (size1 != size2) { + pr_warning("%s: mismatch in rec size %u vs %u\n", __func__, size1, size2); + free(info_linear); + return ERR_PTR(-ERANGE); + } + ptr2 = bpf_prog_info_read_offset_u64(&info_linear->info, desc->array_offset); + if (ptr_to_u64(ptr) != ptr2) { + pr_warning("%s: mismatch in array %p vs %llx\n", __func__, ptr, ptr2); + free(info_linear); + return ERR_PTR(-ERANGE); + } + ptr += roundup(count1 * size1, sizeof(__u64)); } /* step 7: update info_len and data_len */ @@ -224,13 +243,12 @@ void bpil_addr_to_offs(struct perf_bpil *info_linear) int i; for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { - struct bpil_array_desc *desc; + const struct bpil_array_desc *desc = &bpil_array_desc[i]; __u64 addr, offs; if ((info_linear->arrays & (1UL << i)) == 0) continue; - desc = bpil_array_desc + i; addr = bpf_prog_info_read_offset_u64(&info_linear->info, desc->array_offset); offs = addr - ptr_to_u64(info_linear->data); @@ -244,13 +262,12 @@ void bpil_offs_to_addr(struct perf_bpil *info_linear) int i; for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { - struct bpil_array_desc *desc; + const struct bpil_array_desc *desc = &bpil_array_desc[i]; __u64 addr, offs; if ((info_linear->arrays & (1UL << i)) == 0) continue; - desc = bpil_array_desc + i; offs = bpf_prog_info_read_offset_u64(&info_linear->info, desc->array_offset); addr = offs + ptr_to_u64(info_linear->data); diff --git a/tools/perf/util/bpf-utils.h b/tools/perf/util/bpf-utils.h index 86a5055cdfad..a8bc1a232968 100644 --- a/tools/perf/util/bpf-utils.h +++ b/tools/perf/util/bpf-utils.h @@ -8,6 +8,16 @@ #ifdef HAVE_LIBBPF_SUPPORT #include <bpf/libbpf.h> +#include <bpf/libbpf_version.h> + +#define LIBBPF_CURRENT_VERSION_GEQ(major, minor) \ + (LIBBPF_MAJOR_VERSION > (major) || \ + (LIBBPF_MAJOR_VERSION == (major) && LIBBPF_MINOR_VERSION >= (minor))) + +#if LIBBPF_CURRENT_VERSION_GEQ(1, 7) +// libbpf 1.7+ support the btf_dump_type_data_opts.emit_strings option. +#define HAVE_LIBBPF_STRINGS_SUPPORT 1 +#endif /* * Get bpf_prog_info in continuous memory diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 7a8af60e0f51..a5882b582205 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -6,10 +6,14 @@ #include <limits.h> #include <unistd.h> #include <sys/file.h> +#include <sys/resource.h> #include <sys/time.h> #include <linux/err.h> +#include <linux/list.h> #include <linux/zalloc.h> #include <api/fs/fs.h> +#include <bpf/bpf.h> +#include <bpf/btf.h> #include <perf/bpf_perf.h> #include "bpf_counter.h" @@ -28,13 +32,67 @@ #include "bpf_skel/bperf_leader.skel.h" #include "bpf_skel/bperf_follower.skel.h" +struct bpf_counter { + void *skel; + struct list_head list; +}; + #define ATTR_MAP_SIZE 16 -static inline void *u64_to_ptr(__u64 ptr) +static void *u64_to_ptr(__u64 ptr) { return (void *)(unsigned long)ptr; } + +void set_max_rlimit(void) +{ + struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; + + setrlimit(RLIMIT_MEMLOCK, &rinf); +} + +static __u32 bpf_link_get_id(int fd) +{ + struct bpf_link_info link_info = { .id = 0, }; + __u32 link_info_len = sizeof(link_info); + + bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len); + return link_info.id; +} + +static __u32 bpf_link_get_prog_id(int fd) +{ + struct bpf_link_info link_info = { .id = 0, }; + __u32 link_info_len = sizeof(link_info); + + bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len); + return link_info.prog_id; +} + +static __u32 bpf_map_get_id(int fd) +{ + struct bpf_map_info map_info = { .id = 0, }; + __u32 map_info_len = sizeof(map_info); + + bpf_obj_get_info_by_fd(fd, &map_info, &map_info_len); + return map_info.id; +} + +/* trigger the leader program on a cpu */ +int bperf_trigger_reading(int prog_fd, int cpu) +{ + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .ctx_in = NULL, + .ctx_size_in = 0, + .flags = BPF_F_TEST_RUN_ON_CPU, + .cpu = cpu, + .retval = 0, + ); + + return bpf_prog_test_run_opts(prog_fd, &opts); +} + static struct bpf_counter *bpf_counter_alloc(void) { struct bpf_counter *counter; @@ -278,6 +336,7 @@ static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu_map_idx { struct bpf_prog_profiler_bpf *skel; struct bpf_counter *counter; + int cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx).cpu; int ret; list_for_each_entry(counter, &evsel->bpf_counter_list, list) { @@ -285,7 +344,7 @@ static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu_map_idx assert(skel != NULL); ret = bpf_map_update_elem(bpf_map__fd(skel->maps.events), - &cpu_map_idx, &fd, BPF_ANY); + &cpu, &fd, BPF_ANY); if (ret) return ret; } @@ -393,7 +452,7 @@ static int bperf_check_target(struct evsel *evsel, return 0; } -static struct perf_cpu_map *all_cpu_map; +static __u32 filter_entry_cnt; static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, struct perf_event_attr_map_entry *entry) @@ -401,6 +460,7 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, struct bperf_leader_bpf *skel = bperf_leader_bpf__open(); int link_fd, diff_map_fd, err; struct bpf_link *link = NULL; + struct perf_thread_map *threads; if (!skel) { pr_err("Failed to open leader skeleton\n"); @@ -436,7 +496,11 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, * following evsel__open_per_cpu call */ evsel->leader_skel = skel; - evsel__open_per_cpu(evsel, all_cpu_map, -1); + assert(!perf_cpu_map__has_any_cpu_or_is_empty(evsel->core.cpus)); + /* Always open system wide. */ + threads = thread_map__new_by_tid(-1); + evsel__open(evsel, evsel->core.cpus, threads); + perf_thread_map__put(threads); out: bperf_leader_bpf__destroy(skel); @@ -444,22 +508,36 @@ out: return err; } +static int bperf_attach_follower_program(struct bperf_follower_bpf *skel, + enum bperf_filter_type filter_type, + bool inherit) +{ + struct bpf_link *link; + int err = 0; + + if ((filter_type == BPERF_FILTER_PID || + filter_type == BPERF_FILTER_TGID) && inherit) + /* attach all follower bpf progs to enable event inheritance */ + err = bperf_follower_bpf__attach(skel); + else { + link = bpf_program__attach(skel->progs.fexit_XXX); + if (IS_ERR(link)) + err = PTR_ERR(link); + } + + return err; +} + static int bperf__load(struct evsel *evsel, struct target *target) { struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff}; int attr_map_fd, diff_map_fd = -1, err; enum bperf_filter_type filter_type; - __u32 filter_entry_cnt, i; + __u32 i; if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt)) return -1; - if (!all_cpu_map) { - all_cpu_map = perf_cpu_map__new_online_cpus(); - if (!all_cpu_map) - return -1; - } - evsel->bperf_leader_prog_fd = -1; evsel->bperf_leader_link_fd = -1; @@ -529,9 +607,6 @@ static int bperf__load(struct evsel *evsel, struct target *target) /* set up reading map */ bpf_map__set_max_entries(evsel->follower_skel->maps.accum_readings, filter_entry_cnt); - /* set up follower filter based on target */ - bpf_map__set_max_entries(evsel->follower_skel->maps.filter, - filter_entry_cnt); err = bperf_follower_bpf__load(evsel->follower_skel); if (err) { pr_err("Failed to load follower skeleton\n"); @@ -543,6 +618,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) for (i = 0; i < filter_entry_cnt; i++) { int filter_map_fd; __u32 key; + struct bperf_filter_value fval = { i, 0 }; if (filter_type == BPERF_FILTER_PID || filter_type == BPERF_FILTER_TGID) @@ -553,12 +629,14 @@ static int bperf__load(struct evsel *evsel, struct target *target) break; filter_map_fd = bpf_map__fd(evsel->follower_skel->maps.filter); - bpf_map_update_elem(filter_map_fd, &key, &i, BPF_ANY); + bpf_map_update_elem(filter_map_fd, &key, &fval, BPF_ANY); } evsel->follower_skel->bss->type = filter_type; + evsel->follower_skel->bss->inherit = target->inherit; - err = bperf_follower_bpf__attach(evsel->follower_skel); + err = bperf_attach_follower_program(evsel->follower_skel, filter_type, + target->inherit); out: if (err && evsel->bperf_leader_link_fd >= 0) @@ -577,9 +655,10 @@ out: static int bperf__install_pe(struct evsel *evsel, int cpu_map_idx, int fd) { struct bperf_leader_bpf *skel = evsel->leader_skel; + int cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx).cpu; return bpf_map_update_elem(bpf_map__fd(skel->maps.events), - &cpu_map_idx, &fd, BPF_ANY); + &cpu, &fd, BPF_ANY); } /* @@ -588,13 +667,12 @@ static int bperf__install_pe(struct evsel *evsel, int cpu_map_idx, int fd) */ static int bperf_sync_counters(struct evsel *evsel) { - int num_cpu, i, cpu; + struct perf_cpu cpu; + int idx; + + perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus) + bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu.cpu); - num_cpu = perf_cpu_map__nr(all_cpu_map); - for (i = 0; i < num_cpu; i++) { - cpu = perf_cpu_map__cpu(all_cpu_map, i).cpu; - bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu); - } return 0; } @@ -623,7 +701,7 @@ static int bperf__read(struct evsel *evsel) bperf_sync_counters(evsel); reading_map_fd = bpf_map__fd(skel->maps.accum_readings); - for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) { + for (i = 0; i < filter_entry_cnt; i++) { struct perf_cpu entry; __u32 cpu; @@ -764,7 +842,7 @@ struct bpf_counter_ops bperf_ops = { extern struct bpf_counter_ops bperf_cgrp_ops; -static inline bool bpf_counter_skip(struct evsel *evsel) +static bool bpf_counter_skip(struct evsel *evsel) { return evsel->bpf_counter_ops == NULL; } diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h index c6d21c07b14c..658d8e7d507e 100644 --- a/tools/perf/util/bpf_counter.h +++ b/tools/perf/util/bpf_counter.h @@ -2,18 +2,10 @@ #ifndef __PERF_BPF_COUNTER_H #define __PERF_BPF_COUNTER_H 1 -#include <linux/list.h> -#include <sys/resource.h> - -#ifdef HAVE_LIBBPF_SUPPORT -#include <bpf/bpf.h> -#include <bpf/btf.h> -#include <bpf/libbpf.h> -#endif - struct evsel; struct target; -struct bpf_counter; + +#ifdef HAVE_BPF_SKEL typedef int (*bpf_counter_evsel_op)(struct evsel *evsel); typedef int (*bpf_counter_evsel_target_op)(struct evsel *evsel, @@ -22,6 +14,7 @@ typedef int (*bpf_counter_evsel_install_pe_op)(struct evsel *evsel, int cpu_map_idx, int fd); +/* Shared ops between bpf_counter, bpf_counter_cgroup, etc. */ struct bpf_counter_ops { bpf_counter_evsel_target_op load; bpf_counter_evsel_op enable; @@ -31,13 +24,6 @@ struct bpf_counter_ops { bpf_counter_evsel_install_pe_op install_pe; }; -struct bpf_counter { - void *skel; - struct list_head list; -}; - -#ifdef HAVE_BPF_SKEL - int bpf_counter__load(struct evsel *evsel, struct target *target); int bpf_counter__enable(struct evsel *evsel); int bpf_counter__disable(struct evsel *evsel); @@ -45,6 +31,9 @@ int bpf_counter__read(struct evsel *evsel); void bpf_counter__destroy(struct evsel *evsel); int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd); +int bperf_trigger_reading(int prog_fd, int cpu); +void set_max_rlimit(void); + #else /* HAVE_BPF_SKEL */ #include <linux/err.h> @@ -83,55 +72,4 @@ static inline int bpf_counter__install_pe(struct evsel *evsel __maybe_unused, #endif /* HAVE_BPF_SKEL */ -static inline void set_max_rlimit(void) -{ - struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; - - setrlimit(RLIMIT_MEMLOCK, &rinf); -} - -#ifdef HAVE_BPF_SKEL - -static inline __u32 bpf_link_get_id(int fd) -{ - struct bpf_link_info link_info = { .id = 0, }; - __u32 link_info_len = sizeof(link_info); - - bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len); - return link_info.id; -} - -static inline __u32 bpf_link_get_prog_id(int fd) -{ - struct bpf_link_info link_info = { .id = 0, }; - __u32 link_info_len = sizeof(link_info); - - bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len); - return link_info.prog_id; -} - -static inline __u32 bpf_map_get_id(int fd) -{ - struct bpf_map_info map_info = { .id = 0, }; - __u32 map_info_len = sizeof(map_info); - - bpf_obj_get_info_by_fd(fd, &map_info, &map_info_len); - return map_info.id; -} - -/* trigger the leader program on a cpu */ -static inline int bperf_trigger_reading(int prog_fd, int cpu) -{ - DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, - .ctx_in = NULL, - .ctx_size_in = 0, - .flags = BPF_F_TEST_RUN_ON_CPU, - .cpu = cpu, - .retval = 0, - ); - - return bpf_prog_test_run_opts(prog_fd, &opts); -} -#endif /* HAVE_BPF_SKEL */ - #endif /* __PERF_BPF_COUNTER_H */ diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c index ea29c372f339..17d7196c6589 100644 --- a/tools/perf/util/bpf_counter_cgroup.c +++ b/tools/perf/util/bpf_counter_cgroup.c @@ -4,6 +4,7 @@ /* Copyright (c) 2021 Google */ #include <assert.h> +#include <errno.h> #include <limits.h> #include <unistd.h> #include <sys/file.h> @@ -13,6 +14,7 @@ #include <linux/zalloc.h> #include <linux/perf_event.h> #include <api/fs/fs.h> +#include <bpf/bpf.h> #include <perf/bpf_perf.h> #include "affinity.h" @@ -26,6 +28,7 @@ #include "cpumap.h" #include "thread_map.h" +#include "bpf_skel/bperf_cgroup.h" #include "bpf_skel/bperf_cgroup.skel.h" static struct perf_event_attr cgrp_switch_attr = { @@ -41,6 +44,55 @@ static struct bperf_cgroup_bpf *skel; #define FD(evt, cpu) (*(int *)xyarray__entry(evt->core.fd, cpu, 0)) +static void setup_rodata(struct bperf_cgroup_bpf *sk, int evlist_size) +{ + int map_size, total_cpus = cpu__max_cpu().cpu; + + sk->rodata->num_cpus = total_cpus; + sk->rodata->num_events = evlist_size / nr_cgroups; + + if (cgroup_is_v2("perf_event") > 0) + sk->rodata->use_cgroup_v2 = 1; + + BUG_ON(evlist_size % nr_cgroups != 0); + + /* we need one copy of events per cpu for reading */ + map_size = total_cpus * evlist_size / nr_cgroups; + bpf_map__set_max_entries(sk->maps.events, map_size); + bpf_map__set_max_entries(sk->maps.cgrp_idx, nr_cgroups); + /* previous result is saved in a per-cpu array */ + map_size = evlist_size / nr_cgroups; + bpf_map__set_max_entries(sk->maps.prev_readings, map_size); + /* cgroup result needs all events (per-cpu) */ + map_size = evlist_size; + bpf_map__set_max_entries(sk->maps.cgrp_readings, map_size); +} + +static void test_max_events_program_load(void) +{ +#ifndef NDEBUG + /* + * Test that the program verifies with the maximum number of events. If + * this test fails unfortunately perf needs recompiling with a lower + * BPERF_CGROUP__MAX_EVENTS to avoid BPF verifier issues. + */ + int err, max_events = BPERF_CGROUP__MAX_EVENTS * nr_cgroups; + struct bperf_cgroup_bpf *test_skel = bperf_cgroup_bpf__open(); + + if (!test_skel) { + pr_err("Failed to open cgroup skeleton\n"); + return; + } + setup_rodata(test_skel, max_events); + err = bperf_cgroup_bpf__load(test_skel); + if (err) { + pr_err("Failed to load cgroup skeleton with max events %d.\n", + BPERF_CGROUP__MAX_EVENTS); + } + bperf_cgroup_bpf__destroy(test_skel); +#endif +} + static int bperf_load_program(struct evlist *evlist) { struct bpf_link *link; @@ -49,32 +101,18 @@ static int bperf_load_program(struct evlist *evlist) int i, j; struct perf_cpu cpu; int total_cpus = cpu__max_cpu().cpu; - int map_size, map_fd; - int prog_fd, err; + int map_fd, prog_fd, err; + + set_max_rlimit(); + + test_max_events_program_load(); skel = bperf_cgroup_bpf__open(); if (!skel) { pr_err("Failed to open cgroup skeleton\n"); return -1; } - - skel->rodata->num_cpus = total_cpus; - skel->rodata->num_events = evlist->core.nr_entries / nr_cgroups; - - BUG_ON(evlist->core.nr_entries % nr_cgroups != 0); - - /* we need one copy of events per cpu for reading */ - map_size = total_cpus * evlist->core.nr_entries / nr_cgroups; - bpf_map__set_max_entries(skel->maps.events, map_size); - bpf_map__set_max_entries(skel->maps.cgrp_idx, nr_cgroups); - /* previous result is saved in a per-cpu array */ - map_size = evlist->core.nr_entries / nr_cgroups; - bpf_map__set_max_entries(skel->maps.prev_readings, map_size); - /* cgroup result needs all events (per-cpu) */ - map_size = evlist->core.nr_entries; - bpf_map__set_max_entries(skel->maps.cgrp_readings, map_size); - - set_max_rlimit(); + setup_rodata(skel, evlist->core.nr_entries); err = bperf_cgroup_bpf__load(skel); if (err) { @@ -82,9 +120,6 @@ static int bperf_load_program(struct evlist *evlist) goto out; } - if (cgroup_is_v2("perf_event") > 0) - skel->bss->use_cgroup_v2 = 1; - err = -1; cgrp_switch = evsel__new(&cgrp_switch_attr); @@ -185,7 +220,8 @@ static int bperf_cgrp__load(struct evsel *evsel, } static int bperf_cgrp__install_pe(struct evsel *evsel __maybe_unused, - int cpu __maybe_unused, int fd __maybe_unused) + int cpu_map_idx __maybe_unused, + int fd __maybe_unused) { /* nothing to do */ return 0; diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c index 7a4297d8fd2c..c456d24efa30 100644 --- a/tools/perf/util/bpf_ftrace.c +++ b/tools/perf/util/bpf_ftrace.c @@ -1,8 +1,10 @@ -#include <stdio.h> +#include <errno.h> #include <fcntl.h> #include <stdint.h> +#include <stdio.h> #include <stdlib.h> +#include <bpf/bpf.h> #include <linux/err.h> #include "util/ftrace.h" @@ -11,6 +13,7 @@ #include "util/debug.h" #include "util/evlist.h" #include "util/bpf_counter.h" +#include "util/stat.h" #include "util/bpf_skel/func_latency.skel.h" @@ -20,15 +23,26 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) { int fd, err; int i, ncpus = 1, ntasks = 1; - struct filter_entry *func; + struct filter_entry *func = NULL; - if (!list_is_singular(&ftrace->filters)) { - pr_err("ERROR: %s target function(s).\n", - list_empty(&ftrace->filters) ? "No" : "Too many"); - return -1; - } + if (!list_empty(&ftrace->filters)) { + if (!list_is_singular(&ftrace->filters)) { + pr_err("ERROR: Too many target functions.\n"); + return -1; + } + func = list_first_entry(&ftrace->filters, struct filter_entry, list); + } else { + int count = 0; + struct list_head *pos; - func = list_first_entry(&ftrace->filters, struct filter_entry, list); + list_for_each(pos, &ftrace->event_pair) + count++; + + if (count != 2) { + pr_err("ERROR: Needs two target events.\n"); + return -1; + } + } skel = func_latency_bpf__open(); if (!skel) { @@ -36,17 +50,28 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) return -1; } + skel->rodata->bucket_range = ftrace->bucket_range; + skel->rodata->min_latency = ftrace->min_latency; + skel->rodata->bucket_num = ftrace->bucket_num; + if (ftrace->bucket_range && ftrace->bucket_num) { + bpf_map__set_max_entries(skel->maps.latency, ftrace->bucket_num); + } + /* don't need to set cpu filter for system-wide mode */ if (ftrace->target.cpu_list) { ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus); bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); + skel->rodata->has_cpu = 1; } if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) { ntasks = perf_thread_map__nr(ftrace->evlist->core.threads); bpf_map__set_max_entries(skel->maps.task_filter, ntasks); + skel->rodata->has_task = 1; } + skel->rodata->use_nsec = ftrace->use_nsec; + set_max_rlimit(); err = func_latency_bpf__load(skel); @@ -59,7 +84,6 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) u32 cpu; u8 val = 1; - skel->bss->has_cpu = 1; fd = bpf_map__fd(skel->maps.cpu_filter); for (i = 0; i < ncpus; i++) { @@ -72,7 +96,6 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) u32 pid; u8 val = 1; - skel->bss->has_task = 1; fd = bpf_map__fd(skel->maps.task_filter); for (i = 0; i < ntasks; i++) { @@ -81,22 +104,46 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) } } - skel->bss->use_nsec = ftrace->use_nsec; + skel->bss->min = INT64_MAX; - skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin, - false, func->name); - if (IS_ERR(skel->links.func_begin)) { - pr_err("Failed to attach fentry program\n"); - err = PTR_ERR(skel->links.func_begin); - goto out; - } + if (func) { + skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin, + false, func->name); + if (IS_ERR(skel->links.func_begin)) { + pr_err("Failed to attach fentry program\n"); + err = PTR_ERR(skel->links.func_begin); + goto out; + } - skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end, - true, func->name); - if (IS_ERR(skel->links.func_end)) { - pr_err("Failed to attach fexit program\n"); - err = PTR_ERR(skel->links.func_end); - goto out; + skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end, + true, func->name); + if (IS_ERR(skel->links.func_end)) { + pr_err("Failed to attach fexit program\n"); + err = PTR_ERR(skel->links.func_end); + goto out; + } + } else { + struct filter_entry *event; + + event = list_first_entry(&ftrace->event_pair, struct filter_entry, list); + + skel->links.event_begin = bpf_program__attach_raw_tracepoint(skel->progs.event_begin, + event->name); + if (IS_ERR(skel->links.event_begin)) { + pr_err("Failed to attach first tracepoint program\n"); + err = PTR_ERR(skel->links.event_begin); + goto out; + } + + event = list_next_entry(event, list); + + skel->links.event_end = bpf_program__attach_raw_tracepoint(skel->progs.event_end, + event->name); + if (IS_ERR(skel->links.event_end)) { + pr_err("Failed to attach second tracepoint program\n"); + err = PTR_ERR(skel->links.event_end); + goto out; + } } /* XXX: we don't actually use this fd - just for poll() */ @@ -118,8 +165,8 @@ int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused) return 0; } -int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, - int buckets[]) +int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace, + int buckets[], struct stats *stats) { int i, fd, err; u32 idx; @@ -132,7 +179,7 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, if (hist == NULL) return -ENOMEM; - for (idx = 0; idx < NUM_BUCKET; idx++) { + for (idx = 0; idx < skel->rodata->bucket_num; idx++) { err = bpf_map_lookup_elem(fd, &idx, hist); if (err) { buckets[idx] = 0; @@ -143,6 +190,19 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, buckets[idx] += hist[i]; } + if (skel->bss->count) { + stats->mean = skel->bss->total / skel->bss->count; + stats->n = skel->bss->count; + stats->max = skel->bss->max; + stats->min = skel->bss->min; + + if (!ftrace->use_nsec) { + stats->mean /= 1000; + stats->max /= 1000; + stats->min /= 1000; + } + } + free(hist); return 0; } diff --git a/tools/perf/util/bpf_kwork.c b/tools/perf/util/bpf_kwork.c index 44f0f708a15d..5cff755c71fa 100644 --- a/tools/perf/util/bpf_kwork.c +++ b/tools/perf/util/bpf_kwork.c @@ -176,8 +176,6 @@ static int setup_filters(struct perf_kwork *kwork) bpf_map_update_elem(fd, &cpu.cpu, &val, BPF_ANY); } perf_cpu_map__put(map); - - skel->bss->has_cpu_filter = 1; } if (kwork->profile_name != NULL) { @@ -197,8 +195,6 @@ static int setup_filters(struct perf_kwork *kwork) key = 0; bpf_map_update_elem(fd, &key, kwork->profile_name, BPF_ANY); - - skel->bss->has_name_filter = 1; } return 0; @@ -239,6 +235,11 @@ int perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork) class_bpf->load_prepare(kwork); } + if (kwork->cpu_list != NULL) + skel->rodata->has_cpu_filter = 1; + if (kwork->profile_name != NULL) + skel->rodata->has_name_filter = 1; + if (kwork_trace_bpf__load(skel)) { pr_debug("Failed to load kwork trace skeleton\n"); goto out; @@ -284,7 +285,7 @@ static int add_work(struct perf_kwork *kwork, (bpf_trace->get_work_name(key, &tmp.name))) return -1; - work = perf_kwork_add_work(kwork, tmp.class, &tmp); + work = kwork->add_work(kwork, tmp.class, &tmp); if (work == NULL) return -1; diff --git a/tools/perf/util/bpf_kwork_top.c b/tools/perf/util/bpf_kwork_top.c index 22a3b00a1e23..b6f187dd9136 100644 --- a/tools/perf/util/bpf_kwork_top.c +++ b/tools/perf/util/bpf_kwork_top.c @@ -151,14 +151,12 @@ static int setup_filters(struct perf_kwork *kwork) bpf_map_update_elem(fd, &cpu.cpu, &val, BPF_ANY); } perf_cpu_map__put(map); - - skel->bss->has_cpu_filter = 1; } return 0; } -int perf_kwork__top_prepare_bpf(struct perf_kwork *kwork __maybe_unused) +int perf_kwork__top_prepare_bpf(struct perf_kwork *kwork) { struct bpf_program *prog; struct kwork_class *class; @@ -193,6 +191,9 @@ int perf_kwork__top_prepare_bpf(struct perf_kwork *kwork __maybe_unused) class_bpf->load_prepare(); } + if (kwork->cpu_list) + skel->rodata->has_cpu_filter = 1; + if (kwork_top_bpf__load(skel)) { pr_debug("Failed to load kwork top skeleton\n"); goto out; @@ -254,7 +255,7 @@ static int add_work(struct perf_kwork *kwork, struct work_key *key, bpf_trace = kwork_class_bpf_supported_list[type]; tmp.class = bpf_trace->class; - work = perf_kwork_add_work(kwork, tmp.class, &tmp); + work = kwork->add_work(kwork, tmp.class, &tmp); if (!work) return -1; diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index b4cb3fe5cc25..7b5671f13c53 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -2,6 +2,7 @@ #include "util/cgroup.h" #include "util/debug.h" #include "util/evlist.h" +#include "util/hashmap.h" #include "util/machine.h" #include "util/map.h" #include "util/symbol.h" @@ -11,21 +12,181 @@ #include "util/lock-contention.h" #include <linux/zalloc.h> #include <linux/string.h> +#include <api/fs/fs.h> #include <bpf/bpf.h> +#include <bpf/btf.h> #include <inttypes.h> #include "bpf_skel/lock_contention.skel.h" #include "bpf_skel/lock_data.h" static struct lock_contention_bpf *skel; +static bool has_slab_iter; +static struct hashmap slab_hash; + +static size_t slab_cache_hash(long key, void *ctx __maybe_unused) +{ + return key; +} + +static bool slab_cache_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + +static void check_slab_cache_iter(struct lock_contention *con) +{ + s32 ret; + + hashmap__init(&slab_hash, slab_cache_hash, slab_cache_equal, /*ctx=*/NULL); + + con->btf = btf__load_vmlinux_btf(); + if (con->btf == NULL) { + pr_debug("BTF loading failed: %s\n", strerror(errno)); + return; + } + + ret = btf__find_by_name_kind(con->btf, "bpf_iter__kmem_cache", BTF_KIND_STRUCT); + if (ret < 0) { + bpf_program__set_autoload(skel->progs.slab_cache_iter, false); + pr_debug("slab cache iterator is not available: %d\n", ret); + return; + } + + has_slab_iter = true; + + bpf_map__set_max_entries(skel->maps.slab_caches, con->map_nr_entries); +} + +static void run_slab_cache_iter(void) +{ + int fd; + char buf[256]; + long key, *prev_key; + + if (!has_slab_iter) + return; + + fd = bpf_iter_create(bpf_link__fd(skel->links.slab_cache_iter)); + if (fd < 0) { + pr_debug("cannot create slab cache iter: %d\n", fd); + return; + } + + /* This will run the bpf program */ + while (read(fd, buf, sizeof(buf)) > 0) + continue; + + close(fd); + + /* Read the slab cache map and build a hash with IDs */ + fd = bpf_map__fd(skel->maps.slab_caches); + prev_key = NULL; + while (!bpf_map_get_next_key(fd, prev_key, &key)) { + struct slab_cache_data *data; + + data = malloc(sizeof(*data)); + if (data == NULL) + break; + + if (bpf_map_lookup_elem(fd, &key, data) < 0) + break; + + hashmap__add(&slab_hash, data->id, data); + prev_key = &key; + } +} + +static void exit_slab_cache_iter(void) +{ + struct hashmap_entry *cur; + unsigned bkt; + + hashmap__for_each_entry(&slab_hash, cur, bkt) + free(cur->pvalue); + + hashmap__clear(&slab_hash); +} + +static void init_numa_data(struct lock_contention *con) +{ + struct symbol *sym; + struct map *kmap; + char *buf = NULL, *p; + size_t len; + long last = -1; + int ret; + + /* + * 'struct zone' is embedded in 'struct pglist_data' as an array. + * As we may not have full information of the struct zone in the + * (fake) vmlinux.h, let's get the actual size from BTF. + */ + ret = btf__find_by_name_kind(con->btf, "zone", BTF_KIND_STRUCT); + if (ret < 0) { + pr_debug("cannot get type of struct zone: %d\n", ret); + return; + } + + ret = btf__resolve_size(con->btf, ret); + if (ret < 0) { + pr_debug("cannot get size of struct zone: %d\n", ret); + return; + } + skel->rodata->sizeof_zone = ret; + + /* UMA system doesn't have 'node_data[]' - just use contig_page_data. */ + sym = machine__find_kernel_symbol_by_name(con->machine, + "contig_page_data", + &kmap); + if (sym) { + skel->rodata->contig_page_data_addr = map__unmap_ip(kmap, sym->start); + map__put(kmap); + return; + } + + /* + * The 'node_data' is an array of pointers to struct pglist_data. + * It needs to follow the pointer for each node in BPF to get the + * address of struct pglist_data and its zones. + */ + sym = machine__find_kernel_symbol_by_name(con->machine, + "node_data", + &kmap); + if (sym == NULL) + return; + + skel->rodata->node_data_addr = map__unmap_ip(kmap, sym->start); + map__put(kmap); + + /* get the number of online nodes using the last node number + 1 */ + ret = sysfs__read_str("devices/system/node/online", &buf, &len); + if (ret < 0) { + pr_debug("failed to read online node: %d\n", ret); + return; + } + + p = buf; + while (p && *p) { + last = strtol(p, &p, 0); + + if (p && (*p == ',' || *p == '-' || *p == '\n')) + p++; + } + skel->rodata->nr_nodes = last + 1; + free(buf); +} int lock_contention_prepare(struct lock_contention *con) { int i, fd; - int ncpus = 1, ntasks = 1, ntypes = 1, naddrs = 1, ncgrps = 1; + int ncpus = 1, ntasks = 1, ntypes = 1, naddrs = 1, ncgrps = 1, nslabs = 1; struct evlist *evlist = con->evlist; struct target *target = con->target; + /* make sure it loads the kernel map before lookup */ + map__load(machine__kernel_map(con->machine)); + skel = lock_contention_bpf__open(); if (!skel) { pr_err("Failed to open lock-contention BPF skeleton\n"); @@ -41,19 +202,37 @@ int lock_contention_prepare(struct lock_contention *con) else bpf_map__set_max_entries(skel->maps.task_data, 1); - if (con->save_callstack) + if (con->save_callstack) { bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries); - else + if (con->owner) { + bpf_map__set_value_size(skel->maps.stack_buf, con->max_stack * sizeof(u64)); + bpf_map__set_key_size(skel->maps.owner_stacks, + con->max_stack * sizeof(u64)); + bpf_map__set_max_entries(skel->maps.owner_stacks, con->map_nr_entries); + bpf_map__set_max_entries(skel->maps.owner_data, con->map_nr_entries); + bpf_map__set_max_entries(skel->maps.owner_stat, con->map_nr_entries); + skel->rodata->max_stack = con->max_stack; + } + } else { bpf_map__set_max_entries(skel->maps.stacks, 1); + } - if (target__has_cpu(target)) + if (target__has_cpu(target)) { + skel->rodata->has_cpu = 1; ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); - if (target__has_task(target)) + } + if (target__has_task(target)) { + skel->rodata->has_task = 1; ntasks = perf_thread_map__nr(evlist->core.threads); - if (con->filters->nr_types) + } + if (con->filters->nr_types) { + skel->rodata->has_type = 1; ntypes = con->filters->nr_types; - if (con->filters->nr_cgrps) + } + if (con->filters->nr_cgrps) { + skel->rodata->has_cgroup = 1; ncgrps = con->filters->nr_cgrps; + } /* resolve lock name filters to addr */ if (con->filters->nr_syms) { @@ -82,6 +261,28 @@ int lock_contention_prepare(struct lock_contention *con) con->filters->addrs = addrs; } naddrs = con->filters->nr_addrs; + skel->rodata->has_addr = 1; + } + + /* resolve lock name in delays */ + if (con->nr_delays) { + struct symbol *sym; + struct map *kmap; + + for (i = 0; i < con->nr_delays; i++) { + sym = machine__find_kernel_symbol_by_name(con->machine, + con->delays[i].sym, + &kmap); + if (sym == NULL) { + pr_warning("ignore unknown symbol: %s\n", + con->delays[i].sym); + continue; + } + + con->delays[i].addr = map__unmap_ip(kmap, sym->start); + } + skel->rodata->lock_delay = 1; + bpf_map__set_max_entries(skel->maps.lock_delays, con->nr_delays); } bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); @@ -90,6 +291,27 @@ int lock_contention_prepare(struct lock_contention *con) bpf_map__set_max_entries(skel->maps.addr_filter, naddrs); bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps); + skel->rodata->stack_skip = con->stack_skip; + skel->rodata->aggr_mode = con->aggr_mode; + skel->rodata->needs_callstack = con->save_callstack; + skel->rodata->lock_owner = con->owner; + + if (con->aggr_mode == LOCK_AGGR_CGROUP || con->filters->nr_cgrps) { + if (cgroup_is_v2("perf_event")) + skel->rodata->use_cgroup_v2 = 1; + } + + check_slab_cache_iter(con); + + if (con->filters->nr_slabs && has_slab_iter) { + skel->rodata->has_slab = 1; + nslabs = con->filters->nr_slabs; + } + + bpf_map__set_max_entries(skel->maps.slab_filter, nslabs); + + init_numa_data(con); + if (lock_contention_bpf__load(skel) < 0) { pr_err("Failed to load lock-contention BPF skeleton\n"); return -1; @@ -99,7 +321,6 @@ int lock_contention_prepare(struct lock_contention *con) u32 cpu; u8 val = 1; - skel->bss->has_cpu = 1; fd = bpf_map__fd(skel->maps.cpu_filter); for (i = 0; i < ncpus; i++) { @@ -112,7 +333,6 @@ int lock_contention_prepare(struct lock_contention *con) u32 pid; u8 val = 1; - skel->bss->has_task = 1; fd = bpf_map__fd(skel->maps.task_filter); for (i = 0; i < ntasks; i++) { @@ -125,7 +345,6 @@ int lock_contention_prepare(struct lock_contention *con) u32 pid = evlist->workload.pid; u8 val = 1; - skel->bss->has_task = 1; fd = bpf_map__fd(skel->maps.task_filter); bpf_map_update_elem(fd, &pid, &val, BPF_ANY); } @@ -133,7 +352,6 @@ int lock_contention_prepare(struct lock_contention *con) if (con->filters->nr_types) { u8 val = 1; - skel->bss->has_type = 1; fd = bpf_map__fd(skel->maps.type_filter); for (i = 0; i < con->filters->nr_types; i++) @@ -143,7 +361,6 @@ int lock_contention_prepare(struct lock_contention *con) if (con->filters->nr_addrs) { u8 val = 1; - skel->bss->has_addr = 1; fd = bpf_map__fd(skel->maps.addr_filter); for (i = 0; i < con->filters->nr_addrs; i++) @@ -153,29 +370,55 @@ int lock_contention_prepare(struct lock_contention *con) if (con->filters->nr_cgrps) { u8 val = 1; - skel->bss->has_cgroup = 1; fd = bpf_map__fd(skel->maps.cgroup_filter); for (i = 0; i < con->filters->nr_cgrps; i++) bpf_map_update_elem(fd, &con->filters->cgrps[i], &val, BPF_ANY); } - /* these don't work well if in the rodata section */ - skel->bss->stack_skip = con->stack_skip; - skel->bss->aggr_mode = con->aggr_mode; - skel->bss->needs_callstack = con->save_callstack; - skel->bss->lock_owner = con->owner; + if (con->nr_delays) { + fd = bpf_map__fd(skel->maps.lock_delays); - if (con->aggr_mode == LOCK_AGGR_CGROUP) { - if (cgroup_is_v2("perf_event")) - skel->bss->use_cgroup_v2 = 1; + for (i = 0; i < con->nr_delays; i++) + bpf_map_update_elem(fd, &con->delays[i].addr, &con->delays[i].time, BPF_ANY); + } + if (con->aggr_mode == LOCK_AGGR_CGROUP) read_all_cgroups(&con->cgroups); - } bpf_program__set_autoload(skel->progs.collect_lock_syms, false); lock_contention_bpf__attach(skel); + + /* run the slab iterator after attaching */ + run_slab_cache_iter(); + + if (con->filters->nr_slabs) { + u8 val = 1; + int cache_fd; + long key, *prev_key; + + fd = bpf_map__fd(skel->maps.slab_filter); + + /* Read the slab cache map and build a hash with its address */ + cache_fd = bpf_map__fd(skel->maps.slab_caches); + prev_key = NULL; + while (!bpf_map_get_next_key(cache_fd, prev_key, &key)) { + struct slab_cache_data data; + + if (bpf_map_lookup_elem(cache_fd, &key, &data) < 0) + break; + + for (i = 0; i < con->filters->nr_slabs; i++) { + if (!strcmp(con->filters->slabs[i], data.name)) { + bpf_map_update_elem(fd, &key, &val, BPF_ANY); + break; + } + } + prev_key = &key; + } + } + return 0; } @@ -286,6 +529,9 @@ static void account_end_timestamp(struct lock_contention *con) goto next; for (int i = 0; i < total_cpus; i++) { + if (cpu_data[i].lock == 0) + continue; + update_lock_stat(stat_fd, -1, end_ts, aggr_mode, &cpu_data[i]); } @@ -315,7 +561,6 @@ static const char *lock_contention_get_name(struct lock_contention *con, { int idx = 0; u64 addr; - const char *name = ""; static char name_buf[KSYM_NAME_LEN]; struct symbol *sym; struct map *kmap; @@ -330,17 +575,19 @@ static const char *lock_contention_get_name(struct lock_contention *con, if (pid) { struct thread *t = machine__findnew_thread(machine, /*pid=*/-1, pid); - if (t == NULL) - return name; - if (!bpf_map_lookup_elem(task_fd, &pid, &task) && - thread__set_comm(t, task.comm, /*timestamp=*/0)) - name = task.comm; + if (t != NULL && + !bpf_map_lookup_elem(task_fd, &pid, &task) && + thread__set_comm(t, task.comm, /*timestamp=*/0)) { + snprintf(name_buf, sizeof(name_buf), "%s", task.comm); + return name_buf; + } } - return name; + return ""; } if (con->aggr_mode == LOCK_AGGR_ADDR) { int lock_fd = bpf_map__fd(skel->maps.lock_syms); + struct slab_cache_data *slab_data; /* per-process locks set upper bits of the flags */ if (flags & LCD_F_MMAP_LOCK) @@ -359,6 +606,17 @@ static const char *lock_contention_get_name(struct lock_contention *con, return "rq_lock"; } + if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr_or_cgroup, &flags)) { + if (flags == LOCK_CLASS_ZONE_LOCK) + return "zone_lock"; + } + + /* look slab_hash for dynamic locks in a slab object */ + if (hashmap__find(&slab_hash, flags & LCB_F_SLAB_ID_MASK, &slab_data)) { + snprintf(name_buf, sizeof(name_buf), "&%s", slab_data->name); + return name_buf; + } + return ""; } @@ -397,6 +655,63 @@ static const char *lock_contention_get_name(struct lock_contention *con, return name_buf; } +struct lock_stat *pop_owner_stack_trace(struct lock_contention *con) +{ + int stacks_fd, stat_fd; + u64 *stack_trace = NULL; + s32 stack_id; + struct contention_key ckey = {}; + struct contention_data cdata = {}; + size_t stack_size = con->max_stack * sizeof(*stack_trace); + struct lock_stat *st = NULL; + + stacks_fd = bpf_map__fd(skel->maps.owner_stacks); + stat_fd = bpf_map__fd(skel->maps.owner_stat); + if (!stacks_fd || !stat_fd) + goto out_err; + + stack_trace = zalloc(stack_size); + if (stack_trace == NULL) + goto out_err; + + if (bpf_map_get_next_key(stacks_fd, NULL, stack_trace)) + goto out_err; + + bpf_map_lookup_elem(stacks_fd, stack_trace, &stack_id); + ckey.stack_id = stack_id; + bpf_map_lookup_elem(stat_fd, &ckey, &cdata); + + st = zalloc(sizeof(struct lock_stat)); + if (!st) + goto out_err; + + st->name = strdup(stack_trace[0] ? lock_contention_get_name(con, NULL, stack_trace, 0) : + "unknown"); + if (!st->name) + goto out_err; + + st->flags = cdata.flags; + st->nr_contended = cdata.count; + st->wait_time_total = cdata.total_time; + st->wait_time_max = cdata.max_time; + st->wait_time_min = cdata.min_time; + st->callstack = stack_trace; + + if (cdata.count) + st->avg_wait_time = cdata.total_time / cdata.count; + + bpf_map_delete_elem(stacks_fd, stack_trace); + bpf_map_delete_elem(stat_fd, &ckey); + + return st; + +out_err: + free(stack_trace); + free(st); + + return NULL; +} + int lock_contention_read(struct lock_contention *con) { int fd, stack, err = 0; @@ -437,9 +752,6 @@ int lock_contention_read(struct lock_contention *con) bpf_prog_test_run_opts(prog_fd, &opts); } - /* make sure it loads the kernel map */ - maps__load_first(machine->kmaps); - prev_key = NULL; while (!bpf_map_get_next_key(fd, prev_key, &key)) { s64 ls_key; @@ -452,7 +764,7 @@ int lock_contention_read(struct lock_contention *con) if (con->save_callstack) { bpf_map_lookup_elem(stack, &key.stack_id, stack_trace); - if (!match_callstack_filter(machine, stack_trace)) { + if (!match_callstack_filter(machine, stack_trace, con->max_stack)) { con->nr_filtered += data.count; goto next; } @@ -533,5 +845,8 @@ int lock_contention_finish(struct lock_contention *con) cgroup__put(cgrp); } + exit_slab_cache_iter(); + btf__free(con->btf); + return 0; } diff --git a/tools/perf/util/bpf_map.c b/tools/perf/util/bpf_map.c index c863ae0c5cb5..442f91b4e8e1 100644 --- a/tools/perf/util/bpf_map.c +++ b/tools/perf/util/bpf_map.c @@ -5,6 +5,7 @@ #include <bpf/libbpf.h> #include <linux/err.h> #include <linux/kernel.h> +#include <errno.h> #include <stdbool.h> #include <stdlib.h> #include <unistd.h> @@ -35,9 +36,6 @@ int bpf_map__fprintf(struct bpf_map *map, FILE *fp) if (fd < 0) return fd; - if (!map) - return PTR_ERR(map); - err = -ENOMEM; key = malloc(bpf_map__key_size(map)); if (key == NULL) diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c index 6af36142dc5a..88e0660c4bff 100644 --- a/tools/perf/util/bpf_off_cpu.c +++ b/tools/perf/util/bpf_off_cpu.c @@ -13,6 +13,9 @@ #include "util/cgroup.h" #include "util/strlist.h" #include <bpf/bpf.h> +#include <bpf/btf.h> +#include <internal/xyarray.h> +#include <linux/time64.h> #include "bpf_skel/off_cpu.skel.h" @@ -36,34 +39,25 @@ union off_cpu_data { u64 array[1024 / sizeof(u64)]; }; +u64 off_cpu_raw[MAX_STACKS + 5]; + static int off_cpu_config(struct evlist *evlist) { + char off_cpu_event[64]; struct evsel *evsel; - struct perf_event_attr attr = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_BPF_OUTPUT, - .size = sizeof(attr), /* to capture ABI version */ - }; - char *evname = strdup(OFFCPU_EVENT); - if (evname == NULL) - return -ENOMEM; - - evsel = evsel__new(&attr); - if (!evsel) { - free(evname); - return -ENOMEM; + scnprintf(off_cpu_event, sizeof(off_cpu_event), "bpf-output/name=%s/", OFFCPU_EVENT); + if (parse_event(evlist, off_cpu_event)) { + pr_err("Failed to open off-cpu event\n"); + return -1; } - evsel->core.attr.freq = 1; - evsel->core.attr.sample_period = 1; - /* off-cpu analysis depends on stack trace */ - evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN; - - evlist__add(evlist, evsel); - - free(evsel->name); - evsel->name = evname; + evlist__for_each_entry(evlist, evsel) { + if (evsel__is_offcpu_event(evsel)) { + evsel->core.system_wide = true; + break; + } + } return 0; } @@ -71,21 +65,42 @@ static int off_cpu_config(struct evlist *evlist) static void off_cpu_start(void *arg) { struct evlist *evlist = arg; + struct evsel *evsel; + struct perf_cpu pcpu; + int i; /* update task filter for the given workload */ - if (!skel->bss->has_cpu && !skel->bss->has_task && + if (skel->rodata->has_task && skel->rodata->uses_tgid && perf_thread_map__pid(evlist->core.threads, 0) != -1) { int fd; u32 pid; u8 val = 1; - skel->bss->has_task = 1; - skel->bss->uses_tgid = 1; fd = bpf_map__fd(skel->maps.task_filter); pid = perf_thread_map__pid(evlist->core.threads, 0); bpf_map_update_elem(fd, &pid, &val, BPF_ANY); } + /* update BPF perf_event map */ + evsel = evlist__find_evsel_by_str(evlist, OFFCPU_EVENT); + if (evsel == NULL) { + pr_err("%s evsel not found\n", OFFCPU_EVENT); + return; + } + + perf_cpu_map__for_each_cpu(pcpu, i, evsel->core.cpus) { + int err; + int cpu_nr = pcpu.cpu; + + err = bpf_map__update_elem(skel->maps.offcpu_output, &cpu_nr, sizeof(int), + xyarray__entry(evsel->core.fd, cpu_nr, 0), + sizeof(int), BPF_ANY); + if (err) { + pr_err("Failed to update perf event map for direct off-cpu dumping\n"); + return; + } + } + skel->bss->enabled = 1; } @@ -102,6 +117,11 @@ static void check_sched_switch_args(void) const struct btf_type *t1, *t2, *t3; u32 type_id; + if (!btf) { + pr_debug("Missing btf, check if CONFIG_DEBUG_INFO_BTF is enabled\n"); + goto cleanup; + } + type_id = btf__find_by_name_kind(btf, "btf_trace_sched_switch", BTF_KIND_TYPEDEF); if ((s32)type_id < 0) @@ -148,6 +168,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, if (target->cpu_list) { ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); + skel->rodata->has_cpu = 1; } if (target->pid) { @@ -173,11 +194,16 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, ntasks = MAX_PROC; bpf_map__set_max_entries(skel->maps.task_filter, ntasks); + skel->rodata->has_task = 1; + skel->rodata->uses_tgid = 1; } else if (target__has_task(target)) { ntasks = perf_thread_map__nr(evlist->core.threads); bpf_map__set_max_entries(skel->maps.task_filter, ntasks); + skel->rodata->has_task = 1; } else if (target__none(target)) { bpf_map__set_max_entries(skel->maps.task_filter, MAX_PROC); + skel->rodata->has_task = 1; + skel->rodata->uses_tgid = 1; } if (evlist__first(evlist)->cgrp) { @@ -186,6 +212,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, if (!cgroup_is_v2("perf_event")) skel->rodata->uses_cgroup_v1 = true; + skel->rodata->has_cgroup = 1; } if (opts->record_cgroup) { @@ -208,7 +235,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, u32 cpu; u8 val = 1; - skel->bss->has_cpu = 1; fd = bpf_map__fd(skel->maps.cpu_filter); for (i = 0; i < ncpus; i++) { @@ -220,8 +246,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, if (target->pid) { u8 val = 1; - skel->bss->has_task = 1; - skel->bss->uses_tgid = 1; fd = bpf_map__fd(skel->maps.task_filter); strlist__for_each_entry(pos, pid_slist) { @@ -240,7 +264,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, u32 pid; u8 val = 1; - skel->bss->has_task = 1; fd = bpf_map__fd(skel->maps.task_filter); for (i = 0; i < ntasks; i++) { @@ -253,7 +276,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, struct evsel *evsel; u8 val = 1; - skel->bss->has_cgroup = 1; fd = bpf_map__fd(skel->maps.cgroup_filter); evlist__for_each_entry(evlist, evsel) { @@ -272,6 +294,8 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, } } + skel->bss->offcpu_thresh_ns = opts->off_cpu_thresh_ns; + err = off_cpu_bpf__attach(skel); if (err) { pr_err("Failed to attach off-cpu BPF skeleton\n"); @@ -295,6 +319,7 @@ int off_cpu_write(struct perf_session *session) { int bytes = 0, size; int fd, stack; + u32 raw_size; u64 sample_type, val, sid = 0; struct evsel *evsel; struct perf_data_file *file = &session->data->file; @@ -334,46 +359,54 @@ int off_cpu_write(struct perf_session *session) while (!bpf_map_get_next_key(fd, &prev, &key)) { int n = 1; /* start from perf_event_header */ - int ip_pos = -1; bpf_map_lookup_elem(fd, &key, &val); + /* zero-fill some of the fields, will be overwritten by raw_data when parsing */ if (sample_type & PERF_SAMPLE_IDENTIFIER) data.array[n++] = sid; - if (sample_type & PERF_SAMPLE_IP) { - ip_pos = n; + if (sample_type & PERF_SAMPLE_IP) data.array[n++] = 0; /* will be updated */ - } if (sample_type & PERF_SAMPLE_TID) - data.array[n++] = (u64)key.pid << 32 | key.tgid; + data.array[n++] = 0; if (sample_type & PERF_SAMPLE_TIME) data.array[n++] = tstamp; - if (sample_type & PERF_SAMPLE_ID) - data.array[n++] = sid; if (sample_type & PERF_SAMPLE_CPU) data.array[n++] = 0; if (sample_type & PERF_SAMPLE_PERIOD) - data.array[n++] = val; - if (sample_type & PERF_SAMPLE_CALLCHAIN) { - int len = 0; - - /* data.array[n] is callchain->nr (updated later) */ - data.array[n + 1] = PERF_CONTEXT_USER; - data.array[n + 2] = 0; - - bpf_map_lookup_elem(stack, &key.stack_id, &data.array[n + 2]); - while (data.array[n + 2 + len]) + data.array[n++] = 0; + if (sample_type & PERF_SAMPLE_RAW) { + /* + * [ size ][ data ] + * [ data ] + * [ data ] + * [ data ] + * [ data ][ empty] + */ + int len = 0, i = 0; + void *raw_data = (void *)data.array + n * sizeof(u64); + + off_cpu_raw[i++] = (u64)key.pid << 32 | key.tgid; + off_cpu_raw[i++] = val; + + /* off_cpu_raw[i] is callchain->nr (updated later) */ + off_cpu_raw[i + 1] = PERF_CONTEXT_USER; + off_cpu_raw[i + 2] = 0; + + bpf_map_lookup_elem(stack, &key.stack_id, &off_cpu_raw[i + 2]); + while (off_cpu_raw[i + 2 + len]) len++; - /* update length of callchain */ - data.array[n] = len + 1; + off_cpu_raw[i] = len + 1; + i += len + 2; + + off_cpu_raw[i++] = key.cgroup_id; - /* update sample ip with the first callchain entry */ - if (ip_pos >= 0) - data.array[ip_pos] = data.array[n + 2]; + raw_size = i * sizeof(u64) + sizeof(u32); /* 4 bytes for alignment */ + memcpy(raw_data, &raw_size, sizeof(raw_size)); + memcpy(raw_data + sizeof(u32), off_cpu_raw, i * sizeof(u64)); - /* calculate sample callchain data array length */ - n += len + 2; + n += i + 1; } if (sample_type & PERF_SAMPLE_CGROUP) data.array[n++] = key.cgroup_id; diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c index 0acbd74e8c76..2a6e61864ee0 100644 --- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c @@ -7,9 +7,13 @@ */ #include "vmlinux.h" + #include <bpf/bpf_helpers.h> #include <linux/limits.h> +#define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1) +#define __PERF_ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) + /** * is_power_of_2() - check if a value is a power of two * @n: the value to check @@ -22,6 +26,8 @@ #define MAX_CPUS 4096 +#define TRACE_AUG_MAX_BUF 32 /* for buffer augmentation in perf trace */ + /* bpf-output associated map */ struct __augmented_syscalls__ { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); @@ -39,7 +45,7 @@ struct syscalls_sys_enter { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); __type(key, __u32); __type(value, __u32); - __uint(max_entries, 512); + __uint(max_entries, 1024); } syscalls_sys_enter SEC(".maps"); /* @@ -51,7 +57,7 @@ struct syscalls_sys_exit { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); __type(key, __u32); __type(value, __u32); - __uint(max_entries, 512); + __uint(max_entries, 1024); } syscalls_sys_exit SEC(".maps"); struct syscall_enter_args { @@ -66,19 +72,6 @@ struct syscall_exit_args { long ret; }; -struct augmented_arg { - unsigned int size; - int err; - char value[PATH_MAX]; -}; - -struct pids_filtered { - __uint(type, BPF_MAP_TYPE_HASH); - __type(key, pid_t); - __type(value, bool); - __uint(max_entries, 64); -} pids_filtered SEC(".maps"); - /* * Desired design of maximum size and alignment (see RFC2553) */ @@ -105,17 +98,27 @@ struct sockaddr_storage { }; }; -struct augmented_args_payload { - struct syscall_enter_args args; - union { - struct { - struct augmented_arg arg, arg2; - }; +struct augmented_arg { + unsigned int size; + int err; + union { + char value[PATH_MAX]; struct sockaddr_storage saddr; - char __data[sizeof(struct augmented_arg)]; }; }; +struct pids_filtered { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, pid_t); + __type(value, bool); + __uint(max_entries, 64); +} pids_filtered SEC(".maps"); + +struct augmented_args_payload { + struct syscall_enter_args args; + struct augmented_arg arg, arg2; // We have to reserve space for two arguments (rename, etc) +}; + // We need more tmp space than the BPF stack can give us struct augmented_args_tmp { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); @@ -124,6 +127,25 @@ struct augmented_args_tmp { __uint(max_entries, 1); } augmented_args_tmp SEC(".maps"); +struct beauty_map_enter { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); + __type(value, __u32[6]); + __uint(max_entries, 512); +} beauty_map_enter SEC(".maps"); + +struct beauty_payload_enter { + struct syscall_enter_args args; + struct augmented_arg aug_args[6]; +}; + +struct beauty_payload_enter_map { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, int); + __type(value, struct beauty_payload_enter); + __uint(max_entries, 1); +} beauty_payload_enter_map SEC(".maps"); + static inline struct augmented_args_payload *augmented_args_payload(void) { int key = 0; @@ -136,6 +158,11 @@ static inline int augmented__output(void *ctx, struct augmented_args_payload *ar return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len); } +static inline int augmented__beauty_output(void *ctx, void *data, int len) +{ + return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, data, len); +} + static inline unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len) { @@ -182,15 +209,17 @@ int sys_enter_connect(struct syscall_enter_args *args) struct augmented_args_payload *augmented_args = augmented_args_payload(); const void *sockaddr_arg = (const void *)args->args[1]; unsigned int socklen = args->args[2]; - unsigned int len = sizeof(augmented_args->args); + unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs if (augmented_args == NULL) return 1; /* Failure: don't filter */ - _Static_assert(is_power_of_2(sizeof(augmented_args->saddr)), "sizeof(augmented_args->saddr) needs to be a power of two"); - socklen &= sizeof(augmented_args->saddr) - 1; + _Static_assert(is_power_of_2(sizeof(augmented_args->arg.saddr)), "sizeof(augmented_args->arg.saddr) needs to be a power of two"); + socklen &= sizeof(augmented_args->arg.saddr) - 1; - bpf_probe_read_user(&augmented_args->saddr, socklen, sockaddr_arg); + bpf_probe_read_user(&augmented_args->arg.saddr, socklen, sockaddr_arg); + augmented_args->arg.size = socklen; + augmented_args->arg.err = 0; return augmented__output(args, augmented_args, len + socklen); } @@ -201,14 +230,14 @@ int sys_enter_sendto(struct syscall_enter_args *args) struct augmented_args_payload *augmented_args = augmented_args_payload(); const void *sockaddr_arg = (const void *)args->args[4]; unsigned int socklen = args->args[5]; - unsigned int len = sizeof(augmented_args->args); + unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs if (augmented_args == NULL) return 1; /* Failure: don't filter */ - socklen &= sizeof(augmented_args->saddr) - 1; + socklen &= sizeof(augmented_args->arg.saddr) - 1; - bpf_probe_read_user(&augmented_args->saddr, socklen, sockaddr_arg); + bpf_probe_read_user(&augmented_args->arg.saddr, socklen, sockaddr_arg); return augmented__output(args, augmented_args, len + socklen); } @@ -249,30 +278,58 @@ int sys_enter_rename(struct syscall_enter_args *args) struct augmented_args_payload *augmented_args = augmented_args_payload(); const void *oldpath_arg = (const void *)args->args[0], *newpath_arg = (const void *)args->args[1]; - unsigned int len = sizeof(augmented_args->args), oldpath_len; + unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len; if (augmented_args == NULL) return 1; /* Failure: don't filter */ + len += 2 * sizeof(u64); // The overhead of size and err, just before the payload... + oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value)); - len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value)); + augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64)); + len += augmented_args->arg.size; + + /* Every read from userspace is limited to value size */ + if (augmented_args->arg.size > sizeof(augmented_args->arg.value)) + return 1; /* Failure: don't filter */ + + struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size; + + newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value)); + arg2->size = newpath_len; + + len += newpath_len; return augmented__output(args, augmented_args, len); } -SEC("tp/syscalls/sys_enter_renameat") -int sys_enter_renameat(struct syscall_enter_args *args) +SEC("tp/syscalls/sys_enter_renameat2") +int sys_enter_renameat2(struct syscall_enter_args *args) { struct augmented_args_payload *augmented_args = augmented_args_payload(); const void *oldpath_arg = (const void *)args->args[1], *newpath_arg = (const void *)args->args[3]; - unsigned int len = sizeof(augmented_args->args), oldpath_len; + unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len; if (augmented_args == NULL) return 1; /* Failure: don't filter */ + len += 2 * sizeof(u64); // The overhead of size and err, just before the payload... + oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value)); - len += oldpath_len + augmented_arg__read_str((void *)(&augmented_args->arg) + oldpath_len, newpath_arg, sizeof(augmented_args->arg.value)); + augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64)); + len += augmented_args->arg.size; + + /* Every read from userspace is limited to value size */ + if (augmented_args->arg.size > sizeof(augmented_args->arg.value)) + return 1; /* Failure: don't filter */ + + struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size; + + newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value)); + arg2->size = newpath_len; + + len += newpath_len; return augmented__output(args, augmented_args, len); } @@ -293,26 +350,26 @@ int sys_enter_perf_event_open(struct syscall_enter_args *args) { struct augmented_args_payload *augmented_args = augmented_args_payload(); const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read; - unsigned int len = sizeof(augmented_args->args); + unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs if (augmented_args == NULL) goto failure; - if (bpf_probe_read_user(&augmented_args->__data, sizeof(*attr), attr) < 0) + if (bpf_probe_read_user(&augmented_args->arg.value, sizeof(*attr), attr) < 0) goto failure; - attr_read = (const struct perf_event_attr_size *)augmented_args->__data; + attr_read = (const struct perf_event_attr_size *)augmented_args->arg.value; __u32 size = attr_read->size; if (!size) size = PERF_ATTR_SIZE_VER0; - if (size > sizeof(augmented_args->__data)) + if (size > sizeof(augmented_args->arg.value)) goto failure; // Now that we read attr->size and tested it against the size limits, read it completely - if (bpf_probe_read_user(&augmented_args->__data, size, attr) < 0) + if (bpf_probe_read_user(&augmented_args->arg.value, size, attr) < 0) goto failure; return augmented__output(args, augmented_args, len + size); @@ -325,16 +382,16 @@ int sys_enter_clock_nanosleep(struct syscall_enter_args *args) { struct augmented_args_payload *augmented_args = augmented_args_payload(); const void *rqtp_arg = (const void *)args->args[2]; - unsigned int len = sizeof(augmented_args->args); + unsigned int len = sizeof(u64) + sizeof(augmented_args->args); // the size + err in all 'augmented_arg' structs __u32 size = sizeof(struct timespec64); if (augmented_args == NULL) goto failure; - if (size > sizeof(augmented_args->__data)) + if (size > sizeof(augmented_args->arg.value)) goto failure; - bpf_probe_read_user(&augmented_args->__data, size, rqtp_arg); + bpf_probe_read_user(&augmented_args->arg.value, size, rqtp_arg); return augmented__output(args, augmented_args, len + size); failure: @@ -352,10 +409,10 @@ int sys_enter_nanosleep(struct syscall_enter_args *args) if (augmented_args == NULL) goto failure; - if (size > sizeof(augmented_args->__data)) + if (size > sizeof(augmented_args->arg.value)) goto failure; - bpf_probe_read_user(&augmented_args->__data, size, req_arg); + bpf_probe_read_user(&augmented_args->arg.value, size, req_arg); return augmented__output(args, augmented_args, len + size); failure: @@ -372,6 +429,98 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid) return bpf_map_lookup_elem(pids, &pid) != NULL; } +static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) +{ + bool augmented, do_output = false; + int zero = 0, index, value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value); + u64 output = 0; /* has to be u64, otherwise it won't pass the verifier */ + s64 aug_size, size; + unsigned int nr, *beauty_map; + struct beauty_payload_enter *payload; + void *arg, *payload_offset; + + /* fall back to do predefined tail call */ + if (args == NULL) + return 1; + + /* use syscall number to get beauty_map entry */ + nr = (__u32)args->syscall_nr; + beauty_map = bpf_map_lookup_elem(&beauty_map_enter, &nr); + + /* set up payload for output */ + payload = bpf_map_lookup_elem(&beauty_payload_enter_map, &zero); + payload_offset = (void *)&payload->aug_args; + + if (beauty_map == NULL || payload == NULL) + return 1; + + /* copy the sys_enter header, which has the syscall_nr */ + __builtin_memcpy(&payload->args, args, sizeof(struct syscall_enter_args)); + + /* + * Determine what type of argument and how many bytes to read from user space, using the + * value in the beauty_map. This is the relation of parameter type and its corresponding + * value in the beauty map, and how many bytes we read eventually: + * + * string: 1 -> size of string + * struct: size of struct -> size of struct + * buffer: -1 * (index of paired len) -> value of paired len (maximum: TRACE_AUG_MAX_BUF) + */ + for (int i = 0; i < 6; i++) { + arg = (void *)args->args[i]; + augmented = false; + size = beauty_map[i]; + aug_size = size; /* size of the augmented data read from user space */ + + if (size == 0 || arg == NULL) + continue; + + if (size == 1) { /* string */ + aug_size = bpf_probe_read_user_str(((struct augmented_arg *)payload_offset)->value, value_size, arg); + /* minimum of 0 to pass the verifier */ + if (aug_size < 0) + aug_size = 0; + + augmented = true; + } else if (size > 0 && size <= value_size) { /* struct */ + if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, size, arg)) + augmented = true; + } else if ((int)size < 0 && size >= -6) { /* buffer */ + index = -(size + 1); + barrier_var(index); // Prevent clang (noticed with v18) from removing the &= 7 trick. + index &= 7; // Satisfy the bounds checking with the verifier in some kernels. + aug_size = args->args[index] > TRACE_AUG_MAX_BUF ? TRACE_AUG_MAX_BUF : args->args[index]; + + if (aug_size > 0) { + if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, aug_size, arg)) + augmented = true; + } + } + + /* Augmented data size is limited to sizeof(augmented_arg->unnamed union with value field) */ + if (aug_size > value_size) + aug_size = value_size; + + /* write data to payload */ + if (augmented) { + int written = offsetof(struct augmented_arg, value) + aug_size; + + if (written < 0 || written > sizeof(struct augmented_arg)) + return 1; + + ((struct augmented_arg *)payload_offset)->size = aug_size; + output += written; + payload_offset += written; + do_output = true; + } + } + + if (!do_output || (sizeof(struct syscall_enter_args) + output) > sizeof(struct beauty_payload_enter)) + return 1; + + return augmented__beauty_output(ctx, payload, sizeof(struct syscall_enter_args) + output); +} + SEC("tp/raw_syscalls/sys_enter") int sys_enter(struct syscall_enter_args *args) { @@ -400,7 +549,8 @@ int sys_enter(struct syscall_enter_args *args) * "!raw_syscalls:unaugmented" that will just return 1 to return the * unaugmented tracepoint payload. */ - bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr); + if (augment_sys_enter(args, &augmented_args->args)) + bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr); // If not found on the PROG_ARRAY syscalls map, then we're filtering it: return 0; diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c index 6a438e0102c5..c2298a2decc9 100644 --- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c @@ -1,14 +1,12 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2021 Facebook // Copyright (c) 2021 Google +#include "bperf_cgroup.h" #include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_core_read.h> -#define MAX_LEVELS 10 // max cgroup hierarchy level: arbitrary -#define MAX_EVENTS 32 // max events per cgroup: arbitrary - // NOTE: many of map and global data will be modified before loading // from the userspace (perf tool) using the skeleton helpers. @@ -57,9 +55,9 @@ struct cgroup___old { const volatile __u32 num_events = 1; const volatile __u32 num_cpus = 1; +const volatile int use_cgroup_v2 = 0; int enabled = 0; -int use_cgroup_v2 = 0; int perf_subsys_id = -1; static inline __u64 get_cgroup_v1_ancestor_id(struct cgroup *cgrp, int level) @@ -97,7 +95,7 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size) cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_subsys_id], cgroup); level = BPF_CORE_READ(cgrp, level); - for (cnt = 0; i < MAX_LEVELS; i++) { + for (cnt = 0; i < BPERF_CGROUP__MAX_LEVELS; i++) { __u64 cgrp_id; if (i > level) @@ -123,7 +121,7 @@ static inline int get_cgroup_v2_idx(__u32 *cgrps, int size) __u32 *elem; int cnt; - for (cnt = 0; i < MAX_LEVELS; i++) { + for (cnt = 0; i < BPERF_CGROUP__MAX_LEVELS; i++) { __u64 cgrp_id = bpf_get_current_ancestor_cgroup_id(i); if (cgrp_id == 0) @@ -148,17 +146,17 @@ static int bperf_cgroup_count(void) register int c = 0; struct bpf_perf_event_value val, delta, *prev_val, *cgrp_val; __u32 cpu = bpf_get_smp_processor_id(); - __u32 cgrp_idx[MAX_LEVELS]; + __u32 cgrp_idx[BPERF_CGROUP__MAX_LEVELS]; int cgrp_cnt; __u32 key, cgrp; long err; if (use_cgroup_v2) - cgrp_cnt = get_cgroup_v2_idx(cgrp_idx, MAX_LEVELS); + cgrp_cnt = get_cgroup_v2_idx(cgrp_idx, BPERF_CGROUP__MAX_LEVELS); else - cgrp_cnt = get_cgroup_v1_idx(cgrp_idx, MAX_LEVELS); + cgrp_cnt = get_cgroup_v1_idx(cgrp_idx, BPERF_CGROUP__MAX_LEVELS); - for ( ; idx < MAX_EVENTS; idx++) { + for ( ; idx < BPERF_CGROUP__MAX_EVENTS; idx++) { if (idx == num_events) break; @@ -186,7 +184,7 @@ static int bperf_cgroup_count(void) delta.enabled = val.enabled - prev_val->enabled; delta.running = val.running - prev_val->running; - for (c = 0; c < MAX_LEVELS; c++) { + for (c = 0; c < BPERF_CGROUP__MAX_LEVELS; c++) { if (c == cgrp_cnt) break; diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.h b/tools/perf/util/bpf_skel/bperf_cgroup.h new file mode 100644 index 000000000000..3fb84b19d39a --- /dev/null +++ b/tools/perf/util/bpf_skel/bperf_cgroup.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Data structures shared between BPF and tools. */ +#ifndef __BPERF_CGROUP_H +#define __BPERF_CGROUP_H + +// These constants impact code size of bperf_cgroup.bpf.c that may result in BPF +// verifier issues. They are exposed to control the size and also to disable BPF +// counters when the number of user events is too large. + +// max cgroup hierarchy level: arbitrary +#define BPERF_CGROUP__MAX_LEVELS 10 +// max events per cgroup: arbitrary +#define BPERF_CGROUP__MAX_EVENTS 128 + +#endif /* __BPERF_CGROUP_H */ diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c index f193998530d4..0595063139a3 100644 --- a/tools/perf/util/bpf_skel/bperf_follower.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c @@ -5,6 +5,8 @@ #include <bpf/bpf_tracing.h> #include "bperf_u.h" +#define MAX_ENTRIES 102400 + struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(key_size, sizeof(__u32)); @@ -22,25 +24,29 @@ struct { struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bperf_filter_value)); + __uint(max_entries, MAX_ENTRIES); + __uint(map_flags, BPF_F_NO_PREALLOC); } filter SEC(".maps"); enum bperf_filter_type type = 0; int enabled = 0; +int inherit; SEC("fexit/XXX") int BPF_PROG(fexit_XXX) { struct bpf_perf_event_value *diff_val, *accum_val; __u32 filter_key, zero = 0; - __u32 *accum_key; + __u32 accum_key; + struct bperf_filter_value *fval; if (!enabled) return 0; switch (type) { case BPERF_FILTER_GLOBAL: - accum_key = &zero; + accum_key = zero; goto do_add; case BPERF_FILTER_CPU: filter_key = bpf_get_smp_processor_id(); @@ -49,22 +55,34 @@ int BPF_PROG(fexit_XXX) filter_key = bpf_get_current_pid_tgid() & 0xffffffff; break; case BPERF_FILTER_TGID: - filter_key = bpf_get_current_pid_tgid() >> 32; + /* Use pid as the filter_key to exclude new task counts + * when inherit is disabled. Don't worry about the existing + * children in TGID losing their counts, bpf_counter has + * already added them to the filter map via perf_thread_map + * before this bpf prog runs. + */ + filter_key = inherit ? + bpf_get_current_pid_tgid() >> 32 : + bpf_get_current_pid_tgid() & 0xffffffff; break; default: return 0; } - accum_key = bpf_map_lookup_elem(&filter, &filter_key); - if (!accum_key) + fval = bpf_map_lookup_elem(&filter, &filter_key); + if (!fval) return 0; + accum_key = fval->accum_key; + if (fval->exited) + bpf_map_delete_elem(&filter, &filter_key); + do_add: diff_val = bpf_map_lookup_elem(&diff_readings, &zero); if (!diff_val) return 0; - accum_val = bpf_map_lookup_elem(&accum_readings, accum_key); + accum_val = bpf_map_lookup_elem(&accum_readings, &accum_key); if (!accum_val) return 0; @@ -75,4 +93,70 @@ do_add: return 0; } +/* The program is only used for PID or TGID filter types. */ +SEC("tp_btf/task_newtask") +int BPF_PROG(on_newtask, struct task_struct *task, __u64 clone_flags) +{ + __u32 parent_key, child_key; + struct bperf_filter_value *parent_fval; + struct bperf_filter_value child_fval = { 0 }; + + if (!enabled) + return 0; + + switch (type) { + case BPERF_FILTER_PID: + parent_key = bpf_get_current_pid_tgid() & 0xffffffff; + child_key = task->pid; + break; + case BPERF_FILTER_TGID: + parent_key = bpf_get_current_pid_tgid() >> 32; + child_key = task->tgid; + if (child_key == parent_key) + return 0; + break; + default: + return 0; + } + + /* Check if the current task is one of the target tasks to be counted */ + parent_fval = bpf_map_lookup_elem(&filter, &parent_key); + if (!parent_fval) + return 0; + + /* Start counting for the new task by adding it into filter map, + * inherit the accum key of its parent task so that they can be + * counted together. + */ + child_fval.accum_key = parent_fval->accum_key; + child_fval.exited = 0; + bpf_map_update_elem(&filter, &child_key, &child_fval, BPF_NOEXIST); + + return 0; +} + +/* The program is only used for PID or TGID filter types. */ +SEC("tp_btf/sched_process_exit") +int BPF_PROG(on_exittask, struct task_struct *task) +{ + __u32 pid; + struct bperf_filter_value *fval; + + if (!enabled) + return 0; + + /* Stop counting for this task by removing it from filter map. + * For TGID type, if the pid can be found in the map, it means that + * this pid belongs to the leader task. After the task exits, the + * tgid of its child tasks (if any) will be 1, so the pid can be + * safely removed. + */ + pid = task->pid; + fval = bpf_map_lookup_elem(&filter, &pid); + if (fval) + fval->exited = 1; + + return 0; +} + char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/bpf_skel/bperf_u.h b/tools/perf/util/bpf_skel/bperf_u.h index 1ce0c2c905c1..4a4a753980be 100644 --- a/tools/perf/util/bpf_skel/bperf_u.h +++ b/tools/perf/util/bpf_skel/bperf_u.h @@ -11,4 +11,9 @@ enum bperf_filter_type { BPERF_FILTER_TGID, }; +struct bperf_filter_value { + __u32 accum_key; + __u8 exited; +}; + #endif /* __BPERF_STAT_U_H */ diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c index 9d01e3af7479..621e2022c8bc 100644 --- a/tools/perf/util/bpf_skel/func_latency.bpf.c +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c @@ -37,38 +37,104 @@ struct { int enabled = 0; -int has_cpu = 0; -int has_task = 0; -int use_nsec = 0; -SEC("kprobe/func") -int BPF_PROG(func_begin) +// stats +__s64 total; +__s64 count; +__s64 max; +__s64 min; + +const volatile int has_cpu = 0; +const volatile int has_task = 0; +const volatile int use_nsec = 0; +const volatile unsigned int bucket_range; +const volatile unsigned int min_latency; +const volatile unsigned int max_latency; +const volatile unsigned int bucket_num = NUM_BUCKET; + +static bool can_record(void) { - __u64 key, now; - - if (!enabled) - return 0; - - key = bpf_get_current_pid_tgid(); - if (has_cpu) { __u32 cpu = bpf_get_smp_processor_id(); __u8 *ok; ok = bpf_map_lookup_elem(&cpu_filter, &cpu); if (!ok) - return 0; + return false; } if (has_task) { - __u32 pid = key & 0xffffffff; + __u32 pid = bpf_get_current_pid_tgid(); __u8 *ok; ok = bpf_map_lookup_elem(&task_filter, &pid); if (!ok) - return 0; + return false; + } + return true; +} + +static void update_latency(__s64 delta) +{ + __u64 val = delta; + __u32 key = 0; + __u64 *hist; + __u64 cmp_base = use_nsec ? 1 : 1000; + + if (delta < 0) + return; + + if (bucket_range != 0) { + val = delta / cmp_base; + + if (min_latency > 0) { + if (val > min_latency) + val -= min_latency; + else + goto do_lookup; + } + + // Less than 1 unit (ms or ns), or, in the future, + // than the min latency desired. + if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units ) + key = val / bucket_range + 1; + if (key >= bucket_num) + key = bucket_num - 1; + } + + goto do_lookup; + } + // calculate index using delta + for (key = 0; key < (bucket_num - 1); key++) { + if (delta < (cmp_base << key)) + break; } +do_lookup: + hist = bpf_map_lookup_elem(&latency, &key); + if (!hist) + return; + + __sync_fetch_and_add(hist, 1); + + __sync_fetch_and_add(&total, delta); // always in nsec + __sync_fetch_and_add(&count, 1); + + if (delta > max) + max = delta; + if (delta < min) + min = delta; +} + +SEC("kprobe/func") +int BPF_PROG(func_begin) +{ + __u64 key, now; + + if (!enabled || !can_record()) + return 0; + + key = bpf_get_current_pid_tgid(); now = bpf_ktime_get_ns(); // overwrite timestamp for nested functions @@ -81,7 +147,6 @@ int BPF_PROG(func_end) { __u64 tid; __u64 *start; - __u64 cmp_base = use_nsec ? 1 : 1000; if (!enabled) return 0; @@ -90,26 +155,44 @@ int BPF_PROG(func_end) start = bpf_map_lookup_elem(&functime, &tid); if (start) { - __s64 delta = bpf_ktime_get_ns() - *start; - __u32 key; - __u64 *hist; - + update_latency(bpf_ktime_get_ns() - *start); bpf_map_delete_elem(&functime, &tid); + } + + return 0; +} - if (delta < 0) - return 0; +SEC("raw_tp") +int BPF_PROG(event_begin) +{ + __u64 key, now; - // calculate index using delta - for (key = 0; key < (NUM_BUCKET - 1); key++) { - if (delta < (cmp_base << key)) - break; - } + if (!enabled || !can_record()) + return 0; - hist = bpf_map_lookup_elem(&latency, &key); - if (!hist) - return 0; + key = bpf_get_current_pid_tgid(); + now = bpf_ktime_get_ns(); - *hist += 1; + // overwrite timestamp for nested events + bpf_map_update_elem(&functime, &key, &now, BPF_ANY); + return 0; +} + +SEC("raw_tp") +int BPF_PROG(event_end) +{ + __u64 tid; + __u64 *start; + + if (!enabled) + return 0; + + tid = bpf_get_current_pid_tgid(); + + start = bpf_map_lookup_elem(&functime, &tid); + if (start) { + update_latency(bpf_ktime_get_ns() - *start); + bpf_map_delete_elem(&functime, &tid); } return 0; diff --git a/tools/perf/util/bpf_skel/kwork_top.bpf.c b/tools/perf/util/bpf_skel/kwork_top.bpf.c index 84c15ccbab44..6673386302e2 100644 --- a/tools/perf/util/bpf_skel/kwork_top.bpf.c +++ b/tools/perf/util/bpf_skel/kwork_top.bpf.c @@ -18,7 +18,7 @@ enum kwork_class_type { }; #define MAX_ENTRIES 102400 -#define MAX_NR_CPUS 2048 +#define MAX_NR_CPUS 4096 #define PF_KTHREAD 0x00200000 #define MAX_COMMAND_LEN 16 @@ -84,7 +84,7 @@ struct { int enabled = 0; -int has_cpu_filter = 0; +const volatile int has_cpu_filter = 0; __u64 from_timestamp = 0; __u64 to_timestamp = 0; diff --git a/tools/perf/util/bpf_skel/kwork_trace.bpf.c b/tools/perf/util/bpf_skel/kwork_trace.bpf.c index 063c124e0999..9ce9c8dddc4b 100644 --- a/tools/perf/util/bpf_skel/kwork_trace.bpf.c +++ b/tools/perf/util/bpf_skel/kwork_trace.bpf.c @@ -68,8 +68,9 @@ struct { } perf_kwork_name_filter SEC(".maps"); int enabled = 0; -int has_cpu_filter = 0; -int has_name_filter = 0; + +const volatile int has_cpu_filter = 0; +const volatile int has_name_filter = 0; static __always_inline int local_strncmp(const char *s1, unsigned int sz, const char *s2) @@ -79,7 +80,7 @@ static __always_inline int local_strncmp(const char *s1, for (i = 0; i < sz; i++) { ret = (unsigned char)s1[i] - (unsigned char)s2[i]; - if (ret || !s1[i] || !s2[i]) + if (ret || !s1[i]) break; } diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c index d931a898c434..96e7d853b9ed 100644 --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c @@ -11,6 +11,12 @@ /* for collect_lock_syms(). 4096 was rejected by the verifier */ #define MAX_CPUS 1024 +/* for collect_zone_lock(). It should be more than the actual zones. */ +#define MAX_ZONES 10 + +/* for do_lock_delay(). Arbitrarily set to 1 million. */ +#define MAX_LOOP (1U << 20) + /* lock contention flags from include/trace/events/lock.h */ #define LCB_F_SPIN (1U << 0) #define LCB_F_READ (1U << 1) @@ -27,6 +33,38 @@ struct { __uint(max_entries, MAX_ENTRIES); } stacks SEC(".maps"); +/* buffer for owner stacktrace */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u64)); + __uint(max_entries, 1); +} stack_buf SEC(".maps"); + +/* a map for tracing owner stacktrace to owner stack id */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u64)); // owner stacktrace + __uint(value_size, sizeof(__s32)); // owner stack id + __uint(max_entries, 1); +} owner_stacks SEC(".maps"); + +/* a map for tracing lock address to owner data */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u64)); // lock address + __uint(value_size, sizeof(struct owner_tracing_data)); + __uint(max_entries, 1); +} owner_data SEC(".maps"); + +/* a map for contention_key (stores owner stack id) to contention data */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(struct contention_key)); + __uint(value_size, sizeof(struct contention_data)); + __uint(max_entries, 1); +} owner_stat SEC(".maps"); + /* maintain timestamp at the beginning of contention */ struct { __uint(type, BPF_MAP_TYPE_HASH); @@ -100,6 +138,27 @@ struct { __uint(max_entries, 1); } cgroup_filter SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(long)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} slab_filter SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(long)); + __uint(value_size, sizeof(struct slab_cache_data)); + __uint(max_entries, 1); +} slab_caches SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u64)); + __uint(value_size, sizeof(__u64)); + __uint(max_entries, 1); +} lock_delays SEC(".maps"); + struct rw_semaphore___old { struct task_struct *owner; } __attribute__((preserve_access_index)); @@ -116,25 +175,33 @@ struct mm_struct___new { struct rw_semaphore mmap_lock; } __attribute__((preserve_access_index)); +extern struct kmem_cache *bpf_get_kmem_cache(u64 addr) __ksym __weak; + /* control flags */ -int enabled; -int has_cpu; -int has_task; -int has_type; -int has_addr; -int has_cgroup; -int needs_callstack; -int stack_skip; -int lock_owner; - -int use_cgroup_v2; -int perf_subsys_id = -1; +const volatile int has_cpu; +const volatile int has_task; +const volatile int has_type; +const volatile int has_addr; +const volatile int has_cgroup; +const volatile int has_slab; +const volatile int needs_callstack; +const volatile int stack_skip; +const volatile int lock_owner; +const volatile int use_cgroup_v2; +const volatile int max_stack; +const volatile int lock_delay; /* determine the key of lock stat */ -int aggr_mode; +const volatile int aggr_mode; + +int enabled; + +int perf_subsys_id = -1; __u64 end_ts; +__u32 slab_cache_id; + /* error stat */ int task_fail; int stack_fail; @@ -144,6 +211,9 @@ int data_fail; int task_map_full; int data_map_full; +struct task_struct *bpf_task_from_pid(s32 pid) __ksym __weak; +void bpf_task_release(struct task_struct *p) __ksym __weak; + static inline __u64 get_current_cgroup_id(void) { struct task_struct *task; @@ -201,7 +271,7 @@ static inline int can_record(u64 *ctx) __u64 addr = ctx[0]; ok = bpf_map_lookup_elem(&addr_filter, &addr); - if (!ok) + if (!ok && !has_slab) return 0; } @@ -214,6 +284,17 @@ static inline int can_record(u64 *ctx) return 0; } + if (has_slab && bpf_get_kmem_cache) { + __u8 *ok; + __u64 addr = ctx[0]; + long kmem_cache_addr; + + kmem_cache_addr = (long)bpf_get_kmem_cache(addr); + ok = bpf_map_lookup_elem(&slab_filter, &kmem_cache_addr); + if (!ok) + return 0; + } + return 1; } @@ -317,14 +398,42 @@ static inline __u32 check_lock_type(__u64 lock, __u32 flags) return 0; } +static inline long delay_callback(__u64 idx, void *arg) +{ + __u64 target = *(__u64 *)arg; + + if (target <= bpf_ktime_get_ns()) + return 1; + + /* just to kill time */ + (void)bpf_get_prandom_u32(); + + return 0; +} + +static inline void do_lock_delay(__u64 duration) +{ + __u64 target = bpf_ktime_get_ns() + duration; + + bpf_loop(MAX_LOOP, delay_callback, &target, /*flags=*/0); +} + +static inline void check_lock_delay(__u64 lock) +{ + __u64 *delay; + + delay = bpf_map_lookup_elem(&lock_delays, &lock); + if (delay) + do_lock_delay(*delay); +} + static inline struct tstamp_data *get_tstamp_elem(__u32 flags) { __u32 pid; struct tstamp_data *pelem; /* Use per-cpu array map for spinlock and rwlock */ - if (flags == (LCB_F_SPIN | LCB_F_READ) || flags == LCB_F_SPIN || - flags == (LCB_F_SPIN | LCB_F_WRITE)) { + if ((flags & (LCB_F_SPIN | LCB_F_MUTEX)) == LCB_F_SPIN) { __u32 idx = 0; pelem = bpf_map_lookup_elem(&tstamp_cpu, &idx); @@ -357,6 +466,61 @@ static inline struct tstamp_data *get_tstamp_elem(__u32 flags) return pelem; } +static inline s32 get_owner_stack_id(u64 *stacktrace) +{ + s32 *id, new_id; + static s64 id_gen = 1; + + id = bpf_map_lookup_elem(&owner_stacks, stacktrace); + if (id) + return *id; + + new_id = (s32)__sync_fetch_and_add(&id_gen, 1); + + bpf_map_update_elem(&owner_stacks, stacktrace, &new_id, BPF_NOEXIST); + + id = bpf_map_lookup_elem(&owner_stacks, stacktrace); + if (id) + return *id; + + return -1; +} + +static inline void update_contention_data(struct contention_data *data, u64 duration, u32 count) +{ + __sync_fetch_and_add(&data->total_time, duration); + __sync_fetch_and_add(&data->count, count); + + /* FIXME: need atomic operations */ + if (data->max_time < duration) + data->max_time = duration; + if (data->min_time > duration) + data->min_time = duration; +} + +static inline void update_owner_stat(u32 id, u64 duration, u32 flags) +{ + struct contention_key key = { + .stack_id = id, + .pid = 0, + .lock_addr_or_cgroup = 0, + }; + struct contention_data *data = bpf_map_lookup_elem(&owner_stat, &key); + + if (!data) { + struct contention_data first = { + .total_time = duration, + .max_time = duration, + .min_time = duration, + .count = 1, + .flags = flags, + }; + bpf_map_update_elem(&owner_stat, &key, &first, BPF_NOEXIST); + } else { + update_contention_data(data, duration, 1); + } +} + SEC("tp_btf/contention_begin") int contention_begin(u64 *ctx) { @@ -374,6 +538,72 @@ int contention_begin(u64 *ctx) pelem->flags = (__u32)ctx[1]; if (needs_callstack) { + u32 i = 0; + u32 id = 0; + int owner_pid; + u64 *buf; + struct task_struct *task; + struct owner_tracing_data *otdata; + + if (!lock_owner) + goto skip_owner; + + task = get_lock_owner(pelem->lock, pelem->flags); + if (!task) + goto skip_owner; + + owner_pid = BPF_CORE_READ(task, pid); + + buf = bpf_map_lookup_elem(&stack_buf, &i); + if (!buf) + goto skip_owner; + for (i = 0; i < max_stack; i++) + buf[i] = 0x0; + + if (!bpf_task_from_pid) + goto skip_owner; + + task = bpf_task_from_pid(owner_pid); + if (!task) + goto skip_owner; + + bpf_get_task_stack(task, buf, max_stack * sizeof(unsigned long), 0); + bpf_task_release(task); + + otdata = bpf_map_lookup_elem(&owner_data, &pelem->lock); + id = get_owner_stack_id(buf); + + /* + * Contention just happens, or corner case `lock` is owned by process not + * `owner_pid`. For the corner case we treat it as unexpected internal error and + * just ignore the precvious tracing record. + */ + if (!otdata || otdata->pid != owner_pid) { + struct owner_tracing_data first = { + .pid = owner_pid, + .timestamp = pelem->timestamp, + .count = 1, + .stack_id = id, + }; + bpf_map_update_elem(&owner_data, &pelem->lock, &first, BPF_ANY); + } + /* Contention is ongoing and new waiter joins */ + else { + __sync_fetch_and_add(&otdata->count, 1); + + /* + * The owner is the same, but stacktrace might be changed. In this case we + * store/update `owner_stat` based on current owner stack id. + */ + if (id != otdata->stack_id) { + update_owner_stat(id, pelem->timestamp - otdata->timestamp, + pelem->flags); + + otdata->timestamp = pelem->timestamp; + otdata->stack_id = id; + } + } +skip_owner: pelem->stack_id = bpf_get_stackid(ctx, &stacks, BPF_F_FAST_STACK_CMP | stack_skip); if (pelem->stack_id < 0) @@ -410,6 +640,7 @@ int contention_end(u64 *ctx) struct tstamp_data *pelem; struct contention_key key = {}; struct contention_data *data; + __u64 timestamp; __u64 duration; bool need_delete = false; @@ -437,15 +668,88 @@ int contention_end(u64 *ctx) need_delete = true; } - duration = bpf_ktime_get_ns() - pelem->timestamp; + timestamp = bpf_ktime_get_ns(); + duration = timestamp - pelem->timestamp; if ((__s64)duration < 0) { - pelem->lock = 0; - if (need_delete) - bpf_map_delete_elem(&tstamp, &pid); __sync_fetch_and_add(&time_fail, 1); - return 0; + goto out; } + if (needs_callstack && lock_owner) { + struct owner_tracing_data *otdata = bpf_map_lookup_elem(&owner_data, &pelem->lock); + + if (!otdata) + goto skip_owner; + + /* Update `owner_stat` */ + update_owner_stat(otdata->stack_id, timestamp - otdata->timestamp, pelem->flags); + + /* No contention is occurring, delete `lock` entry in `owner_data` */ + if (otdata->count <= 1) + bpf_map_delete_elem(&owner_data, &pelem->lock); + /* + * Contention is still ongoing, with a new owner (current task). `owner_data` + * should be updated accordingly. + */ + else { + u32 i = 0; + s32 ret = (s32)ctx[1]; + u64 *buf; + + otdata->timestamp = timestamp; + __sync_fetch_and_add(&otdata->count, -1); + + buf = bpf_map_lookup_elem(&stack_buf, &i); + if (!buf) + goto skip_owner; + for (i = 0; i < (u32)max_stack; i++) + buf[i] = 0x0; + + /* + * `ret` has the return code of the lock function. + * If `ret` is negative, the current task terminates lock waiting without + * acquiring it. Owner is not changed, but we still need to update the owner + * stack. + */ + if (ret < 0) { + s32 id = 0; + struct task_struct *task; + + if (!bpf_task_from_pid) + goto skip_owner; + + task = bpf_task_from_pid(otdata->pid); + if (!task) + goto skip_owner; + + bpf_get_task_stack(task, buf, + max_stack * sizeof(unsigned long), 0); + bpf_task_release(task); + + id = get_owner_stack_id(buf); + + /* + * If owner stack is changed, update owner stack id for this lock. + */ + if (id != otdata->stack_id) + otdata->stack_id = id; + } + /* + * Otherwise, update tracing data with the current task, which is the new + * owner. + */ + else { + otdata->pid = pid; + /* + * We don't want to retrieve callstack here, since it is where the + * current task acquires the lock and provides no additional + * information. We simply assign -1 to invalidate it. + */ + otdata->stack_id = -1; + } + } + } +skip_owner: switch (aggr_mode) { case LOCK_AGGR_CALLER: key.stack_id = pelem->stack_id; @@ -477,11 +781,8 @@ int contention_end(u64 *ctx) data = bpf_map_lookup_elem(&lock_stat, &key); if (!data) { if (data_map_full) { - pelem->lock = 0; - if (need_delete) - bpf_map_delete_elem(&tstamp, &pid); __sync_fetch_and_add(&data_fail, 1); - return 0; + goto out; } struct contention_data first = { @@ -493,29 +794,50 @@ int contention_end(u64 *ctx) }; int err; - if (aggr_mode == LOCK_AGGR_ADDR) - first.flags |= check_lock_type(pelem->lock, pelem->flags); + if (aggr_mode == LOCK_AGGR_ADDR) { + first.flags |= check_lock_type(pelem->lock, + pelem->flags & LCB_F_TYPE_MASK); + + /* Check if it's from a slab object */ + if (bpf_get_kmem_cache) { + struct kmem_cache *s; + struct slab_cache_data *d; + + s = bpf_get_kmem_cache(pelem->lock); + if (s != NULL) { + /* + * Save the ID of the slab cache in the flags + * (instead of full address) to reduce the + * space in the contention_data. + */ + d = bpf_map_lookup_elem(&slab_caches, &s); + if (d != NULL) + first.flags |= d->id; + } + } + } err = bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST); if (err < 0) { + if (err == -EEXIST) { + /* it lost the race, try to get it again */ + data = bpf_map_lookup_elem(&lock_stat, &key); + if (data != NULL) + goto found; + } if (err == -E2BIG) data_map_full = 1; __sync_fetch_and_add(&data_fail, 1); } - pelem->lock = 0; - if (need_delete) - bpf_map_delete_elem(&tstamp, &pid); - return 0; + goto out; } - __sync_fetch_and_add(&data->total_time, duration); - __sync_fetch_and_add(&data->count, 1); +found: + update_contention_data(data, duration, 1); - /* FIXME: need atomic operations */ - if (data->max_time < duration) - data->max_time = duration; - if (data->min_time > duration) - data->min_time = duration; +out: + if (lock_delay) + check_lock_delay(pelem->lock); pelem->lock = 0; if (need_delete) @@ -525,6 +847,11 @@ int contention_end(u64 *ctx) extern struct rq runqueues __ksym; +const volatile __u64 contig_page_data_addr; +const volatile __u64 node_data_addr; +const volatile int nr_nodes; +const volatile int sizeof_zone; + struct rq___old { raw_spinlock_t lock; } __attribute__((preserve_access_index)); @@ -533,6 +860,59 @@ struct rq___new { raw_spinlock_t __lock; } __attribute__((preserve_access_index)); +static void collect_zone_lock(void) +{ + __u64 nr_zones, zone_off; + __u64 lock_addr, lock_off; + __u32 lock_flag = LOCK_CLASS_ZONE_LOCK; + + zone_off = offsetof(struct pglist_data, node_zones); + lock_off = offsetof(struct zone, lock); + + if (contig_page_data_addr) { + struct pglist_data *contig_page_data; + + contig_page_data = (void *)(long)contig_page_data_addr; + nr_zones = BPF_CORE_READ(contig_page_data, nr_zones); + + for (int i = 0; i < MAX_ZONES; i++) { + __u64 zone_addr; + + if (i >= nr_zones) + break; + + zone_addr = contig_page_data_addr + (sizeof_zone * i) + zone_off; + lock_addr = zone_addr + lock_off; + + bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY); + } + } else if (nr_nodes > 0) { + struct pglist_data **node_data = (void *)(long)node_data_addr; + + for (int i = 0; i < nr_nodes; i++) { + struct pglist_data *pgdat = NULL; + int err; + + err = bpf_core_read(&pgdat, sizeof(pgdat), &node_data[i]); + if (err < 0 || pgdat == NULL) + break; + + nr_zones = BPF_CORE_READ(pgdat, nr_zones); + for (int k = 0; k < MAX_ZONES; k++) { + __u64 zone_addr; + + if (k >= nr_zones) + break; + + zone_addr = (__u64)(void *)pgdat + (sizeof_zone * k) + zone_off; + lock_addr = zone_addr + lock_off; + + bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY); + } + } + } +} + SEC("raw_tp/bpf_test_finish") int BPF_PROG(collect_lock_syms) { @@ -554,6 +934,9 @@ int BPF_PROG(collect_lock_syms) lock_flag = LOCK_CLASS_RQLOCK; bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY); } + + collect_zone_lock(); + return 0; } @@ -564,4 +947,43 @@ int BPF_PROG(end_timestamp) return 0; } +/* + * bpf_iter__kmem_cache added recently so old kernels don't have it in the + * vmlinux.h. But we cannot add it here since it will cause a compiler error + * due to redefinition of the struct on later kernels. + * + * So it uses a CO-RE trick to access the member only if it has the type. + * This will support both old and new kernels without compiler errors. + */ +struct bpf_iter__kmem_cache___new { + struct kmem_cache *s; +} __attribute__((preserve_access_index)); + +SEC("iter/kmem_cache") +int slab_cache_iter(void *ctx) +{ + struct kmem_cache *s = NULL; + struct slab_cache_data d; + const char *nameptr; + + if (bpf_core_type_exists(struct bpf_iter__kmem_cache)) { + struct bpf_iter__kmem_cache___new *iter = ctx; + + s = iter->s; + } + + if (s == NULL) + return 0; + + nameptr = s->name; + bpf_probe_read_kernel_str(d.name, sizeof(d.name), nameptr); + + d.id = ++slab_cache_id << LCB_F_SLAB_ID_SHIFT; + if (d.id >= LCB_F_SLAB_ID_END) + return 0; + + bpf_map_update_elem(&slab_caches, &s, &d, BPF_NOEXIST); + return 0; +} + char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h index 36af11faad03..28c5e5aced7f 100644 --- a/tools/perf/util/bpf_skel/lock_data.h +++ b/tools/perf/util/bpf_skel/lock_data.h @@ -3,15 +3,22 @@ #ifndef UTIL_BPF_SKEL_LOCK_DATA_H #define UTIL_BPF_SKEL_LOCK_DATA_H +struct owner_tracing_data { + u32 pid; // Who has the lock. + u32 count; // How many waiters for this lock. + u64 timestamp; // The time while the owner acquires lock and contention is going on. + s32 stack_id; // Identifier for `owner_stat`, which stores as value in `owner_stacks` +}; + struct tstamp_data { u64 timestamp; u64 lock; u32 flags; - u32 stack_id; + s32 stack_id; }; struct contention_key { - u32 stack_id; + s32 stack_id; u32 pid; u64 lock_addr_or_cgroup; }; @@ -32,7 +39,15 @@ struct contention_task_data { #define LCD_F_MMAP_LOCK (1U << 31) #define LCD_F_SIGHAND_LOCK (1U << 30) -#define LCB_F_MAX_FLAGS (1U << 7) +#define LCB_F_SLAB_ID_SHIFT 16 +#define LCB_F_SLAB_ID_START (1U << 16) +#define LCB_F_SLAB_ID_END (1U << 26) +#define LCB_F_SLAB_ID_MASK 0x03FF0000U + +#define LCB_F_TYPE_MAX (1U << 7) +#define LCB_F_TYPE_MASK 0x0000007FU + +#define SLAB_NAME_MAX 28 struct contention_data { u64 total_time; @@ -52,6 +67,12 @@ enum lock_aggr_mode { enum lock_class_sym { LOCK_CLASS_NONE, LOCK_CLASS_RQLOCK, + LOCK_CLASS_ZONE_LOCK, +}; + +struct slab_cache_data { + u32 id; + char name[SLAB_NAME_MAX]; }; #endif /* UTIL_BPF_SKEL_LOCK_DATA_H */ diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c index d877a0a9731f..72763bb8d1de 100644 --- a/tools/perf/util/bpf_skel/off_cpu.bpf.c +++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c @@ -18,10 +18,19 @@ #define MAX_STACKS 32 #define MAX_ENTRIES 102400 +#define MAX_CPUS 4096 +#define MAX_OFFCPU_LEN 37 + +// We have a 'struct stack' in vmlinux.h when building with GEN_VMLINUX_H=1 +struct __stack { + u64 array[MAX_STACKS]; +}; + struct tstamp_data { __u32 stack_id; __u32 state; __u64 timestamp; + struct __stack stack; }; struct offcpu_key { @@ -39,6 +48,24 @@ struct { __uint(max_entries, MAX_ENTRIES); } stacks SEC(".maps"); +struct offcpu_data { + u64 array[MAX_OFFCPU_LEN]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __uint(max_entries, MAX_CPUS); +} offcpu_output SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct offcpu_data)); + __uint(max_entries, 1); +} offcpu_payload SEC(".maps"); + struct { __uint(type, BPF_MAP_TYPE_TASK_STORAGE); __uint(map_flags, BPF_F_NO_PREALLOC); @@ -85,10 +112,11 @@ struct task_struct___old { } __attribute__((preserve_access_index)); int enabled = 0; -int has_cpu = 0; -int has_task = 0; -int has_cgroup = 0; -int uses_tgid = 0; + +const volatile int has_cpu = 0; +const volatile int has_task = 0; +const volatile int has_cgroup = 0; +const volatile int uses_tgid = 0; const volatile bool has_prev_state = false; const volatile bool needs_cgroup = false; @@ -96,6 +124,8 @@ const volatile bool uses_cgroup_v1 = false; int perf_subsys_id = -1; +__u64 offcpu_thresh_ns; + /* * Old kernel used to call it task_struct->state and now it's '__state'. * Use BPF CO-RE "ignored suffix rule" to deal with it like below: @@ -182,6 +212,47 @@ static inline int can_record(struct task_struct *t, int state) return 1; } +static inline int copy_stack(struct __stack *from, struct offcpu_data *to, int n) +{ + int len = 0; + + for (int i = 0; i < MAX_STACKS && from->array[i]; ++i, ++len) + to->array[n + 2 + i] = from->array[i]; + + return len; +} + +/** + * off_cpu_dump - dump off-cpu samples to ring buffer + * @data: payload for dumping off-cpu samples + * @key: off-cpu data + * @stack: stack trace of the task before being scheduled out + * + * If the threshold of off-cpu time is reached, acquire tid, period, callchain, and cgroup id + * information of the task, and dump it as a raw sample to perf ring buffer + */ +static int off_cpu_dump(void *ctx, struct offcpu_data *data, struct offcpu_key *key, + struct __stack *stack, __u64 delta) +{ + int n = 0, len = 0; + + data->array[n++] = (u64)key->tgid << 32 | key->pid; + data->array[n++] = delta; + + /* data->array[n] is callchain->nr (updated later) */ + data->array[n + 1] = PERF_CONTEXT_USER; + data->array[n + 2] = 0; + len = copy_stack(stack, data, n); + + /* update length of callchain */ + data->array[n] = len + 1; + n += len + 2; + + data->array[n++] = key->cgroup_id; + + return bpf_perf_event_output(ctx, &offcpu_output, BPF_F_CURRENT_CPU, data, n * sizeof(u64)); +} + static int off_cpu_stat(u64 *ctx, struct task_struct *prev, struct task_struct *next, int state) { @@ -206,6 +277,16 @@ static int off_cpu_stat(u64 *ctx, struct task_struct *prev, pelem->state = state; pelem->stack_id = stack_id; + /* + * If stacks are successfully collected by bpf_get_stackid(), collect them once more + * in task_storage for direct off-cpu sample dumping + */ + if (stack_id > 0 && bpf_get_stack(ctx, &pelem->stack, MAX_STACKS * sizeof(u64), BPF_F_USER_STACK)) { + /* + * This empty if block is used to avoid 'result unused warning' from bpf_get_stack(). + * If the collection fails, continue with the logic for the next task. + */ + } next: pelem = bpf_task_storage_get(&tstamp, next, NULL, 0); @@ -220,11 +301,19 @@ next: __u64 delta = ts - pelem->timestamp; __u64 *total; - total = bpf_map_lookup_elem(&off_cpu, &key); - if (total) - *total += delta; - else - bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY); + if (delta >= offcpu_thresh_ns) { + int zero = 0; + struct offcpu_data *data = bpf_map_lookup_elem(&offcpu_payload, &zero); + + if (data) + off_cpu_dump(ctx, data, &key, &pelem->stack, delta); + } else { + total = bpf_map_lookup_elem(&off_cpu, &key); + if (total) + *total += delta; + else + bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY); + } /* prevent to reuse the timestamp later */ pelem->timestamp = 0; diff --git a/tools/perf/util/bpf_skel/perf_version.h b/tools/perf/util/bpf_skel/perf_version.h new file mode 100644 index 000000000000..1ed5b2e59bf5 --- /dev/null +++ b/tools/perf/util/bpf_skel/perf_version.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef __PERF_VERSION_H__ +#define __PERF_VERSION_H__ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +/* + * This is used by tests/shell/record_bpf_metadata.sh + * to verify that BPF metadata generation works. + * + * PERF_VERSION is defined by a build rule at compile time. + */ +const char bpf_metadata_perf_version[] SEC(".rodata") = PERF_VERSION; + +#endif /* __PERF_VERSION_H__ */ diff --git a/tools/perf/util/bpf_skel/sample-filter.h b/tools/perf/util/bpf_skel/sample-filter.h index 2e96e1ab084a..683fec85e71e 100644 --- a/tools/perf/util/bpf_skel/sample-filter.h +++ b/tools/perf/util/bpf_skel/sample-filter.h @@ -1,7 +1,9 @@ #ifndef PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H #define PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H -#define MAX_FILTERS 64 +#define MAX_FILTERS 64 +#define MAX_IDX_HASH (16 * 1024) +#define MAX_EVT_HASH (1024 * 1024) /* supported filter operations */ enum perf_bpf_filter_op { @@ -14,14 +16,57 @@ enum perf_bpf_filter_op { PBF_OP_AND, PBF_OP_GROUP_BEGIN, PBF_OP_GROUP_END, + PBF_OP_DONE, +}; + +enum perf_bpf_filter_term { + /* No term is in use. */ + PBF_TERM_NONE = 0, + /* Terms that correspond to PERF_SAMPLE_xx values. */ + PBF_TERM_SAMPLE_START = PBF_TERM_NONE + 1, + PBF_TERM_IP = PBF_TERM_SAMPLE_START + 0, /* SAMPLE_IP = 1U << 0 */ + PBF_TERM_TID = PBF_TERM_SAMPLE_START + 1, /* SAMPLE_TID = 1U << 1 */ + PBF_TERM_TIME = PBF_TERM_SAMPLE_START + 2, /* SAMPLE_TIME = 1U << 2 */ + PBF_TERM_ADDR = PBF_TERM_SAMPLE_START + 3, /* SAMPLE_ADDR = 1U << 3 */ + __PBF_UNUSED_TERM4 = PBF_TERM_SAMPLE_START + 4, /* SAMPLE_READ = 1U << 4 */ + __PBF_UNUSED_TERM5 = PBF_TERM_SAMPLE_START + 5, /* SAMPLE_CALLCHAIN = 1U << 5 */ + PBF_TERM_ID = PBF_TERM_SAMPLE_START + 6, /* SAMPLE_ID = 1U << 6 */ + PBF_TERM_CPU = PBF_TERM_SAMPLE_START + 7, /* SAMPLE_CPU = 1U << 7 */ + PBF_TERM_PERIOD = PBF_TERM_SAMPLE_START + 8, /* SAMPLE_PERIOD = 1U << 8 */ + __PBF_UNUSED_TERM9 = PBF_TERM_SAMPLE_START + 9, /* SAMPLE_STREAM_ID = 1U << 9 */ + __PBF_UNUSED_TERM10 = PBF_TERM_SAMPLE_START + 10, /* SAMPLE_RAW = 1U << 10 */ + __PBF_UNUSED_TERM11 = PBF_TERM_SAMPLE_START + 11, /* SAMPLE_BRANCH_STACK = 1U << 11 */ + __PBF_UNUSED_TERM12 = PBF_TERM_SAMPLE_START + 12, /* SAMPLE_REGS_USER = 1U << 12 */ + __PBF_UNUSED_TERM13 = PBF_TERM_SAMPLE_START + 13, /* SAMPLE_STACK_USER = 1U << 13 */ + PBF_TERM_WEIGHT = PBF_TERM_SAMPLE_START + 14, /* SAMPLE_WEIGHT = 1U << 14 */ + PBF_TERM_DATA_SRC = PBF_TERM_SAMPLE_START + 15, /* SAMPLE_DATA_SRC = 1U << 15 */ + __PBF_UNUSED_TERM16 = PBF_TERM_SAMPLE_START + 16, /* SAMPLE_IDENTIFIER = 1U << 16 */ + PBF_TERM_TRANSACTION = PBF_TERM_SAMPLE_START + 17, /* SAMPLE_TRANSACTION = 1U << 17 */ + __PBF_UNUSED_TERM18 = PBF_TERM_SAMPLE_START + 18, /* SAMPLE_REGS_INTR = 1U << 18 */ + PBF_TERM_PHYS_ADDR = PBF_TERM_SAMPLE_START + 19, /* SAMPLE_PHYS_ADDR = 1U << 19 */ + __PBF_UNUSED_TERM20 = PBF_TERM_SAMPLE_START + 20, /* SAMPLE_AUX = 1U << 20 */ + PBF_TERM_CGROUP = PBF_TERM_SAMPLE_START + 21, /* SAMPLE_CGROUP = 1U << 21 */ + PBF_TERM_DATA_PAGE_SIZE = PBF_TERM_SAMPLE_START + 22, /* SAMPLE_DATA_PAGE_SIZE = 1U << 22 */ + PBF_TERM_CODE_PAGE_SIZE = PBF_TERM_SAMPLE_START + 23, /* SAMPLE_CODE_PAGE_SIZE = 1U << 23 */ + PBF_TERM_WEIGHT_STRUCT = PBF_TERM_SAMPLE_START + 24, /* SAMPLE_WEIGHT_STRUCT = 1U << 24 */ + PBF_TERM_SAMPLE_END = PBF_TERM_WEIGHT_STRUCT, + /* Terms computed from BPF helpers. */ + PBF_TERM_UID, + PBF_TERM_GID, }; /* BPF map entry for filtering */ struct perf_bpf_filter_entry { enum perf_bpf_filter_op op; __u32 part; /* sub-sample type info when it has multiple values */ - __u64 flags; /* perf sample type flags */ + enum perf_bpf_filter_term term; __u64 value; }; -#endif /* PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H */
\ No newline at end of file +struct idx_hash_key { + __u64 evt_id; + __u32 tgid; + __u32 reserved; +}; + +#endif /* PERF_UTIL_BPF_SKEL_SAMPLE_FILTER_H */ diff --git a/tools/perf/util/bpf_skel/sample_filter.bpf.c b/tools/perf/util/bpf_skel/sample_filter.bpf.c index fb94f5280626..e5666d4c1722 100644 --- a/tools/perf/util/bpf_skel/sample_filter.bpf.c +++ b/tools/perf/util/bpf_skel/sample_filter.bpf.c @@ -9,13 +9,41 @@ /* BPF map that will be filled by user space */ struct filters { - __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(type, BPF_MAP_TYPE_HASH); __type(key, int); - __type(value, struct perf_bpf_filter_entry); - __uint(max_entries, MAX_FILTERS); + __type(value, struct perf_bpf_filter_entry[MAX_FILTERS]); + __uint(max_entries, 1); } filters SEC(".maps"); -int dropped; +/* + * An evsel has multiple instances for each CPU or task but we need a single + * id to be used as a key for the idx_hash. This hashmap would translate the + * instance's ID to a representative ID. + */ +struct event_hash { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u64); + __type(value, __u64); + __uint(max_entries, 1); +} event_hash SEC(".maps"); + +/* tgid/evtid to filter index */ +struct idx_hash { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct idx_hash_key); + __type(value, int); + __uint(max_entries, 1); +} idx_hash SEC(".maps"); + +/* tgid to filter index */ +struct lost_count { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); +} dropped SEC(".maps"); + +volatile const int use_idx_hash; void *bpf_cast_to_kern_ctx(void *) __ksym; @@ -48,31 +76,55 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx, { struct perf_sample_data___new *data = (void *)kctx->data; - if (!bpf_core_field_exists(data->sample_flags) || - (data->sample_flags & entry->flags) == 0) + if (!bpf_core_field_exists(data->sample_flags)) return 0; - switch (entry->flags) { - case PERF_SAMPLE_IP: +#define BUILD_CHECK_SAMPLE(x) \ + _Static_assert((1 << (PBF_TERM_##x - PBF_TERM_SAMPLE_START)) == PERF_SAMPLE_##x, \ + "Mismatched PBF term to sample bit " #x) + BUILD_CHECK_SAMPLE(IP); + BUILD_CHECK_SAMPLE(TID); + BUILD_CHECK_SAMPLE(TIME); + BUILD_CHECK_SAMPLE(ADDR); + BUILD_CHECK_SAMPLE(ID); + BUILD_CHECK_SAMPLE(CPU); + BUILD_CHECK_SAMPLE(PERIOD); + BUILD_CHECK_SAMPLE(WEIGHT); + BUILD_CHECK_SAMPLE(DATA_SRC); + BUILD_CHECK_SAMPLE(TRANSACTION); + BUILD_CHECK_SAMPLE(PHYS_ADDR); + BUILD_CHECK_SAMPLE(CGROUP); + BUILD_CHECK_SAMPLE(DATA_PAGE_SIZE); + BUILD_CHECK_SAMPLE(CODE_PAGE_SIZE); + BUILD_CHECK_SAMPLE(WEIGHT_STRUCT); +#undef BUILD_CHECK_SAMPLE + + /* For sample terms check the sample bit is set. */ + if (entry->term >= PBF_TERM_SAMPLE_START && entry->term <= PBF_TERM_SAMPLE_END && + (data->sample_flags & (1 << (entry->term - PBF_TERM_SAMPLE_START))) == 0) + return 0; + + switch (entry->term) { + case PBF_TERM_IP: return kctx->data->ip; - case PERF_SAMPLE_ID: + case PBF_TERM_ID: return kctx->data->id; - case PERF_SAMPLE_TID: + case PBF_TERM_TID: if (entry->part) return kctx->data->tid_entry.pid; else return kctx->data->tid_entry.tid; - case PERF_SAMPLE_CPU: + case PBF_TERM_CPU: return kctx->data->cpu_entry.cpu; - case PERF_SAMPLE_TIME: + case PBF_TERM_TIME: return kctx->data->time; - case PERF_SAMPLE_ADDR: + case PBF_TERM_ADDR: return kctx->data->addr; - case PERF_SAMPLE_PERIOD: + case PBF_TERM_PERIOD: return kctx->data->period; - case PERF_SAMPLE_TRANSACTION: + case PBF_TERM_TRANSACTION: return kctx->data->txn; - case PERF_SAMPLE_WEIGHT_STRUCT: + case PBF_TERM_WEIGHT_STRUCT: if (entry->part == 1) return kctx->data->weight.var1_dw; if (entry->part == 2) @@ -80,15 +132,17 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx, if (entry->part == 3) return kctx->data->weight.var3_w; /* fall through */ - case PERF_SAMPLE_WEIGHT: + case PBF_TERM_WEIGHT: return kctx->data->weight.full; - case PERF_SAMPLE_PHYS_ADDR: + case PBF_TERM_PHYS_ADDR: return kctx->data->phys_addr; - case PERF_SAMPLE_CODE_PAGE_SIZE: + case PBF_TERM_CGROUP: + return kctx->data->cgroup; + case PBF_TERM_CODE_PAGE_SIZE: return kctx->data->code_page_size; - case PERF_SAMPLE_DATA_PAGE_SIZE: + case PBF_TERM_DATA_PAGE_SIZE: return kctx->data->data_page_size; - case PERF_SAMPLE_DATA_SRC: + case PBF_TERM_DATA_SRC: if (entry->part == 1) return kctx->data->data_src.mem_op; if (entry->part == 2) @@ -110,13 +164,28 @@ static inline __u64 perf_get_sample(struct bpf_perf_event_data_kern *kctx, if (entry->part == 8) { union perf_mem_data_src___new *data = (void *)&kctx->data->data_src; - if (bpf_core_field_exists(data->mem_hops)) + if (__builtin_preserve_field_info(data->mem_hops, BPF_FIELD_EXISTS)) return data->mem_hops; return 0; } /* return the whole word */ return kctx->data->data_src.val; + case PBF_TERM_UID: + return bpf_get_current_uid_gid() & 0xFFFFFFFF; + case PBF_TERM_GID: + return bpf_get_current_uid_gid() >> 32; + case PBF_TERM_NONE: + case __PBF_UNUSED_TERM4: + case __PBF_UNUSED_TERM5: + case __PBF_UNUSED_TERM9: + case __PBF_UNUSED_TERM10: + case __PBF_UNUSED_TERM11: + case __PBF_UNUSED_TERM12: + case __PBF_UNUSED_TERM13: + case __PBF_UNUSED_TERM16: + case __PBF_UNUSED_TERM18: + case __PBF_UNUSED_TERM20: default: break; } @@ -140,39 +209,66 @@ int perf_sample_filter(void *ctx) __u64 sample_data; int in_group = 0; int group_result = 0; - int i; + int i, k; + int *losts; kctx = bpf_cast_to_kern_ctx(ctx); - for (i = 0; i < MAX_FILTERS; i++) { - int key = i; /* needed for verifier :( */ + k = 0; - entry = bpf_map_lookup_elem(&filters, &key); - if (entry == NULL) - break; - sample_data = perf_get_sample(kctx, entry); + if (use_idx_hash) { + struct idx_hash_key key = { + .tgid = bpf_get_current_pid_tgid() >> 32, + }; + __u64 eid = kctx->event->id; + __u64 *key_id; + int *idx; - switch (entry->op) { + /* get primary_event_id */ + if (kctx->event->parent) + eid = kctx->event->parent->id; + + key_id = bpf_map_lookup_elem(&event_hash, &eid); + if (key_id == NULL) + goto drop; + + key.evt_id = *key_id; + + idx = bpf_map_lookup_elem(&idx_hash, &key); + if (idx) + k = *idx; + else + goto drop; + } + + entry = bpf_map_lookup_elem(&filters, &k); + if (entry == NULL) + goto drop; + + for (i = 0; i < MAX_FILTERS; i++) { + sample_data = perf_get_sample(kctx, &entry[i]); + + switch (entry[i].op) { case PBF_OP_EQ: - CHECK_RESULT(sample_data, ==, entry->value) + CHECK_RESULT(sample_data, ==, entry[i].value) break; case PBF_OP_NEQ: - CHECK_RESULT(sample_data, !=, entry->value) + CHECK_RESULT(sample_data, !=, entry[i].value) break; case PBF_OP_GT: - CHECK_RESULT(sample_data, >, entry->value) + CHECK_RESULT(sample_data, >, entry[i].value) break; case PBF_OP_GE: - CHECK_RESULT(sample_data, >=, entry->value) + CHECK_RESULT(sample_data, >=, entry[i].value) break; case PBF_OP_LT: - CHECK_RESULT(sample_data, <, entry->value) + CHECK_RESULT(sample_data, <, entry[i].value) break; case PBF_OP_LE: - CHECK_RESULT(sample_data, <=, entry->value) + CHECK_RESULT(sample_data, <=, entry[i].value) break; case PBF_OP_AND: - CHECK_RESULT(sample_data, &, entry->value) + CHECK_RESULT(sample_data, &, entry[i].value) break; case PBF_OP_GROUP_BEGIN: in_group = 1; @@ -183,13 +279,19 @@ int perf_sample_filter(void *ctx) goto drop; in_group = 0; break; + case PBF_OP_DONE: + /* no failures so far, accept it */ + return 1; } } /* generate sample data */ return 1; drop: - __sync_fetch_and_add(&dropped, 1); + losts = bpf_map_lookup_elem(&dropped, &k); + if (losts != NULL) + __sync_fetch_and_add(losts, 1); + return 0; } diff --git a/tools/perf/util/bpf_skel/syscall_summary.bpf.c b/tools/perf/util/bpf_skel/syscall_summary.bpf.c new file mode 100644 index 000000000000..1bcd066a5199 --- /dev/null +++ b/tools/perf/util/bpf_skel/syscall_summary.bpf.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Trace raw_syscalls tracepoints to collect system call statistics. + */ + +#include "vmlinux.h" +#include "syscall_summary.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +/* This is to calculate a delta between sys-enter and sys-exit for each thread */ +struct syscall_trace { + int nr; /* syscall number is only available at sys-enter */ + int unused; + u64 timestamp; +}; + +#define MAX_ENTRIES (128 * 1024) + +struct syscall_trace_map { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); /* tid */ + __type(value, struct syscall_trace); + __uint(max_entries, MAX_ENTRIES); +} syscall_trace_map SEC(".maps"); + +struct syscall_stats_map { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct syscall_key); + __type(value, struct syscall_stats); + __uint(max_entries, MAX_ENTRIES); +} syscall_stats_map SEC(".maps"); + +int enabled; /* controlled from userspace */ + +const volatile enum syscall_aggr_mode aggr_mode; +const volatile int use_cgroup_v2; + +int perf_subsys_id = -1; + +static inline __u64 get_current_cgroup_id(void) +{ + struct task_struct *task; + struct cgroup *cgrp; + + if (use_cgroup_v2) + return bpf_get_current_cgroup_id(); + + task = bpf_get_current_task_btf(); + + if (perf_subsys_id == -1) { +#if __has_builtin(__builtin_preserve_enum_value) + perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id, + perf_event_cgrp_id); +#else + perf_subsys_id = perf_event_cgrp_id; +#endif + } + + cgrp = BPF_CORE_READ(task, cgroups, subsys[perf_subsys_id], cgroup); + return BPF_CORE_READ(cgrp, kn, id); +} + +static void update_stats(int cpu_or_tid, u64 cgroup_id, int nr, s64 duration, + long ret) +{ + struct syscall_key key = { + .cpu_or_tid = cpu_or_tid, + .cgroup = cgroup_id, + .nr = nr, + }; + struct syscall_stats *stats; + + stats = bpf_map_lookup_elem(&syscall_stats_map, &key); + if (stats == NULL) { + struct syscall_stats zero = {}; + + bpf_map_update_elem(&syscall_stats_map, &key, &zero, BPF_NOEXIST); + stats = bpf_map_lookup_elem(&syscall_stats_map, &key); + if (stats == NULL) + return; + } + + __sync_fetch_and_add(&stats->count, 1); + if (ret < 0) + __sync_fetch_and_add(&stats->error, 1); + + if (duration > 0) { + __sync_fetch_and_add(&stats->total_time, duration); + __sync_fetch_and_add(&stats->squared_sum, duration * duration); + if (stats->max_time < duration) + stats->max_time = duration; + if (stats->min_time > duration || stats->min_time == 0) + stats->min_time = duration; + } + + return; +} + +SEC("tp_btf/sys_enter") +int sys_enter(u64 *ctx) +{ + int tid; + struct syscall_trace st; + + if (!enabled) + return 0; + + st.nr = ctx[1]; /* syscall number */ + st.unused = 0; + st.timestamp = bpf_ktime_get_ns(); + + tid = bpf_get_current_pid_tgid(); + bpf_map_update_elem(&syscall_trace_map, &tid, &st, BPF_ANY); + + return 0; +} + +SEC("tp_btf/sys_exit") +int sys_exit(u64 *ctx) +{ + int tid; + int key = 0; + u64 cgroup = 0; + long ret = ctx[1]; /* return value of the syscall */ + struct syscall_trace *st; + s64 delta; + + if (!enabled) + return 0; + + tid = bpf_get_current_pid_tgid(); + st = bpf_map_lookup_elem(&syscall_trace_map, &tid); + if (st == NULL) + return 0; + + if (aggr_mode == SYSCALL_AGGR_THREAD) + key = tid; + else if (aggr_mode == SYSCALL_AGGR_CGROUP) + cgroup = get_current_cgroup_id(); + else + key = bpf_get_smp_processor_id(); + + delta = bpf_ktime_get_ns() - st->timestamp; + update_stats(key, cgroup, st->nr, delta, ret); + + bpf_map_delete_elem(&syscall_trace_map, &tid); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/perf/util/bpf_skel/syscall_summary.h b/tools/perf/util/bpf_skel/syscall_summary.h new file mode 100644 index 000000000000..72ccccb45925 --- /dev/null +++ b/tools/perf/util/bpf_skel/syscall_summary.h @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Data structures shared between BPF and tools. */ +#ifndef UTIL_BPF_SKEL_SYSCALL_SUMMARY_H +#define UTIL_BPF_SKEL_SYSCALL_SUMMARY_H + +enum syscall_aggr_mode { + SYSCALL_AGGR_THREAD, + SYSCALL_AGGR_CPU, + SYSCALL_AGGR_CGROUP, +}; + +struct syscall_key { + u64 cgroup; + int cpu_or_tid; + int nr; +}; + +struct syscall_stats { + u64 total_time; + u64 squared_sum; + u64 max_time; + u64 min_time; + u32 count; + u32 error; +}; + +#endif /* UTIL_BPF_SKEL_SYSCALL_SUMMARY_H */ diff --git a/tools/perf/util/bpf_skel/vmlinux/vmlinux.h b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h index e9028235d771..a59ce912be18 100644 --- a/tools/perf/util/bpf_skel/vmlinux/vmlinux.h +++ b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h @@ -15,6 +15,7 @@ typedef __u8 u8; typedef __u32 u32; +typedef __s32 s32; typedef __u64 u64; typedef __s64 s64; @@ -170,10 +171,16 @@ struct perf_sample_data { u32 cpu; } cpu_entry; u64 phys_addr; + u64 cgroup; u64 data_page_size; u64 code_page_size; } __attribute__((__aligned__(64))) __attribute__((preserve_access_index)); +struct perf_event { + struct perf_event *parent; + u64 id; +} __attribute__((preserve_access_index)); + struct bpf_perf_event_data_kern { struct perf_sample_data *data; struct perf_event *event; @@ -188,4 +195,21 @@ struct bpf_perf_event_data_kern { */ struct rq {}; +struct kmem_cache { + const char *name; +} __attribute__((preserve_access_index)); + +struct bpf_iter__kmem_cache { + struct kmem_cache *s; +} __attribute__((preserve_access_index)); + +struct zone { + spinlock_t lock; +} __attribute__((preserve_access_index)); + +struct pglist_data { + struct zone node_zones[6]; /* value for all possible config */ + int nr_zones; +} __attribute__((preserve_access_index)); + #endif // __VMLINUX_H diff --git a/tools/perf/util/bpf_trace_augment.c b/tools/perf/util/bpf_trace_augment.c new file mode 100644 index 000000000000..56ed17534caa --- /dev/null +++ b/tools/perf/util/bpf_trace_augment.c @@ -0,0 +1,143 @@ +#include <bpf/libbpf.h> +#include <internal/xyarray.h> + +#include "util/debug.h" +#include "util/evlist.h" +#include "util/trace_augment.h" + +#include "bpf_skel/augmented_raw_syscalls.skel.h" + +static struct augmented_raw_syscalls_bpf *skel; +static struct evsel *bpf_output; + +int augmented_syscalls__prepare(void) +{ + struct bpf_program *prog; + char buf[128]; + int err; + + skel = augmented_raw_syscalls_bpf__open(); + if (!skel) { + pr_debug("Failed to open augmented syscalls BPF skeleton\n"); + return -errno; + } + + /* + * Disable attaching the BPF programs except for sys_enter and + * sys_exit that tail call into this as necessary. + */ + bpf_object__for_each_program(prog, skel->obj) { + if (prog != skel->progs.sys_enter && prog != skel->progs.sys_exit) + bpf_program__set_autoattach(prog, /*autoattach=*/false); + } + + err = augmented_raw_syscalls_bpf__load(skel); + if (err < 0) { + libbpf_strerror(err, buf, sizeof(buf)); + pr_debug("Failed to load augmented syscalls BPF skeleton: %s\n", buf); + return err; + } + + augmented_raw_syscalls_bpf__attach(skel); + return 0; +} + +int augmented_syscalls__create_bpf_output(struct evlist *evlist) +{ + int err = parse_event(evlist, "bpf-output/no-inherit=1,name=__augmented_syscalls__/"); + + if (err) { + pr_err("ERROR: Setup BPF output event failed: %d\n", err); + return err; + } + + bpf_output = evlist__last(evlist); + assert(evsel__name_is(bpf_output, "__augmented_syscalls__")); + + return 0; +} + +void augmented_syscalls__setup_bpf_output(void) +{ + struct perf_cpu cpu; + int i; + + if (bpf_output == NULL) + return; + + /* + * Set up the __augmented_syscalls__ BPF map to hold for each + * CPU the bpf-output event's file descriptor. + */ + perf_cpu_map__for_each_cpu(cpu, i, bpf_output->core.cpus) { + int mycpu = cpu.cpu; + + bpf_map__update_elem(skel->maps.__augmented_syscalls__, + &mycpu, sizeof(mycpu), + xyarray__entry(bpf_output->core.fd, + mycpu, 0), + sizeof(__u32), BPF_ANY); + } +} + +int augmented_syscalls__set_filter_pids(unsigned int nr, pid_t *pids) +{ + bool value = true; + int err = 0; + + if (skel == NULL) + return 0; + + for (size_t i = 0; i < nr; ++i) { + err = bpf_map__update_elem(skel->maps.pids_filtered, &pids[i], + sizeof(*pids), &value, sizeof(value), + BPF_ANY); + if (err) + break; + } + return err; +} + +int augmented_syscalls__get_map_fds(int *enter_fd, int *exit_fd, int *beauty_fd) +{ + if (skel == NULL) + return -1; + + *enter_fd = bpf_map__fd(skel->maps.syscalls_sys_enter); + *exit_fd = bpf_map__fd(skel->maps.syscalls_sys_exit); + *beauty_fd = bpf_map__fd(skel->maps.beauty_map_enter); + + if (*enter_fd < 0 || *exit_fd < 0 || *beauty_fd < 0) { + pr_err("Error: failed to get syscall or beauty map fd\n"); + return -1; + } + + return 0; +} + +struct bpf_program *augmented_syscalls__unaugmented(void) +{ + return skel->progs.syscall_unaugmented; +} + +struct bpf_program *augmented_syscalls__find_by_title(const char *name) +{ + struct bpf_program *pos; + const char *sec_name; + + if (skel->obj == NULL) + return NULL; + + bpf_object__for_each_program(pos, skel->obj) { + sec_name = bpf_program__section_name(pos); + if (sec_name && !strcmp(sec_name, name)) + return pos; + } + + return NULL; +} + +void augmented_syscalls__cleanup(void) +{ + augmented_raw_syscalls_bpf__destroy(skel); +} diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c index ab760e267d41..3712be067464 100644 --- a/tools/perf/util/branch.c +++ b/tools/perf/util/branch.c @@ -46,7 +46,7 @@ const char *branch_new_type_name(int new_type) "FAULT_DATA", "FAULT_INST", /* - * TODO: This switch should happen on 'session->header.env.arch' + * TODO: This switch should happen on 'perf_session__env(session)->arch' * instead, because an arm64 platform perf recording could be * opened for analysis on other platforms as well. */ diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 87704d713ff6..7429530fa774 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -25,7 +25,8 @@ struct branch_flags { u64 spec:2; u64 new_type:4; u64 priv:3; - u64 reserved:31; + u64 not_taken:1; + u64 reserved:30; }; }; }; @@ -34,6 +35,7 @@ struct branch_info { struct addr_map_symbol from; struct addr_map_symbol to; struct branch_flags flags; + u64 branch_stack_cntr; char *srcline_from; char *srcline_to; }; diff --git a/tools/perf/util/btf.c b/tools/perf/util/btf.c new file mode 100644 index 000000000000..bb163fe87767 --- /dev/null +++ b/tools/perf/util/btf.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Arnaldo Carvalho de Melo <acme@redhat.com> + * + * Copyright (C) 2024, Red Hat, Inc + */ + +#include <bpf/btf.h> +#include <util/btf.h> +#include <string.h> + +const struct btf_member *__btf_type__find_member_by_name(struct btf *btf, + int type_id, const char *member_name) +{ + const struct btf_type *t = btf__type_by_id(btf, type_id); + const struct btf_member *m; + int i; + + for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) { + const char *current_member_name = btf__name_by_offset(btf, m->name_off); + + if (!strcmp(current_member_name, member_name)) + return m; + } + + return NULL; +} diff --git a/tools/perf/util/btf.h b/tools/perf/util/btf.h new file mode 100644 index 000000000000..05e6e5bf23d6 --- /dev/null +++ b/tools/perf/util/btf.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_UTIL_BTF +#define __PERF_UTIL_BTF 1 + +struct btf; +struct btf_member; + +const struct btf_member *__btf_type__find_member_by_name(struct btf *btf, + int type_id, const char *member_name); +#endif // __PERF_UTIL_BTF diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 83a1581e8cf1..fdb35133fde4 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -42,10 +42,20 @@ static bool no_buildid_cache; -int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, +static int mark_dso_hit_callback(struct callchain_cursor_node *node, void *data __maybe_unused) +{ + struct map *map = node->ms.map; + + if (map) + dso__set_hit(map__dso(map)); + + return 0; +} + +int build_id__mark_dso_hit(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, - struct evsel *evsel __maybe_unused, + struct evsel *evsel, struct machine *machine) { struct addr_location al; @@ -63,63 +73,36 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, dso__set_hit(map__dso(al.map)); addr_location__exit(&al); - thread__put(thread); - return 0; -} -static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_sample *sample - __maybe_unused, - struct machine *machine) -{ - struct thread *thread = machine__findnew_thread(machine, - event->fork.pid, - event->fork.tid); + sample__for_each_callchain_node(thread, evsel, sample, PERF_MAX_STACK_DEPTH, + /*symbols=*/false, mark_dso_hit_callback, /*data=*/NULL); - dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid, - event->fork.ppid, event->fork.ptid); - - if (thread) { - machine__remove_thread(machine, thread); - thread__put(thread); - } + thread__put(thread); return 0; } -struct perf_tool build_id__mark_dso_hit_ops = { - .sample = build_id__mark_dso_hit, - .mmap = perf_event__process_mmap, - .mmap2 = perf_event__process_mmap2, - .fork = perf_event__process_fork, - .exit = perf_event__exit_del_thread, - .attr = perf_event__process_attr, - .build_id = perf_event__process_build_id, - .ordered_events = true, -}; - -int build_id__sprintf(const struct build_id *build_id, char *bf) +int build_id__snprintf(const struct build_id *build_id, char *bf, size_t bf_size) { - char *bid = bf; - const u8 *raw = build_id->data; - size_t i; + size_t offs = 0; - bf[0] = 0x0; - - for (i = 0; i < build_id->size; ++i) { - sprintf(bid, "%02x", *raw); - ++raw; - bid += 2; + if (build_id->size == 0) { + /* Ensure bf is always \0 terminated. */ + if (bf_size > 0) + bf[0] = '\0'; + return 0; } - return (bid - bf) + 1; + for (size_t i = 0; i < build_id->size && offs < bf_size; ++i) + offs += snprintf(bf + offs, bf_size - offs, "%02x", build_id->data[i]); + + return offs; } -int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id) +int sysfs__snprintf_build_id(const char *root_dir, char *sbuild_id, size_t sbuild_id_size) { char notes[PATH_MAX]; - struct build_id bid; + struct build_id bid = { .size = 0, }; int ret; if (!root_dir) @@ -131,19 +114,19 @@ int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id) if (ret < 0) return ret; - return build_id__sprintf(&bid, sbuild_id); + return build_id__snprintf(&bid, sbuild_id, sbuild_id_size); } -int filename__sprintf_build_id(const char *pathname, char *sbuild_id) +int filename__snprintf_build_id(const char *pathname, char *sbuild_id, size_t sbuild_id_size) { - struct build_id bid; + struct build_id bid = { .size = 0, }; int ret; ret = filename__read_build_id(pathname, &bid); if (ret < 0) return ret; - return build_id__sprintf(&bid, sbuild_id); + return build_id__snprintf(&bid, sbuild_id, sbuild_id_size); } /* asnprintf consolidates asprintf and snprintf */ @@ -244,9 +227,9 @@ static bool build_id_cache__valid_id(char *sbuild_id) return false; if (!strcmp(pathname, DSO__NAME_KALLSYMS)) - ret = sysfs__sprintf_build_id("/", real_sbuild_id); + ret = sysfs__snprintf_build_id("/", real_sbuild_id, sizeof(real_sbuild_id)); else if (pathname[0] == '/') - ret = filename__sprintf_build_id(pathname, real_sbuild_id); + ret = filename__snprintf_build_id(pathname, real_sbuild_id, sizeof(real_sbuild_id)); else ret = -EINVAL; /* Should we support other special DSO cache? */ if (ret >= 0) @@ -275,7 +258,7 @@ char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size, if (!dso__has_build_id(dso)) return NULL; - build_id__sprintf(dso__bid_const(dso), sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); linkname = build_id_cache__linkname(sbuild_id, NULL, 0); if (!linkname) return NULL; @@ -310,7 +293,7 @@ static int write_buildid(const char *name, size_t name_len, struct build_id *bid size_t len; len = name_len + 1; - len = PERF_ALIGN(len, NAME_ALIGN); + len = PERF_ALIGN(len, sizeof(u64)); memset(&b, 0, sizeof(b)); memcpy(&b.data, bid->data, bid->size); @@ -358,7 +341,7 @@ static int machine__write_buildid_table_cb(struct dso *dso, void *data) } in_kernel = dso__kernel(dso) || is_kernel_module(name, PERF_RECORD_MISC_CPUMODE_UNKNOWN); - return write_buildid(name, name_len, dso__bid(dso), args->machine->pid, + return write_buildid(name, name_len, &dso__id(dso)->build_id, args->machine->pid, in_kernel ? args->kmisc : args->umisc, args->fd); } @@ -801,7 +784,7 @@ static int build_id_cache__add_b(const struct build_id *bid, { char sbuild_id[SBUILD_ID_SIZE]; - build_id__sprintf(bid, sbuild_id); + build_id__snprintf(bid, sbuild_id, sizeof(sbuild_id)); return __build_id_cache__add_s(sbuild_id, name, nsi, is_kallsyms, is_vdso, proper_name, root_dir); @@ -873,7 +856,7 @@ static int filename__read_build_id_ns(const char *filename, static bool dso__build_id_mismatch(struct dso *dso, const char *name) { - struct build_id bid; + struct build_id bid = { .size = 0, }; bool ret = false; mutex_lock(dso__lock(dso)); @@ -896,7 +879,7 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine, char *allocated_name = NULL; int ret = 0; - if (!dso__has_build_id(dso)) + if (!dso__has_build_id(dso) || !dso__hit(dso)) return 0; if (dso__is_kcore(dso)) { @@ -983,7 +966,10 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits) void build_id__init(struct build_id *bid, const u8 *data, size_t size) { - WARN_ON(size > BUILD_ID_SIZE); + if (size > BUILD_ID_SIZE) { + pr_debug("Truncating build_id size from %zd\n", size); + size = BUILD_ID_SIZE; + } memcpy(bid->data, data, size); bid->size = size; } diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 3fa8bffb07ca..47e621cebe1b 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -13,20 +13,18 @@ struct build_id { u8 data[BUILD_ID_SIZE]; - size_t size; + u8 size; }; -struct nsinfo; - -extern struct perf_tool build_id__mark_dso_hit_ops; struct dso; struct feat_fd; +struct nsinfo; void build_id__init(struct build_id *bid, const u8 *data, size_t size); -int build_id__sprintf(const struct build_id *build_id, char *bf); +int build_id__snprintf(const struct build_id *build_id, char *bf, size_t bf_size); bool build_id__is_defined(const struct build_id *bid); -int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id); -int filename__sprintf_build_id(const char *pathname, char *sbuild_id); +int sysfs__snprintf_build_id(const char *root_dir, char *sbuild_id, size_t sbuild_id_size); +int filename__snprintf_build_id(const char *pathname, char *sbuild_id, size_t sbuild_id_size); char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf, size_t size); @@ -35,11 +33,11 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size, char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size, bool is_debug, bool is_kallsyms); -int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, +int build_id__mark_dso_hit(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct evsel *evsel, struct machine *machine); -int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event, +int perf_event__inject_buildid(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct evsel *evsel, struct machine *machine); diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 1730b852a947..428e5350d7a2 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -275,9 +275,13 @@ int parse_callchain_record(const char *arg, struct callchain_param *param) if (tok) { unsigned long size; - size = strtoul(tok, &name, 0); - if (size < (unsigned) sysctl__max_stack()) - param->max_stack = size; + if (!strncmp(tok, "defer", sizeof("defer"))) { + param->defer = true; + } else { + size = strtoul(tok, &name, 0); + if (size < (unsigned) sysctl__max_stack()) + param->max_stack = size; + } } break; @@ -314,6 +318,12 @@ int parse_callchain_record(const char *arg, struct callchain_param *param) } while (0); free(buf); + + if (param->defer && param->record_mode != CALLCHAIN_FP) { + pr_err("callchain: deferred callchain only works with FP\n"); + return -EINVAL; + } + return ret; } @@ -589,9 +599,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) return -ENOMEM; } call->ip = cursor_node->ip; - call->ms = cursor_node->ms; - call->ms.map = map__get(call->ms.map); - call->ms.maps = maps__get(call->ms.maps); + map_symbol__copy(&call->ms, &cursor_node->ms); call->srcline = cursor_node->srcline; if (cursor_node->branch) { @@ -1094,9 +1102,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor, node->ip = ip; map_symbol__exit(&node->ms); - node->ms = *ms; - node->ms.maps = maps__get(ms->maps); - node->ms.map = map__get(ms->map); + map_symbol__copy(&node->ms, ms); node->branch = branch; node->nr_loop_iter = nr_loop_iter; node->iter_cycles = iter_cycles; @@ -1141,7 +1147,7 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node, bool hide_unresolved) { - struct machine *machine = maps__machine(node->ms.maps); + struct machine *machine = node->ms.maps ? maps__machine(node->ms.maps) : NULL; maps__put(al->maps); al->maps = maps__get(node->ms.maps); @@ -1564,7 +1570,7 @@ int callchain_node__make_parent_list(struct callchain_node *node) goto out; *new = *chain; new->has_children = false; - new->ms.map = map__get(new->ms.map); + map_symbol__copy(&new->ms, &chain->ms); list_add_tail(&new->list, &head); } parent = parent->parent; @@ -1797,3 +1803,73 @@ s64 callchain_avg_cycles(struct callchain_node *cnode) return cycles; } + +int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel, + struct perf_sample *sample, int max_stack, + bool symbols, callchain_iter_fn cb, void *data) +{ + struct callchain_cursor *cursor = get_tls_callchain_cursor(); + int ret; + + if (!cursor) + return -ENOMEM; + + /* Fill in the callchain. */ + ret = __thread__resolve_callchain(thread, cursor, evsel, sample, + /*parent=*/NULL, /*root_al=*/NULL, + max_stack, symbols); + if (ret) + return ret; + + /* Switch from writing the callchain to reading it. */ + callchain_cursor_commit(cursor); + + while (1) { + struct callchain_cursor_node *node = callchain_cursor_current(cursor); + + if (!node) + break; + + ret = cb(node, data); + if (ret) + return ret; + + callchain_cursor_advance(cursor); + } + return 0; +} + +/* + * This function merges earlier samples (@sample_orig) waiting for deferred + * user callchains with the matching callchain record (@sample_callchain) + * which is delivered now. The @sample_orig->callchain should be released + * after use if ->deferred_callchain is set. + */ +int sample__merge_deferred_callchain(struct perf_sample *sample_orig, + struct perf_sample *sample_callchain) +{ + u64 nr_orig = sample_orig->callchain->nr - 1; + u64 nr_deferred = sample_callchain->callchain->nr; + struct ip_callchain *callchain; + + if (sample_orig->callchain->nr < 2) { + sample_orig->deferred_callchain = false; + return -EINVAL; + } + + callchain = calloc(1 + nr_orig + nr_deferred, sizeof(u64)); + if (callchain == NULL) { + sample_orig->deferred_callchain = false; + return -ENOMEM; + } + + callchain->nr = nr_orig + nr_deferred; + /* copy original including PERF_CONTEXT_USER_DEFERRED (but the cookie) */ + memcpy(callchain->ips, sample_orig->callchain->ips, nr_orig * sizeof(u64)); + /* copy deferred user callchains */ + memcpy(&callchain->ips[nr_orig], sample_callchain->callchain->ips, + nr_deferred * sizeof(u64)); + + sample_orig->callchain = callchain; + return 0; +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index d5c66345ae31..2a52af8c80ac 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -98,6 +98,7 @@ extern bool dwarf_callchain_users; struct callchain_param { bool enabled; + bool defer; enum perf_call_graph_mode record_mode; u32 dump_size; enum chain_mode mode; @@ -311,4 +312,13 @@ u64 callchain_total_hits(struct hists *hists); s64 callchain_avg_cycles(struct callchain_node *cnode); +typedef int (*callchain_iter_fn)(struct callchain_cursor_node *node, void *data); + +int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel, + struct perf_sample *sample, int max_stack, + bool symbols, callchain_iter_fn cb, void *data); + +int sample__merge_deferred_callchain(struct perf_sample *sample_orig, + struct perf_sample *sample_callchain); + #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/cap.c b/tools/perf/util/cap.c index c3ba841bbf37..24a0ea7e6d97 100644 --- a/tools/perf/util/cap.c +++ b/tools/perf/util/cap.c @@ -3,27 +3,47 @@ * Capability utilities */ -#ifdef HAVE_LIBCAP_SUPPORT - #include "cap.h" -#include <stdbool.h> -#include <sys/capability.h> - -bool perf_cap__capable(cap_value_t cap) -{ - cap_flag_value_t val; - cap_t caps = cap_get_proc(); - - if (!caps) - return false; +#include "debug.h" +#include <errno.h> +#include <string.h> +#include <sys/syscall.h> +#include <unistd.h> - if (cap_get_flag(caps, cap, CAP_EFFECTIVE, &val) != 0) - val = CAP_CLEAR; +#define MAX_LINUX_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 - if (cap_free(caps) != 0) - return false; - - return val == CAP_SET; +bool perf_cap__capable(int cap, bool *used_root) +{ + struct __user_cap_header_struct header = { + .version = _LINUX_CAPABILITY_VERSION_3, + .pid = 0, + }; + struct __user_cap_data_struct data[MAX_LINUX_CAPABILITY_U32S] = {}; + __u32 cap_val; + + *used_root = false; + while (syscall(SYS_capget, &header, &data[0]) == -1) { + /* Retry, first attempt has set the header.version correctly. */ + if (errno == EINVAL && header.version != _LINUX_CAPABILITY_VERSION_3 && + header.version == _LINUX_CAPABILITY_VERSION_1) + continue; + + pr_debug2("capget syscall failed (%s - %d) fall back on root check\n", + strerror(errno), errno); + *used_root = true; + return geteuid() == 0; + } + + /* Extract the relevant capability bit. */ + if (cap >= 32) { + if (header.version == _LINUX_CAPABILITY_VERSION_3) { + cap_val = data[1].effective; + } else { + /* Capability beyond 32 is requested but only 32 are supported. */ + return false; + } + } else { + cap_val = data[0].effective; + } + return (cap_val & (1 << (cap & 0x1f))) != 0; } - -#endif /* HAVE_LIBCAP_SUPPORT */ diff --git a/tools/perf/util/cap.h b/tools/perf/util/cap.h index ae52878c0b2e..c1b8ac033ccc 100644 --- a/tools/perf/util/cap.h +++ b/tools/perf/util/cap.h @@ -4,25 +4,6 @@ #include <stdbool.h> #include <linux/capability.h> -#include <linux/compiler.h> - -#ifdef HAVE_LIBCAP_SUPPORT - -#include <sys/capability.h> - -bool perf_cap__capable(cap_value_t cap); - -#else - -#include <unistd.h> -#include <sys/types.h> - -static inline bool perf_cap__capable(int cap __maybe_unused) -{ - return geteuid() == 0; -} - -#endif /* HAVE_LIBCAP_SUPPORT */ /* For older systems */ #ifndef CAP_SYSLOG @@ -33,4 +14,11 @@ static inline bool perf_cap__capable(int cap __maybe_unused) #define CAP_PERFMON 38 #endif +#ifndef CAP_BPF +#define CAP_BPF 39 +#endif + +/* Query if a capability is supported, used_root is set if the fallback root check was used. */ +bool perf_cap__capable(int cap, bool *used_root); + #endif /* __PERF_CAP_H */ diff --git a/tools/perf/util/capstone.c b/tools/perf/util/capstone.c new file mode 100644 index 000000000000..be5fd44b1f9d --- /dev/null +++ b/tools/perf/util/capstone.c @@ -0,0 +1,471 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "capstone.h" +#include "annotate.h" +#include "addr_location.h" +#include "debug.h" +#include "disasm.h" +#include "dso.h" +#include "machine.h" +#include "map.h" +#include "namespaces.h" +#include "print_insn.h" +#include "symbol.h" +#include "thread.h" +#include <errno.h> +#include <fcntl.h> +#include <string.h> + +#ifdef HAVE_LIBCAPSTONE_SUPPORT +#include <capstone/capstone.h> +#endif + +#ifdef HAVE_LIBCAPSTONE_SUPPORT +static int capstone_init(struct machine *machine, csh *cs_handle, bool is64, + bool disassembler_style) +{ + cs_arch arch; + cs_mode mode; + + if (machine__is(machine, "x86_64") && is64) { + arch = CS_ARCH_X86; + mode = CS_MODE_64; + } else if (machine__normalized_is(machine, "x86")) { + arch = CS_ARCH_X86; + mode = CS_MODE_32; + } else if (machine__normalized_is(machine, "arm64")) { + arch = CS_ARCH_ARM64; + mode = CS_MODE_ARM; + } else if (machine__normalized_is(machine, "arm")) { + arch = CS_ARCH_ARM; + mode = CS_MODE_ARM + CS_MODE_V8; + } else if (machine__normalized_is(machine, "s390")) { + arch = CS_ARCH_SYSZ; + mode = CS_MODE_BIG_ENDIAN; + } else { + return -1; + } + + if (cs_open(arch, mode, cs_handle) != CS_ERR_OK) { + pr_warning_once("cs_open failed\n"); + return -1; + } + + if (machine__normalized_is(machine, "x86")) { + /* + * In case of using capstone_init while symbol__disassemble + * setting CS_OPT_SYNTAX_ATT depends if disassembler_style opts + * is set via annotation args + */ + if (disassembler_style) + cs_option(*cs_handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); + /* + * Resolving address operands to symbols is implemented + * on x86 by investigating instruction details. + */ + cs_option(*cs_handle, CS_OPT_DETAIL, CS_OPT_ON); + } + + return 0; +} +#endif + +#ifdef HAVE_LIBCAPSTONE_SUPPORT +static size_t print_insn_x86(struct thread *thread, u8 cpumode, cs_insn *insn, + int print_opts, FILE *fp) +{ + struct addr_location al; + size_t printed = 0; + + if (insn->detail && insn->detail->x86.op_count == 1) { + cs_x86_op *op = &insn->detail->x86.operands[0]; + + addr_location__init(&al); + if (op->type == X86_OP_IMM && + thread__find_symbol(thread, cpumode, op->imm, &al)) { + printed += fprintf(fp, "%s ", insn[0].mnemonic); + printed += symbol__fprintf_symname_offs(al.sym, &al, fp); + if (print_opts & PRINT_INSN_IMM_HEX) + printed += fprintf(fp, " [%#" PRIx64 "]", op->imm); + addr_location__exit(&al); + return printed; + } + addr_location__exit(&al); + } + + printed += fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str); + return printed; +} +#endif + + +ssize_t capstone__fprintf_insn_asm(struct machine *machine __maybe_unused, + struct thread *thread __maybe_unused, + u8 cpumode __maybe_unused, bool is64bit __maybe_unused, + const uint8_t *code __maybe_unused, + size_t code_size __maybe_unused, + uint64_t ip __maybe_unused, int *lenp __maybe_unused, + int print_opts __maybe_unused, FILE *fp __maybe_unused) +{ +#ifdef HAVE_LIBCAPSTONE_SUPPORT + size_t printed; + cs_insn *insn; + csh cs_handle; + size_t count; + int ret; + + /* TODO: Try to initiate capstone only once but need a proper place. */ + ret = capstone_init(machine, &cs_handle, is64bit, true); + if (ret < 0) + return ret; + + count = cs_disasm(cs_handle, code, code_size, ip, 1, &insn); + if (count > 0) { + if (machine__normalized_is(machine, "x86")) + printed = print_insn_x86(thread, cpumode, &insn[0], print_opts, fp); + else + printed = fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str); + if (lenp) + *lenp = insn->size; + cs_free(insn, count); + } else { + printed = -1; + } + + cs_close(&cs_handle); + return printed; +#else + return -1; +#endif +} + +#ifdef HAVE_LIBCAPSTONE_SUPPORT +static void print_capstone_detail(cs_insn *insn, char *buf, size_t len, + struct annotate_args *args, u64 addr) +{ + int i; + struct map *map = args->ms.map; + struct symbol *sym; + + /* TODO: support more architectures */ + if (!arch__is(args->arch, "x86")) + return; + + if (insn->detail == NULL) + return; + + for (i = 0; i < insn->detail->x86.op_count; i++) { + cs_x86_op *op = &insn->detail->x86.operands[i]; + u64 orig_addr; + + if (op->type != X86_OP_MEM) + continue; + + /* only print RIP-based global symbols for now */ + if (op->mem.base != X86_REG_RIP) + continue; + + /* get the target address */ + orig_addr = addr + insn->size + op->mem.disp; + addr = map__objdump_2mem(map, orig_addr); + + if (dso__kernel(map__dso(map))) { + /* + * The kernel maps can be split into sections, let's + * find the map first and the search the symbol. + */ + map = maps__find(map__kmaps(map), addr); + if (map == NULL) + continue; + } + + /* convert it to map-relative address for search */ + addr = map__map_ip(map, addr); + + sym = map__find_symbol(map, addr); + if (sym == NULL) + continue; + + if (addr == sym->start) { + scnprintf(buf, len, "\t# %"PRIx64" <%s>", + orig_addr, sym->name); + } else { + scnprintf(buf, len, "\t# %"PRIx64" <%s+%#"PRIx64">", + orig_addr, sym->name, addr - sym->start); + } + break; + } +} +#endif + +#ifdef HAVE_LIBCAPSTONE_SUPPORT +struct find_file_offset_data { + u64 ip; + u64 offset; +}; + +/* This will be called for each PHDR in an ELF binary */ +static int find_file_offset(u64 start, u64 len, u64 pgoff, void *arg) +{ + struct find_file_offset_data *data = arg; + + if (start <= data->ip && data->ip < start + len) { + data->offset = pgoff + data->ip - start; + return 1; + } + return 0; +} +#endif + +int symbol__disassemble_capstone(const char *filename __maybe_unused, + struct symbol *sym __maybe_unused, + struct annotate_args *args __maybe_unused) +{ +#ifdef HAVE_LIBCAPSTONE_SUPPORT + struct annotation *notes = symbol__annotation(sym); + struct map *map = args->ms.map; + struct dso *dso = map__dso(map); + u64 start = map__rip_2objdump(map, sym->start); + u64 offset; + int i, count, free_count; + bool is_64bit = false; + bool needs_cs_close = false; + /* Malloc-ed buffer containing instructions read from disk. */ + u8 *code_buf = NULL; + /* Pointer to code to be disassembled. */ + const u8 *buf; + u64 buf_len; + csh handle; + cs_insn *insn = NULL; + char disasm_buf[512]; + struct disasm_line *dl; + bool disassembler_style = false; + + if (args->options->objdump_path) + return -1; + + buf = dso__read_symbol(dso, filename, map, sym, + &code_buf, &buf_len, &is_64bit); + if (buf == NULL) + return errno; + + /* add the function address and name */ + scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:", + start, sym->name); + + args->offset = -1; + args->line = disasm_buf; + args->line_nr = 0; + args->fileloc = NULL; + args->ms.sym = sym; + + dl = disasm_line__new(args); + if (dl == NULL) + goto err; + + annotation_line__add(&dl->al, ¬es->src->source); + + if (!args->options->disassembler_style || + !strcmp(args->options->disassembler_style, "att")) + disassembler_style = true; + + if (capstone_init(maps__machine(args->ms.maps), &handle, is_64bit, disassembler_style) < 0) + goto err; + + needs_cs_close = true; + + free_count = count = cs_disasm(handle, buf, buf_len, start, buf_len, &insn); + for (i = 0, offset = 0; i < count; i++) { + int printed; + + printed = scnprintf(disasm_buf, sizeof(disasm_buf), + " %-7s %s", + insn[i].mnemonic, insn[i].op_str); + print_capstone_detail(&insn[i], disasm_buf + printed, + sizeof(disasm_buf) - printed, args, + start + offset); + + args->offset = offset; + args->line = disasm_buf; + + dl = disasm_line__new(args); + if (dl == NULL) + goto err; + + annotation_line__add(&dl->al, ¬es->src->source); + + offset += insn[i].size; + } + + /* It failed in the middle: probably due to unknown instructions */ + if (offset != buf_len) { + struct list_head *list = ¬es->src->source; + + /* Discard all lines and fallback to objdump */ + while (!list_empty(list)) { + dl = list_first_entry(list, struct disasm_line, al.node); + + list_del_init(&dl->al.node); + disasm_line__free(dl); + } + count = -1; + } + +out: + if (needs_cs_close) { + cs_close(&handle); + if (free_count > 0) + cs_free(insn, free_count); + } + free(code_buf); + return count < 0 ? count : 0; + +err: + if (needs_cs_close) { + struct disasm_line *tmp; + + /* + * It probably failed in the middle of the above loop. + * Release any resources it might add. + */ + list_for_each_entry_safe(dl, tmp, ¬es->src->source, al.node) { + list_del(&dl->al.node); + disasm_line__free(dl); + } + } + count = -1; + goto out; +#else + return -1; +#endif +} + +int symbol__disassemble_capstone_powerpc(const char *filename __maybe_unused, + struct symbol *sym __maybe_unused, + struct annotate_args *args __maybe_unused) +{ +#ifdef HAVE_LIBCAPSTONE_SUPPORT + struct annotation *notes = symbol__annotation(sym); + struct map *map = args->ms.map; + struct dso *dso = map__dso(map); + struct nscookie nsc; + u64 start = map__rip_2objdump(map, sym->start); + u64 end = map__rip_2objdump(map, sym->end); + u64 len = end - start; + u64 offset; + int i, fd, count; + bool is_64bit = false; + bool needs_cs_close = false; + u8 *buf = NULL; + struct find_file_offset_data data = { + .ip = start, + }; + csh handle; + char disasm_buf[512]; + struct disasm_line *dl; + u32 *line; + bool disassembler_style = false; + + if (args->options->objdump_path) + return -1; + + nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); + fd = open(filename, O_RDONLY); + nsinfo__mountns_exit(&nsc); + if (fd < 0) + return -1; + + if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data, + &is_64bit) == 0) + goto err; + + if (!args->options->disassembler_style || + !strcmp(args->options->disassembler_style, "att")) + disassembler_style = true; + + if (capstone_init(maps__machine(args->ms.maps), &handle, is_64bit, disassembler_style) < 0) + goto err; + + needs_cs_close = true; + + buf = malloc(len); + if (buf == NULL) + goto err; + + count = pread(fd, buf, len, data.offset); + close(fd); + fd = -1; + + if ((u64)count != len) + goto err; + + line = (u32 *)buf; + + /* add the function address and name */ + scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:", + start, sym->name); + + args->offset = -1; + args->line = disasm_buf; + args->line_nr = 0; + args->fileloc = NULL; + args->ms.sym = sym; + + dl = disasm_line__new(args); + if (dl == NULL) + goto err; + + annotation_line__add(&dl->al, ¬es->src->source); + + /* + * TODO: enable disassm for powerpc + * count = cs_disasm(handle, buf, len, start, len, &insn); + * + * For now, only binary code is saved in disassembled line + * to be used in "type" and "typeoff" sort keys. Each raw code + * is 32 bit instruction. So use "len/4" to get the number of + * entries. + */ + count = len/4; + + for (i = 0, offset = 0; i < count; i++) { + args->offset = offset; + sprintf(args->line, "%x", line[i]); + + dl = disasm_line__new(args); + if (dl == NULL) + break; + + annotation_line__add(&dl->al, ¬es->src->source); + + offset += 4; + } + + /* It failed in the middle */ + if (offset != len) { + struct list_head *list = ¬es->src->source; + + /* Discard all lines and fallback to objdump */ + while (!list_empty(list)) { + dl = list_first_entry(list, struct disasm_line, al.node); + + list_del_init(&dl->al.node); + disasm_line__free(dl); + } + count = -1; + } + +out: + if (needs_cs_close) + cs_close(&handle); + free(buf); + return count < 0 ? count : 0; + +err: + if (fd >= 0) + close(fd); + count = -1; + goto out; +#else + return -1; +#endif +} diff --git a/tools/perf/util/capstone.h b/tools/perf/util/capstone.h new file mode 100644 index 000000000000..0f030ea034b6 --- /dev/null +++ b/tools/perf/util/capstone.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_CAPSTONE_H +#define __PERF_CAPSTONE_H + +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <linux/types.h> + +struct annotate_args; +struct machine; +struct symbol; +struct thread; + +ssize_t capstone__fprintf_insn_asm(struct machine *machine, struct thread *thread, u8 cpumode, + bool is64bit, const uint8_t *code, size_t code_size, + uint64_t ip, int *lenp, int print_opts, FILE *fp); +int symbol__disassemble_capstone(const char *filename, struct symbol *sym, + struct annotate_args *args); +int symbol__disassemble_capstone_powerpc(const char *filename, struct symbol *sym, + struct annotate_args *args); + +#endif /* __PERF_CAPSTONE_H */ diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 0f759dd96db7..040eb75f0804 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -10,6 +10,7 @@ #include <sys/types.h> #include <sys/stat.h> #include <sys/statfs.h> +#include <errno.h> #include <fcntl.h> #include <stdlib.h> #include <string.h> @@ -413,8 +414,7 @@ static bool has_pattern_string(const char *str) return !!strpbrk(str, "{}[]()|*+?^$"); } -int evlist__expand_cgroup(struct evlist *evlist, const char *str, - struct rblist *metric_events, bool open_cgroup) +int evlist__expand_cgroup(struct evlist *evlist, const char *str, bool open_cgroup) { struct evlist *orig_list, *tmp_list; struct evsel *pos, *evsel, *leader; @@ -440,12 +440,8 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, evlist__splice_list_tail(orig_list, &evlist->core.entries); evlist->core.nr_entries = 0; - if (metric_events) { - orig_metric_events = *metric_events; - rblist__init(metric_events); - } else { - rblist__init(&orig_metric_events); - } + orig_metric_events = evlist->metric_events; + metricgroup__rblist_init(&evlist->metric_events); if (has_pattern_string(str)) prefix_len = match_cgroups(str); @@ -473,7 +469,7 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, leader = NULL; evlist__for_each_entry(orig_list, pos) { - evsel = evsel__clone(pos); + evsel = evsel__clone(/*dest=*/NULL, pos); if (evsel == NULL) goto out_err; @@ -490,12 +486,10 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, cgroup__put(cgrp); nr_cgroups++; - if (metric_events) { - if (metricgroup__copy_metric_events(tmp_list, cgrp, - metric_events, - &orig_metric_events) < 0) - goto out_err; - } + if (metricgroup__copy_metric_events(tmp_list, cgrp, + &evlist->metric_events, + &orig_metric_events) < 0) + goto out_err; evlist__splice_list_tail(evlist, &tmp_list->core.entries); tmp_list->core.nr_entries = 0; @@ -512,7 +506,7 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str, out_err: evlist__delete(orig_list); evlist__delete(tmp_list); - rblist__exit(&orig_metric_events); + metricgroup__rblist_exit(&orig_metric_events); release_cgroup_list(); return ret; diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h index de8882d6e8d3..7b1bda22878c 100644 --- a/tools/perf/util/cgroup.h +++ b/tools/perf/util/cgroup.h @@ -28,8 +28,7 @@ struct rblist; struct cgroup *cgroup__new(const char *name, bool do_open); struct cgroup *evlist__findnew_cgroup(struct evlist *evlist, const char *name); -int evlist__expand_cgroup(struct evlist *evlist, const char *cgroups, - struct rblist *metric_events, bool open_cgroup); +int evlist__expand_cgroup(struct evlist *evlist, const char *cgroups, bool open_cgroup); void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup); diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index bffbdd216a6a..e51f0a676a22 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -93,34 +93,6 @@ int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) return r; } -/* - * This function splits the buffer by newlines and colors the lines individually. - * - * Returns 0 on success. - */ -int color_fwrite_lines(FILE *fp, const char *color, - size_t count, const char *buf) -{ - if (!*color) - return fwrite(buf, count, 1, fp) != 1; - - while (count) { - char *p = memchr(buf, '\n', count); - - if (p != buf && (fputs(color, fp) < 0 || - fwrite(buf, p ? (size_t)(p - buf) : count, 1, fp) != 1 || - fputs(PERF_COLOR_RESET, fp) < 0)) - return -1; - if (!p) - return 0; - if (fputc('\n', fp) < 0) - return -1; - count -= p + 1 - buf; - buf = p + 1; - } - return 0; -} - const char *get_percent_color(double percent) { const char *color = PERF_COLOR_NORMAL; diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 01f7bed21c9b..0319546decca 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -2,6 +2,7 @@ #ifndef __PERF_COLOR_H #define __PERF_COLOR_H +#include <linux/compiler.h> #include <stdio.h> #include <stdarg.h> @@ -22,27 +23,22 @@ #define MIN_GREEN 0.5 #define MIN_RED 5.0 +#define PERF_COLOR_DELETE_LINE "\033[A\33[2K\r" /* * This variable stores the value of color.ui */ extern int perf_use_color_default; -/* - * Use this instead of perf_default_config if you need the value of color.ui. - */ -int perf_color_default_config(const char *var, const char *value, void *cb); - int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty); int color_vsnprintf(char *bf, size_t size, const char *color, const char *fmt, va_list args); int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args); -int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); -int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...); -int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); +int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) __printf(3, 4); +int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...) __printf(4, 5); int value_color_snprintf(char *bf, size_t size, const char *fmt, double value); -int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...); -int percent_color_len_snprintf(char *bf, size_t size, const char *fmt, ...); +int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...) __printf(3, 4); +int percent_color_len_snprintf(char *bf, size_t size, const char *fmt, ...) __printf(3, 4); int percent_color_fprintf(FILE *fp, const char *fmt, double percent); const char *get_percent_color(double percent); diff --git a/tools/perf/util/color_config.c b/tools/perf/util/color_config.c index dc09ba7cb31e..301031ddc025 100644 --- a/tools/perf/util/color_config.c +++ b/tools/perf/util/color_config.c @@ -35,14 +35,3 @@ int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) } return 0; } - -int perf_color_default_config(const char *var, const char *value, - void *cb __maybe_unused) -{ - if (!strcmp(var, "color.ui")) { - perf_use_color_default = perf_config_colorbool(var, value, -1); - return 0; - } - - return 0; -} diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 49b79cf0c5cc..9880247a2c33 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -5,6 +5,8 @@ #include <internal/rc_check.h> #include <linux/refcount.h> #include <linux/zalloc.h> +#include <tools/libc_compat.h> // reallocarray + #include "rwsem.h" DECLARE_RC_STRUCT(comm_str) { @@ -22,6 +24,7 @@ static struct comm_strs { static void comm_strs__remove_if_last(struct comm_str *cs); static void comm_strs__init(void) + NO_THREAD_SAFETY_ANALYSIS /* Inherently single threaded due to pthread_once. */ { init_rwsem(&_comm_strs.lock); _comm_strs.capacity = 16; @@ -117,6 +120,7 @@ static void comm_strs__remove_if_last(struct comm_str *cs) } static struct comm_str *__comm_strs__find(struct comm_strs *comm_strs, const char *str) + SHARED_LOCKS_REQUIRED(comm_strs->lock) { struct comm_str **result; diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h index b29109cd3609..6cfecfca16f2 100644 --- a/tools/perf/util/compress.h +++ b/tools/perf/util/compress.h @@ -4,7 +4,9 @@ #include <stdbool.h> #include <stddef.h> +#include <stdio.h> #include <sys/types.h> +#include <linux/compiler.h> #ifdef HAVE_ZSTD_SUPPORT #include <zstd.h> #endif @@ -15,8 +17,26 @@ bool gzip_is_compressed(const char *input); #endif #ifdef HAVE_LZMA_SUPPORT +int lzma_decompress_stream_to_file(FILE *input, int output_fd); int lzma_decompress_to_file(const char *input, int output_fd); bool lzma_is_compressed(const char *input); +#else +static inline +int lzma_decompress_stream_to_file(FILE *input __maybe_unused, + int output_fd __maybe_unused) +{ + return -1; +} +static inline +int lzma_decompress_to_file(const char *input __maybe_unused, + int output_fd __maybe_unused) +{ + return -1; +} +static inline int lzma_is_compressed(const char *input __maybe_unused) +{ + return false; +} #endif struct zstd_data { diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 7a650de0db83..e0219bc6330a 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -13,12 +13,13 @@ #include <sys/param.h> #include "cache.h" #include "callchain.h" +#include "header.h" #include <subcmd/exec-cmd.h> #include "util/event.h" /* proc_map_timeout */ #include "util/hist.h" /* perf_hist_config */ #include "util/stat.h" /* perf_stat__set_big_num */ #include "util/evsel.h" /* evsel__hw_names, evsel__use_bpf_counters */ -#include "util/srcline.h" /* addr2line_timeout_ms */ +#include "util/addr2line.h" /* addr2line_timeout_ms */ #include "build-id.h" #include "debug.h" #include "config.h" @@ -34,6 +35,23 @@ #define DEBUG_CACHE_DIR ".debug" +#define METRIC_ONLY_LEN 20 + +static struct stats walltime_nsecs_stats; + +struct perf_stat_config stat_config = { + .aggr_mode = AGGR_GLOBAL, + .aggr_level = MAX_CACHE_LVL + 1, + .scale = true, + .unit_width = 4, /* strlen("unit") */ + .run_count = 1, + .metric_only_len = METRIC_ONLY_LEN, + .walltime_nsecs_stats = &walltime_nsecs_stats, + .big_num = true, + .ctl_fd = -1, + .ctl_fd_ack = -1, + .iostat_run = false, +}; char buildid_dir[MAXPATHLEN]; /* root dir for buildid, binary cache */ @@ -455,6 +473,16 @@ static int perf_ui_config(const char *var, const char *value) return 0; } +void perf_stat__set_big_num(int set) +{ + stat_config.big_num = (set != 0); +} + +static void perf_stat__set_no_csv_summary(int set) +{ + stat_config.no_csv_summary = (set != 0); +} + static int perf_stat_config(const char *var, const char *value) { if (!strcmp(var, "stat.big-num")) @@ -829,12 +857,6 @@ void perf_config__exit(void) config_set = NULL; } -void perf_config__refresh(void) -{ - perf_config__exit(); - perf_config__init(); -} - static void perf_config_item__delete(struct perf_config_item *item) { zfree(&item->name); @@ -912,6 +934,7 @@ void set_buildid_dir(const char *dir) struct perf_config_scan_data { const char *name; const char *fmt; + const char *value; va_list args; int ret; }; @@ -939,3 +962,24 @@ int perf_config_scan(const char *name, const char *fmt, ...) return d.ret; } + +static int perf_config_get_cb(const char *var, const char *value, void *data) +{ + struct perf_config_scan_data *d = data; + + if (!strcmp(var, d->name)) + d->value = value; + + return 0; +} + +const char *perf_config_get(const char *name) +{ + struct perf_config_scan_data d = { + .name = name, + .value = NULL, + }; + + perf_config(perf_config_get_cb, &d); + return d.value; +} diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index 2e5e808928a5..987b47cf54c3 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -30,6 +30,7 @@ typedef int (*config_fn_t)(const char *, const char *, void *); int perf_default_config(const char *, const char *, void *); int perf_config(config_fn_t fn, void *); int perf_config_scan(const char *name, const char *fmt, ...) __scanf(2, 3); +const char *perf_config_get(const char *name); int perf_config_set(struct perf_config_set *set, config_fn_t fn, void *data); int perf_config_int(int *dest, const char *, const char *); @@ -48,7 +49,7 @@ void perf_config_set__delete(struct perf_config_set *set); int perf_config_set__collect(struct perf_config_set *set, const char *file_name, const char *var, const char *value); void perf_config__exit(void); -void perf_config__refresh(void); +int perf_config__set_variable(const char *var, const char *value); /** * perf_config_sections__for_each - iterate thru all the sections diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 27094211edd8..a80845038a5e 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -67,19 +67,23 @@ static struct perf_cpu_map *cpu_map__from_entries(const struct perf_record_cpu_m struct perf_cpu_map *map; map = perf_cpu_map__empty_new(data->cpus_data.nr); - if (map) { - unsigned i; - - for (i = 0; i < data->cpus_data.nr; i++) { - /* - * Special treatment for -1, which is not real cpu number, - * and we need to use (int) -1 to initialize map[i], - * otherwise it would become 65535. - */ - if (data->cpus_data.cpu[i] == (u16) -1) - RC_CHK_ACCESS(map)->map[i].cpu = -1; - else - RC_CHK_ACCESS(map)->map[i].cpu = (int) data->cpus_data.cpu[i]; + if (!map) + return NULL; + + for (unsigned int i = 0; i < data->cpus_data.nr; i++) { + /* + * Special treatment for -1, which is not real cpu number, + * and we need to use (int) -1 to initialize map[i], + * otherwise it would become 65535. + */ + if (data->cpus_data.cpu[i] == (u16) -1) { + RC_CHK_ACCESS(map)->map[i].cpu = -1; + } else if (data->cpus_data.cpu[i] < INT16_MAX) { + RC_CHK_ACCESS(map)->map[i].cpu = (int16_t) data->cpus_data.cpu[i]; + } else { + pr_err("Invalid cpumap entry %u\n", data->cpus_data.cpu[i]); + perf_cpu_map__put(map); + return NULL; } } @@ -106,8 +110,15 @@ static struct perf_cpu_map *cpu_map__from_mask(const struct perf_record_cpu_map_ int cpu; perf_record_cpu_map_data__read_one_mask(data, i, local_copy); - for_each_set_bit(cpu, local_copy, 64) - RC_CHK_ACCESS(map)->map[j++].cpu = cpu + cpus_per_i; + for_each_set_bit(cpu, local_copy, 64) { + if (cpu + cpus_per_i < INT16_MAX) { + RC_CHK_ACCESS(map)->map[j++].cpu = cpu + cpus_per_i; + } else { + pr_err("Invalid cpumap entry %d\n", cpu + cpus_per_i); + perf_cpu_map__put(map); + return NULL; + } + } } return map; @@ -127,8 +138,15 @@ static struct perf_cpu_map *cpu_map__from_range(const struct perf_record_cpu_map RC_CHK_ACCESS(map)->map[i++].cpu = -1; for (int cpu = data->range_cpu_data.start_cpu; cpu <= data->range_cpu_data.end_cpu; - i++, cpu++) - RC_CHK_ACCESS(map)->map[i].cpu = cpu; + i++, cpu++) { + if (cpu < INT16_MAX) { + RC_CHK_ACCESS(map)->map[i].cpu = cpu; + } else { + pr_err("Invalid cpumap entry %d\n", cpu); + perf_cpu_map__put(map); + return NULL; + } + } return map; } @@ -293,7 +311,7 @@ struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data) die = cpu__get_die_id(cpu); /* There is no die_id on legacy system. */ - if (die == -1) + if (die < 0) die = 0; /* @@ -322,7 +340,7 @@ struct aggr_cpu_id aggr_cpu_id__cluster(struct perf_cpu cpu, void *data) struct aggr_cpu_id id; /* There is no cluster_id on legacy system. */ - if (cluster == -1) + if (cluster < 0) cluster = 0; id = aggr_cpu_id__die(cpu, data); @@ -427,7 +445,7 @@ static void set_max_cpu_num(void) { const char *mnt; char path[PATH_MAX]; - int ret = -1; + int max, ret = -1; /* set up default */ max_cpu_num.cpu = 4096; @@ -444,10 +462,12 @@ static void set_max_cpu_num(void) goto out; } - ret = get_max_num(path, &max_cpu_num.cpu); + ret = get_max_num(path, &max); if (ret) goto out; + max_cpu_num.cpu = max; + /* get the highest present cpu number for a sparse allocation */ ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt); if (ret >= PATH_MAX) { @@ -455,8 +475,14 @@ static void set_max_cpu_num(void) goto out; } - ret = get_max_num(path, &max_present_cpu_num.cpu); + ret = get_max_num(path, &max); + if (!ret && max > INT16_MAX) { + pr_err("Read out of bounds max cpus of %d\n", max); + ret = -1; + } + if (!ret) + max_present_cpu_num.cpu = (int16_t)max; out: if (ret) pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu); @@ -606,7 +632,7 @@ size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size) #define COMMA first ? "" : "," for (i = 0; i < perf_cpu_map__nr(map) + 1; i++) { - struct perf_cpu cpu = { .cpu = INT_MAX }; + struct perf_cpu cpu = { .cpu = INT16_MAX }; bool last = i == perf_cpu_map__nr(map); if (!last) @@ -658,16 +684,21 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size) unsigned char *bitmap; struct perf_cpu c, last_cpu = perf_cpu_map__max(map); - if (buf == NULL) + if (buf == NULL || size == 0) return 0; + if (last_cpu.cpu < 0) { + buf[0] = '\0'; + return 0; + } + bitmap = zalloc(last_cpu.cpu / 8 + 1); if (bitmap == NULL) { buf[0] = '\0'; return 0; } - perf_cpu_map__for_each_cpu(c, idx, map) + perf_cpu_map__for_each_cpu_skip_any(c, idx, map) bitmap[c.cpu / 8] |= 1 << (c.cpu % 8); for (int cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) { @@ -696,7 +727,7 @@ struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ if (!online) online = perf_cpu_map__new_online_cpus(); /* from /sys/devices/system/cpu/online */ - return online; + return perf_cpu_map__get(online); } bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b) diff --git a/tools/perf/util/cs-etm-decoder/Build b/tools/perf/util/cs-etm-decoder/Build index 216cb17a3322..27550db2aa4c 100644 --- a/tools/perf/util/cs-etm-decoder/Build +++ b/tools/perf/util/cs-etm-decoder/Build @@ -1 +1 @@ -perf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o +perf-util-y += cs-etm-decoder.o diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index e917985bbbe6..3050fe212666 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -41,7 +41,7 @@ const u32 INSTR_PER_NS = 10; struct cs_etm_decoder { void *data; - void (*packet_printer)(const char *msg); + void (*packet_printer)(const char *msg, void *data); bool suppress_printing; dcd_tree_handle_t dcd_tree; cs_etm_mem_cb_type mem_access; @@ -202,7 +202,7 @@ static void cs_etm_decoder__print_str_cb(const void *p_context, const struct cs_etm_decoder *decoder = p_context; if (p_context && str_len && !decoder->suppress_printing) - decoder->packet_printer(msg); + decoder->packet_printer(msg, decoder->data); } static int @@ -388,7 +388,8 @@ cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue) } static ocsd_datapath_resp_t -cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue, +cs_etm_decoder__buffer_packet(struct cs_etm_queue *etmq, + struct cs_etm_packet_queue *packet_queue, const u8 trace_chan_id, enum cs_etm_sample_type sample_type) { @@ -398,7 +399,7 @@ cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue, if (packet_queue->packet_count >= CS_ETM_PACKET_MAX_BUFFER - 1) return OCSD_RESP_FATAL_SYS_ERR; - if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0) + if (cs_etm__get_cpu(etmq, trace_chan_id, &cpu) < 0) return OCSD_RESP_FATAL_SYS_ERR; et = packet_queue->tail; @@ -436,7 +437,7 @@ cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq, int ret = 0; struct cs_etm_packet *packet; - ret = cs_etm_decoder__buffer_packet(packet_queue, trace_chan_id, + ret = cs_etm_decoder__buffer_packet(etmq, packet_queue, trace_chan_id, CS_ETM_RANGE); if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT) return ret; @@ -496,7 +497,8 @@ out: } static ocsd_datapath_resp_t -cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue, +cs_etm_decoder__buffer_discontinuity(struct cs_etm_queue *etmq, + struct cs_etm_packet_queue *queue, const uint8_t trace_chan_id) { /* @@ -504,18 +506,19 @@ cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue, * reset time statistics. */ cs_etm_decoder__reset_timestamp(queue); - return cs_etm_decoder__buffer_packet(queue, trace_chan_id, + return cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id, CS_ETM_DISCONTINUITY); } static ocsd_datapath_resp_t -cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue, +cs_etm_decoder__buffer_exception(struct cs_etm_queue *etmq, + struct cs_etm_packet_queue *queue, const ocsd_generic_trace_elem *elem, const uint8_t trace_chan_id) { int ret = 0; struct cs_etm_packet *packet; - ret = cs_etm_decoder__buffer_packet(queue, trace_chan_id, + ret = cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id, CS_ETM_EXCEPTION); if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT) return ret; @@ -527,10 +530,11 @@ cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue, } static ocsd_datapath_resp_t -cs_etm_decoder__buffer_exception_ret(struct cs_etm_packet_queue *queue, +cs_etm_decoder__buffer_exception_ret(struct cs_etm_queue *etmq, + struct cs_etm_packet_queue *queue, const uint8_t trace_chan_id) { - return cs_etm_decoder__buffer_packet(queue, trace_chan_id, + return cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id, CS_ETM_EXCEPTION_RET); } @@ -584,6 +588,7 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( const ocsd_generic_trace_elem *elem) { ocsd_datapath_resp_t resp = OCSD_RESP_CONT; + ocsd_gen_trc_elem_t type; struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; struct cs_etm_queue *etmq = decoder->data; struct cs_etm_packet_queue *packet_queue; @@ -593,52 +598,29 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( if (!packet_queue) return OCSD_RESP_FATAL_SYS_ERR; - switch (elem->elem_type) { - case OCSD_GEN_TRC_ELEM_UNKNOWN: - break; - case OCSD_GEN_TRC_ELEM_EO_TRACE: - case OCSD_GEN_TRC_ELEM_NO_SYNC: - case OCSD_GEN_TRC_ELEM_TRACE_ON: - resp = cs_etm_decoder__buffer_discontinuity(packet_queue, + type = elem->elem_type; + + if (type == OCSD_GEN_TRC_ELEM_EO_TRACE || + type == OCSD_GEN_TRC_ELEM_NO_SYNC || + type == OCSD_GEN_TRC_ELEM_TRACE_ON) + resp = cs_etm_decoder__buffer_discontinuity(etmq, packet_queue, trace_chan_id); - break; - case OCSD_GEN_TRC_ELEM_INSTR_RANGE: + else if (type == OCSD_GEN_TRC_ELEM_INSTR_RANGE) resp = cs_etm_decoder__buffer_range(etmq, packet_queue, elem, trace_chan_id); - break; - case OCSD_GEN_TRC_ELEM_EXCEPTION: - resp = cs_etm_decoder__buffer_exception(packet_queue, elem, + else if (type == OCSD_GEN_TRC_ELEM_EXCEPTION) + resp = cs_etm_decoder__buffer_exception(etmq, packet_queue, elem, trace_chan_id); - break; - case OCSD_GEN_TRC_ELEM_EXCEPTION_RET: - resp = cs_etm_decoder__buffer_exception_ret(packet_queue, + else if (type == OCSD_GEN_TRC_ELEM_EXCEPTION_RET) + resp = cs_etm_decoder__buffer_exception_ret(etmq, packet_queue, trace_chan_id); - break; - case OCSD_GEN_TRC_ELEM_TIMESTAMP: + else if (type == OCSD_GEN_TRC_ELEM_TIMESTAMP) resp = cs_etm_decoder__do_hard_timestamp(etmq, elem, trace_chan_id, indx); - break; - case OCSD_GEN_TRC_ELEM_PE_CONTEXT: + else if (type == OCSD_GEN_TRC_ELEM_PE_CONTEXT) resp = cs_etm_decoder__set_tid(etmq, packet_queue, elem, trace_chan_id); - break; - /* Unused packet types */ - case OCSD_GEN_TRC_ELEM_I_RANGE_NOPATH: - case OCSD_GEN_TRC_ELEM_ADDR_NACC: - case OCSD_GEN_TRC_ELEM_CYCLE_COUNT: - case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN: - case OCSD_GEN_TRC_ELEM_EVENT: - case OCSD_GEN_TRC_ELEM_SWTRACE: - case OCSD_GEN_TRC_ELEM_CUSTOM: - case OCSD_GEN_TRC_ELEM_SYNC_MARKER: - case OCSD_GEN_TRC_ELEM_MEMTRANS: -#if (OCSD_VER_NUM >= 0x010400) - case OCSD_GEN_TRC_ELEM_INSTRUMENTATION: -#endif - default: - break; - } return resp; } @@ -680,14 +662,15 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, return -1; } - /* if the CPU has no trace ID associated, no decoder needed */ - if (csid == CORESIGHT_TRACE_ID_UNUSED_VAL) - return 0; - if (d_params->operation == CS_ETM_OPERATION_DECODE) { + int decode_flags = OCSD_CREATE_FLG_FULL_DECODER; +#ifdef OCSD_OPFLG_N_UNCOND_DIR_BR_CHK + decode_flags |= OCSD_OPFLG_N_UNCOND_DIR_BR_CHK | OCSD_OPFLG_CHK_RANGE_CONTINUE | + ETM4_OPFLG_PKTDEC_AA64_OPCODE_CHK; +#endif if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder->decoder_name, - OCSD_CREATE_FLG_FULL_DECODER, + decode_flags, trace_config, &csid)) return -1; diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 272c2efe78ee..12c782fa6db2 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -60,7 +60,7 @@ struct cs_etm_trace_params { struct cs_etm_decoder_params { int operation; - void (*packet_printer)(const char *msg); + void (*packet_printer)(const char *msg, void *data); cs_etm_mem_cb_type mem_acc_cb; bool formatted; bool fsyncs; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 32818bd7cd17..25d56e0f1c07 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -97,28 +97,43 @@ struct cs_etm_traceid_queue { struct cs_etm_packet_queue packet_queue; }; +enum cs_etm_format { + UNSET, + FORMATTED, + UNFORMATTED +}; + struct cs_etm_queue { struct cs_etm_auxtrace *etm; struct cs_etm_decoder *decoder; struct auxtrace_buffer *buffer; unsigned int queue_nr; u8 pending_timestamp_chan_id; + enum cs_etm_format format; u64 offset; const unsigned char *buf; size_t buf_len, buf_used; /* Conversion between traceID and index in traceid_queues array */ struct intlist *traceid_queues_list; struct cs_etm_traceid_queue **traceid_queues; + /* Conversion between traceID and metadata pointers */ + struct intlist *traceid_list; + /* + * Same as traceid_list, but traceid_list may be a reference to another + * queue's which has a matching sink ID. + */ + struct intlist *own_traceid_list; + u32 sink_id; }; -/* RB tree for quick conversion between traceID and metadata pointers */ -static struct intlist *traceid_list; - static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, pid_t tid); static int cs_etm__get_data_block(struct cs_etm_queue *etmq); static int cs_etm__decode_data_block(struct cs_etm_queue *etmq); +static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata); +static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu); +static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata); /* PTMs ETMIDR [11:8] set to b0011 */ #define ETMIDR_PTM_VERSION 0x00000300 @@ -133,6 +148,7 @@ static int cs_etm__decode_data_block(struct cs_etm_queue *etmq); (queue_nr << 16 | trace_chan_id) #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16) #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff) +#define SINK_UNSET ((u32) -1) static u32 cs_etm__get_v7_protocol_version(u32 etmidr) { @@ -144,12 +160,12 @@ static u32 cs_etm__get_v7_protocol_version(u32 etmidr) return CS_ETM_PROTO_ETMV3; } -static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic) +static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic) { struct int_node *inode; u64 *metadata; - inode = intlist__find(traceid_list, trace_chan_id); + inode = intlist__find(etmq->traceid_list, trace_chan_id); if (!inode) return -EINVAL; @@ -158,12 +174,12 @@ static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic) return 0; } -int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) +int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu) { struct int_node *inode; u64 *metadata; - inode = intlist__find(traceid_list, trace_chan_id); + inode = intlist__find(etmq->traceid_list, trace_chan_id); if (!inode) return -EINVAL; @@ -215,26 +231,171 @@ enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq) return etmq->etm->pid_fmt; } -static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata) +static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq, + u8 trace_chan_id, u64 *cpu_metadata) { - struct int_node *inode; - /* Get an RB node for this CPU */ - inode = intlist__findnew(traceid_list, trace_chan_id); + struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id); /* Something went wrong, no need to continue */ if (!inode) return -ENOMEM; + /* Disallow re-mapping a different traceID to metadata pair. */ + if (inode->priv) { + u64 *curr_cpu_data = inode->priv; + u8 curr_chan_id; + int err; + + if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) { + /* + * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs + * are expected (but not supported) in per-thread mode, + * rather than signifying an error. + */ + if (etmq->etm->per_thread_decoding) + pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n"); + else + pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n"); + + return -EINVAL; + } + + /* check that the mapped ID matches */ + err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data); + if (err) + return err; + + if (curr_chan_id != trace_chan_id) { + pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n"); + return -EINVAL; + } + + /* Skip re-adding the same mappings if everything matched */ + return 0; + } + + /* Not one we've seen before, associate the traceID with the metadata pointer */ + inode->priv = cpu_metadata; + + return 0; +} + +static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu) +{ + if (etm->per_thread_decoding) + return etm->queues.queue_array[0].priv; + else + return etm->queues.queue_array[cpu].priv; +} + +static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id, + u64 *cpu_metadata) +{ + struct cs_etm_queue *etmq; + + /* + * If the queue is unformatted then only save one mapping in the + * queue associated with that CPU so only one decoder is made. + */ + etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]); + if (etmq->format == UNFORMATTED) + return cs_etm__insert_trace_id_node(etmq, trace_chan_id, + cpu_metadata); + /* - * The node for that CPU should not be taken. - * Back out if that's the case. + * Otherwise, version 0 trace IDs are global so save them into every + * queue. */ - if (inode->priv) + for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) { + int ret; + + etmq = etm->queues.queue_array[i].priv; + ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id, + cpu_metadata); + if (ret) + return ret; + } + + return 0; +} + +static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu, + u64 hw_id) +{ + int err; + u64 *cpu_data; + u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id); + + cpu_data = get_cpu_data(etm, cpu); + if (cpu_data == NULL) return -EINVAL; - /* All good, associate the traceID with the metadata pointer */ - inode->priv = cpu_metadata; + err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data); + if (err) + return err; + + /* + * if we are picking up the association from the packet, need to plug + * the correct trace ID into the metadata for setting up decoders later. + */ + return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data); +} + +static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu, + u64 hw_id) +{ + struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu); + int ret; + u64 *cpu_data; + u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id); + u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id); + + /* + * Check sink id hasn't changed in per-cpu mode. In per-thread mode, + * let it pass for now until an actual overlapping trace ID is hit. In + * most cases IDs won't overlap even if the sink changes. + */ + if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET && + etmq->sink_id != sink_id) { + pr_err("CS_ETM: mismatch between sink IDs\n"); + return -EINVAL; + } + + etmq->sink_id = sink_id; + + /* Find which other queues use this sink and link their ID maps */ + for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) { + struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv; + + /* Different sinks, skip */ + if (other_etmq->sink_id != etmq->sink_id) + continue; + + /* Already linked, skip */ + if (other_etmq->traceid_list == etmq->traceid_list) + continue; + + /* At the point of first linking, this one should be empty */ + if (!intlist__empty(etmq->traceid_list)) { + pr_err("CS_ETM: Can't link populated trace ID lists\n"); + return -EINVAL; + } + + etmq->own_traceid_list = NULL; + intlist__delete(etmq->traceid_list); + etmq->traceid_list = other_etmq->traceid_list; + break; + } + + cpu_data = get_cpu_data(etm, cpu); + ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data); + if (ret) + return ret; + + ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data); + if (ret) + return ret; return 0; } @@ -261,7 +422,6 @@ static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata) /* * update metadata trace ID from the value found in the AUX_HW_INFO packet. - * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present. */ static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata) { @@ -322,20 +482,16 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session, { struct cs_etm_auxtrace *etm; struct perf_sample sample; - struct int_node *inode; struct evsel *evsel; - u64 *cpu_data; u64 hw_id; int cpu, version, err; - u8 trace_chan_id, curr_chan_id; /* extract and parse the HW ID */ hw_id = event->aux_output_hw_id.hw_id; - version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id); - trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id); + version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id); /* check that we can handle this version */ - if (version > CS_AUX_HW_ID_CURR_VERSION) { + if (version > CS_AUX_HW_ID_MAJOR_VERSION) { pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n", version); return -EINVAL; @@ -350,52 +506,26 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session, evsel = evlist__event2evsel(session->evlist, event); if (!evsel) return -EINVAL; + perf_sample__init(&sample, /*all=*/false); err = evsel__parse_sample(evsel, event, &sample); if (err) - return err; + goto out; cpu = sample.cpu; if (cpu == -1) { /* no CPU in the sample - possibly recorded with an old version of perf */ pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record."); - return -EINVAL; + err = -EINVAL; + goto out; } - /* See if the ID is mapped to a CPU, and it matches the current CPU */ - inode = intlist__find(traceid_list, trace_chan_id); - if (inode) { - cpu_data = inode->priv; - if ((int)cpu_data[CS_ETM_CPU] != cpu) { - pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n"); - return -EINVAL; - } - - /* check that the mapped ID matches */ - err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data); - if (err) - return err; - if (curr_chan_id != trace_chan_id) { - pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n"); - return -EINVAL; - } - - /* mapped and matched - return OK */ - return 0; + if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0) { + err = cs_etm__process_trace_id_v0(etm, cpu, hw_id); + goto out; } - cpu_data = get_cpu_data(etm, cpu); - if (cpu_data == NULL) - return err; - - /* not one we've seen before - lets map it */ - err = cs_etm__map_trace_id(trace_chan_id, cpu_data); - if (err) - return err; - - /* - * if we are picking up the association from the packet, need to plug - * the correct trace ID into the metadata for setting up decoders later. - */ - err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data); + err = cs_etm__process_trace_id_v0_1(etm, cpu, hw_id); +out: + perf_sample__exit(&sample); return err; } @@ -639,94 +769,79 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm, } } -static void cs_etm__packet_dump(const char *pkt_string) +static void cs_etm__packet_dump(const char *pkt_string, void *data) { const char *color = PERF_COLOR_BLUE; int len = strlen(pkt_string); + struct cs_etm_queue *etmq = data; + char queue_nr[64]; + + if (verbose) + snprintf(queue_nr, sizeof(queue_nr), "Qnr:%u; ", etmq->queue_nr); + else + queue_nr[0] = '\0'; if (len && (pkt_string[len-1] == '\n')) - color_fprintf(stdout, color, " %s", pkt_string); + color_fprintf(stdout, color, " %s%s", queue_nr, pkt_string); else - color_fprintf(stdout, color, " %s\n", pkt_string); + color_fprintf(stdout, color, " %s%s\n", queue_nr, pkt_string); fflush(stdout); } static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, - struct cs_etm_auxtrace *etm, int t_idx, - int m_idx, u32 etmidr) + u64 *metadata, u32 etmidr) { - u64 **metadata = etm->metadata; - - t_params[t_idx].protocol = cs_etm__get_v7_protocol_version(etmidr); - t_params[t_idx].etmv3.reg_ctrl = metadata[m_idx][CS_ETM_ETMCR]; - t_params[t_idx].etmv3.reg_trc_id = metadata[m_idx][CS_ETM_ETMTRACEIDR]; + t_params->protocol = cs_etm__get_v7_protocol_version(etmidr); + t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR]; + t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR]; } static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, - struct cs_etm_auxtrace *etm, int t_idx, - int m_idx) + u64 *metadata) { - u64 **metadata = etm->metadata; - - t_params[t_idx].protocol = CS_ETM_PROTO_ETMV4i; - t_params[t_idx].etmv4.reg_idr0 = metadata[m_idx][CS_ETMV4_TRCIDR0]; - t_params[t_idx].etmv4.reg_idr1 = metadata[m_idx][CS_ETMV4_TRCIDR1]; - t_params[t_idx].etmv4.reg_idr2 = metadata[m_idx][CS_ETMV4_TRCIDR2]; - t_params[t_idx].etmv4.reg_idr8 = metadata[m_idx][CS_ETMV4_TRCIDR8]; - t_params[t_idx].etmv4.reg_configr = metadata[m_idx][CS_ETMV4_TRCCONFIGR]; - t_params[t_idx].etmv4.reg_traceidr = metadata[m_idx][CS_ETMV4_TRCTRACEIDR]; + t_params->protocol = CS_ETM_PROTO_ETMV4i; + t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0]; + t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1]; + t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2]; + t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8]; + t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR]; + t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR]; } static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params, - struct cs_etm_auxtrace *etm, int t_idx, - int m_idx) + u64 *metadata) { - u64 **metadata = etm->metadata; - - t_params[t_idx].protocol = CS_ETM_PROTO_ETE; - t_params[t_idx].ete.reg_idr0 = metadata[m_idx][CS_ETE_TRCIDR0]; - t_params[t_idx].ete.reg_idr1 = metadata[m_idx][CS_ETE_TRCIDR1]; - t_params[t_idx].ete.reg_idr2 = metadata[m_idx][CS_ETE_TRCIDR2]; - t_params[t_idx].ete.reg_idr8 = metadata[m_idx][CS_ETE_TRCIDR8]; - t_params[t_idx].ete.reg_configr = metadata[m_idx][CS_ETE_TRCCONFIGR]; - t_params[t_idx].ete.reg_traceidr = metadata[m_idx][CS_ETE_TRCTRACEIDR]; - t_params[t_idx].ete.reg_devarch = metadata[m_idx][CS_ETE_TRCDEVARCH]; + t_params->protocol = CS_ETM_PROTO_ETE; + t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0]; + t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1]; + t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2]; + t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8]; + t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR]; + t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR]; + t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH]; } static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, - struct cs_etm_auxtrace *etm, - bool formatted, - int sample_cpu, - int decoders) -{ - int t_idx, m_idx; - u32 etmidr; - u64 architecture; - - for (t_idx = 0; t_idx < decoders; t_idx++) { - if (formatted) - m_idx = t_idx; - else { - m_idx = get_cpu_data_idx(etm, sample_cpu); - if (m_idx == -1) { - pr_warning("CS_ETM: unknown CPU, falling back to first metadata\n"); - m_idx = 0; - } - } + struct cs_etm_queue *etmq) +{ + struct int_node *inode; - architecture = etm->metadata[m_idx][CS_ETM_MAGIC]; + intlist__for_each_entry(inode, etmq->traceid_list) { + u64 *metadata = inode->priv; + u64 architecture = metadata[CS_ETM_MAGIC]; + u32 etmidr; switch (architecture) { case __perf_cs_etmv3_magic: - etmidr = etm->metadata[m_idx][CS_ETM_ETMIDR]; - cs_etm__set_trace_param_etmv3(t_params, etm, t_idx, m_idx, etmidr); + etmidr = metadata[CS_ETM_ETMIDR]; + cs_etm__set_trace_param_etmv3(t_params++, metadata, etmidr); break; case __perf_cs_etmv4_magic: - cs_etm__set_trace_param_etmv4(t_params, etm, t_idx, m_idx); + cs_etm__set_trace_param_etmv4(t_params++, metadata); break; case __perf_cs_ete_magic: - cs_etm__set_trace_param_ete(t_params, etm, t_idx, m_idx); + cs_etm__set_trace_param_ete(t_params++, metadata); break; default: return -EINVAL; @@ -738,8 +853,7 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, struct cs_etm_queue *etmq, - enum cs_etm_decoder_operation mode, - bool formatted) + enum cs_etm_decoder_operation mode) { int ret = -EINVAL; @@ -749,7 +863,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, d_params->packet_printer = cs_etm__packet_dump; d_params->operation = mode; d_params->data = etmq; - d_params->formatted = formatted; + d_params->formatted = etmq->format == FORMATTED; d_params->fsyncs = false; d_params->hsyncs = false; d_params->frame_aligned = true; @@ -788,7 +902,7 @@ static void cs_etm__dump_event(struct cs_etm_queue *etmq, } static int cs_etm__flush_events(struct perf_session *session, - struct perf_tool *tool) + const struct perf_tool *tool) { struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, @@ -850,6 +964,7 @@ static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) static void cs_etm__free_queue(void *priv) { + struct int_node *inode, *tmp; struct cs_etm_queue *etmq = priv; if (!etmq) @@ -857,6 +972,16 @@ static void cs_etm__free_queue(void *priv) cs_etm_decoder__free(etmq->decoder); cs_etm__free_traceid_queues(etmq); + + if (etmq->own_traceid_list) { + /* First remove all traceID/metadata nodes for the RB tree */ + intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list) + intlist__remove(etmq->own_traceid_list, inode); + + /* Then the RB tree itself */ + intlist__delete(etmq->own_traceid_list); + } + free(etmq); } @@ -879,19 +1004,12 @@ static void cs_etm__free_events(struct perf_session *session) static void cs_etm__free(struct perf_session *session) { int i; - struct int_node *inode, *tmp; struct cs_etm_auxtrace *aux = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); cs_etm__free_events(session); session->auxtrace = NULL; - /* First remove all traceID/metadata nodes for the RB tree */ - intlist__for_each_entry_safe(inode, tmp, traceid_list) - intlist__remove(traceid_list, inode); - /* Then the RB tree itself */ - intlist__delete(traceid_list); - for (i = 0; i < aux->num_cpu; i++) zfree(&aux->metadata[i]); @@ -1013,7 +1131,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, if (!dso) goto out; - if (dso->data.status == DSO_DATA_STATUS_ERROR && + if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR && dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE)) goto out; @@ -1027,11 +1145,11 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, if (len <= 0) { ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n" " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n"); - if (!dso->auxtrace_warned) { + if (!dso__auxtrace_warned(dso)) { pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n", - address, - dso->long_name ? dso->long_name : "Unknown"); - dso->auxtrace_warned = true; + address, + dso__long_name(dso) ? dso__long_name(dso) : "Unknown"); + dso__set_auxtrace_warned(dso); } goto out; } @@ -1041,19 +1159,9 @@ out: return ret; } -static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, - bool formatted, int sample_cpu) +static struct cs_etm_queue *cs_etm__alloc_queue(void) { - struct cs_etm_decoder_params d_params; - struct cs_etm_trace_params *t_params = NULL; - struct cs_etm_queue *etmq; - /* - * Each queue can only contain data from one CPU when unformatted, so only one decoder is - * needed. - */ - int decoders = formatted ? etm->num_cpu : 1; - - etmq = zalloc(sizeof(*etmq)); + struct cs_etm_queue *etmq = zalloc(sizeof(*etmq)); if (!etmq) return NULL; @@ -1061,42 +1169,17 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, if (!etmq->traceid_queues_list) goto out_free; - /* Use metadata to fill in trace parameters for trace decoder */ - t_params = zalloc(sizeof(*t_params) * decoders); - - if (!t_params) - goto out_free; - - if (cs_etm__init_trace_params(t_params, etm, formatted, sample_cpu, decoders)) - goto out_free; - - /* Set decoder parameters to decode trace packets */ - if (cs_etm__init_decoder_params(&d_params, etmq, - dump_trace ? CS_ETM_OPERATION_PRINT : - CS_ETM_OPERATION_DECODE, - formatted)) - goto out_free; - - etmq->decoder = cs_etm_decoder__new(decoders, &d_params, - t_params); - - if (!etmq->decoder) - goto out_free; - /* - * Register a function to handle all memory accesses required by - * the trace decoder library. + * Create an RB tree for traceID-metadata tuple. Since the conversion + * has to be made for each packet that gets decoded, optimizing access + * in anything other than a sequential array is worth doing. */ - if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, - 0x0L, ((u64) -1L), - cs_etm__mem_access)) - goto out_free_decoder; + etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL); + if (!etmq->traceid_list) + goto out_free; - zfree(&t_params); return etmq; -out_free_decoder: - cs_etm_decoder__free(etmq->decoder); out_free: intlist__delete(etmq->traceid_queues_list); free(etmq); @@ -1106,16 +1189,14 @@ out_free: static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, struct auxtrace_queue *queue, - unsigned int queue_nr, - bool formatted, - int sample_cpu) + unsigned int queue_nr) { struct cs_etm_queue *etmq = queue->priv; - if (list_empty(&queue->head) || etmq) + if (etmq) return 0; - etmq = cs_etm__alloc_queue(etm, formatted, sample_cpu); + etmq = cs_etm__alloc_queue(); if (!etmq) return -ENOMEM; @@ -1123,7 +1204,9 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, queue->priv = etmq; etmq->etm = etm; etmq->queue_nr = queue_nr; + queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */ etmq->offset = 0; + etmq->sink_id = SINK_UNSET; return 0; } @@ -1267,8 +1350,12 @@ static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet) { - /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ - if (packet->sample_type == CS_ETM_DISCONTINUITY) + /* + * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't + * appear in samples. + */ + if (packet->sample_type == CS_ETM_DISCONTINUITY || + packet->sample_type == CS_ETM_EXCEPTION) return 0; return packet->start_addr; @@ -1480,8 +1567,9 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, int ret = 0; struct cs_etm_auxtrace *etm = etmq->etm; union perf_event *event = tidq->event_buf; - struct perf_sample sample = {.ip = 0,}; + struct perf_sample sample; + perf_sample__init(&sample, /*all=*/true); event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el); event->sample.header.size = sizeof(struct perf_event_header); @@ -1518,6 +1606,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, "CS ETM Trace: failed to deliver instruction event, error %d\n", ret); + perf_sample__exit(&sample); return ret; } @@ -1595,35 +1684,6 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, return ret; } -struct cs_etm_synth { - struct perf_tool dummy_tool; - struct perf_session *session; -}; - -static int cs_etm__event_synth(struct perf_tool *tool, - union perf_event *event, - struct perf_sample *sample __maybe_unused, - struct machine *machine __maybe_unused) -{ - struct cs_etm_synth *cs_etm_synth = - container_of(tool, struct cs_etm_synth, dummy_tool); - - return perf_session__deliver_synth_event(cs_etm_synth->session, - event, NULL); -} - -static int cs_etm__synth_event(struct perf_session *session, - struct perf_event_attr *attr, u64 id) -{ - struct cs_etm_synth cs_etm_synth; - - memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth)); - cs_etm_synth.session = session; - - return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1, - &id, cs_etm__event_synth); -} - static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, struct perf_session *session) { @@ -1666,16 +1726,13 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, attr.read_format = evsel->core.attr.read_format; /* create new id val to be a fixed offset from evsel id */ - id = evsel->core.id[0] + 1000000000; - - if (!id) - id = 1; + id = auxtrace_synth_id_range_start(evsel); if (etm->synth_opts.branches) { attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; attr.sample_period = 1; attr.sample_type |= PERF_SAMPLE_ADDR; - err = cs_etm__synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; etm->branches_sample_type = attr.sample_type; @@ -1698,7 +1755,7 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, attr.config = PERF_COUNT_HW_INSTRUCTIONS; attr.sample_period = etm->synth_opts.period; etm->instructions_sample_period = attr.sample_period; - err = cs_etm__synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; etm->instructions_sample_type = attr.sample_type; @@ -2252,7 +2309,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, PERF_IP_FLAG_TRACE_END; break; case CS_ETM_EXCEPTION: - ret = cs_etm__get_magic(packet->trace_chan_id, &magic); + ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic); if (ret) return ret; @@ -2439,12 +2496,6 @@ static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq) /* Ignore return value */ cs_etm__process_traceid_queue(etmq, tidq); - - /* - * Generate an instruction sample with the remaining - * branchstack entries. - */ - cs_etm__flush(etmq, tidq); } } @@ -2587,7 +2638,7 @@ static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm) while (1) { if (!etm->heap.heap_cnt) - goto out; + break; /* Take the entry at the top of the min heap */ cs_queue_nr = etm->heap.heap_array[0].queue_nr; @@ -2670,6 +2721,23 @@ refetch: ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); } + for (i = 0; i < etm->queues.nr_queues; i++) { + struct int_node *inode; + + etmq = etm->queues.queue_array[i].priv; + if (!etmq) + continue; + + intlist__for_each_entry(inode, etmq->traceid_queues_list) { + int idx = (int)(intptr_t)inode->priv; + + /* Flush any remaining branch stack entries */ + tidq = etmq->traceid_queues[idx]; + ret = cs_etm__end_block(etmq, tidq); + if (ret) + return ret; + } + } out: return ret; } @@ -2740,7 +2808,7 @@ static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm, static int cs_etm__process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, - struct perf_tool *tool) + const struct perf_tool *tool) { struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, @@ -2810,7 +2878,7 @@ static void dump_queued_data(struct cs_etm_auxtrace *etm, static int cs_etm__process_auxtrace_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, @@ -2836,17 +2904,6 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, if (err) return err; - /* - * Knowing if the trace is formatted or not requires a lookup of - * the aux record so only works in non-piped mode where data is - * queued in cs_etm__queue_aux_records(). Always assume - * formatted in piped mode (true). - */ - err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], - idx, true, -1); - if (err) - return err; - if (dump_trace) if (auxtrace_buffer__get_data(buffer, fd)) { cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer); @@ -2963,8 +3020,7 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o struct perf_record_auxtrace *auxtrace_event; union perf_event auxtrace_fragment; __u64 aux_offset, aux_size; - __u32 idx; - bool formatted; + enum cs_etm_format format; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, @@ -3030,6 +3086,8 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o if (aux_offset >= auxtrace_event->offset && aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) { + struct cs_etm_queue *etmq = etm->queues.queue_array[auxtrace_event->idx].priv; + /* * If this AUX event was inside this buffer somewhere, create a new auxtrace event * based on the sizes of the aux event, and queue that fragment. @@ -3046,10 +3104,14 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o if (err) return err; - idx = auxtrace_event->idx; - formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW); - return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], - idx, formatted, sample->cpu); + format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ? + UNFORMATTED : FORMATTED; + if (etmq->format != UNSET && format != etmq->format) { + pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n"); + return -EINVAL; + } + etmq->format = format; + return 0; } /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */ @@ -3095,9 +3157,10 @@ static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf evsel = evlist__event2evsel(session->evlist, event); if (!evsel) return -EINVAL; + perf_sample__init(&sample, /*all=*/false); ret = evsel__parse_sample(evsel, event, &sample); if (ret) - return ret; + goto out; /* * Loop through the auxtrace index to find the buffer that matches up with this aux event. @@ -3112,7 +3175,7 @@ static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf * 1 ('not found') */ if (ret != 1) - return ret; + goto out; } } @@ -3122,7 +3185,10 @@ static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf */ pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64 " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu); - return 0; + ret = 0; +out: + perf_sample__exit(&sample); + return ret; } static int cs_etm__queue_aux_records(struct perf_session *session) @@ -3175,7 +3241,8 @@ static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu) } /* map trace ids to correct metadata block, from information in metadata */ -static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata) +static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu, + u64 **metadata) { u64 cs_etm_magic; u8 trace_chan_id; @@ -3197,7 +3264,7 @@ static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata) /* unknown magic number */ return -EINVAL; } - err = cs_etm__map_trace_id(trace_chan_id, metadata[i]); + err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]); if (err) return err; } @@ -3205,30 +3272,85 @@ static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata) } /* - * If we found AUX_HW_ID packets, then set any metadata marked as unused to the - * unused value to reduce the number of unneeded decoders created. + * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX + * (formatted or not) packets to create the decoders. */ -static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata) +static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq) { - u64 cs_etm_magic; - int i; + struct cs_etm_decoder_params d_params; + struct cs_etm_trace_params *t_params; + int decoders = intlist__nr_entries(etmq->traceid_list); - for (i = 0; i < num_cpu; i++) { - cs_etm_magic = metadata[i][CS_ETM_MAGIC]; - switch (cs_etm_magic) { - case __perf_cs_etmv3_magic: - if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG) - metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL; - break; - case __perf_cs_etmv4_magic: - case __perf_cs_ete_magic: - if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG) - metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL; - break; - default: - /* unknown magic number */ - return -EINVAL; - } + if (decoders == 0) + return 0; + + /* + * Each queue can only contain data from one CPU when unformatted, so only one decoder is + * needed. + */ + if (etmq->format == UNFORMATTED) + assert(decoders == 1); + + /* Use metadata to fill in trace parameters for trace decoder */ + t_params = zalloc(sizeof(*t_params) * decoders); + + if (!t_params) + goto out_free; + + if (cs_etm__init_trace_params(t_params, etmq)) + goto out_free; + + /* Set decoder parameters to decode trace packets */ + if (cs_etm__init_decoder_params(&d_params, etmq, + dump_trace ? CS_ETM_OPERATION_PRINT : + CS_ETM_OPERATION_DECODE)) + goto out_free; + + etmq->decoder = cs_etm_decoder__new(decoders, &d_params, + t_params); + + if (!etmq->decoder) + goto out_free; + + /* + * Register a function to handle all memory accesses required by + * the trace decoder library. + */ + if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, + 0x0L, ((u64) -1L), + cs_etm__mem_access)) + goto out_free_decoder; + + zfree(&t_params); + return 0; + +out_free_decoder: + cs_etm_decoder__free(etmq->decoder); +out_free: + zfree(&t_params); + return -EINVAL; +} + +static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm) +{ + struct auxtrace_queues *queues = &etm->queues; + + for (unsigned int i = 0; i < queues->nr_queues; i++) { + bool empty = list_empty(&queues->queue_array[i].head); + struct cs_etm_queue *etmq = queues->queue_array[i].priv; + int ret; + + /* + * Don't create decoders for empty queues, mainly because + * etmq->format is unknown for empty queues. + */ + assert(empty || etmq->format != UNSET); + if (empty) + continue; + + ret = cs_etm__create_queue_decoders(etmq); + if (ret) + return ret; } return 0; } @@ -3242,30 +3364,19 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, int event_header_size = sizeof(struct perf_event_header); int total_size = auxtrace_info->header.size; int priv_size = 0; - int num_cpu; + int num_cpu, max_cpu = 0; int err = 0; int aux_hw_id_found; - int i, j; + int i; u64 *ptr = NULL; u64 **metadata = NULL; - /* - * Create an RB tree for traceID-metadata tuple. Since the conversion - * has to be made for each packet that gets decoded, optimizing access - * in anything other than a sequential array is worth doing. - */ - traceid_list = intlist__new(NULL); - if (!traceid_list) - return -ENOMEM; - /* First the global part */ ptr = (u64 *) auxtrace_info->priv; num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff; metadata = zalloc(sizeof(*metadata) * num_cpu); - if (!metadata) { - err = -ENOMEM; - goto err_free_traceid_list; - } + if (!metadata) + return -ENOMEM; /* Start parsing after the common part of the header */ i = CS_HEADER_VERSION_MAX; @@ -3276,7 +3387,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, * required by the trace decoder to properly decode the trace due * to its highly compressed nature. */ - for (j = 0; j < num_cpu; j++) { + for (int j = 0; j < num_cpu; j++) { if (ptr[i] == __perf_cs_etmv3_magic) { metadata[j] = cs_etm__create_meta_blk(ptr, &i, @@ -3300,6 +3411,9 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, err = -ENOMEM; goto err_free_metadata; } + + if ((int) metadata[j][CS_ETM_CPU] > max_cpu) + max_cpu = metadata[j][CS_ETM_CPU]; } /* @@ -3329,10 +3443,16 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, */ etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]); - err = auxtrace_queues__init(&etm->queues); + err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1); if (err) goto err_free_etm; + for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) { + err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j); + if (err) + goto err_free_queues; + } + if (session->itrace_synth_opts->set) { etm->synth_opts = *session->itrace_synth_opts; } else { @@ -3396,12 +3516,16 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, if (err) goto err_free_queues; + err = cs_etm__queue_aux_records(session); + if (err) + goto err_free_queues; + /* * Map Trace ID values to CPU metadata. * - * Trace metadata will always contain Trace ID values from the legacy algorithm. If the - * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata - * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set. + * Trace metadata will always contain Trace ID values from the legacy algorithm + * in case it's read by a version of Perf that doesn't know about HW_ID packets + * or the kernel doesn't emit them. * * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use * the same IDs as the old algorithm as far as is possible, unless there are clashes @@ -3410,15 +3534,14 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, * * For a perf able to interpret AUX_HW_ID packets we first check for the presence of * those packets. If they are there then the values will be mapped and plugged into - * the metadata. We then set any remaining metadata values with the used flag to a - * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required. + * the metadata and decoders are only created for each mapping received. * * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel - * then we map Trace ID values to CPU directly from the metadata - clearing any unused - * flags if present. + * then we map Trace ID values to CPU directly from the metadata and create decoders + * for all mappings. */ - /* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */ + /* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */ aux_hw_id_found = 0; err = perf_session__peek_events(session, session->header.data_offset, session->header.data_size, @@ -3426,17 +3549,14 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event, if (err) goto err_free_queues; - /* if HW ID found then clear any unused metadata ID values */ - if (aux_hw_id_found) - err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata); - /* otherwise, this is a file with metadata values only, map from metadata */ - else - err = cs_etm__map_trace_ids_metadata(num_cpu, metadata); - - if (err) - goto err_free_queues; + /* if no HW ID found this is a file with metadata values only, map from metadata */ + if (!aux_hw_id_found) { + err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata); + if (err) + goto err_free_queues; + } - err = cs_etm__queue_aux_records(session); + err = cs_etm__create_decoders(etm); if (err) goto err_free_queues; @@ -3450,10 +3570,8 @@ err_free_etm: zfree(&etm); err_free_metadata: /* No need to check @metadata[j], free(NULL) is supported */ - for (j = 0; j < num_cpu; j++) + for (int j = 0; j < num_cpu; j++) zfree(&metadata[j]); zfree(&metadata); -err_free_traceid_list: - intlist__delete(traceid_list); return err; } diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 4696267a32f0..a8caeea720aa 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -230,16 +230,6 @@ struct cs_etm_packet_queue { /* CoreSight trace ID is currently the bottom 7 bits of the value */ #define CORESIGHT_TRACE_ID_VAL_MASK GENMASK(6, 0) -/* - * perf record will set the legacy meta data values as unused initially. - * This allows perf report to manage the decoders created when dynamic - * allocation in operation. - */ -#define CORESIGHT_TRACE_ID_UNUSED_FLAG BIT(31) - -/* Value to set for unused trace ID values */ -#define CORESIGHT_TRACE_ID_UNUSED_VAL 0x7F - int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session); void cs_etm_get_default_config(const struct perf_pmu *pmu, struct perf_event_attr *attr); @@ -252,7 +242,7 @@ enum cs_etm_pid_fmt { #ifdef HAVE_CSTRACE_SUPPORT #include <opencsd/ocsd_if_types.h> -int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); +int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu); enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq); int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid, u8 trace_chan_id, ocsd_ex_level el); diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 2b732bccabad..3d2e437e1354 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -36,7 +36,7 @@ #include "util/sample.h" #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #define pr_N(n, fmt, ...) \ @@ -426,8 +426,9 @@ static int add_tracepoint_values(struct ctf_writer *cw, struct evsel *evsel, struct perf_sample *sample) { - struct tep_format_field *common_fields = evsel->tp_format->format.common_fields; - struct tep_format_field *fields = evsel->tp_format->format.fields; + const struct tep_event *tp_format = evsel__tp_format(evsel); + struct tep_format_field *common_fields = tp_format->format.common_fields; + struct tep_format_field *fields = tp_format->format.fields; int ret; ret = add_tracepoint_fields_values(cw, event_class, event, @@ -792,7 +793,7 @@ static bool is_flush_needed(struct ctf_stream *cs) return cs->count >= STREAM_FLUSH_COUNT; } -static int process_sample_event(struct perf_tool *tool, +static int process_sample_event(const struct perf_tool *tool, union perf_event *_event, struct perf_sample *sample, struct evsel *evsel, @@ -871,7 +872,7 @@ do { \ } while(0) #define __FUNC_PROCESS_NON_SAMPLE(_name, body) \ -static int process_##_name##_event(struct perf_tool *tool, \ +static int process_##_name##_event(const struct perf_tool *tool, \ union perf_event *_event, \ struct perf_sample *sample, \ struct machine *machine) \ @@ -1064,8 +1065,9 @@ static int add_tracepoint_types(struct ctf_writer *cw, struct evsel *evsel, struct bt_ctf_event_class *class) { - struct tep_format_field *common_fields = evsel->tp_format->format.common_fields; - struct tep_format_field *fields = evsel->tp_format->format.fields; + const struct tep_event *tp_format = evsel__tp_format(evsel); + struct tep_format_field *common_fields = tp_format ? tp_format->format.common_fields : NULL; + struct tep_format_field *fields = tp_format ? tp_format->format.fields : NULL; int ret; ret = add_tracepoint_fields_types(cw, common_fields, class); @@ -1336,14 +1338,14 @@ static void cleanup_events(struct perf_session *session) static int setup_streams(struct ctf_writer *cw, struct perf_session *session) { struct ctf_stream **stream; - struct perf_header *ph = &session->header; + struct perf_env *env = perf_session__env(session); int ncpus; /* * Try to get the number of cpus used in the data file, * if not present fallback to the MAX_CPUS. */ - ncpus = ph->env.nr_cpus_avail ?: MAX_CPUS; + ncpus = env->nr_cpus_avail ?: MAX_CPUS; stream = zalloc(sizeof(*stream) * ncpus); if (!stream) { @@ -1369,7 +1371,7 @@ static void free_streams(struct ctf_writer *cw) static int ctf_writer__setup_env(struct ctf_writer *cw, struct perf_session *session) { - struct perf_header *header = &session->header; + struct perf_env *env = perf_session__env(session); struct bt_ctf_writer *writer = cw->writer; #define ADD(__n, __v) \ @@ -1378,11 +1380,11 @@ do { \ return -1; \ } while (0) - ADD("host", header->env.hostname); + ADD("host", env->hostname); ADD("sysname", "Linux"); - ADD("release", header->env.os_release); - ADD("version", header->env.version); - ADD("machine", header->env.arch); + ADD("release", env->os_release); + ADD("version", env->version); + ADD("machine", env->arch); ADD("domain", "kernel"); ADD("tracer_name", "perf"); @@ -1399,7 +1401,7 @@ static int ctf_writer__setup_clock(struct ctf_writer *cw, int64_t offset = 0; if (tod) { - struct perf_env *env = &session->header.env; + struct perf_env *env = perf_session__env(session); if (!env->clock.enabled) { pr_err("Can't provide --tod time, missing clock data. " @@ -1607,25 +1609,23 @@ int bt_convert__perf2ctf(const char *input, const char *path, .mode = PERF_DATA_MODE_READ, .force = opts->force, }; - struct convert c = { - .tool = { - .sample = process_sample_event, - .mmap = perf_event__process_mmap, - .mmap2 = perf_event__process_mmap2, - .comm = perf_event__process_comm, - .exit = perf_event__process_exit, - .fork = perf_event__process_fork, - .lost = perf_event__process_lost, - .tracing_data = perf_event__process_tracing_data, - .build_id = perf_event__process_build_id, - .namespaces = perf_event__process_namespaces, - .ordered_events = true, - .ordering_requires_timestamps = true, - }, - }; + struct convert c = {}; struct ctf_writer *cw = &c.writer; int err; + perf_tool__init(&c.tool, /*ordered_events=*/true); + c.tool.sample = process_sample_event; + c.tool.mmap = perf_event__process_mmap; + c.tool.mmap2 = perf_event__process_mmap2; + c.tool.comm = perf_event__process_comm; + c.tool.exit = perf_event__process_exit; + c.tool.fork = perf_event__process_fork; + c.tool.lost = perf_event__process_lost; + c.tool.tracing_data = perf_event__process_tracing_data; + c.tool.build_id = perf_event__process_build_id; + c.tool.namespaces = perf_event__process_namespaces; + c.tool.ordering_requires_timestamps = true; + if (opts->all) { c.tool.comm = process_comm_event; c.tool.exit = process_exit_event; diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c index 3cf64f5b23ee..9dc1e184cf3c 100644 --- a/tools/perf/util/data-convert-json.c +++ b/tools/perf/util/data-convert-json.c @@ -28,7 +28,7 @@ #include "util/tool.h" #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif struct convert_json { @@ -118,7 +118,7 @@ static void output_json_key_format(FILE *out, bool comma, int depth, va_end(args); } -static void output_sample_callchain_entry(struct perf_tool *tool, +static void output_sample_callchain_entry(const struct perf_tool *tool, u64 ip, struct addr_location *al) { struct convert_json *c = container_of(tool, struct convert_json, tool); @@ -146,7 +146,7 @@ static void output_sample_callchain_entry(struct perf_tool *tool, output_json_format(out, false, 4, "}"); } -static int process_sample_event(struct perf_tool *tool, +static int process_sample_event(const struct perf_tool *tool, union perf_event *event __maybe_unused, struct perf_sample *sample, struct evsel *evsel __maybe_unused, @@ -230,12 +230,12 @@ static int process_sample_event(struct perf_tool *tool, #ifdef HAVE_LIBTRACEEVENT if (sample->raw_data) { - int i; - struct tep_format_field **fields; + struct tep_event *tp_format = evsel__tp_format(evsel); + struct tep_format_field **fields = tp_format ? tep_event_fields(tp_format) : NULL; - fields = tep_event_fields(evsel->tp_format); if (fields) { - i = 0; + int i = 0; + while (fields[i]) { struct trace_seq s; @@ -257,7 +257,8 @@ static int process_sample_event(struct perf_tool *tool, static void output_headers(struct perf_session *session, struct convert_json *c) { struct stat st; - struct perf_header *header = &session->header; + const struct perf_header *header = &session->header; + const struct perf_env *env = perf_session__env(session); int ret; int fd = perf_data__fd(session->data); int i; @@ -280,32 +281,32 @@ static void output_headers(struct perf_session *session, struct convert_json *c) output_json_key_format(out, true, 2, "data-size", "%" PRIu64, header->data_size); output_json_key_format(out, true, 2, "feat-offset", "%" PRIu64, header->feat_offset); - output_json_key_string(out, true, 2, "hostname", header->env.hostname); - output_json_key_string(out, true, 2, "os-release", header->env.os_release); - output_json_key_string(out, true, 2, "arch", header->env.arch); + output_json_key_string(out, true, 2, "hostname", env->hostname); + output_json_key_string(out, true, 2, "os-release", env->os_release); + output_json_key_string(out, true, 2, "arch", env->arch); - if (header->env.cpu_desc) - output_json_key_string(out, true, 2, "cpu-desc", header->env.cpu_desc); + if (env->cpu_desc) + output_json_key_string(out, true, 2, "cpu-desc", env->cpu_desc); - output_json_key_string(out, true, 2, "cpuid", header->env.cpuid); - output_json_key_format(out, true, 2, "nrcpus-online", "%u", header->env.nr_cpus_online); - output_json_key_format(out, true, 2, "nrcpus-avail", "%u", header->env.nr_cpus_avail); + output_json_key_string(out, true, 2, "cpuid", env->cpuid); + output_json_key_format(out, true, 2, "nrcpus-online", "%u", env->nr_cpus_online); + output_json_key_format(out, true, 2, "nrcpus-avail", "%u", env->nr_cpus_avail); - if (header->env.clock.enabled) { + if (env->clock.enabled) { output_json_key_format(out, true, 2, "clockid", - "%u", header->env.clock.clockid); + "%u", env->clock.clockid); output_json_key_format(out, true, 2, "clock-time", - "%" PRIu64, header->env.clock.clockid_ns); + "%" PRIu64, env->clock.clockid_ns); output_json_key_format(out, true, 2, "real-time", - "%" PRIu64, header->env.clock.tod_ns); + "%" PRIu64, env->clock.tod_ns); } - output_json_key_string(out, true, 2, "perf-version", header->env.version); + output_json_key_string(out, true, 2, "perf-version", env->version); output_json_key_format(out, true, 2, "cmdline", "["); - for (i = 0; i < header->env.nr_cmdline; i++) { + for (i = 0; i < env->nr_cmdline; i++) { output_json_delimiters(out, i != 0, 3); - output_json_string(c->out, header->env.cmdline_argv[i]); + output_json_string(c->out, env->cmdline_argv[i]); } output_json_format(out, false, 2, "]"); } @@ -316,39 +317,36 @@ int bt_convert__perf2json(const char *input_name, const char *output_name, struct perf_session *session; int fd; int ret = -1; - struct convert_json c = { - .tool = { - .sample = process_sample_event, - .mmap = perf_event__process_mmap, - .mmap2 = perf_event__process_mmap2, - .comm = perf_event__process_comm, - .namespaces = perf_event__process_namespaces, - .cgroup = perf_event__process_cgroup, - .exit = perf_event__process_exit, - .fork = perf_event__process_fork, - .lost = perf_event__process_lost, -#ifdef HAVE_LIBTRACEEVENT - .tracing_data = perf_event__process_tracing_data, -#endif - .build_id = perf_event__process_build_id, - .id_index = perf_event__process_id_index, - .auxtrace_info = perf_event__process_auxtrace_info, - .auxtrace = perf_event__process_auxtrace, - .event_update = perf_event__process_event_update, - .ordered_events = true, - .ordering_requires_timestamps = true, - }, .first = true, .events_count = 0, }; - struct perf_data data = { .mode = PERF_DATA_MODE_READ, .path = input_name, .force = opts->force, }; + perf_tool__init(&c.tool, /*ordered_events=*/true); + c.tool.sample = process_sample_event; + c.tool.mmap = perf_event__process_mmap; + c.tool.mmap2 = perf_event__process_mmap2; + c.tool.comm = perf_event__process_comm; + c.tool.namespaces = perf_event__process_namespaces; + c.tool.cgroup = perf_event__process_cgroup; + c.tool.exit = perf_event__process_exit; + c.tool.fork = perf_event__process_fork; + c.tool.lost = perf_event__process_lost; +#ifdef HAVE_LIBTRACEEVENT + c.tool.tracing_data = perf_event__process_tracing_data; +#endif + c.tool.build_id = perf_event__process_build_id; + c.tool.id_index = perf_event__process_id_index; + c.tool.auxtrace_info = perf_event__process_auxtrace_info; + c.tool.auxtrace = perf_event__process_auxtrace; + c.tool.event_update = perf_event__process_event_update; + c.tool.ordering_requires_timestamps = true; + if (opts->all) { pr_err("--all is currently unsupported for JSON output.\n"); goto err; @@ -379,8 +377,7 @@ int bt_convert__perf2json(const char *input_name, const char *output_name, fprintf(stderr, "Error creating perf session!\n"); goto err_fclose; } - - if (symbol__init(&session->header.env) < 0) { + if (symbol__init(perf_session__env(session)) < 0) { fprintf(stderr, "Symbol init error!\n"); goto err_session_delete; } diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 08c4bfbd817f..164eb45a0b36 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -158,26 +158,6 @@ out_err: return ret; } -int perf_data__update_dir(struct perf_data *data) -{ - int i; - - if (WARN_ON(!data->is_dir)) - return -EINVAL; - - for (i = 0; i < data->dir.nr; i++) { - struct perf_data_file *file = &data->dir.files[i]; - struct stat st; - - if (fstat(file->fd, &st)) - return -1; - - file->size = st.st_size; - } - - return 0; -} - static bool check_pipe(struct perf_data *data) { struct stat st; @@ -204,7 +184,12 @@ static bool check_pipe(struct perf_data *data) data->file.fd = fd; data->use_stdio = false; } - } else { + + /* + * When is_pipe and data->file.fd is given, use given fd + * instead of STDIN_FILENO or STDOUT_FILENO + */ + } else if (data->file.fd <= 0) { data->file.fd = fd; } } diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 110f3ebde30f..1438e32e0451 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -97,7 +97,6 @@ int perf_data__switch(struct perf_data *data, int perf_data__create_dir(struct perf_data *data, int nr); int perf_data__open_dir(struct perf_data *data); void perf_data__close_dir(struct perf_data *data); -int perf_data__update_dir(struct perf_data *data); unsigned long perf_data__size(struct perf_data *data); int perf_data__make_kcore_dir(struct perf_data *data, char *buf, size_t buf_sz); bool has_kcore_dir(const char *path); diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 50f916374d87..8f52e8cefcf3 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -181,7 +181,7 @@ static int db_ids_from_al(struct db_export *dbe, struct addr_location *al, if (al->map) { struct dso *dso = map__dso(al->map); - err = db_export__dso(dbe, dso, maps__machine(al->maps)); + err = db_export__dso(dbe, dso, maps__machine(thread__maps(al->thread))); if (err) return err; *dso_db_id = dso__db_id(dso); @@ -256,6 +256,7 @@ static struct call_path *call_path_from_sample(struct db_export *dbe, al.map = map__get(node->ms.map); al.maps = maps__get(thread__maps(thread)); al.addr = node->ip; + al.thread = thread__get(thread); if (al.map && !al.sym) al.sym = dso__find_symbol(map__dso(al.map), al.addr); @@ -358,14 +359,18 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, }; struct thread *main_thread; struct comm *comm = NULL; - struct machine *machine; + struct machine *machine = NULL; int err; + if (thread__maps(thread)) + machine = maps__machine(thread__maps(thread)); + if (!machine) + return -1; + err = db_export__evsel(dbe, evsel); if (err) return err; - machine = maps__machine(al->maps); err = db_export__machine(dbe, machine); if (err) return err; diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index d633d15329fa..1dfa4d0eec4d 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -14,11 +14,19 @@ #ifdef HAVE_BACKTRACE_SUPPORT #include <execinfo.h> #endif +#include "addr_location.h" #include "color.h" -#include "event.h" #include "debug.h" +#include "env.h" +#include "event.h" +#include "machine.h" +#include "map.h" #include "print_binary.h" +#include "srcline.h" +#include "symbol.h" +#include "synthetic-events.h" #include "target.h" +#include "thread.h" #include "trace-event.h" #include "ui/helpline.h" #include "ui/ui.h" @@ -27,7 +35,7 @@ #include <linux/ctype.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #else #define LIBTRACEEVENT_VERSION 0 #endif @@ -46,8 +54,8 @@ int debug_type_profile; FILE *debug_file(void) { if (!_debug_file) { - pr_warning_once("debug_file not set"); debug_set_file(stderr); + pr_warning_once("debug_file not set"); } return _debug_file; } @@ -298,21 +306,66 @@ void perf_debug_setup(void) libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper); } +void __dump_stack(FILE *file, void **stackdump, size_t stackdump_size) +{ + /* TODO: async safety. printf, malloc, etc. aren't safe inside a signal handler. */ + pid_t pid = getpid(); + struct machine *machine; + struct thread *thread = NULL; + struct perf_env host_env; + + perf_env__init(&host_env); + machine = machine__new_live(&host_env, /*kernel_maps=*/false, pid); + + if (machine) + thread = machine__find_thread(machine, pid, pid); + +#ifdef HAVE_BACKTRACE_SUPPORT + if (!machine || !thread) { + /* + * Backtrace functions are async signal safe. Fall back on them + * if machine/thread creation fails. + */ + backtrace_symbols_fd(stackdump, stackdump_size, fileno(file)); + machine__delete(machine); + perf_env__exit(&host_env); + return; + } +#endif + + for (size_t i = 0; i < stackdump_size; i++) { + struct addr_location al; + u64 addr = (u64)(uintptr_t)stackdump[i]; + bool printed = false; + + addr_location__init(&al); + if (thread && thread__find_map(thread, PERF_RECORD_MISC_USER, addr, &al)) { + al.sym = map__find_symbol(al.map, al.addr); + if (al.sym) { + fprintf(file, " #%zd %p in %s ", i, stackdump[i], al.sym->name); + printed = true; + } + } + if (!printed) + fprintf(file, " #%zd %p ", i, stackdump[i]); + + map__fprintf_srcline(al.map, al.addr, "", file); + fprintf(file, "\n"); + addr_location__exit(&al); + } + thread__put(thread); + machine__delete(machine); + perf_env__exit(&host_env); +} + /* Obtain a backtrace and print it to stdout. */ #ifdef HAVE_BACKTRACE_SUPPORT void dump_stack(void) { - void *array[16]; - size_t size = backtrace(array, ARRAY_SIZE(array)); - char **strings = backtrace_symbols(array, size); - size_t i; - - printf("Obtained %zd stack frames.\n", size); - - for (i = 0; i < size; i++) - printf("%s\n", strings[i]); + void *stackdump[32]; + size_t size = backtrace(stackdump, ARRAY_SIZE(stackdump)); - free(strings); + __dump_stack(stdout, stackdump, size); } #else void dump_stack(void) {} diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index a4026d1fd6a3..6b737e195ce1 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -85,6 +85,7 @@ void debug_set_display_time(bool set); void perf_debug_setup(void); int perf_quiet_option(void); +void __dump_stack(FILE *file, void **stackdump, size_t stackdump_size); void dump_stack(void); void sighandler_dump_stack(int sig); diff --git a/tools/perf/util/debuginfo.c b/tools/perf/util/debuginfo.c index 19acf4775d35..4a559b3e8cdc 100644 --- a/tools/perf/util/debuginfo.c +++ b/tools/perf/util/debuginfo.c @@ -103,15 +103,19 @@ struct debuginfo *debuginfo__new(const char *path) char buf[PATH_MAX], nil = '\0'; struct dso *dso; struct debuginfo *dinfo = NULL; - struct build_id bid; + struct build_id bid = { .size = 0}; /* Try to open distro debuginfo files */ dso = dso__new(path); if (!dso) goto out; - /* Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO */ - if (is_regular_file(path) && filename__read_build_id(path, &bid) > 0) + /* + * Set the build id for DSO_BINARY_TYPE__BUILDID_DEBUGINFO. Don't block + * incase the path isn't for a regular file. + */ + assert(!dso__has_build_id(dso)); + if (filename__read_build_id(path, &bid) > 0) dso__set_build_id(dso, &bid); for (type = distro_dwarf_types; @@ -125,8 +129,12 @@ struct debuginfo *debuginfo__new(const char *path) dso__put(dso); out: + if (dinfo) + return dinfo; + /* if failed to open all distro debuginfo, open given binary */ - return dinfo ? : __debuginfo__new(path); + symbol__join_symfs(buf, path); + return __debuginfo__new(buf); } void debuginfo__delete(struct debuginfo *dbg) diff --git a/tools/perf/util/debuginfo.h b/tools/perf/util/debuginfo.h index 4d65b8c605fc..a52d69932815 100644 --- a/tools/perf/util/debuginfo.h +++ b/tools/perf/util/debuginfo.h @@ -5,7 +5,7 @@ #include <errno.h> #include <linux/compiler.h> -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #include "dwarf-aux.h" @@ -25,7 +25,7 @@ void debuginfo__delete(struct debuginfo *dbg); int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, bool adjust_offset); -#else /* HAVE_DWARF_SUPPORT */ +#else /* HAVE_LIBDW_SUPPORT */ /* dummy debug information structure */ struct debuginfo { @@ -40,6 +40,8 @@ static inline void debuginfo__delete(struct debuginfo *dbg __maybe_unused) { } +typedef void Dwarf_Addr; + static inline int debuginfo__get_text_offset(struct debuginfo *dbg __maybe_unused, Dwarf_Addr *offs __maybe_unused, bool adjust_offset __maybe_unused) @@ -47,7 +49,7 @@ static inline int debuginfo__get_text_offset(struct debuginfo *dbg __maybe_unuse return -EINVAL; } -#endif /* HAVE_DWARF_SUPPORT */ +#endif /* HAVE_LIBDW_SUPPORT */ #ifdef HAVE_DEBUGINFOD_SUPPORT int get_source_from_debuginfod(const char *raw_path, const char *sbuild_id, diff --git a/tools/perf/util/demangle-cxx.h b/tools/perf/util/demangle-cxx.h index 26b5b66c0b4e..9359937a881a 100644 --- a/tools/perf/util/demangle-cxx.h +++ b/tools/perf/util/demangle-cxx.h @@ -2,6 +2,8 @@ #ifndef __PERF_DEMANGLE_CXX #define __PERF_DEMANGLE_CXX 1 +#include <stdbool.h> + #ifdef __cplusplus extern "C" { #endif diff --git a/tools/perf/util/demangle-rust-v0.c b/tools/perf/util/demangle-rust-v0.c new file mode 100644 index 000000000000..19924d85407d --- /dev/null +++ b/tools/perf/util/demangle-rust-v0.c @@ -0,0 +1,2042 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// The contents of this file come from the Rust rustc-demangle library, hosted +// in the <https://github.com/rust-lang/rustc-demangle> repository, licensed +// under "Apache-2.0 OR MIT". For copyright details, see +// <https://github.com/rust-lang/rustc-demangle/blob/main/README.md>. +// Please note that the file should be kept as close as possible to upstream. + +// Code for demangling Rust symbols. This code is mostly +// a line-by-line translation of the Rust code in `rustc-demangle`. + +// you can find the latest version of this code in https://github.com/rust-lang/rustc-demangle + +#include <stdint.h> +#include <stddef.h> +#include <string.h> +#include <stdbool.h> +#include <sys/param.h> +#include <stdio.h> + +#include "demangle-rust-v0.h" + +#if defined(__GNUC__) || defined(__clang__) +#define NODISCARD __attribute__((warn_unused_result)) +#else +#define NODISCARD +#endif + +#define MAX_DEPTH 500 + +typedef enum { + DemangleOk, + DemangleInvalid, + DemangleRecursed, + DemangleBug, +} demangle_status; + +struct demangle_v0 { + const char *mangled; + size_t mangled_len; +}; + +struct demangle_legacy { + const char *mangled; + size_t mangled_len; + size_t elements; +}; + +// private version of memrchr to avoid _GNU_SOURCE +static void *demangle_memrchr(const void *s, int c, size_t n) { + const uint8_t *s_ = s; + for (; n != 0; n--) { + if (s_[n-1] == c) { + return (void*)&s_[n-1]; + } + } + return NULL; +} + + +static bool unicode_iscontrol(uint32_t ch) { + // this is *technically* a unicode table, but + // some unicode properties are simpler than you might think + return ch < 0x20 || (ch >= 0x7f && ch < 0xa0); +} + +// "good enough" tables, the only consequence is that when printing +// *constant strings*, some characters are printed as `\u{abcd}` rather than themselves. +// +// I'm leaving these here to allow easily replacing them with actual +// tables if desired. +static bool unicode_isprint(uint32_t ch) { + if (ch < 0x20) { + return false; + } + if (ch < 0x7f) { + return true; + } + return false; +} + +static bool unicode_isgraphemextend(uint32_t ch) { + (void)ch; + return false; +} + +static bool str_isascii(const char *s, size_t s_len) { + for (size_t i = 0; i < s_len; i++) { + if (s[i] & 0x80) { + return false; + } + } + + return true; +} + +typedef enum { + PunycodeOk, + PunycodeError +} punycode_status; + +struct parser { + // the parser assumes that `sym` has a safe "terminating byte". It might be NUL, + // but it might also be something else if a symbol is "truncated". + const char *sym; + size_t sym_len; + size_t next; + uint32_t depth; +}; + +struct printer { + demangle_status status; // if status == 0 parser is valid + struct parser parser; + char *out; // NULL for no output [in which case out_len is not decremented] + size_t out_len; + uint32_t bound_lifetime_depth; + bool alternate; +}; + +static NODISCARD overflow_status printer_print_path(struct printer *printer, bool in_value); +static NODISCARD overflow_status printer_print_type(struct printer *printer); +static NODISCARD overflow_status printer_print_const(struct printer *printer, bool in_value); + +static NODISCARD demangle_status try_parse_path(struct parser *parser) { + struct printer printer = { + DemangleOk, + *parser, + NULL, + SIZE_MAX, + 0, + false + }; + overflow_status ignore = printer_print_path(&printer, false); // can't fail since no output + (void)ignore; + *parser = printer.parser; + return printer.status; +} + +NODISCARD static demangle_status rust_demangle_v0_demangle(const char *s, size_t s_len, struct demangle_v0 *res, const char **rest) { + if (s_len > strlen(s)) { + // s_len only exists to shorten the string, this is not a buffer API + return DemangleInvalid; + } + + const char *inner; + size_t inner_len; + if (s_len >= 2 && !strncmp(s, "_R", strlen("_R"))) { + inner = s+2; + inner_len = s_len - 2; + } else if (s_len >= 1 && !strncmp(s, "R", strlen("R"))) { + // On Windows, dbghelp strips leading underscores, so we accept "R..." + // form too. + inner = s+1; + inner_len = s_len - 1; + } else if (s_len >= 3 && !strncmp(s, "__R", strlen("__R"))) { + // On OSX, symbols are prefixed with an extra _ + inner = s+3; + inner_len = s_len - 3; + } else { + return DemangleInvalid; + } + + // Paths always start with uppercase characters. + if (*inner < 'A' || *inner > 'Z') { + return DemangleInvalid; + } + + if (!str_isascii(inner, inner_len)) { + return DemangleInvalid; + } + + struct parser parser = { inner, inner_len, 0, 0 }; + + demangle_status status = try_parse_path(&parser); + if (status != DemangleOk) return status; + char next = parser.sym[parser.next]; + + // Instantiating crate (paths always start with uppercase characters). + if (parser.next < parser.sym_len && next >= 'A' && next <= 'Z') { + status = try_parse_path(&parser); + if (status != DemangleOk) return status; + } + + res->mangled = inner; + res->mangled_len = inner_len; + if (rest) { + *rest = parser.sym + parser.next; + } + + return DemangleOk; +} + +// This might require `len` to be up to 3 characters bigger than the real output len in case of utf-8 +NODISCARD static overflow_status rust_demangle_v0_display_demangle(struct demangle_v0 res, char *out, size_t len, bool alternate) { + struct printer printer = { + DemangleOk, + { + res.mangled, + res.mangled_len, + 0, + 0 + }, + out, + len, + 0, + alternate + }; + if (printer_print_path(&printer, true) == OverflowOverflow) { + return OverflowOverflow; + } + if (printer.out_len < OVERFLOW_MARGIN) { + return OverflowOverflow; + } + *printer.out = '\0'; + return OverflowOk; +} + +static size_t code_to_utf8(unsigned char *buffer, uint32_t code) +{ + if (code <= 0x7F) { + buffer[0] = code; + return 1; + } + if (code <= 0x7FF) { + buffer[0] = 0xC0 | (code >> 6); /* 110xxxxx */ + buffer[1] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + return 2; + } + if (code <= 0xFFFF) { + buffer[0] = 0xE0 | (code >> 12); /* 1110xxxx */ + buffer[1] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ + buffer[2] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + return 3; + } + if (code <= 0x10FFFF) { + buffer[0] = 0xF0 | (code >> 18); /* 11110xxx */ + buffer[1] = 0x80 | ((code >> 12) & 0x3F); /* 10xxxxxx */ + buffer[2] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ + buffer[3] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + return 4; + } + return 0; +} + + +// return length of char at byte, or SIZE_MAX if invalid. buf should have 4 valid characters +static NODISCARD size_t utf8_next_char(uint8_t *s, uint32_t *ch) { + uint8_t byte = *s; + // UTF8-1 = %x00-7F + // UTF8-2 = %xC2-DF UTF8-tail + // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / + // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) + // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / + // %xF4 %x80-8F 2( UTF8-tail ) + if (byte < 0x80) { + *ch = byte; + return 1; + } else if (byte < 0xc2) { + return SIZE_MAX; + } else if (byte < 0xe0) { + if (s[1] >= 0x80 && s[1] < 0xc0) { + *ch = ((byte&0x1f)<<6) + (s[1] & 0x3f); + return 2; + } + return SIZE_MAX; + } if (byte < 0xf0) { + if (!(s[1] >= 0x80 && s[1] < 0xc0) || !(s[2] >= 0x80 && s[2] < 0xc0)) { + return SIZE_MAX; // basic validation + } + if (byte == 0xe0 && s[1] < 0xa0) { + return SIZE_MAX; // overshort + } + if (byte == 0xed && s[1] >= 0xa0) { + return SIZE_MAX; // surrogate + } + *ch = ((byte&0x0f)<<12) + ((s[1] & 0x3f)<<6) + (s[2] & 0x3f); + return 3; + } else if (byte < 0xf5) { + if (!(s[1] >= 0x80 && s[1] < 0xc0) || !(s[2] >= 0x80 && s[2] < 0xc0) || !(s[3] >= 0x80 && s[3] < 0xc0)) { + return SIZE_MAX; // basic validation + } + if (byte == 0xf0 && s[1] < 0x90) { + return SIZE_MAX; // overshort + } + if (byte == 0xf4 && s[1] >= 0x90) { + return SIZE_MAX; // over max + } + *ch = ((byte&0x07)<<18) + ((s[1] & 0x3f)<<12) + ((s[2] & 0x3f)<<6) + (s[3]&0x3f); + return 4; + } else { + return SIZE_MAX; + } +} + +static NODISCARD bool validate_char(uint32_t n) { + return ((n ^ 0xd800) - 0x800) < 0x110000 - 0x800; +} + +#define SMALL_PUNYCODE_LEN 128 + +static NODISCARD punycode_status punycode_decode(const char *start, size_t ascii_len, const char *punycode_start, size_t punycode_len, uint32_t (*out_)[SMALL_PUNYCODE_LEN], size_t *out_len) { + uint32_t *out = *out_; + + if (punycode_len == 0) { + return PunycodeError; + } + + if (ascii_len > SMALL_PUNYCODE_LEN) { + return PunycodeError; + } + for (size_t i = 0; i < ascii_len; i++) { + out[i] = start[i]; + } + size_t len = ascii_len; + + size_t base = 36, t_min = 1, t_max = 26, skew = 38, damp = 700, bias = 72, i = 0, n = 0x80; + for (;;) { + size_t delta = 0, w = 1, k = 0; + for (;;) { + k += base; + size_t biased = k < bias ? 0 : k - bias; + size_t t = MIN(MAX(biased, t_min), t_max); + size_t d; + if (punycode_len == 0) { + return PunycodeError; + } + char nx = *punycode_start++; + punycode_len--; + if ('a' <= nx && nx <= 'z') { + d = nx - 'a'; + } else if ('0' <= nx && nx <= '9') { + d = 26 + (nx - '0'); + } else { + return PunycodeError; + } + if (w == 0 || d > SIZE_MAX / w || d*w > SIZE_MAX - delta) { + return PunycodeError; + } + delta += d * w; + if (d < t) { + break; + } + if (base < t || w == 0 || (base - t) > SIZE_MAX / w) { + return PunycodeError; + } + w *= (base - t); + } + + len += 1; + if (i > SIZE_MAX - delta) { + return PunycodeError; + } + i += delta; + if (n > SIZE_MAX - i / len) { + return PunycodeError; + } + n += i / len; + i %= len; + + // char validation + if (n > UINT32_MAX || !validate_char((uint32_t)n)) { + return PunycodeError; + } + + // insert new character + if (len > SMALL_PUNYCODE_LEN) { + return PunycodeError; + } + memmove(out + i + 1, out + i, (len - i - 1) * sizeof(uint32_t)); + out[i] = (uint32_t)n; + + // start i index at incremented position + i++; + + // If there are no more deltas, decoding is complete. + if (punycode_len == 0) { + *out_len = len; + return PunycodeOk; + } + + // Perform bias adaptation. + delta /= damp; + damp = 2; + + delta += delta / len; + k = 0; + while (delta > ((base - t_min) * t_max) / 2) { + delta /= base - t_min; + k += base; + } + bias = k + ((base - t_min + 1) * delta) / (delta + skew); + } +} + +struct ident { + const char *ascii_start; + size_t ascii_len; + const char *punycode_start; + size_t punycode_len; +}; + +static NODISCARD overflow_status display_ident(const char *ascii_start, size_t ascii_len, const char *punycode_start, size_t punycode_len, uint8_t *out, size_t *out_len) { + uint32_t outbuf[SMALL_PUNYCODE_LEN]; + + size_t wide_len; + size_t out_buflen = *out_len; + + if (punycode_len == 0) { + if (ascii_len > out_buflen) { + return OverflowOverflow; + } + memcpy(out, ascii_start, ascii_len); + *out_len = ascii_len; + } else if (punycode_decode(ascii_start, ascii_len, punycode_start, punycode_len, &outbuf, &wide_len) == PunycodeOk) { + size_t narrow_len = 0; + for (size_t i = 0; i < wide_len; i++) { + if (out_buflen - narrow_len < 4) { + return OverflowOverflow; + } + unsigned char *pos = &out[narrow_len]; + narrow_len += code_to_utf8(pos, outbuf[i]); + } + *out_len = narrow_len; + } else { + size_t narrow_len = 0; + if (out_buflen < strlen("punycode{")) { + return OverflowOverflow; + } + memcpy(out, "punycode{", strlen("punycode{")); + narrow_len = strlen("punycode{"); + if (ascii_len > 0) { + if (out_buflen - narrow_len < ascii_len || out_buflen - narrow_len - ascii_len < 1) { + return OverflowOverflow; + } + memcpy(out + narrow_len, ascii_start, ascii_len); + narrow_len += ascii_len; + out[narrow_len] = '-'; + narrow_len++; + } + if (out_buflen - narrow_len < punycode_len || out_buflen - narrow_len - punycode_len < 1) { + return OverflowOverflow; + } + memcpy(out + narrow_len, punycode_start, punycode_len); + narrow_len += punycode_len; + out[narrow_len] = '}'; + narrow_len++; + *out_len = narrow_len; + } + + return OverflowOk; +} + +static NODISCARD bool try_parse_uint(const char *buf, size_t len, uint64_t *result) { + size_t cur = 0; + for(;cur < len && buf[cur] == '0';cur++); + uint64_t result_val = 0; + if (len - cur > 16) return false; + for(;cur < len;cur++) { + char c = buf[cur]; + result_val <<= 4; + if ('0' <= c && c <= '9') { + result_val += c - '0'; + } else if ('a' <= c && c <= 'f') { + result_val += 10 + (c - 'a'); + } else { + return false; + } + } + *result = result_val; + return true; +} + +static NODISCARD bool dinibble2int(const char *buf, uint8_t *result) { + uint8_t result_val = 0; + for (int i = 0; i < 2; i++) { + char c = buf[i]; + result_val <<= 4; + if ('0' <= c && c <= '9') { + result_val += c - '0'; + } else if ('a' <= c && c <= 'f') { + result_val += 10 + (c - 'a'); + } else { + return false; + } + } + *result = result_val; + return true; +} + + +typedef enum { + NtsOk = 0, + NtsOverflow = 1, + NtsInvalid = 2 +} nibbles_to_string_status; + +// '\u{10ffff}', +margin +#define ESCAPED_SIZE 12 + +static NODISCARD size_t char_to_string(uint32_t ch, uint8_t quote, bool first, char (*buf)[ESCAPED_SIZE]) { + // encode the character + char *escaped_buf = *buf; + escaped_buf[0] = '\\'; + size_t escaped_len = 2; + switch (ch) { + case '\0': + escaped_buf[1] = '0'; + break; + case '\t': + escaped_buf[1] = 't'; + break; + case '\r': + escaped_buf[1] = 'r'; + break; + case '\n': + escaped_buf[1] = 'n'; + break; + case '\\': + escaped_buf[1] = '\\'; + break; + default: + if (ch == quote) { + escaped_buf[1] = ch; + } else if (!unicode_isprint(ch) || (first && unicode_isgraphemextend(ch))) { + int hexlen = snprintf(escaped_buf, ESCAPED_SIZE, "\\u{%x}", (unsigned int)ch); + if (hexlen < 0) { + return 0; // (snprintf shouldn't fail!) + } + escaped_len = hexlen; + } else { + // printable character + escaped_buf[0] = ch; + escaped_len = 1; + } + break; + } + + return escaped_len; +} + +// convert nibbles to a single/double-quoted string +static NODISCARD nibbles_to_string_status nibbles_to_string(const char *buf, size_t len, uint8_t *out, size_t *out_len) { + uint8_t quote = '"'; + bool first = true; + + if ((len % 2) != 0) { + return NtsInvalid; // odd number of nibbles + } + + size_t cur_out_len = 0; + + // write starting quote + if (out != NULL) { + cur_out_len = *out_len; + if (cur_out_len == 0) { + return NtsOverflow; + } + *out++ = quote; + cur_out_len--; + } + + uint8_t conv_buf[4] = {0}; + size_t conv_buf_len = 0; + while (len > 1 || conv_buf_len > 0) { + while (len > 1 && conv_buf_len < sizeof(conv_buf)) { + if (!dinibble2int(buf, &conv_buf[conv_buf_len])) { + return NtsInvalid; + } + conv_buf_len++; + buf += 2; + len -= 2; + } + + // conv_buf is full here if possible, process 1 UTF-8 character + uint32_t ch = 0; + size_t consumed = utf8_next_char(conv_buf, &ch); + if (consumed > conv_buf_len) { + // either SIZE_MAX (invalid UTF-8) or finished input buffer and + // there are still bytes remaining, in both cases invalid + return NtsInvalid; + } + + // "consume" the character + memmove(conv_buf, conv_buf+consumed, conv_buf_len-consumed); + conv_buf_len -= consumed; + + char escaped_buf[ESCAPED_SIZE]; + size_t escaped_len = char_to_string(ch, '"', first, &escaped_buf); + if (out != NULL) { + if (cur_out_len < escaped_len) { + return NtsOverflow; + } + memcpy(out, escaped_buf, escaped_len); + out += escaped_len; + cur_out_len -= escaped_len; + } + first = false; + } + + // write ending quote + if (out != NULL) { + if (cur_out_len == 0) { + return NtsOverflow; + } + *out++ = quote; + cur_out_len--; + *out_len -= cur_out_len; // subtract remaining space to get used space + } + + return NtsOk; +} + +static const char* basic_type(uint8_t tag) { + switch(tag) { + case 'b': + return "bool"; + case 'c': + return "char"; + case 'e': + return "str"; + case 'u': + return "()"; + case 'a': + return "i8"; + case 's': + return "i16"; + case 'l': + return "i32"; + case 'x': + return "i64"; + case 'n': + return "i128"; + case 'i': + return "isize"; + case 'h': + return "u8"; + case 't': + return "u16"; + case 'm': + return "u32"; + case 'y': + return "u64"; + case 'o': + return "u128"; + case 'j': + return "usize"; + case 'f': + return "f32"; + case 'd': + return "f64"; + case 'z': + return "!"; + case 'p': + return "_"; + case 'v': + return "..."; + default: + return NULL; + } +} + +static NODISCARD demangle_status parser_push_depth(struct parser *parser) { + parser->depth++; + if (parser->depth > MAX_DEPTH) { + return DemangleRecursed; + } else { + return DemangleOk; + } +} + +static demangle_status parser_pop_depth(struct parser *parser) { + parser->depth--; + return DemangleOk; +} + +static uint8_t parser_peek(struct parser const *parser) { + if (parser->next == parser->sym_len) { + return 0; // add a "pseudo nul terminator" to avoid peeking past the end of a symbol + } else { + return parser->sym[parser->next]; + } +} + +static bool parser_eat(struct parser *parser, uint8_t ch) { + if (parser_peek(parser) == ch) { + if (ch != 0) { // safety: make sure we don't skip past the NUL terminator + parser->next++; + } + return true; + } else { + return false; + } +} + +static uint8_t parser_next(struct parser *parser) { + // don't advance after end of input, and return an imaginary NUL terminator + if (parser->next == parser->sym_len) { + return 0; + } else { + return parser->sym[parser->next++]; + } +} + +static NODISCARD demangle_status parser_ch(struct parser *parser, uint8_t *next) { + // don't advance after end of input + if (parser->next == parser->sym_len) { + return DemangleInvalid; + } else { + *next = parser->sym[parser->next++]; + return DemangleOk; + } +} + +struct buf { + const char *start; + size_t len; +}; + +static NODISCARD demangle_status parser_hex_nibbles(struct parser *parser, struct buf *buf) { + size_t start = parser->next; + for (;;) { + uint8_t ch = parser_next(parser); + if (ch == '_') { + break; + } + if (!(('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f'))) { + return DemangleInvalid; + } + } + buf->start = parser->sym + start; + buf->len = parser->next - start - 1; // skip final _ + return DemangleOk; +} + +static NODISCARD demangle_status parser_digit_10(struct parser *parser, uint8_t *out) { + uint8_t ch = parser_peek(parser); + if ('0' <= ch && ch <= '9') { + *out = ch - '0'; + parser->next++; + return DemangleOk; + } else { + return DemangleInvalid; + } +} + +static NODISCARD demangle_status parser_digit_62(struct parser *parser, uint64_t *out) { + uint8_t ch = parser_peek(parser); + if ('0' <= ch && ch <= '9') { + *out = ch - '0'; + parser->next++; + return DemangleOk; + } else if ('a' <= ch && ch <= 'z') { + *out = 10 + (ch - 'a'); + parser->next++; + return DemangleOk; + } else if ('A' <= ch && ch <= 'Z') { + *out = 10 + 26 + (ch - 'A'); + parser->next++; + return DemangleOk; + } else { + return DemangleInvalid; + } +} + +static NODISCARD demangle_status parser_integer_62(struct parser *parser, uint64_t *out) { + if (parser_eat(parser, '_')) { + *out = 0; + return DemangleOk; + } + + uint64_t x = 0; + demangle_status status; + while (!parser_eat(parser, '_')) { + uint64_t d; + if ((status = parser_digit_62(parser, &d)) != DemangleOk) { + return status; + } + if (x > UINT64_MAX / 62) { + return DemangleInvalid; + } + x *= 62; + if (x > UINT64_MAX - d) { + return DemangleInvalid; + } + x += d; + } + if (x == UINT64_MAX) { + return DemangleInvalid; + } + *out = x + 1; + return DemangleOk; +} + +static NODISCARD demangle_status parser_opt_integer_62(struct parser *parser, uint8_t tag, uint64_t *out) { + if (!parser_eat(parser, tag)) { + *out = 0; + return DemangleOk; + } + + demangle_status status; + if ((status = parser_integer_62(parser, out)) != DemangleOk) { + return status; + } + if (*out == UINT64_MAX) { + return DemangleInvalid; + } + *out = *out + 1; + return DemangleOk; +} + +static NODISCARD demangle_status parser_disambiguator(struct parser *parser, uint64_t *out) { + return parser_opt_integer_62(parser, 's', out); +} + +typedef uint8_t parser_namespace_type; + +static NODISCARD demangle_status parser_namespace(struct parser *parser, parser_namespace_type *out) { + uint8_t next = parser_next(parser); + if ('A' <= next && next <= 'Z') { + *out = next; + return DemangleOk; + } else if ('a' <= next && next <= 'z') { + *out = 0; + return DemangleOk; + } else { + return DemangleInvalid; + } +} + +static NODISCARD demangle_status parser_backref(struct parser *parser, struct parser *out) { + size_t start = parser->next; + if (start == 0) { + return DemangleBug; + } + size_t s_start = start - 1; + uint64_t i; + demangle_status status = parser_integer_62(parser, &i); + if (status != DemangleOk) { + return status; + } + if (i >= s_start) { + return DemangleInvalid; + } + struct parser res = { + .sym = parser->sym, + .sym_len = parser->sym_len, + .next = (size_t)i, + .depth = parser->depth + }; + status = parser_push_depth(&res); + if (status != DemangleOk) { + return status; + } + *out = res; + return DemangleOk; +} + +static NODISCARD demangle_status parser_ident(struct parser *parser, struct ident *out) { + bool is_punycode = parser_eat(parser, 'u'); + size_t len; + uint8_t d; + demangle_status status = parser_digit_10(parser, &d); + len = d; + if (status != DemangleOk) { + return status; + } + if (len) { + for (;;) { + status = parser_digit_10(parser, &d); + if (status != DemangleOk) { + break; + } + if (len > SIZE_MAX / 10) { + return DemangleInvalid; + } + len *= 10; + if (len > SIZE_MAX - d) { + return DemangleInvalid; + } + len += d; + } + } + + // Skip past the optional `_` separator. + parser_eat(parser, '_'); + + size_t start = parser->next; + if (parser->sym_len - parser->next < len) { + return DemangleInvalid; + } + parser->next += len; + + const char *ident = &parser->sym[start]; + + if (is_punycode) { + const char *underscore = demangle_memrchr(ident, '_', (size_t)len); + if (underscore == NULL) { + *out = (struct ident){ + .ascii_start="", + .ascii_len=0, + .punycode_start=ident, + .punycode_len=len + }; + } else { + size_t ascii_len = underscore - ident; + // ascii_len <= len - 1 since `_` is in the first len bytes + size_t punycode_len = len - 1 - ascii_len; + *out = (struct ident){ + .ascii_start=ident, + .ascii_len=ascii_len, + .punycode_start=underscore + 1, + .punycode_len=punycode_len + }; + } + if (out->punycode_len == 0) { + return DemangleInvalid; + } + return DemangleOk; + } else { + *out = (struct ident) { + .ascii_start=ident, + .ascii_len=(size_t)len, + .punycode_start="", + .punycode_len=0, + }; + return DemangleOk; + } +} + +#define INVALID_SYNTAX "{invalid syntax}" + +static const char *demangle_error_message(demangle_status status) { + switch (status) { + case DemangleInvalid: + return INVALID_SYNTAX; + case DemangleBug: + return "{bug}"; + case DemangleRecursed: + return "{recursion limit reached}"; + default: + return "{unknown error}"; + } +} + +#define PRINT(print_fn) \ + do { \ + if ((print_fn) == OverflowOverflow) { \ + return OverflowOverflow; \ + } \ + } while(0) + +#define PRINT_CH(printer, s) PRINT(printer_print_ch((printer), (s))) +#define PRINT_STR(printer, s) PRINT(printer_print_str((printer), (s))) +#define PRINT_U64(printer, s) PRINT(printer_print_u64((printer), (s))) +#define PRINT_IDENT(printer, s) PRINT(printer_print_ident((printer), (s))) + +#define INVALID(printer) \ + do { \ + PRINT_STR((printer), INVALID_SYNTAX); \ + (printer)->status = DemangleInvalid; \ + return OverflowOk; \ + } while(0) + +#define PARSE(printer, method, ...) \ + do { \ + if ((printer)->status != DemangleOk) { \ + PRINT_STR((printer), "?"); \ + return OverflowOk; \ + } else { \ + demangle_status _parse_status = method(&(printer)->parser, ## __VA_ARGS__); \ + if (_parse_status != DemangleOk) { \ + PRINT_STR((printer), demangle_error_message(_parse_status)); \ + (printer)->status = _parse_status; \ + return OverflowOk; \ + } \ + } \ + } while(0) + +#define PRINT_SEP_LIST(printer, body, sep) \ + do { \ + size_t _sep_list_i; \ + PRINT_SEP_LIST_COUNT(printer, _sep_list_i, body, sep); \ + } while(0) + +#define PRINT_SEP_LIST_COUNT(printer, count, body, sep) \ + do { \ + count = 0; \ + while ((printer)->status == DemangleOk && !printer_eat((printer), 'E')) { \ + if (count > 0) { PRINT_STR(printer, sep); } \ + body; \ + count++; \ + } \ + } while(0) + +static bool printer_eat(struct printer *printer, uint8_t b) { + if (printer->status != DemangleOk) { + return false; + } + + return parser_eat(&printer->parser, b); +} + +static void printer_pop_depth(struct printer *printer) { + if (printer->status == DemangleOk) { + parser_pop_depth(&printer->parser); + } +} + +static NODISCARD overflow_status printer_print_buf(struct printer *printer, const char *start, size_t len) { + if (printer->out == NULL) { + return OverflowOk; + } + if (printer->out_len < len) { + return OverflowOverflow; + } + + memcpy(printer->out, start, len); + printer->out += len; + printer->out_len -= len; + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_str(struct printer *printer, const char *buf) { + return printer_print_buf(printer, buf, strlen(buf)); +} + +static NODISCARD overflow_status printer_print_ch(struct printer *printer, char ch) { + return printer_print_buf(printer, &ch, 1); +} + +static NODISCARD overflow_status printer_print_u64(struct printer *printer, uint64_t n) { + char buf[32] = {0}; + sprintf(buf, "%llu", (unsigned long long)n); // printing uint64 uses 21 < 32 chars + return printer_print_str(printer, buf); +} + +static NODISCARD overflow_status printer_print_ident(struct printer *printer, struct ident *ident) { + if (printer->out == NULL) { + return OverflowOk; + } + + size_t out_len = printer->out_len; + overflow_status status; + if ((status = display_ident(ident->ascii_start, ident->ascii_len, ident->punycode_start, ident->punycode_len, (uint8_t*)printer->out, &out_len)) != OverflowOk) { + return status; + } + printer->out += out_len; + printer->out_len -= out_len; + return OverflowOk; +} + +typedef overflow_status (*printer_fn)(struct printer *printer); +typedef overflow_status (*backref_fn)(struct printer *printer, bool *arg); + +static NODISCARD overflow_status printer_print_backref(struct printer *printer, backref_fn func, bool *arg) { + struct parser backref; + PARSE(printer, parser_backref, &backref); + + if (printer->out == NULL) { + return OverflowOk; + } + + struct parser orig_parser = printer->parser; + demangle_status orig_status = printer->status; // fixme not sure this is needed match for Ok on the Rust side + printer->parser = backref; + printer->status = DemangleOk; + overflow_status status = func(printer, arg); + printer->parser = orig_parser; + printer->status = orig_status; + + return status; +} + +static NODISCARD overflow_status printer_print_lifetime_from_index(struct printer *printer, uint64_t lt) { + // Bound lifetimes aren't tracked when skipping printing. + if (printer->out == NULL) { + return OverflowOk; + } + + PRINT_STR(printer, "'"); + if (lt == 0) { + PRINT_STR(printer, "_"); + return OverflowOk; + } + + if (printer->bound_lifetime_depth < lt) { + INVALID(printer); + } else { + uint64_t depth = printer->bound_lifetime_depth - lt; + if (depth < 26) { + PRINT_CH(printer, 'a' + depth); + } else { + PRINT_STR(printer, "_"); + PRINT_U64(printer, depth); + } + + return OverflowOk; + } +} + +static NODISCARD overflow_status printer_in_binder(struct printer *printer, printer_fn func) { + uint64_t bound_lifetimes; + PARSE(printer, parser_opt_integer_62, 'G', &bound_lifetimes); + + // Don't track bound lifetimes when skipping printing. + if (printer->out == NULL) { + return func(printer); + } + + if (bound_lifetimes > 0) { + PRINT_STR(printer, "for<"); + for (uint64_t i = 0; i < bound_lifetimes; i++) { + if (i > 0) { + PRINT_STR(printer, ", "); + } + printer->bound_lifetime_depth++; + PRINT(printer_print_lifetime_from_index(printer, 1)); + } + PRINT_STR(printer, "> "); + } + + overflow_status r = func(printer); + printer->bound_lifetime_depth -= bound_lifetimes; + + return r; +} + +static NODISCARD overflow_status printer_print_generic_arg(struct printer *printer) { + if (printer_eat(printer, 'L')) { + uint64_t lt; + PARSE(printer, parser_integer_62, <); + return printer_print_lifetime_from_index(printer, lt); + } else if (printer_eat(printer, 'K')) { + return printer_print_const(printer, false); + } else { + return printer_print_type(printer); + } +} + +static NODISCARD overflow_status printer_print_generic_args(struct printer *printer) { + PRINT_STR(printer, "<"); + PRINT_SEP_LIST(printer, PRINT(printer_print_generic_arg(printer)), ", "); + PRINT_STR(printer, ">"); + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_path_out_of_value(struct printer *printer, bool *_arg) { + (void)_arg; + return printer_print_path(printer, false); +} + +static NODISCARD overflow_status printer_print_path_in_value(struct printer *printer, bool *_arg) { + (void)_arg; + return printer_print_path(printer, true); +} + +static NODISCARD overflow_status printer_print_path(struct printer *printer, bool in_value) { + PARSE(printer, parser_push_depth); + uint8_t tag; + PARSE(printer, parser_ch, &tag); + + overflow_status st; + uint64_t dis; + struct ident name; + parser_namespace_type ns; + char *orig_out; + + switch(tag) { + case 'C': + PARSE(printer, parser_disambiguator, &dis); + PARSE(printer, parser_ident, &name); + + PRINT_IDENT(printer, &name); + + if (printer->out != NULL && !printer->alternate && dis != 0) { + PRINT_STR(printer, "["); + char buf[24] = {0}; + sprintf(buf, "%llx", (unsigned long long)dis); + PRINT_STR(printer, buf); + PRINT_STR(printer, "]"); + } + break; + case 'N': + PARSE(printer, parser_namespace, &ns); + if ((st = printer_print_path(printer, in_value)) != OverflowOk) { + return st; + } + + // HACK(eddyb) if the parser is already marked as having errored, + // `parse!` below will print a `?` without its preceding `::` + // (because printing the `::` is skipped in certain conditions, + // i.e. a lowercase namespace with an empty identifier), + // so in order to get `::?`, the `::` has to be printed here. + if (printer->status != DemangleOk) { + PRINT_STR(printer, "::"); + } + + PARSE(printer, parser_disambiguator, &dis); + PARSE(printer, parser_ident, &name); + // Special namespace, like closures and shims + if (ns) { + PRINT_STR(printer, "::{"); + if (ns == 'C') { + PRINT_STR(printer, "closure"); + } else if (ns == 'S') { + PRINT_STR(printer, "shim"); + } else { + PRINT_CH(printer, ns); + } + if (name.ascii_len != 0 || name.punycode_len != 0) { + PRINT_STR(printer, ":"); + PRINT_IDENT(printer, &name); + } + PRINT_STR(printer, "#"); + PRINT_U64(printer, dis); + PRINT_STR(printer, "}"); + } else { + // Implementation-specific/unspecified namespaces + if (name.ascii_len != 0 || name.punycode_len != 0) { + PRINT_STR(printer, "::"); + PRINT_IDENT(printer, &name); + } + } + break; + case 'M': + case 'X': + // for impls, ignore the impls own path + PARSE(printer, parser_disambiguator, &dis); + orig_out = printer->out; + printer->out = NULL; + PRINT(printer_print_path(printer, false)); + printer->out = orig_out; + + // fallthru + case 'Y': + PRINT_STR(printer, "<"); + PRINT(printer_print_type(printer)); + if (tag != 'M') { + PRINT_STR(printer, " as "); + PRINT(printer_print_path(printer, false)); + } + PRINT_STR(printer, ">"); + break; + case 'I': + PRINT(printer_print_path(printer, in_value)); + if (in_value) { + PRINT_STR(printer, "::"); + } + PRINT(printer_print_generic_args(printer)); + break; + case 'B': + PRINT(printer_print_backref(printer, in_value ? printer_print_path_in_value : printer_print_path_out_of_value, NULL)); + break; + default: + INVALID(printer); + break; + } + + printer_pop_depth(printer); + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_const_uint(struct printer *printer, uint8_t tag) { + struct buf hex; + PARSE(printer, parser_hex_nibbles, &hex); + + uint64_t val; + if (try_parse_uint(hex.start, hex.len, &val)) { + PRINT_U64(printer, val); + } else { + PRINT_STR(printer, "0x"); + PRINT(printer_print_buf(printer, hex.start, hex.len)); + } + + if (printer->out != NULL && !printer->alternate) { + const char *ty = basic_type(tag); + if (/* safety */ ty != NULL) { + PRINT_STR(printer, ty); + } + } + + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_const_str_literal(struct printer *printer) { + struct buf hex; + PARSE(printer, parser_hex_nibbles, &hex); + + size_t out_len = SIZE_MAX; + nibbles_to_string_status nts_status = nibbles_to_string(hex.start, hex.len, NULL, &out_len); + switch (nts_status) { + case NtsOk: + if (printer->out != NULL) { + out_len = printer->out_len; + nts_status = nibbles_to_string(hex.start, hex.len, (uint8_t*)printer->out, &out_len); + if (nts_status != NtsOk) { + return OverflowOverflow; + } + printer->out += out_len; + printer->out_len -= out_len; + } + return OverflowOk; + case NtsOverflow: + // technically if there is a string of size `SIZE_MAX/6` whose escaped version overflows + // SIZE_MAX but has an invalid char, this will be a "fake" overflow. In practice, + // that is not going to happen and a fuzzer will not generate strings of this length. + return OverflowOverflow; + case NtsInvalid: + default: + INVALID(printer); + } +} + +static NODISCARD overflow_status printer_print_const_struct(struct printer *printer) { + uint64_t dis; + struct ident name; + PARSE(printer, parser_disambiguator, &dis); + PARSE(printer, parser_ident, &name); + PRINT_IDENT(printer, &name); + PRINT_STR(printer, ": "); + return printer_print_const(printer, true); +} + +static NODISCARD overflow_status printer_print_const_out_of_value(struct printer *printer, bool *_arg) { + (void)_arg; + return printer_print_const(printer, false); +} + +static NODISCARD overflow_status printer_print_const_in_value(struct printer *printer, bool *_arg) { + (void)_arg; + return printer_print_const(printer, true); +} + +static NODISCARD overflow_status printer_print_const(struct printer *printer, bool in_value) { + uint8_t tag; + + PARSE(printer, parser_ch, &tag); + PARSE(printer, parser_push_depth); + + struct buf hex; + uint64_t val; + size_t count; + + bool opened_brace = false; +#define OPEN_BRACE_IF_OUTSIDE_EXPR \ + do { if (!in_value) { \ + opened_brace = true; \ + PRINT_STR(printer, "{"); \ + } } while(0) + + switch(tag) { + case 'p': + PRINT_STR(printer, "_"); + break; + // Primitive leaves with hex-encoded values (see `basic_type`). + case 'a': + case 's': + case 'l': + case 'x': + case 'n': + case 'i': + if (printer_eat(printer, 'n')) { + PRINT_STR(printer, "-"); + } + /* fallthrough */ + case 'h': + case 't': + case 'm': + case 'y': + case 'o': + case 'j': + PRINT(printer_print_const_uint(printer, tag)); + break; + case 'b': + PARSE(printer, parser_hex_nibbles, &hex); + if (try_parse_uint(hex.start, hex.len, &val)) { + if (val == 0) { + PRINT_STR(printer, "false"); + } else if (val == 1) { + PRINT_STR(printer, "true"); + } else { + INVALID(printer); + } + } else { + INVALID(printer); + } + break; + case 'c': + PARSE(printer, parser_hex_nibbles, &hex); + if (try_parse_uint(hex.start, hex.len, &val) + && val < UINT32_MAX + && validate_char((uint32_t)val)) + { + char escaped_buf[ESCAPED_SIZE]; + size_t escaped_size = char_to_string((uint32_t)val, '\'', true, &escaped_buf); + + PRINT_STR(printer, "'"); + PRINT(printer_print_buf(printer, escaped_buf, escaped_size)); + PRINT_STR(printer, "'"); + } else { + INVALID(printer); + } + break; + case 'e': + OPEN_BRACE_IF_OUTSIDE_EXPR; + PRINT_STR(printer, "*"); + PRINT(printer_print_const_str_literal(printer)); + break; + case 'R': + case 'Q': + if (tag == 'R' && printer_eat(printer, 'e')) { + PRINT(printer_print_const_str_literal(printer)); + } else { + OPEN_BRACE_IF_OUTSIDE_EXPR; + PRINT_STR(printer, "&"); + if (tag != 'R') { + PRINT_STR(printer, "mut "); + } + PRINT(printer_print_const(printer, true)); + } + break; + case 'A': + OPEN_BRACE_IF_OUTSIDE_EXPR; + PRINT_STR(printer, "["); + PRINT_SEP_LIST(printer, PRINT(printer_print_const(printer, true)), ", "); + PRINT_STR(printer, "]"); + break; + case 'T': + OPEN_BRACE_IF_OUTSIDE_EXPR; + PRINT_STR(printer, "("); + PRINT_SEP_LIST_COUNT(printer, count, PRINT(printer_print_const(printer, true)), ", "); + if (count == 1) { + PRINT_STR(printer, ","); + } + PRINT_STR(printer, ")"); + break; + case 'V': + OPEN_BRACE_IF_OUTSIDE_EXPR; + PRINT(printer_print_path(printer, true)); + PARSE(printer, parser_ch, &tag); + switch(tag) { + case 'U': + break; + case 'T': + PRINT_STR(printer, "("); + PRINT_SEP_LIST(printer, PRINT(printer_print_const(printer, true)), ", "); + PRINT_STR(printer, ")"); + break; + case 'S': + PRINT_STR(printer, " { "); + PRINT_SEP_LIST(printer, PRINT(printer_print_const_struct(printer)), ", "); + PRINT_STR(printer, " }"); + break; + default: + INVALID(printer); + } + break; + case 'B': + PRINT(printer_print_backref(printer, in_value ? printer_print_const_in_value : printer_print_const_out_of_value, NULL)); + break; + default: + INVALID(printer); + } +#undef OPEN_BRACE_IF_OUTSIDE_EXPR + + if (opened_brace) { + PRINT_STR(printer, "}"); + } + printer_pop_depth(printer); + + return OverflowOk; +} + +/// A trait in a trait object may have some "existential projections" +/// (i.e. associated type bindings) after it, which should be printed +/// in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`. +/// To this end, this method will keep the `<...>` of an 'I' path +/// open, by omitting the `>`, and return `Ok(true)` in that case. +static NODISCARD overflow_status printer_print_maybe_open_generics(struct printer *printer, bool *open) { + if (printer_eat(printer, 'B')) { + // NOTE(eddyb) the closure may not run if printing is being skipped, + // but in that case the returned boolean doesn't matter. + *open = false; + return printer_print_backref(printer, printer_print_maybe_open_generics, open); + } else if(printer_eat(printer, 'I')) { + PRINT(printer_print_path(printer, false)); + PRINT_STR(printer, "<"); + PRINT_SEP_LIST(printer, PRINT(printer_print_generic_arg(printer)), ", "); + *open = true; + return OverflowOk; + } else { + PRINT(printer_print_path(printer, false)); + *open = false; + return OverflowOk; + } +} + +static NODISCARD overflow_status printer_print_dyn_trait(struct printer *printer) { + bool open; + PRINT(printer_print_maybe_open_generics(printer, &open)); + + while (printer_eat(printer, 'p')) { + if (!open) { + PRINT_STR(printer, "<"); + open = true; + } else { + PRINT_STR(printer, ", "); + } + + struct ident name; + PARSE(printer, parser_ident, &name); + + PRINT_IDENT(printer, &name); + PRINT_STR(printer, " = "); + PRINT(printer_print_type(printer)); + } + + if (open) { + PRINT_STR(printer, ">"); + } + + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_object_bounds(struct printer *printer) { + PRINT_SEP_LIST(printer, PRINT(printer_print_dyn_trait(printer)), " + "); + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_function_type(struct printer *printer) { + bool is_unsafe = printer_eat(printer, 'U'); + const char *abi; + size_t abi_len; + if (printer_eat(printer, 'K')) { + if (printer_eat(printer, 'C')) { + abi = "C"; + abi_len = 1; + } else { + struct ident abi_ident; + PARSE(printer, parser_ident, &abi_ident); + if (abi_ident.ascii_len == 0 || abi_ident.punycode_len != 0) { + INVALID(printer); + } + abi = abi_ident.ascii_start; + abi_len = abi_ident.ascii_len; + } + } else { + abi = NULL; + abi_len = 0; + } + + if (is_unsafe) { + PRINT_STR(printer, "unsafe "); + } + + if (abi != NULL) { + PRINT_STR(printer, "extern \""); + + // replace _ with - + while (abi_len > 0) { + const char *minus = memchr(abi, '_', abi_len); + if (minus == NULL) { + PRINT(printer_print_buf(printer, (const char*)abi, abi_len)); + break; + } else { + size_t space_to_minus = minus - abi; + PRINT(printer_print_buf(printer, (const char*)abi, space_to_minus)); + PRINT_STR(printer, "-"); + abi = minus + 1; + abi_len -= (space_to_minus + 1); + } + } + + PRINT_STR(printer, "\" "); + } + + PRINT_STR(printer, "fn("); + PRINT_SEP_LIST(printer, PRINT(printer_print_type(printer)), ", "); + PRINT_STR(printer, ")"); + + if (printer_eat(printer, 'u')) { + // Skip printing the return type if it's 'u', i.e. `()`. + } else { + PRINT_STR(printer, " -> "); + PRINT(printer_print_type(printer)); + } + + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_type_backref(struct printer *printer, bool *_arg) { + (void)_arg; + return printer_print_type(printer); +} + +static NODISCARD overflow_status printer_print_type(struct printer *printer) { + uint8_t tag; + PARSE(printer, parser_ch, &tag); + + const char *basic_ty = basic_type(tag); + if (basic_ty) { + return printer_print_str(printer, basic_ty); + } + + uint64_t count; + uint64_t lt; + + PARSE(printer, parser_push_depth); + switch (tag) { + case 'R': + case 'Q': + PRINT_STR(printer, "&"); + if (printer_eat(printer, 'L')) { + PARSE(printer, parser_integer_62, <); + if (lt != 0) { + PRINT(printer_print_lifetime_from_index(printer, lt)); + PRINT_STR(printer, " "); + } + } + if (tag != 'R') { + PRINT_STR(printer, "mut "); + } + PRINT(printer_print_type(printer)); + break; + case 'P': + case 'O': + PRINT_STR(printer, "*"); + if (tag != 'P') { + PRINT_STR(printer, "mut "); + } else { + PRINT_STR(printer, "const "); + } + PRINT(printer_print_type(printer)); + break; + case 'A': + case 'S': + PRINT_STR(printer, "["); + PRINT(printer_print_type(printer)); + if (tag == 'A') { + PRINT_STR(printer, "; "); + PRINT(printer_print_const(printer, true)); + } + PRINT_STR(printer, "]"); + break; + case 'T': + PRINT_STR(printer, "("); + PRINT_SEP_LIST_COUNT(printer, count, PRINT(printer_print_type(printer)), ", "); + if (count == 1) { + PRINT_STR(printer, ","); + } + PRINT_STR(printer, ")"); + break; + case 'F': + PRINT(printer_in_binder(printer, printer_print_function_type)); + break; + case 'D': + PRINT_STR(printer, "dyn "); + PRINT(printer_in_binder(printer, printer_print_object_bounds)); + + if (!printer_eat(printer, 'L')) { + INVALID(printer); + } + PARSE(printer, parser_integer_62, <); + + if (lt != 0) { + PRINT_STR(printer, " + "); + PRINT(printer_print_lifetime_from_index(printer, lt)); + } + break; + case 'B': + PRINT(printer_print_backref(printer, printer_print_type_backref, NULL)); + break; + default: + // Go back to the tag, so `print_path` also sees it. + if (printer->status == DemangleOk && /* safety */ printer->parser.next > 0) { + printer->parser.next--; + } + PRINT(printer_print_path(printer, false)); + } + + printer_pop_depth(printer); + return OverflowOk; +} + +NODISCARD static demangle_status rust_demangle_legacy_demangle(const char *s, size_t s_len, struct demangle_legacy *res, const char **rest) +{ + if (s_len > strlen(s)) { + // s_len only exists to shorten the string, this is not a buffer API + return DemangleInvalid; + } + + const char *inner; + size_t inner_len; + if (s_len >= 3 && !strncmp(s, "_ZN", 3)) { + inner = s + 3; + inner_len = s_len - 3; + } else if (s_len >= 2 && !strncmp(s, "ZN", 2)) { + // On Windows, dbghelp strips leading underscores, so we accept "ZN...E" + // form too. + inner = s + 2; + inner_len = s_len - 2; + } else if (s_len >= 4 && !strncmp(s, "__ZN", 4)) { + // On OSX, symbols are prefixed with an extra _ + inner = s + 4; + inner_len = s_len - 4; + } else { + return DemangleInvalid; + } + + if (!str_isascii(inner, inner_len)) { + return DemangleInvalid; + } + + size_t elements = 0; + const char *chars = inner; + size_t chars_len = inner_len; + if (chars_len == 0) { + return DemangleInvalid; + } + char c; + while ((c = *chars) != 'E') { + // Decode an identifier element's length + if (c < '0' || c > '9') { + return DemangleInvalid; + } + size_t len = 0; + while (c >= '0' && c <= '9') { + size_t d = c - '0'; + if (len > SIZE_MAX / 10) { + return DemangleInvalid; + } + len *= 10; + if (len > SIZE_MAX - d) { + return DemangleInvalid; + } + len += d; + + chars++; + chars_len--; + if (chars_len == 0) { + return DemangleInvalid; + } + c = *chars; + } + + // Advance by the length + if (chars_len <= len) { + return DemangleInvalid; + } + chars += len; + chars_len -= len; + elements++; + } + *res = (struct demangle_legacy) { inner, inner_len, elements }; + *rest = chars + 1; + return DemangleOk; +} + +static bool is_rust_hash(const char *s, size_t len) { + if (len == 0 || s[0] != 'h') { + return false; + } + + for (size_t i = 1; i < len; i++) { + if (!((s[i] >= '0' && s[i] <= '9') || (s[i] >= 'a' && s[i] <= 'f') || (s[i] >= 'A' && s[i] <= 'F'))) { + return false; + } + } + + return true; +} + +NODISCARD static overflow_status rust_demangle_legacy_display_demangle(struct demangle_legacy res, char *out, size_t len, bool alternate) +{ + struct printer printer = { + // not actually using the parser part of the printer, just keeping it to share the format functions + DemangleOk, + { NULL }, + out, + len, + 0, + alternate + }; + const char *inner = res.mangled; + for (size_t element = 0; element < res.elements; element++) { + size_t i = 0; + const char *rest; + for (rest = inner; rest < res.mangled + res.mangled_len && *rest >= '0' && *rest <= '9'; rest++) { + i *= 10; + i += *rest - '0'; + } + if ((size_t)(res.mangled + res.mangled_len - rest) < i) { + // safety: shouldn't reach this place if the input string is validated. bail out. + // safety: we knwo rest <= res.mangled + res.mangled_len from the for-loop above + break; + } + + size_t len = i; + inner = rest + len; + + // From here on, inner contains a pointer to the next element, rest[:len] to the current one + if (alternate && element + 1 == res.elements && is_rust_hash(rest, i)) { + break; + } + if (element != 0) { + PRINT_STR(&printer, "::"); + } + + if (len >= 2 && !strncmp(rest, "_$", 2)) { + rest++; + len--; + } + + while (len > 0) { + if (rest[0] == '.') { + if (len >= 2 && rest[1] == '.') { + PRINT_STR(&printer, "::"); + rest += 2; + len -= 2; + } else { + PRINT_STR(&printer, "."); + rest += 1; + len -= 1; + } + } else if (rest[0] == '$') { + const char *escape = memchr(rest + 1, '$', len - 1); + if (escape == NULL) { + break; + } + const char *escape_start = rest + 1; + size_t escape_len = escape - (rest + 1); + + size_t next_len = len - (escape + 1 - rest); + const char *next_rest = escape + 1; + + char ch; + if ((escape_len == 2 && escape_start[0] == 'S' && escape_start[1] == 'P')) { + ch = '@'; + } else if ((escape_len == 2 && escape_start[0] == 'B' && escape_start[1] == 'P')) { + ch = '*'; + } else if ((escape_len == 2 && escape_start[0] == 'R' && escape_start[1] == 'F')) { + ch = '&'; + } else if ((escape_len == 2 && escape_start[0] == 'L' && escape_start[1] == 'T')) { + ch = '<'; + } else if ((escape_len == 2 && escape_start[0] == 'G' && escape_start[1] == 'T')) { + ch = '>'; + } else if ((escape_len == 2 && escape_start[0] == 'L' && escape_start[1] == 'P')) { + ch = '('; + } else if ((escape_len == 2 && escape_start[0] == 'R' && escape_start[1] == 'P')) { + ch = ')'; + } else if ((escape_len == 1 && escape_start[0] == 'C')) { + ch = ','; + } else { + if (escape_len > 1 && escape_start[0] == 'u') { + escape_start++; + escape_len--; + uint64_t val; + if (try_parse_uint(escape_start, escape_len, &val) + && val < UINT32_MAX + && validate_char((uint32_t)val)) + { + if (!unicode_iscontrol(val)) { + uint8_t wchr[4]; + size_t wchr_len = code_to_utf8(wchr, (uint32_t)val); + PRINT(printer_print_buf(&printer, (const char*)wchr, wchr_len)); + len = next_len; + rest = next_rest; + continue; + } + } + } + break; // print the rest of this element raw + } + PRINT_CH(&printer, ch); + len = next_len; + rest = next_rest; + } else { + size_t j = 0; + for (;j < len && rest[j] != '$' && rest[j] != '.';j++); + if (j == len) { + break; + } + PRINT(printer_print_buf(&printer, rest, j)); + rest += j; + len -= j; + } + } + PRINT(printer_print_buf(&printer, rest, len)); + } + + if (printer.out_len < OVERFLOW_MARGIN) { + return OverflowOverflow; + } + *printer.out = '\0'; + return OverflowOk; +} + +static bool is_symbol_like(const char *s, size_t len) { + // rust-demangle definition of symbol like: control characters and space are not symbol-like, all else is + for (size_t i = 0; i < len; i++) { + char ch = s[i]; + if (!(ch >= 0x21 && ch <= 0x7e)) { + return false; + } + } + return true; +} + +void rust_demangle_demangle(const char *s, struct demangle *res) +{ + // During ThinLTO LLVM may import and rename internal symbols, so strip out + // those endings first as they're one of the last manglings applied to symbol + // names. + const char *llvm = ".llvm."; + const char *found_llvm = strstr(s, llvm); + size_t s_len = strlen(s); + if (found_llvm) { + const char *all_hex_ptr = found_llvm + strlen(".llvm."); + bool all_hex = true; + for (;*all_hex_ptr;all_hex_ptr++) { + if (!(('0' <= *all_hex_ptr && *all_hex_ptr <= '9') || + ('A' <= *all_hex_ptr && *all_hex_ptr <= 'F') || + *all_hex_ptr == '@')) { + all_hex = false; + break; + } + } + + if (all_hex) { + s_len = found_llvm - s; + } + } + + const char *suffix; + struct demangle_legacy legacy; + demangle_status st = rust_demangle_legacy_demangle(s, s_len, &legacy, &suffix); + if (st == DemangleOk) { + *res = (struct demangle) { + .style=DemangleStyleLegacy, + .mangled=legacy.mangled, + .mangled_len=legacy.mangled_len, + .elements=legacy.elements, + .original=s, + .original_len=s_len, + .suffix=suffix, + .suffix_len=s_len - (suffix - s), + }; + } else { + struct demangle_v0 v0; + st = rust_demangle_v0_demangle(s, s_len, &v0, &suffix); + if (st == DemangleOk) { + *res = (struct demangle) { + .style=DemangleStyleV0, + .mangled=v0.mangled, + .mangled_len=v0.mangled_len, + .elements=0, + .original=s, + .original_len=s_len, + .suffix=suffix, + .suffix_len=s_len - (suffix - s), + }; + } else { + *res = (struct demangle) { + .style=DemangleStyleUnknown, + .mangled=NULL, + .mangled_len=0, + .elements=0, + .original=s, + .original_len=s_len, + .suffix=s, + .suffix_len=0, + }; + } + } + + // Output like LLVM IR adds extra period-delimited words. See if + // we are in that case and save the trailing words if so. + if (res->suffix_len) { + if (res->suffix[0] == '.' && is_symbol_like(res->suffix, res->suffix_len)) { + // Keep the suffix + } else { + // Reset the suffix and invalidate the demangling + res->style = DemangleStyleUnknown; + res->suffix_len = 0; + } + } +} + +bool rust_demangle_is_known(struct demangle *res) { + return res->style != DemangleStyleUnknown; +} + +overflow_status rust_demangle_display_demangle(struct demangle const *res, char *out, size_t len, bool alternate) { + size_t original_len = res->original_len; + size_t out_len; + switch (res->style) { + case DemangleStyleUnknown: + if (len < original_len) { + return OverflowOverflow; + } else { + memcpy(out, res->original, original_len); + out += original_len; + len -= original_len; + break; + } + break; + case DemangleStyleLegacy: { + struct demangle_legacy legacy = { + res->mangled, + res->mangled_len, + res->elements + }; + if (rust_demangle_legacy_display_demangle(legacy, out, len, alternate) == OverflowOverflow) { + return OverflowOverflow; + } + out_len = strlen(out); + out += out_len; + len -= out_len; + break; + } + case DemangleStyleV0: { + struct demangle_v0 v0 = { + res->mangled, + res->mangled_len + }; + if (rust_demangle_v0_display_demangle(v0, out, len, alternate) == OverflowOverflow) { + return OverflowOverflow; + } + out_len = strlen(out); + out += out_len; + len -= out_len; + break; + } + } + size_t suffix_len = res->suffix_len; + if (len < suffix_len || len - suffix_len < OVERFLOW_MARGIN) { + return OverflowOverflow; + } + memcpy(out, res->suffix, suffix_len); + out[suffix_len] = 0; + return OverflowOk; +} diff --git a/tools/perf/util/demangle-rust-v0.h b/tools/perf/util/demangle-rust-v0.h new file mode 100644 index 000000000000..d0092818610a --- /dev/null +++ b/tools/perf/util/demangle-rust-v0.h @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// The contents of this file come from the Rust rustc-demangle library, hosted +// in the <https://github.com/rust-lang/rustc-demangle> repository, licensed +// under "Apache-2.0 OR MIT". For copyright details, see +// <https://github.com/rust-lang/rustc-demangle/blob/main/README.md>. +// Please note that the file should be kept as close as possible to upstream. + +#ifndef _H_DEMANGLE_V0_H +#define _H_DEMANGLE_V0_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stddef.h> + +#if defined(__GNUC__) || defined(__clang__) +#define DEMANGLE_NODISCARD __attribute__((warn_unused_result)) +#else +#define DEMANGLE_NODISCARD +#endif + +typedef enum { + OverflowOk, + OverflowOverflow +} overflow_status; + +enum demangle_style { + DemangleStyleUnknown = 0, + DemangleStyleLegacy, + DemangleStyleV0, +}; + +// Not using a union here to make the struct easier to copy-paste if needed. +struct demangle { + enum demangle_style style; + // points to the "mangled" part of the name, + // not including `ZN` or `R` prefixes. + const char *mangled; + size_t mangled_len; + // In DemangleStyleLegacy, is the number of path elements + size_t elements; + // while it's called "original", it will not contain `.llvm.9D1C9369@@16` suffixes + // that are to be ignored. + const char *original; + size_t original_len; + // Contains the part after the mangled name that is to be outputted, + // which can be `.exit.i.i` suffixes LLVM sometimes adds. + const char *suffix; + size_t suffix_len; +}; + +// if the length of the output buffer is less than `output_len-OVERFLOW_MARGIN`, +// the demangler will return `OverflowOverflow` even if there is no overflow. +#define OVERFLOW_MARGIN 4 + +/// Demangle a C string that refers to a Rust symbol and put the demangle intermediate result in `res`. +/// Beware that `res` contains references into `s`. If `s` is modified (or free'd) before calling +/// `rust_demangle_display_demangle` behavior is undefined. +/// +/// Use `rust_demangle_display_demangle` to convert it to an actual string. +void rust_demangle_demangle(const char *s, struct demangle *res); + +/// Write the string in a `struct demangle` into a buffer. +/// +/// Return `OverflowOk` if the output buffer was sufficiently big, `OverflowOverflow` if it wasn't. +/// This function is `O(n)` in the length of the input + *output* [$], but the demangled output of demangling a symbol can +/// be exponentially[$$] large, therefore it is recommended to have a sane bound (`rust-demangle` +/// uses 1,000,000 bytes) on `len`. +/// +/// `alternate`, if true, uses the less verbose alternate formatting (Rust `{:#}`) is used, which does not show +/// symbol hashes and types of constant ints. +/// +/// [$] It's `O(n * MAX_DEPTH)`, but `MAX_DEPTH` is a constant 300 and therefore it's `O(n)` +/// [$$] Technically, bounded by `O(n^MAX_DEPTH)`, but this is practically exponential. +DEMANGLE_NODISCARD overflow_status rust_demangle_display_demangle(struct demangle const *res, char *out, size_t len, bool alternate); + +/// Returns true if `res` refers to a known valid Rust demangling style, false if it's an unknown style. +bool rust_demangle_is_known(struct demangle *res); + +#undef DEMANGLE_NODISCARD + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/perf/util/demangle-rust.c b/tools/perf/util/demangle-rust.c deleted file mode 100644 index a659fc69f73a..000000000000 --- a/tools/perf/util/demangle-rust.c +++ /dev/null @@ -1,269 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <string.h> -#include "debug.h" - -#include "demangle-rust.h" - -/* - * Mangled Rust symbols look like this: - * - * _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a - * - * The original symbol is: - * - * <std::sys::fd::FileDesc as core::ops::Drop>::drop - * - * The last component of the path is a 64-bit hash in lowercase hex, prefixed - * with "h". Rust does not have a global namespace between crates, an illusion - * which Rust maintains by using the hash to distinguish things that would - * otherwise have the same symbol. - * - * Any path component not starting with a XID_Start character is prefixed with - * "_". - * - * The following escape sequences are used: - * - * "," => $C$ - * "@" => $SP$ - * "*" => $BP$ - * "&" => $RF$ - * "<" => $LT$ - * ">" => $GT$ - * "(" => $LP$ - * ")" => $RP$ - * " " => $u20$ - * "'" => $u27$ - * "[" => $u5b$ - * "]" => $u5d$ - * "~" => $u7e$ - * - * A double ".." means "::" and a single "." means "-". - * - * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$ - */ - -static const char *hash_prefix = "::h"; -static const size_t hash_prefix_len = 3; -static const size_t hash_len = 16; - -static bool is_prefixed_hash(const char *start); -static bool looks_like_rust(const char *sym, size_t len); -static bool unescape(const char **in, char **out, const char *seq, char value); - -/* - * INPUT: - * sym: symbol that has been through BFD-demangling - * - * This function looks for the following indicators: - * - * 1. The hash must consist of "h" followed by 16 lowercase hex digits. - * - * 2. As a sanity check, the hash must use between 5 and 15 of the 16 possible - * hex digits. This is true of 99.9998% of hashes so once in your life you - * may see a false negative. The point is to notice path components that - * could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In - * this case a false positive (non-Rust symbol has an important path - * component removed because it looks like a Rust hash) is worse than a - * false negative (the rare Rust symbol is not demangled) so this sets the - * balance in favor of false negatives. - * - * 3. There must be no characters other than a-zA-Z0-9 and _.:$ - * - * 4. There must be no unrecognized $-sign sequences. - * - * 5. There must be no sequence of three or more dots in a row ("..."). - */ -bool -rust_is_mangled(const char *sym) -{ - size_t len, len_without_hash; - - if (!sym) - return false; - - len = strlen(sym); - if (len <= hash_prefix_len + hash_len) - /* Not long enough to contain "::h" + hash + something else */ - return false; - - len_without_hash = len - (hash_prefix_len + hash_len); - if (!is_prefixed_hash(sym + len_without_hash)) - return false; - - return looks_like_rust(sym, len_without_hash); -} - -/* - * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex - * digits must comprise between 5 and 15 (inclusive) distinct digits. - */ -static bool is_prefixed_hash(const char *str) -{ - const char *end; - bool seen[16]; - size_t i; - int count; - - if (strncmp(str, hash_prefix, hash_prefix_len)) - return false; - str += hash_prefix_len; - - memset(seen, false, sizeof(seen)); - for (end = str + hash_len; str < end; str++) - if (*str >= '0' && *str <= '9') - seen[*str - '0'] = true; - else if (*str >= 'a' && *str <= 'f') - seen[*str - 'a' + 10] = true; - else - return false; - - /* Count how many distinct digits seen */ - count = 0; - for (i = 0; i < 16; i++) - if (seen[i]) - count++; - - return count >= 5 && count <= 15; -} - -static bool looks_like_rust(const char *str, size_t len) -{ - const char *end = str + len; - - while (str < end) - switch (*str) { - case '$': - if (!strncmp(str, "$C$", 3)) - str += 3; - else if (!strncmp(str, "$SP$", 4) - || !strncmp(str, "$BP$", 4) - || !strncmp(str, "$RF$", 4) - || !strncmp(str, "$LT$", 4) - || !strncmp(str, "$GT$", 4) - || !strncmp(str, "$LP$", 4) - || !strncmp(str, "$RP$", 4)) - str += 4; - else if (!strncmp(str, "$u20$", 5) - || !strncmp(str, "$u27$", 5) - || !strncmp(str, "$u5b$", 5) - || !strncmp(str, "$u5d$", 5) - || !strncmp(str, "$u7e$", 5)) - str += 5; - else - return false; - break; - case '.': - /* Do not allow three or more consecutive dots */ - if (!strncmp(str, "...", 3)) - return false; - /* Fall through */ - case 'a' ... 'z': - case 'A' ... 'Z': - case '0' ... '9': - case '_': - case ':': - str++; - break; - default: - return false; - } - - return true; -} - -/* - * INPUT: - * sym: symbol for which rust_is_mangled(sym) returns true - * - * The input is demangled in-place because the mangled name is always longer - * than the demangled one. - */ -void -rust_demangle_sym(char *sym) -{ - const char *in; - char *out; - const char *end; - - if (!sym) - return; - - in = sym; - out = sym; - end = sym + strlen(sym) - (hash_prefix_len + hash_len); - - while (in < end) - switch (*in) { - case '$': - if (!(unescape(&in, &out, "$C$", ',') - || unescape(&in, &out, "$SP$", '@') - || unescape(&in, &out, "$BP$", '*') - || unescape(&in, &out, "$RF$", '&') - || unescape(&in, &out, "$LT$", '<') - || unescape(&in, &out, "$GT$", '>') - || unescape(&in, &out, "$LP$", '(') - || unescape(&in, &out, "$RP$", ')') - || unescape(&in, &out, "$u20$", ' ') - || unescape(&in, &out, "$u27$", '\'') - || unescape(&in, &out, "$u5b$", '[') - || unescape(&in, &out, "$u5d$", ']') - || unescape(&in, &out, "$u7e$", '~'))) { - pr_err("demangle-rust: unexpected escape sequence"); - goto done; - } - break; - case '_': - /* - * If this is the start of a path component and the next - * character is an escape sequence, ignore the - * underscore. The mangler inserts an underscore to make - * sure the path component begins with a XID_Start - * character. - */ - if ((in == sym || in[-1] == ':') && in[1] == '$') - in++; - else - *out++ = *in++; - break; - case '.': - if (in[1] == '.') { - /* ".." becomes "::" */ - *out++ = ':'; - *out++ = ':'; - in += 2; - } else { - /* "." becomes "-" */ - *out++ = '-'; - in++; - } - break; - case 'a' ... 'z': - case 'A' ... 'Z': - case '0' ... '9': - case ':': - *out++ = *in++; - break; - default: - pr_err("demangle-rust: unexpected character '%c' in symbol\n", - *in); - goto done; - } - -done: - *out = '\0'; -} - -static bool unescape(const char **in, char **out, const char *seq, char value) -{ - size_t len = strlen(seq); - - if (strncmp(*in, seq, len)) - return false; - - **out = value; - - *in += len; - *out += 1; - - return true; -} diff --git a/tools/perf/util/demangle-rust.h b/tools/perf/util/demangle-rust.h deleted file mode 100644 index 2fca618b1aa5..000000000000 --- a/tools/perf/util/demangle-rust.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PERF_DEMANGLE_RUST -#define __PERF_DEMANGLE_RUST 1 - -bool rust_is_mangled(const char *str); -void rust_demangle_sym(char *str); - -#endif /* __PERF_DEMANGLE_RUST */ diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c index 72aec8f61b94..50b9433f3f8e 100644 --- a/tools/perf/util/disasm.c +++ b/tools/perf/util/disasm.c @@ -12,12 +12,17 @@ #include <subcmd/run-command.h> #include "annotate.h" +#include "annotate-data.h" #include "build-id.h" +#include "capstone.h" #include "debug.h" #include "disasm.h" #include "dso.h" +#include "dwarf-regs.h" #include "env.h" #include "evsel.h" +#include "libbfd.h" +#include "llvm.h" #include "map.h" #include "maps.h" #include "namespaces.h" @@ -35,6 +40,8 @@ static struct ins_ops mov_ops; static struct ins_ops nop_ops; static struct ins_ops lock_ops; static struct ins_ops ret_ops; +static struct ins_ops load_store_ops; +static struct ins_ops arithmetic_ops; static int jump__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops, int max_ins_name); @@ -43,6 +50,7 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size, static void ins__sort(struct arch *arch); static int disasm_line__parse(char *line, const char **namep, char **rawp); +static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args); static __attribute__((constructor)) void symbol__init_regexpr(void) { @@ -145,10 +153,16 @@ static struct arch architectures[] = { .memory_ref_char = '(', .imm_char = '$', }, +#ifdef HAVE_LIBDW_SUPPORT + .update_insn_state = update_insn_state_x86, +#endif }, { .name = "powerpc", .init = powerpc__annotate_init, +#ifdef HAVE_LIBDW_SUPPORT + .update_insn_state = update_insn_state_powerpc, +#endif }, { .name = "riscv64", @@ -233,8 +247,8 @@ static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size, return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->raw); } -int ins__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name) +static int ins__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name) { if (ins->ops->scnprintf) return ins->ops->scnprintf(ins, bf, size, ops, max_ins_name); @@ -250,7 +264,8 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2) return arch->ins_is_fused(arch, ins1, ins2); } -static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms) +static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, + struct disasm_line *dl __maybe_unused) { char *endptr, *tok, *name; struct map *map = ms->map; @@ -345,7 +360,8 @@ static inline const char *validate_comma(const char *c, struct ins_operands *ops return c; } -static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms) +static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, + struct disasm_line *dl __maybe_unused) { struct map *map = ms->map; struct symbol *sym = ms->sym; @@ -375,13 +391,16 @@ static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_s * skip over possible up to 2 operands to get to address, e.g.: * tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0> */ - if (c++ != NULL) { + if (c != NULL) { + c++; ops->target.addr = strtoull(c, NULL, 16); if (!ops->target.addr) { c = strchr(c, ','); c = validate_comma(c, ops); - if (c++ != NULL) + if (c != NULL) { + c++; ops->target.addr = strtoull(c, NULL, 16); + } } } else { ops->target.addr = strtoull(ops->raw, NULL, 16); @@ -504,7 +523,8 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) return 0; } -static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms) +static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, + struct disasm_line *dl __maybe_unused) { ops->locked.ops = zalloc(sizeof(*ops->locked.ops)); if (ops->locked.ops == NULL) @@ -513,13 +533,13 @@ static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_s if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0) goto out_free_ops; - ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name); + ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name, 0); if (ops->locked.ins.ops == NULL) goto out_free_ops; if (ops->locked.ins.ops->parse && - ops->locked.ins.ops->parse(arch, ops->locked.ops, ms) < 0) + ops->locked.ins.ops->parse(arch, ops->locked.ops, ms, NULL) < 0) goto out_free_ops; return 0; @@ -552,6 +572,7 @@ static void lock__delete(struct ins_operands *ops) ins_ops__delete(ops->locked.ops); zfree(&ops->locked.ops); + zfree(&ops->locked.ins.name); zfree(&ops->target.raw); zfree(&ops->target.name); } @@ -590,7 +611,8 @@ static bool check_multi_regs(struct arch *arch, const char *op) return count > 1; } -static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused) +static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused, + struct disasm_line *dl __maybe_unused) { char *s = strchr(ops->raw, ','), *target, *comment, prev; @@ -668,7 +690,92 @@ static struct ins_ops mov_ops = { .scnprintf = mov__scnprintf, }; -static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused) +#define PPC_22_30(R) (((R) >> 1) & 0x1ff) +#define MINUS_EXT_XO_FORM 234 +#define SUB_EXT_XO_FORM 232 +#define ADD_ZERO_EXT_XO_FORM 202 +#define SUB_ZERO_EXT_XO_FORM 200 + +static int arithmetic__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name) +{ + return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, + ops->raw); +} + +/* + * Sets the fields: multi_regs and "mem_ref". + * "mem_ref" is set for ops->source which is later used to + * fill the objdump->memory_ref-char field. This ops is currently + * used by powerpc and since binary instruction code is used to + * extract opcode, regs and offset, no other parsing is needed here. + * + * Dont set multi regs for 4 cases since it has only one operand + * for source: + * - Add to Minus One Extended XO-form ( Ex: addme, addmeo ) + * - Subtract From Minus One Extended XO-form ( Ex: subfme ) + * - Add to Zero Extended XO-form ( Ex: addze, addzeo ) + * - Subtract From Zero Extended XO-form ( Ex: subfze ) + */ +static int arithmetic__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, + struct map_symbol *ms __maybe_unused, struct disasm_line *dl) +{ + int opcode = PPC_OP(dl->raw.raw_insn); + + ops->source.mem_ref = false; + if (opcode == 31) { + if ((opcode != MINUS_EXT_XO_FORM) && (opcode != SUB_EXT_XO_FORM) \ + && (opcode != ADD_ZERO_EXT_XO_FORM) && (opcode != SUB_ZERO_EXT_XO_FORM)) + ops->source.multi_regs = true; + } + + ops->target.mem_ref = false; + ops->target.multi_regs = false; + + return 0; +} + +static struct ins_ops arithmetic_ops = { + .parse = arithmetic__parse, + .scnprintf = arithmetic__scnprintf, +}; + +static int load_store__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name) +{ + return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, + ops->raw); +} + +/* + * Sets the fields: multi_regs and "mem_ref". + * "mem_ref" is set for ops->source which is later used to + * fill the objdump->memory_ref-char field. This ops is currently + * used by powerpc and since binary instruction code is used to + * extract opcode, regs and offset, no other parsing is needed here + */ +static int load_store__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, + struct map_symbol *ms __maybe_unused, struct disasm_line *dl __maybe_unused) +{ + ops->source.mem_ref = true; + ops->source.multi_regs = false; + /* opcode 31 is of X form */ + if (PPC_OP(dl->raw.raw_insn) == 31) + ops->source.multi_regs = true; + + ops->target.mem_ref = false; + ops->target.multi_regs = false; + + return 0; +} + +static struct ins_ops load_store_ops = { + .parse = load_store__parse, + .scnprintf = load_store__scnprintf, +}; + +static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused, + struct disasm_line *dl __maybe_unused) { char *target, *comment, *s, prev; @@ -721,7 +828,7 @@ static struct ins_ops ret_ops = { .scnprintf = ins__raw_scnprintf, }; -bool ins__is_nop(const struct ins *ins) +static bool ins__is_nop(const struct ins *ins) { return ins->ops == &nop_ops; } @@ -758,11 +865,23 @@ static void ins__sort(struct arch *arch) qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp); } -static struct ins_ops *__ins__find(struct arch *arch, const char *name) +static struct ins_ops *__ins__find(struct arch *arch, const char *name, struct disasm_line *dl) { struct ins *ins; const int nmemb = arch->nr_instructions; + if (arch__is(arch, "powerpc")) { + /* + * For powerpc, identify the instruction ops + * from the opcode using raw_insn. + */ + struct ins_ops *ops; + + ops = check_ppc_insn(dl); + if (ops) + return ops; + } + if (!arch->sorted_instructions) { ins__sort(arch); arch->sorted_instructions = true; @@ -792,9 +911,9 @@ static struct ins_ops *__ins__find(struct arch *arch, const char *name) return ins ? ins->ops : NULL; } -struct ins_ops *ins__find(struct arch *arch, const char *name) +struct ins_ops *ins__find(struct arch *arch, const char *name, struct disasm_line *dl) { - struct ins_ops *ops = __ins__find(arch, name); + struct ins_ops *ops = __ins__find(arch, name, dl); if (!ops && arch->associate_instruction_ops) ops = arch->associate_instruction_ops(arch, name); @@ -804,12 +923,12 @@ struct ins_ops *ins__find(struct arch *arch, const char *name) static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms) { - dl->ins.ops = ins__find(arch, dl->ins.name); + dl->ins.ops = ins__find(arch, dl->ins.name, dl); if (!dl->ins.ops) return; - if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms) < 0) + if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms, dl) < 0) dl->ins.ops = NULL; } @@ -841,6 +960,52 @@ out: return -1; } +/* + * Parses the result captured from symbol__disassemble_* + * Example, line read from DSO file in powerpc: + * line: 38 01 81 e8 + * opcode: fetched from arch specific get_opcode_insn + * rawp_insn: e8810138 + * + * rawp_insn is used later to extract the reg/offset fields + */ +#define PPC_OP(op) (((op) >> 26) & 0x3F) +#define RAW_BYTES 11 + +static int disasm_line__parse_powerpc(struct disasm_line *dl, struct annotate_args *args) +{ + char *line = dl->al.line; + const char **namep = &dl->ins.name; + char **rawp = &dl->ops.raw; + char *tmp_raw_insn, *name_raw_insn = skip_spaces(line); + char *name = skip_spaces(name_raw_insn + RAW_BYTES); + int disasm = 0; + int ret = 0; + + if (args->options->disassembler_used) + disasm = 1; + + if (name_raw_insn[0] == '\0') + return -1; + + if (disasm) + ret = disasm_line__parse(name, namep, rawp); + else + *namep = ""; + + tmp_raw_insn = strndup(name_raw_insn, 11); + if (tmp_raw_insn == NULL) + return -1; + + remove_spaces(tmp_raw_insn); + + sscanf(tmp_raw_insn, "%x", &dl->raw.raw_insn); + if (disasm) + dl->raw.raw_insn = be32_to_cpu(dl->raw.raw_insn); + + return ret; +} + static void annotation_line__init(struct annotation_line *al, struct annotate_args *args, int nr) @@ -857,6 +1022,7 @@ static void annotation_line__exit(struct annotation_line *al) zfree_srcline(&al->path); zfree(&al->line); zfree(&al->cycles); + zfree(&al->br_cntr); } static size_t disasm_line_size(int nr) @@ -880,10 +1046,8 @@ static size_t disasm_line_size(int nr) struct disasm_line *disasm_line__new(struct annotate_args *args) { struct disasm_line *dl = NULL; - int nr = 1; - - if (evsel__is_group_event(args->evsel)) - nr = args->evsel->core.nr_members; + struct annotation *notes = symbol__annotation(args->ms.sym); + int nr = notes->src->nr_events; dl = zalloc(disasm_line_size(nr)); if (!dl) @@ -894,7 +1058,10 @@ struct disasm_line *disasm_line__new(struct annotate_args *args) goto out_delete; if (args->offset != -1) { - if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) + if (arch__is(args->arch, "powerpc")) { + if (disasm_line__parse_powerpc(dl, args) < 0) + goto out_free_line; + } else if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line; disasm_line__init_ins(dl, args->arch, &args->ms); @@ -1055,7 +1222,7 @@ int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, s char *build_id_msg = NULL; if (dso__has_build_id(dso)) { - build_id__sprintf(dso__bid(dso), bf + 15); + build_id__snprintf(dso__bid(dso), bf + 15, sizeof(bf) - 15); build_id_msg = bf; } scnprintf(buf, buflen, @@ -1083,6 +1250,9 @@ int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, s scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.", dso__long_name(dso)); break; + case SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE: + scnprintf(buf, buflen, "Couldn't determine the file %s type.", dso__long_name(dso)); + break; default: scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum); break; @@ -1164,343 +1334,35 @@ fallback: return 0; } -#if defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) -#define PACKAGE "perf" -#include <bfd.h> -#include <dis-asm.h> -#include <bpf/bpf.h> -#include <bpf/btf.h> -#include <bpf/libbpf.h> -#include <linux/btf.h> -#include <tools/dis-asm-compat.h> - -#include "bpf-event.h" -#include "bpf-utils.h" - -static int symbol__disassemble_bpf(struct symbol *sym, - struct annotate_args *args) -{ - struct annotation *notes = symbol__annotation(sym); - struct bpf_prog_linfo *prog_linfo = NULL; - struct bpf_prog_info_node *info_node; - int len = sym->end - sym->start; - disassembler_ftype disassemble; - struct map *map = args->ms.map; - struct perf_bpil *info_linear; - struct disassemble_info info; - struct dso *dso = map__dso(map); - int pc = 0, count, sub_id; - struct btf *btf = NULL; - char tpath[PATH_MAX]; - size_t buf_size; - int nr_skip = 0; - char *buf; - bfd *bfdf; - int ret; - FILE *s; - - if (dso->binary_type != DSO_BINARY_TYPE__BPF_PROG_INFO) - return SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE; - - pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__, - sym->name, sym->start, sym->end - sym->start); - - memset(tpath, 0, sizeof(tpath)); - perf_exe(tpath, sizeof(tpath)); - - bfdf = bfd_openr(tpath, NULL); - if (bfdf == NULL) - abort(); - - if (!bfd_check_format(bfdf, bfd_object)) - abort(); - - s = open_memstream(&buf, &buf_size); - if (!s) { - ret = errno; - goto out; - } - init_disassemble_info_compat(&info, s, - (fprintf_ftype) fprintf, - fprintf_styled); - info.arch = bfd_get_arch(bfdf); - info.mach = bfd_get_mach(bfdf); - - info_node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, - dso->bpf_prog.id); - if (!info_node) { - ret = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF; - goto out; - } - info_linear = info_node->info_linear; - sub_id = dso->bpf_prog.sub_id; - - info.buffer = (void *)(uintptr_t)(info_linear->info.jited_prog_insns); - info.buffer_length = info_linear->info.jited_prog_len; - - if (info_linear->info.nr_line_info) - prog_linfo = bpf_prog_linfo__new(&info_linear->info); - - if (info_linear->info.btf_id) { - struct btf_node *node; - - node = perf_env__find_btf(dso->bpf_prog.env, - info_linear->info.btf_id); - if (node) - btf = btf__new((__u8 *)(node->data), - node->data_size); - } - - disassemble_init_for_target(&info); - -#ifdef DISASM_FOUR_ARGS_SIGNATURE - disassemble = disassembler(info.arch, - bfd_big_endian(bfdf), - info.mach, - bfdf); -#else - disassemble = disassembler(bfdf); -#endif - if (disassemble == NULL) - abort(); - - fflush(s); - do { - const struct bpf_line_info *linfo = NULL; - struct disasm_line *dl; - size_t prev_buf_size; - const char *srcline; - u64 addr; - - addr = pc + ((u64 *)(uintptr_t)(info_linear->info.jited_ksyms))[sub_id]; - count = disassemble(pc, &info); - - if (prog_linfo) - linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo, - addr, sub_id, - nr_skip); - - if (linfo && btf) { - srcline = btf__name_by_offset(btf, linfo->line_off); - nr_skip++; - } else - srcline = NULL; - - fprintf(s, "\n"); - prev_buf_size = buf_size; - fflush(s); - - if (!annotate_opts.hide_src_code && srcline) { - args->offset = -1; - args->line = strdup(srcline); - args->line_nr = 0; - args->fileloc = NULL; - args->ms.sym = sym; - dl = disasm_line__new(args); - if (dl) { - annotation_line__add(&dl->al, - ¬es->src->source); - } - } - - args->offset = pc; - args->line = buf + prev_buf_size; - args->line_nr = 0; - args->fileloc = NULL; - args->ms.sym = sym; - dl = disasm_line__new(args); - if (dl) - annotation_line__add(&dl->al, ¬es->src->source); - - pc += count; - } while (count > 0 && pc < len); - - ret = 0; -out: - free(prog_linfo); - btf__free(btf); - fclose(s); - bfd_close(bfdf); - return ret; -} -#else // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) -static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused, - struct annotate_args *args __maybe_unused) -{ - return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF; -} -#endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) - -static int -symbol__disassemble_bpf_image(struct symbol *sym, - struct annotate_args *args) -{ - struct annotation *notes = symbol__annotation(sym); - struct disasm_line *dl; - - args->offset = -1; - args->line = strdup("to be implemented"); - args->line_nr = 0; - args->fileloc = NULL; - dl = disasm_line__new(args); - if (dl) - annotation_line__add(&dl->al, ¬es->src->source); - - zfree(&args->line); - return 0; -} - -#ifdef HAVE_LIBCAPSTONE_SUPPORT -#include <capstone/capstone.h> - -static int open_capstone_handle(struct annotate_args *args, bool is_64bit, - csh *handle) -{ - struct annotation_options *opt = args->options; - cs_mode mode = is_64bit ? CS_MODE_64 : CS_MODE_32; - - /* TODO: support more architectures */ - if (!arch__is(args->arch, "x86")) - return -1; - - if (cs_open(CS_ARCH_X86, mode, handle) != CS_ERR_OK) - return -1; - - if (!opt->disassembler_style || - !strcmp(opt->disassembler_style, "att")) - cs_option(*handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); - - /* - * Resolving address operands to symbols is implemented - * on x86 by investigating instruction details. - */ - cs_option(*handle, CS_OPT_DETAIL, CS_OPT_ON); - - return 0; -} - -struct find_file_offset_data { - u64 ip; - u64 offset; -}; - -/* This will be called for each PHDR in an ELF binary */ -static int find_file_offset(u64 start, u64 len, u64 pgoff, void *arg) -{ - struct find_file_offset_data *data = arg; - - if (start <= data->ip && data->ip < start + len) { - data->offset = pgoff + data->ip - start; - return 1; - } - return 0; -} - -static void print_capstone_detail(cs_insn *insn, char *buf, size_t len, - struct annotate_args *args, u64 addr) -{ - int i; - struct map *map = args->ms.map; - struct symbol *sym; - - /* TODO: support more architectures */ - if (!arch__is(args->arch, "x86")) - return; - - if (insn->detail == NULL) - return; - - for (i = 0; i < insn->detail->x86.op_count; i++) { - cs_x86_op *op = &insn->detail->x86.operands[i]; - u64 orig_addr; - - if (op->type != X86_OP_MEM) - continue; - - /* only print RIP-based global symbols for now */ - if (op->mem.base != X86_REG_RIP) - continue; - - /* get the target address */ - orig_addr = addr + insn->size + op->mem.disp; - addr = map__objdump_2mem(map, orig_addr); - - if (dso__kernel(map__dso(map))) { - /* - * The kernel maps can be splitted into sections, - * let's find the map first and the search the symbol. - */ - map = maps__find(map__kmaps(map), addr); - if (map == NULL) - continue; - } - - /* convert it to map-relative address for search */ - addr = map__map_ip(map, addr); - - sym = map__find_symbol(map, addr); - if (sym == NULL) - continue; - - if (addr == sym->start) { - scnprintf(buf, len, "\t# %"PRIx64" <%s>", - orig_addr, sym->name); - } else { - scnprintf(buf, len, "\t# %"PRIx64" <%s+%#"PRIx64">", - orig_addr, sym->name, addr - sym->start); - } - break; - } -} - -static int symbol__disassemble_capstone(char *filename, struct symbol *sym, +static int symbol__disassemble_raw(char *filename, struct symbol *sym, struct annotate_args *args) { struct annotation *notes = symbol__annotation(sym); struct map *map = args->ms.map; struct dso *dso = map__dso(map); - struct nscookie nsc; u64 start = map__rip_2objdump(map, sym->start); u64 end = map__rip_2objdump(map, sym->end); u64 len = end - start; u64 offset; - int i, fd, count; - bool is_64bit = false; - bool needs_cs_close = false; + int i, count; u8 *buf = NULL; - struct find_file_offset_data data = { - .ip = start, - }; - csh handle; - cs_insn *insn; char disasm_buf[512]; struct disasm_line *dl; + u32 *line; + /* Return if objdump is specified explicitly */ if (args->options->objdump_path) return -1; - nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); - fd = open(filename, O_RDONLY); - nsinfo__mountns_exit(&nsc); - if (fd < 0) - return -1; - - if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data, - &is_64bit) == 0) - goto err; - - if (open_capstone_handle(args, is_64bit, &handle) < 0) - goto err; - - needs_cs_close = true; + pr_debug("Reading raw instruction from : %s using dso__data_read_offset\n", filename); buf = malloc(len); if (buf == NULL) goto err; - count = pread(fd, buf, len, data.offset); - close(fd); - fd = -1; + count = dso__data_read_offset(dso, NULL, sym->start, buf, len); + + line = (u32 *)buf; if ((u64)count != len) goto err; @@ -1521,30 +1383,21 @@ static int symbol__disassemble_capstone(char *filename, struct symbol *sym, annotation_line__add(&dl->al, ¬es->src->source); - count = cs_disasm(handle, buf, len, start, len, &insn); - for (i = 0, offset = 0; i < count; i++) { - int printed; - - printed = scnprintf(disasm_buf, sizeof(disasm_buf), - " %-7s %s", - insn[i].mnemonic, insn[i].op_str); - print_capstone_detail(&insn[i], disasm_buf + printed, - sizeof(disasm_buf) - printed, args, - start + offset); + /* Each raw instruction is 4 byte */ + count = len/4; + for (i = 0, offset = 0; i < count; i++) { args->offset = offset; - args->line = disasm_buf; - + sprintf(args->line, "%x", line[i]); dl = disasm_line__new(args); if (dl == NULL) - goto err; + break; annotation_line__add(&dl->al, ¬es->src->source); - - offset += insn[i].size; + offset += 4; } - /* It failed in the middle: probably due to unknown instructions */ + /* It failed in the middle */ if (offset != len) { struct list_head *list = ¬es->src->source; @@ -1559,37 +1412,20 @@ static int symbol__disassemble_capstone(char *filename, struct symbol *sym, } out: - if (needs_cs_close) - cs_close(&handle); free(buf); return count < 0 ? count : 0; err: - if (fd >= 0) - close(fd); - if (needs_cs_close) { - struct disasm_line *tmp; - - /* - * It probably failed in the middle of the above loop. - * Release any resources it might add. - */ - list_for_each_entry_safe(dl, tmp, ¬es->src->source, al.node) { - list_del(&dl->al.node); - free(dl); - } - } count = -1; goto out; } -#endif /* * Possibly create a new version of line with tabs expanded. Returns the * existing or new line, storage is updated if a new line is allocated. If * allocation fails then NULL is returned. */ -static char *expand_tabs(char *line, char **storage, size_t *storage_len) +char *expand_tabs(char *line, char **storage, size_t *storage_len) { size_t i, src, dst, len, new_storage_len, num_tabs; char *new_line; @@ -1644,17 +1480,31 @@ static char *expand_tabs(char *line, char **storage, size_t *storage_len) return new_line; } -int symbol__disassemble(struct symbol *sym, struct annotate_args *args) +static int symbol__disassemble_bpf_image(struct symbol *sym, struct annotate_args *args) +{ + struct annotation *notes = symbol__annotation(sym); + struct disasm_line *dl; + + args->offset = -1; + args->line = strdup("to be implemented"); + args->line_nr = 0; + args->fileloc = NULL; + dl = disasm_line__new(args); + if (dl) + annotation_line__add(&dl->al, ¬es->src->source); + + zfree(&args->line); + return 0; +} + +static int symbol__disassemble_objdump(const char *filename, struct symbol *sym, + struct annotate_args *args) { struct annotation_options *opts = &annotate_opts; struct map *map = args->ms.map; struct dso *dso = map__dso(map); char *command; FILE *file; - char symfs_filename[PATH_MAX]; - struct kcore_extract kce; - bool delete_extract = false; - bool decomp = false; int lineno = 0; char *fileloc = NULL; int nline; @@ -1669,50 +1519,13 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) NULL, }; struct child_process objdump_process; - int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); + int err; - if (err) - return err; + if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) + return symbol__disassemble_bpf_libbfd(sym, args); - pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, - symfs_filename, sym->name, map__unmap_ip(map, sym->start), - map__unmap_ip(map, sym->end)); - - pr_debug("annotating [%p] %30s : [%p] %30s\n", - dso, dso__long_name(dso), sym, sym->name); - - if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) { - return symbol__disassemble_bpf(sym, args); - } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) { + if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) return symbol__disassemble_bpf_image(sym, args); - } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { - return -1; - } else if (dso__is_kcore(dso)) { - kce.kcore_filename = symfs_filename; - kce.addr = map__rip_2objdump(map, sym->start); - kce.offs = sym->start; - kce.len = sym->end - sym->start; - if (!kcore_extract__create(&kce)) { - delete_extract = true; - strlcpy(symfs_filename, kce.extract_filename, - sizeof(symfs_filename)); - } - } else if (dso__needs_decompress(dso)) { - char tmp[KMOD_DECOMP_LEN]; - - if (dso__decompress_kmodule_path(dso, symfs_filename, - tmp, sizeof(tmp)) < 0) - return -1; - - decomp = true; - strcpy(symfs_filename, tmp); - } - -#ifdef HAVE_LIBCAPSTONE_SUPPORT - err = symbol__disassemble_capstone(symfs_filename, sym, args); - if (err == 0) - goto out_remove_tmp; -#endif err = asprintf(&command, "%s %s%s --start-address=0x%016" PRIx64 @@ -1735,13 +1548,13 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) if (err < 0) { pr_err("Failure allocating memory for the command to run\n"); - goto out_remove_tmp; + return err; } pr_debug("Executing: %s\n", command); objdump_argv[2] = command; - objdump_argv[4] = symfs_filename; + objdump_argv[4] = filename; /* Create a pipe to read from for stdout */ memset(&objdump_process, 0, sizeof(objdump_process)); @@ -1779,8 +1592,8 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) break; /* Skip lines containing "filename:" */ - match = strstr(line, symfs_filename); - if (match && match[strlen(symfs_filename)] == ':') + match = strstr(line, filename); + if (match && match[strlen(filename)] == ':') continue; expanded_line = strim(line); @@ -1825,7 +1638,104 @@ out_close_stdout: out_free_command: free(command); + return err; +} + +int symbol__disassemble(struct symbol *sym, struct annotate_args *args) +{ + struct annotation_options *options = args->options; + struct map *map = args->ms.map; + struct dso *dso = map__dso(map); + char symfs_filename[PATH_MAX]; + bool delete_extract = false; + struct kcore_extract kce; + bool decomp = false; + int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); + + if (err) + return err; + + pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, + symfs_filename, sym->name, map__unmap_ip(map, sym->start), + map__unmap_ip(map, sym->end)); + pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso__long_name(dso), sym, sym->name); + + if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { + return SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE; + } else if (dso__is_kcore(dso)) { + kce.addr = map__rip_2objdump(map, sym->start); + kce.kcore_filename = symfs_filename; + kce.len = sym->end - sym->start; + kce.offs = sym->start; + + if (!kcore_extract__create(&kce)) { + delete_extract = true; + strlcpy(symfs_filename, kce.extract_filename, sizeof(symfs_filename)); + } + } else if (dso__needs_decompress(dso)) { + char tmp[KMOD_DECOMP_LEN]; + + if (dso__decompress_kmodule_path(dso, symfs_filename, tmp, sizeof(tmp)) < 0) + return -1; + + decomp = true; + strcpy(symfs_filename, tmp); + } + + /* + * For powerpc data type profiling, use the dso__data_read_offset to + * read raw instruction directly and interpret the binary code to + * understand instructions and register fields. For sort keys as type + * and typeoff, disassemble to mnemonic notation is not required in + * case of powerpc. + */ + if (arch__is(args->arch, "powerpc")) { + extern const char *sort_order; + + if (sort_order && !strstr(sort_order, "sym")) { + err = symbol__disassemble_raw(symfs_filename, sym, args); + if (err == 0) + goto out_remove_tmp; + + err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args); + if (err == 0) + goto out_remove_tmp; + } + } + + /* FIXME: LLVM and CAPSTONE should support source code */ + if (options->annotate_src && !options->hide_src_code) { + err = symbol__disassemble_objdump(symfs_filename, sym, args); + if (err == 0) + goto out_remove_tmp; + } + + err = -1; + for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers) && err != 0; i++) { + enum perf_disassembler dis = options->disassemblers[i]; + + switch (dis) { + case PERF_DISASM_LLVM: + args->options->disassembler_used = PERF_DISASM_LLVM; + err = symbol__disassemble_llvm(symfs_filename, sym, args); + break; + case PERF_DISASM_CAPSTONE: + args->options->disassembler_used = PERF_DISASM_CAPSTONE; + err = symbol__disassemble_capstone(symfs_filename, sym, args); + break; + case PERF_DISASM_OBJDUMP: + args->options->disassembler_used = PERF_DISASM_OBJDUMP; + err = symbol__disassemble_objdump(symfs_filename, sym, args); + break; + case PERF_DISASM_UNKNOWN: /* End of disassemblers. */ + default: + args->options->disassembler_used = PERF_DISASM_UNKNOWN; + goto out_remove_tmp; + } + if (err == 0) + pr_debug("Disassembled with %s\n", perf_disassembler__strs[dis]); + } out_remove_tmp: if (decomp) unlink(symfs_filename); diff --git a/tools/perf/util/disasm.h b/tools/perf/util/disasm.h index 3d381a043520..d2cb555e4a3b 100644 --- a/tools/perf/util/disasm.h +++ b/tools/perf/util/disasm.h @@ -4,11 +4,18 @@ #include "map_symbol.h" +#ifdef HAVE_LIBDW_SUPPORT +#include "dwarf-aux.h" +#endif + struct annotation_options; struct disasm_line; struct ins; struct evsel; struct symbol; +struct data_loc_info; +struct type_state; +struct disasm_line; struct arch { const char *name; @@ -32,6 +39,15 @@ struct arch { char memory_ref_char; char imm_char; } objdump; +#ifdef HAVE_LIBDW_SUPPORT + void (*update_insn_state)(struct type_state *state, + struct data_loc_info *dloc, Dwarf_Die *cu_die, + struct disasm_line *dl); +#endif + /** @e_machine: ELF machine associated with arch. */ + unsigned int e_machine; + /** @e_flags: Optional ELF flags associated with arch. */ + unsigned int e_flags; }; struct ins { @@ -50,6 +66,7 @@ struct ins_operands { bool offset_avail; bool outside; bool multi_regs; + bool mem_ref; } target; union { struct { @@ -57,6 +74,7 @@ struct ins_operands { char *name; u64 addr; bool multi_regs; + bool mem_ref; } source; struct { struct ins ins; @@ -71,7 +89,8 @@ struct ins_operands { struct ins_ops { void (*free)(struct ins_operands *ops); - int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms); + int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, + struct disasm_line *dl); int (*scnprintf)(struct ins *ins, char *bf, size_t size, struct ins_operands *ops, int max_ins_name); }; @@ -79,7 +98,6 @@ struct ins_ops { struct annotate_args { struct arch *arch; struct map_symbol ms; - struct evsel *evsel; struct annotation_options *options; s64 offset; char *line; @@ -90,14 +108,11 @@ struct annotate_args { struct arch *arch__find(const char *name); bool arch__is(struct arch *arch, const char *name); -struct ins_ops *ins__find(struct arch *arch, const char *name); -int ins__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops, int max_ins_name); +struct ins_ops *ins__find(struct arch *arch, const char *name, struct disasm_line *dl); bool ins__is_call(const struct ins *ins); bool ins__is_jump(const struct ins *ins); bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); -bool ins__is_nop(const struct ins *ins); bool ins__is_ret(const struct ins *ins); bool ins__is_lock(const struct ins *ins); @@ -109,4 +124,6 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, int symbol__disassemble(struct symbol *sym, struct annotate_args *args); +char *expand_tabs(char *line, char **storage, size_t *storage_len); + #endif /* __PERF_UTIL_DISASM_H */ diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c index 7d180bdaedbc..c0afcbd954f8 100644 --- a/tools/perf/util/dlfilter.c +++ b/tools/perf/util/dlfilter.c @@ -234,7 +234,8 @@ static const __u8 *dlfilter__insn(void *ctx, __u32 *len) struct machine *machine = maps__machine(thread__maps(al->thread)); if (machine) - script_fetch_insn(d->sample, al->thread, machine); + script_fetch_insn(d->sample, al->thread, machine, + /*native_arch=*/true); } } @@ -512,6 +513,7 @@ int dlfilter__do_filter_event(struct dlfilter *d, d->d_addr_al = &d_addr_al; d_sample.size = sizeof(d_sample); + d_sample.p_stage_cyc = sample->weight3; d_ip_al.size = 0; /* To indicate d_ip_al is not initialized */ d_addr_al.size = 0; /* To indicate d_addr_al is not initialized */ @@ -525,7 +527,6 @@ int dlfilter__do_filter_event(struct dlfilter *d, ASSIGN(period); ASSIGN(weight); ASSIGN(ins_lat); - ASSIGN(p_stage_cyc); ASSIGN(transaction); ASSIGN(insn_cnt); ASSIGN(cyc_cnt); diff --git a/tools/perf/util/drm_pmu.c b/tools/perf/util/drm_pmu.c new file mode 100644 index 000000000000..b48a375e4584 --- /dev/null +++ b/tools/perf/util/drm_pmu.c @@ -0,0 +1,689 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#include "drm_pmu.h" +#include "counts.h" +#include "cpumap.h" +#include "debug.h" +#include "evsel.h" +#include "pmu.h" +#include <perf/threadmap.h> +#include <api/fs/fs.h> +#include <api/io.h> +#include <ctype.h> +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> +#include <linux/unistd.h> +#include <linux/kcmp.h> +#include <linux/zalloc.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/sysmacros.h> +#include <sys/types.h> + +enum drm_pmu_unit { + DRM_PMU_UNIT_BYTES, + DRM_PMU_UNIT_CAPACITY, + DRM_PMU_UNIT_CYCLES, + DRM_PMU_UNIT_HZ, + DRM_PMU_UNIT_NS, + + DRM_PMU_UNIT_MAX, +}; + +struct drm_pmu_event { + const char *name; + const char *desc; + enum drm_pmu_unit unit; +}; + +struct drm_pmu { + struct perf_pmu pmu; + struct drm_pmu_event *events; + int num_events; +}; + +static const char * const drm_pmu_unit_strs[DRM_PMU_UNIT_MAX] = { + "bytes", + "capacity", + "cycles", + "hz", + "ns", +}; + +static const char * const drm_pmu_scale_unit_strs[DRM_PMU_UNIT_MAX] = { + "1bytes", + "1capacity", + "1cycles", + "1hz", + "1ns", +}; + +bool perf_pmu__is_drm(const struct perf_pmu *pmu) +{ + return pmu && pmu->type >= PERF_PMU_TYPE_DRM_START && + pmu->type <= PERF_PMU_TYPE_DRM_END; +} + +bool evsel__is_drm(const struct evsel *evsel) +{ + return perf_pmu__is_drm(evsel->pmu); +} + +static struct drm_pmu *add_drm_pmu(struct list_head *pmus, char *line, size_t line_len) +{ + struct drm_pmu *drm; + struct perf_pmu *pmu; + const char *name; + __u32 max_drm_pmu_type = 0, type; + int i = 12; + + if (line[line_len - 1] == '\n') + line[line_len - 1] = '\0'; + while (isspace(line[i])) + i++; + + line[--i] = '_'; + line[--i] = 'm'; + line[--i] = 'r'; + line[--i] = 'd'; + name = &line[i]; + + list_for_each_entry(pmu, pmus, list) { + if (!perf_pmu__is_drm(pmu)) + continue; + if (pmu->type > max_drm_pmu_type) + max_drm_pmu_type = pmu->type; + if (!strcmp(pmu->name, name)) { + /* PMU already exists. */ + return NULL; + } + } + + if (max_drm_pmu_type != 0) + type = max_drm_pmu_type + 1; + else + type = PERF_PMU_TYPE_DRM_START; + + if (type > PERF_PMU_TYPE_DRM_END) { + zfree(&drm); + pr_err("Unable to encode DRM PMU type for %s\n", name); + return NULL; + } + + drm = zalloc(sizeof(*drm)); + if (!drm) + return NULL; + + if (perf_pmu__init(&drm->pmu, type, name) != 0) { + perf_pmu__delete(&drm->pmu); + return NULL; + } + + drm->pmu.cpus = perf_cpu_map__new_int(0); + if (!drm->pmu.cpus) { + perf_pmu__delete(&drm->pmu); + return NULL; + } + return drm; +} + + +static bool starts_with(const char *str, const char *prefix) +{ + return !strncmp(prefix, str, strlen(prefix)); +} + +static int add_event(struct drm_pmu_event **events, int *num_events, + const char *line, enum drm_pmu_unit unit, const char *desc) +{ + const char *colon = strchr(line, ':'); + struct drm_pmu_event *tmp; + + if (!colon) + return -EINVAL; + + tmp = reallocarray(*events, *num_events + 1, sizeof(struct drm_pmu_event)); + if (!tmp) + return -ENOMEM; + tmp[*num_events].unit = unit; + tmp[*num_events].desc = desc; + tmp[*num_events].name = strndup(line, colon - line); + if (!tmp[*num_events].name) + return -ENOMEM; + (*num_events)++; + *events = tmp; + return 0; +} + +static int read_drm_pmus_cb(void *args, int fdinfo_dir_fd, const char *fd_name) +{ + struct list_head *pmus = args; + char buf[640]; + struct io io; + char *line = NULL; + size_t line_len; + struct drm_pmu *drm = NULL; + struct drm_pmu_event *events = NULL; + int num_events = 0; + + io__init(&io, openat(fdinfo_dir_fd, fd_name, O_RDONLY), buf, sizeof(buf)); + if (io.fd == -1) { + /* Failed to open file, ignore. */ + return 0; + } + + while (io__getline(&io, &line, &line_len) > 0) { + if (starts_with(line, "drm-driver:")) { + drm = add_drm_pmu(pmus, line, line_len); + if (!drm) + break; + continue; + } + /* + * Note the string matching below is alphabetical, with more + * specific matches appearing before less specific. + */ + if (starts_with(line, "drm-active-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, + "Total memory active in one or more engines"); + continue; + } + if (starts_with(line, "drm-cycles-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_CYCLES, + "Busy cycles"); + continue; + } + if (starts_with(line, "drm-engine-capacity-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_CAPACITY, + "Engine capacity"); + continue; + } + if (starts_with(line, "drm-engine-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_NS, + "Utilization in ns"); + continue; + } + if (starts_with(line, "drm-maxfreq-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_HZ, + "Maximum frequency"); + continue; + } + if (starts_with(line, "drm-purgeable-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, + "Size of resident and purgeable memory buffers"); + continue; + } + if (starts_with(line, "drm-resident-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, + "Size of resident memory buffers"); + continue; + } + if (starts_with(line, "drm-shared-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, + "Size of shared memory buffers"); + continue; + } + if (starts_with(line, "drm-total-cycles-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, + "Total busy cycles"); + continue; + } + if (starts_with(line, "drm-total-")) { + add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES, + "Size of shared and private memory"); + continue; + } + if (verbose > 1 && starts_with(line, "drm-") && + !starts_with(line, "drm-client-id:") && + !starts_with(line, "drm-pdev:")) + pr_debug("Unhandled DRM PMU fdinfo line match '%s'\n", line); + } + if (drm) { + drm->events = events; + drm->num_events = num_events; + list_add_tail(&drm->pmu.list, pmus); + } + free(line); + if (io.fd != -1) + close(io.fd); + return 0; +} + +void drm_pmu__exit(struct perf_pmu *pmu) +{ + struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu); + + free(drm->events); +} + +bool drm_pmu__have_event(const struct perf_pmu *pmu, const char *name) +{ + struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu); + + if (!starts_with(name, "drm-")) + return false; + + for (int i = 0; i < drm->num_events; i++) { + if (!strcasecmp(drm->events[i].name, name)) + return true; + } + return false; +} + +int drm_pmu__for_each_event(const struct perf_pmu *pmu, void *state, pmu_event_callback cb) +{ + struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu); + + for (int i = 0; i < drm->num_events; i++) { + char encoding_buf[128]; + struct pmu_event_info info = { + .pmu = pmu, + .name = drm->events[i].name, + .alias = NULL, + .scale_unit = drm_pmu_scale_unit_strs[drm->events[i].unit], + .desc = drm->events[i].desc, + .long_desc = NULL, + .encoding_desc = encoding_buf, + .topic = "drm", + .pmu_name = pmu->name, + .event_type_desc = "DRM event", + }; + int ret; + + snprintf(encoding_buf, sizeof(encoding_buf), "%s/config=0x%x/", pmu->name, i); + + ret = cb(state, &info); + if (ret) + return ret; + } + return 0; +} + +size_t drm_pmu__num_events(const struct perf_pmu *pmu) +{ + const struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu); + + return drm->num_events; +} + +static int drm_pmu__index_for_event(const struct drm_pmu *drm, const char *name) +{ + for (int i = 0; i < drm->num_events; i++) { + if (!strcmp(drm->events[i].name, name)) + return i; + } + return -1; +} + +static int drm_pmu__config_term(const struct drm_pmu *drm, + struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err) +{ + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) { + int i = drm_pmu__index_for_event(drm, term->config); + + if (i >= 0) { + attr->config = i; + return 0; + } + } + if (err) { + char *err_str; + + parse_events_error__handle(err, term->err_val, + asprintf(&err_str, + "unexpected drm event term (%s) %s", + parse_events__term_type_str(term->type_term), + term->config) < 0 + ? strdup("unexpected drm event term") + : err_str, + NULL); + } + return -EINVAL; +} + +int drm_pmu__config_terms(const struct perf_pmu *pmu, + struct perf_event_attr *attr, + struct parse_events_terms *terms, + struct parse_events_error *err) +{ + struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu); + struct parse_events_term *term; + + list_for_each_entry(term, &terms->terms, list) { + if (drm_pmu__config_term(drm, attr, term, err)) + return -EINVAL; + } + + return 0; +} + +int drm_pmu__check_alias(const struct perf_pmu *pmu, struct parse_events_terms *terms, + struct perf_pmu_info *info, struct parse_events_error *err) +{ + struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu); + struct parse_events_term *term = + list_first_entry(&terms->terms, struct parse_events_term, list); + + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) { + int i = drm_pmu__index_for_event(drm, term->config); + + if (i >= 0) { + info->unit = drm_pmu_unit_strs[drm->events[i].unit]; + info->scale = 1; + return 0; + } + } + if (err) { + char *err_str; + + parse_events_error__handle(err, term->err_val, + asprintf(&err_str, + "unexpected drm event term (%s) %s", + parse_events__term_type_str(term->type_term), + term->config) < 0 + ? strdup("unexpected drm event term") + : err_str, + NULL); + } + return -EINVAL; +} + +struct minor_info { + unsigned int *minors; + int minors_num, minors_len; +}; + +static int for_each_drm_fdinfo_in_dir(int (*cb)(void *args, int fdinfo_dir_fd, const char *fd_name), + void *args, int proc_dir, const char *pid_name, + struct minor_info *minors) +{ + char buf[256]; + DIR *fd_dir; + struct dirent *fd_entry; + int fd_dir_fd, fdinfo_dir_fd = -1; + + + scnprintf(buf, sizeof(buf), "%s/fd", pid_name); + fd_dir_fd = openat(proc_dir, buf, O_DIRECTORY); + if (fd_dir_fd == -1) + return 0; /* Presumably lost race to open. */ + fd_dir = fdopendir(fd_dir_fd); + if (!fd_dir) { + close(fd_dir_fd); + return -ENOMEM; + } + while ((fd_entry = readdir(fd_dir)) != NULL) { + struct stat stat; + unsigned int minor; + bool is_dup = false; + int ret; + + if (fd_entry->d_type != DT_LNK) + continue; + + if (fstatat(fd_dir_fd, fd_entry->d_name, &stat, 0) != 0) + continue; + + if ((stat.st_mode & S_IFMT) != S_IFCHR || major(stat.st_rdev) != 226) + continue; + + minor = minor(stat.st_rdev); + for (int i = 0; i < minors->minors_num; i++) { + if (minor(stat.st_rdev) == minors->minors[i]) { + is_dup = true; + break; + } + } + if (is_dup) + continue; + + if (minors->minors_num == minors->minors_len) { + unsigned int *tmp = reallocarray(minors->minors, minors->minors_len + 4, + sizeof(unsigned int)); + + if (tmp) { + minors->minors = tmp; + minors->minors_len += 4; + } + } + minors->minors[minors->minors_num++] = minor; + if (fdinfo_dir_fd == -1) { + /* Open fdinfo dir if we have a DRM fd. */ + scnprintf(buf, sizeof(buf), "%s/fdinfo", pid_name); + fdinfo_dir_fd = openat(proc_dir, buf, O_DIRECTORY); + if (fdinfo_dir_fd == -1) + continue; + } + ret = cb(args, fdinfo_dir_fd, fd_entry->d_name); + if (ret) + goto close_fdinfo; + } + +close_fdinfo: + if (fdinfo_dir_fd != -1) + close(fdinfo_dir_fd); + closedir(fd_dir); + return 0; +} + +static int for_each_drm_fdinfo(bool skip_all_duplicates, + int (*cb)(void *args, int fdinfo_dir_fd, const char *fd_name), + void *args) +{ + DIR *proc_dir; + struct dirent *proc_entry; + int ret; + /* + * minors maintains an array of DRM minor device numbers seen for a pid, + * or for all pids if skip_all_duplicates is true, so that duplicates + * are ignored. + */ + struct minor_info minors = { + .minors = NULL, + .minors_num = 0, + .minors_len = 0, + }; + + proc_dir = opendir(procfs__mountpoint()); + if (!proc_dir) + return 0; + + /* Walk through the /proc directory. */ + while ((proc_entry = readdir(proc_dir)) != NULL) { + if (proc_entry->d_type != DT_DIR || + !isdigit(proc_entry->d_name[0])) + continue; + if (!skip_all_duplicates) { + /* Reset the seen minor numbers for each pid. */ + minors.minors_num = 0; + } + ret = for_each_drm_fdinfo_in_dir(cb, args, + dirfd(proc_dir), proc_entry->d_name, + &minors); + if (ret) + break; + } + free(minors.minors); + closedir(proc_dir); + return ret; +} + +int perf_pmus__read_drm_pmus(struct list_head *pmus) +{ + return for_each_drm_fdinfo(/*skip_all_duplicates=*/true, read_drm_pmus_cb, pmus); +} + +int evsel__drm_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx) +{ + (void)evsel; + (void)threads; + (void)start_cpu_map_idx; + (void)end_cpu_map_idx; + return 0; +} + +static uint64_t read_count_and_apply_unit(const char *count_and_unit, enum drm_pmu_unit unit) +{ + char *unit_ptr = NULL; + uint64_t count = strtoul(count_and_unit, &unit_ptr, 10); + + if (!unit_ptr) + return 0; + + while (isblank(*unit_ptr)) + unit_ptr++; + + switch (unit) { + case DRM_PMU_UNIT_BYTES: + if (*unit_ptr == '\0') + assert(count == 0); /* Generally undocumented, happens for 0. */ + else if (!strcmp(unit_ptr, "KiB")) + count *= 1024; + else if (!strcmp(unit_ptr, "MiB")) + count *= 1024 * 1024; + else + pr_err("Unexpected bytes unit '%s'\n", unit_ptr); + break; + case DRM_PMU_UNIT_CAPACITY: + /* No units expected. */ + break; + case DRM_PMU_UNIT_CYCLES: + /* No units expected. */ + break; + case DRM_PMU_UNIT_HZ: + if (!strcmp(unit_ptr, "Hz")) + count *= 1; + else if (!strcmp(unit_ptr, "KHz")) + count *= 1000; + else if (!strcmp(unit_ptr, "MHz")) + count *= 1000000; + else + pr_err("Unexpected hz unit '%s'\n", unit_ptr); + break; + case DRM_PMU_UNIT_NS: + /* Only unit ns expected. */ + break; + case DRM_PMU_UNIT_MAX: + default: + break; + } + return count; +} + +static uint64_t read_drm_event(int fdinfo_dir_fd, const char *fd_name, + const char *match, enum drm_pmu_unit unit) +{ + char buf[640]; + struct io io; + char *line = NULL; + size_t line_len; + uint64_t count = 0; + + io__init(&io, openat(fdinfo_dir_fd, fd_name, O_RDONLY), buf, sizeof(buf)); + if (io.fd == -1) { + /* Failed to open file, ignore. */ + return 0; + } + while (io__getline(&io, &line, &line_len) > 0) { + size_t i = strlen(match); + + if (strncmp(line, match, i)) + continue; + if (line[i] != ':') + continue; + while (isblank(line[++i])) + ; + if (line[line_len - 1] == '\n') + line[line_len - 1] = '\0'; + count = read_count_and_apply_unit(&line[i], unit); + break; + } + free(line); + close(io.fd); + return count; +} + +struct read_drm_event_cb_args { + const char *match; + uint64_t count; + enum drm_pmu_unit unit; +}; + +static int read_drm_event_cb(void *vargs, int fdinfo_dir_fd, const char *fd_name) +{ + struct read_drm_event_cb_args *args = vargs; + + args->count += read_drm_event(fdinfo_dir_fd, fd_name, args->match, args->unit); + return 0; +} + +static uint64_t drm_pmu__read_system_wide(struct drm_pmu *drm, struct evsel *evsel) +{ + struct read_drm_event_cb_args args = { + .count = 0, + .match = drm->events[evsel->core.attr.config].name, + .unit = drm->events[evsel->core.attr.config].unit, + }; + + for_each_drm_fdinfo(/*skip_all_duplicates=*/false, read_drm_event_cb, &args); + return args.count; +} + +static uint64_t drm_pmu__read_for_pid(struct drm_pmu *drm, struct evsel *evsel, int pid) +{ + struct read_drm_event_cb_args args = { + .count = 0, + .match = drm->events[evsel->core.attr.config].name, + .unit = drm->events[evsel->core.attr.config].unit, + }; + struct minor_info minors = { + .minors = NULL, + .minors_num = 0, + .minors_len = 0, + }; + int proc_dir = open(procfs__mountpoint(), O_DIRECTORY); + char pid_name[12]; + int ret; + + if (proc_dir < 0) + return 0; + + snprintf(pid_name, sizeof(pid_name), "%d", pid); + ret = for_each_drm_fdinfo_in_dir(read_drm_event_cb, &args, proc_dir, pid_name, &minors); + free(minors.minors); + close(proc_dir); + return ret == 0 ? args.count : 0; +} + +int evsel__drm_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) +{ + struct drm_pmu *drm = container_of(evsel->pmu, struct drm_pmu, pmu); + struct perf_counts_values *count, *old_count = NULL; + int pid = perf_thread_map__pid(evsel->core.threads, thread); + uint64_t counter; + + if (pid != -1) + counter = drm_pmu__read_for_pid(drm, evsel, pid); + else + counter = drm_pmu__read_system_wide(drm, evsel); + + if (evsel->prev_raw_counts) + old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + + count = perf_counts(evsel->counts, cpu_map_idx, thread); + if (old_count) { + count->val = old_count->val + counter; + count->run = old_count->run + 1; + count->ena = old_count->ena + 1; + } else { + count->val = counter; + count->run++; + count->ena++; + } + return 0; +} diff --git a/tools/perf/util/drm_pmu.h b/tools/perf/util/drm_pmu.h new file mode 100644 index 000000000000..e7f366fca8a4 --- /dev/null +++ b/tools/perf/util/drm_pmu.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __DRM_PMU_H +#define __DRM_PMU_H +/* + * Linux DRM clients expose information through usage stats as documented in + * Documentation/gpu/drm-usage-stats.rst (available online at + * https://docs.kernel.org/gpu/drm-usage-stats.html). This is a tool like PMU + * that exposes DRM information. + */ + +#include "pmu.h" +#include <stdbool.h> + +struct list_head; +struct perf_thread_map; + +void drm_pmu__exit(struct perf_pmu *pmu); +bool drm_pmu__have_event(const struct perf_pmu *pmu, const char *name); +int drm_pmu__for_each_event(const struct perf_pmu *pmu, void *state, pmu_event_callback cb); +size_t drm_pmu__num_events(const struct perf_pmu *pmu); +int drm_pmu__config_terms(const struct perf_pmu *pmu, + struct perf_event_attr *attr, + struct parse_events_terms *terms, + struct parse_events_error *err); +int drm_pmu__check_alias(const struct perf_pmu *pmu, struct parse_events_terms *terms, + struct perf_pmu_info *info, struct parse_events_error *err); + + +bool perf_pmu__is_drm(const struct perf_pmu *pmu); +bool evsel__is_drm(const struct evsel *evsel); + +int perf_pmus__read_drm_pmus(struct list_head *pmus); + +int evsel__drm_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx); +int evsel__drm_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread); + +#endif /* __DRM_PMU_H */ diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index dde706b71da7..344e689567ee 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -67,6 +67,7 @@ char dso__symtab_origin(const struct dso *dso) [DSO_BINARY_TYPE__GUEST_KMODULE] = 'G', [DSO_BINARY_TYPE__GUEST_KMODULE_COMP] = 'M', [DSO_BINARY_TYPE__GUEST_VMLINUX] = 'V', + [DSO_BINARY_TYPE__GNU_DEBUGDATA] = 'n', }; if (dso == NULL || dso__symtab_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) @@ -93,6 +94,7 @@ bool dso__is_object_file(const struct dso *dso) case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: case DSO_BINARY_TYPE__BUILDID_DEBUGINFO: + case DSO_BINARY_TYPE__GNU_DEBUGDATA: case DSO_BINARY_TYPE__SYSTEM_PATH_DSO: case DSO_BINARY_TYPE__GUEST_KMODULE: case DSO_BINARY_TYPE__GUEST_KMODULE_COMP: @@ -215,7 +217,7 @@ int dso__read_binary_type_filename(const struct dso *dso, break; } - build_id__sprintf(dso__bid_const(dso), build_id_hex); + build_id__snprintf(dso__bid(dso), build_id_hex, sizeof(build_id_hex)); len = __symbol__join_symfs(filename, size, "/usr/lib/debug/.build-id/"); snprintf(filename + len, size - len, "%.2s/%s.debug", build_id_hex, build_id_hex + 2); @@ -224,6 +226,7 @@ int dso__read_binary_type_filename(const struct dso *dso, case DSO_BINARY_TYPE__VMLINUX: case DSO_BINARY_TYPE__GUEST_VMLINUX: case DSO_BINARY_TYPE__SYSTEM_PATH_DSO: + case DSO_BINARY_TYPE__GNU_DEBUGDATA: __symbol__join_symfs(filename, size, dso__long_name(dso)); break; @@ -490,11 +493,25 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m, /* * Global list of open DSOs and the counter. */ +struct mutex _dso__data_open_lock; static LIST_HEAD(dso__data_open); -static long dso__data_open_cnt; -static pthread_mutex_t dso__data_open_lock = PTHREAD_MUTEX_INITIALIZER; +static long dso__data_open_cnt GUARDED_BY(_dso__data_open_lock); -static void dso__list_add(struct dso *dso) +static void dso__data_open_lock_init(void) +{ + mutex_init(&_dso__data_open_lock); +} + +static struct mutex *dso__data_open_lock(void) LOCK_RETURNED(_dso__data_open_lock) +{ + static pthread_once_t data_open_lock_once = PTHREAD_ONCE_INIT; + + pthread_once(&data_open_lock_once, dso__data_open_lock_init); + + return &_dso__data_open_lock; +} + +static void dso__list_add(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { list_add_tail(&dso__data(dso)->open_entry, &dso__data_open); #ifdef REFCNT_CHECKING @@ -505,11 +522,13 @@ static void dso__list_add(struct dso *dso) dso__data_open_cnt++; } -static void dso__list_del(struct dso *dso) +static void dso__list_del(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { list_del_init(&dso__data(dso)->open_entry); #ifdef REFCNT_CHECKING + mutex_unlock(dso__data_open_lock()); dso__put(dso__data(dso)->dso); + mutex_lock(dso__data_open_lock()); #endif WARN_ONCE(dso__data_open_cnt <= 0, "DSO data fd counter out of bounds."); @@ -518,7 +537,7 @@ static void dso__list_del(struct dso *dso) static void close_first_dso(void); -static int do_open(char *name) +static int do_open(char *name) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { int fd; char sbuf[STRERR_BUFSIZE]; @@ -545,6 +564,7 @@ char *dso__filename_with_chroot(const struct dso *dso, const char *filename) } static int __open_dso(struct dso *dso, struct machine *machine) + EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { int fd = -EINVAL; char *root_dir = (char *)""; @@ -610,6 +630,7 @@ static void check_data_close(void); * list/count of open DSO objects. */ static int open_dso(struct dso *dso, struct machine *machine) + EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { int fd; struct nscookie nsc; @@ -635,7 +656,7 @@ static int open_dso(struct dso *dso, struct machine *machine) return fd; } -static void close_data_fd(struct dso *dso) +static void close_data_fd(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { if (dso__data(dso)->fd >= 0) { close(dso__data(dso)->fd); @@ -652,12 +673,12 @@ static void close_data_fd(struct dso *dso) * Close @dso's data file descriptor and updates * list/count of open DSO objects. */ -static void close_dso(struct dso *dso) +static void close_dso(struct dso *dso) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { close_data_fd(dso); } -static void close_first_dso(void) +static void close_first_dso(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { struct dso_data *dso_data; struct dso *dso; @@ -702,7 +723,7 @@ void reset_fd_limit(void) fd_limit = 0; } -static bool may_cache_fd(void) +static bool may_cache_fd(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { if (!fd_limit) fd_limit = get_fd_limit(); @@ -718,7 +739,7 @@ static bool may_cache_fd(void) * for opened dso file descriptors. The limit is half * of the RLIMIT_NOFILE files opened. */ -static void check_data_close(void) +static void check_data_close(void) EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { bool cache_fd = may_cache_fd(); @@ -734,12 +755,13 @@ static void check_data_close(void) */ void dso__data_close(struct dso *dso) { - pthread_mutex_lock(&dso__data_open_lock); + mutex_lock(dso__data_open_lock()); close_dso(dso); - pthread_mutex_unlock(&dso__data_open_lock); + mutex_unlock(dso__data_open_lock()); } static void try_to_open_dso(struct dso *dso, struct machine *machine) + EXCLUSIVE_LOCKS_REQUIRED(_dso__data_open_lock) { enum dso_binary_type binary_type_data[] = { DSO_BINARY_TYPE__BUILD_ID_CACHE, @@ -781,25 +803,27 @@ out: * returns file descriptor. It should be paired with * dso__data_put_fd() if it returns non-negative value. */ -int dso__data_get_fd(struct dso *dso, struct machine *machine) +bool dso__data_get_fd(struct dso *dso, struct machine *machine, int *fd) { + *fd = -1; if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR) - return -1; + return false; - if (pthread_mutex_lock(&dso__data_open_lock) < 0) - return -1; + mutex_lock(dso__data_open_lock()); try_to_open_dso(dso, machine); - if (dso__data(dso)->fd < 0) - pthread_mutex_unlock(&dso__data_open_lock); + *fd = dso__data(dso)->fd; + if (*fd >= 0) + return true; - return dso__data(dso)->fd; + mutex_unlock(dso__data_open_lock()); + return false; } void dso__data_put_fd(struct dso *dso __maybe_unused) { - pthread_mutex_unlock(&dso__data_open_lock); + mutex_unlock(dso__data_open_lock()); } bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by) @@ -951,7 +975,7 @@ static ssize_t file_read(struct dso *dso, struct machine *machine, { ssize_t ret; - pthread_mutex_lock(&dso__data_open_lock); + mutex_lock(dso__data_open_lock()); /* * dso__data(dso)->fd might be closed if other thread opened another @@ -967,7 +991,7 @@ static ssize_t file_read(struct dso *dso, struct machine *machine, ret = pread(dso__data(dso)->fd, data, DSO__DATA_CACHE_SIZE, offset); out: - pthread_mutex_unlock(&dso__data_open_lock); + mutex_unlock(dso__data_open_lock()); return ret; } @@ -1075,7 +1099,7 @@ static int file_size(struct dso *dso, struct machine *machine) struct stat st; char sbuf[STRERR_BUFSIZE]; - pthread_mutex_lock(&dso__data_open_lock); + mutex_lock(dso__data_open_lock()); /* * dso__data(dso)->fd might be closed if other thread opened another @@ -1099,7 +1123,7 @@ static int file_size(struct dso *dso, struct machine *machine) dso__data(dso)->file_size = st.st_size; out: - pthread_mutex_unlock(&dso__data_open_lock); + mutex_unlock(dso__data_open_lock()); return ret; } @@ -1170,6 +1194,68 @@ ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, return data_read_write_offset(dso, machine, offset, data, size, true); } +uint16_t dso__e_machine(struct dso *dso, struct machine *machine) +{ + uint16_t e_machine = EM_NONE; + int fd; + + switch (dso__binary_type(dso)) { + case DSO_BINARY_TYPE__KALLSYMS: + case DSO_BINARY_TYPE__GUEST_KALLSYMS: + case DSO_BINARY_TYPE__VMLINUX: + case DSO_BINARY_TYPE__GUEST_VMLINUX: + case DSO_BINARY_TYPE__GUEST_KMODULE: + case DSO_BINARY_TYPE__GUEST_KMODULE_COMP: + case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE: + case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP: + case DSO_BINARY_TYPE__KCORE: + case DSO_BINARY_TYPE__GUEST_KCORE: + case DSO_BINARY_TYPE__BPF_PROG_INFO: + case DSO_BINARY_TYPE__BPF_IMAGE: + case DSO_BINARY_TYPE__OOL: + case DSO_BINARY_TYPE__JAVA_JIT: + return EM_HOST; + case DSO_BINARY_TYPE__DEBUGLINK: + case DSO_BINARY_TYPE__BUILD_ID_CACHE: + case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO: + case DSO_BINARY_TYPE__GNU_DEBUGDATA: + case DSO_BINARY_TYPE__SYSTEM_PATH_DSO: + case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: + case DSO_BINARY_TYPE__FEDORA_DEBUGINFO: + case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: + case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: + case DSO_BINARY_TYPE__BUILDID_DEBUGINFO: + break; + case DSO_BINARY_TYPE__NOT_FOUND: + default: + return EM_NONE; + } + + mutex_lock(dso__data_open_lock()); + + /* + * dso__data(dso)->fd might be closed if other thread opened another + * file (dso) due to open file limit (RLIMIT_NOFILE). + */ + try_to_open_dso(dso, machine); + fd = dso__data(dso)->fd; + if (fd >= 0) { + _Static_assert(offsetof(Elf32_Ehdr, e_machine) == 18, "Unexpected offset"); + _Static_assert(offsetof(Elf64_Ehdr, e_machine) == 18, "Unexpected offset"); + if (dso__needs_swap(dso) == DSO_SWAP__UNSET) { + unsigned char eidata; + + if (pread(fd, &eidata, sizeof(eidata), EI_DATA) == sizeof(eidata)) + dso__swap_init(dso, eidata); + } + if (dso__needs_swap(dso) != DSO_SWAP__UNSET && + pread(fd, &e_machine, sizeof(e_machine), 18) == sizeof(e_machine)) + e_machine = DSO__SWAP(dso, uint16_t, e_machine); + } + mutex_unlock(dso__data_open_lock()); + return e_machine; +} + /** * dso__data_read_addr - Read data from dso address * @dso: dso object @@ -1263,6 +1349,16 @@ struct dso *machine__findnew_kernel(struct machine *machine, const char *name, return dso; } +static void __dso__set_long_name_id(struct dso *dso, const char *name, bool name_allocated) +{ + if (dso__long_name_allocated(dso)) + free((char *)dso__long_name(dso)); + + RC_CHK_ACCESS(dso)->long_name = name; + RC_CHK_ACCESS(dso)->long_name_len = strlen(name); + dso__set_long_name_allocated(dso, name_allocated); +} + static void dso__set_long_name_id(struct dso *dso, const char *name, bool name_allocated) { struct dsos *dsos = dso__dsos(dso); @@ -1276,81 +1372,86 @@ static void dso__set_long_name_id(struct dso *dso, const char *name, bool name_a * renaming the dso. */ down_write(&dsos->lock); - } - - if (dso__long_name_allocated(dso)) - free((char *)dso__long_name(dso)); - - RC_CHK_ACCESS(dso)->long_name = name; - RC_CHK_ACCESS(dso)->long_name_len = strlen(name); - dso__set_long_name_allocated(dso, name_allocated); - - if (dsos) { + __dso__set_long_name_id(dso, name, name_allocated); dsos->sorted = false; up_write(&dsos->lock); + } else { + __dso__set_long_name_id(dso, name, name_allocated); } } static int __dso_id__cmp(const struct dso_id *a, const struct dso_id *b) { - if (a->maj > b->maj) return -1; - if (a->maj < b->maj) return 1; + if (a->mmap2_valid && b->mmap2_valid) { + if (a->maj > b->maj) return -1; + if (a->maj < b->maj) return 1; - if (a->min > b->min) return -1; - if (a->min < b->min) return 1; + if (a->min > b->min) return -1; + if (a->min < b->min) return 1; - if (a->ino > b->ino) return -1; - if (a->ino < b->ino) return 1; - - /* - * Synthesized MMAP events have zero ino_generation, avoid comparing - * them with MMAP events with actual ino_generation. - * - * I found it harmful because the mismatch resulted in a new - * dso that did not have a build ID whereas the original dso did have a - * build ID. The build ID was essential because the object was not found - * otherwise. - Adrian - */ - if (a->ino_generation && b->ino_generation) { + if (a->ino > b->ino) return -1; + if (a->ino < b->ino) return 1; + } + if (a->mmap2_ino_generation_valid && b->mmap2_ino_generation_valid) { if (a->ino_generation > b->ino_generation) return -1; if (a->ino_generation < b->ino_generation) return 1; } - + if (build_id__is_defined(&a->build_id) && build_id__is_defined(&b->build_id)) { + if (a->build_id.size != b->build_id.size) + return a->build_id.size < b->build_id.size ? -1 : 1; + return memcmp(a->build_id.data, b->build_id.data, a->build_id.size); + } return 0; } -bool dso_id__empty(const struct dso_id *id) -{ - if (!id) - return true; - - return !id->maj && !id->min && !id->ino && !id->ino_generation; -} +const struct dso_id dso_id_empty = { + { + .maj = 0, + .min = 0, + .ino = 0, + .ino_generation = 0, + }, + .mmap2_valid = false, + .mmap2_ino_generation_valid = false, + { + .size = 0, + } +}; -void __dso__inject_id(struct dso *dso, struct dso_id *id) +void __dso__improve_id(struct dso *dso, const struct dso_id *id) { struct dsos *dsos = dso__dsos(dso); struct dso_id *dso_id = dso__id(dso); + bool changed = false; /* dsos write lock held by caller. */ - dso_id->maj = id->maj; - dso_id->min = id->min; - dso_id->ino = id->ino; - dso_id->ino_generation = id->ino_generation; - - if (dsos) + if (id->mmap2_valid && !dso_id->mmap2_valid) { + dso_id->maj = id->maj; + dso_id->min = id->min; + dso_id->ino = id->ino; + dso_id->mmap2_valid = true; + changed = true; + } + if (id->mmap2_ino_generation_valid && !dso_id->mmap2_ino_generation_valid) { + dso_id->ino_generation = id->ino_generation; + dso_id->mmap2_ino_generation_valid = true; + changed = true; + } + if (build_id__is_defined(&id->build_id) && !build_id__is_defined(&dso_id->build_id)) { + dso_id->build_id = id->build_id; + changed = true; + } + if (changed && dsos) dsos->sorted = false; } int dso_id__cmp(const struct dso_id *a, const struct dso_id *b) { - /* - * The second is always dso->id, so zeroes if not set, assume passing - * NULL for a means a zeroed id - */ - if (dso_id__empty(a) || dso_id__empty(b)) + if (a == &dso_id_empty || b == &dso_id_empty) { + /* There is no valid data to compare so the comparison always returns identical. */ return 0; + } return __dso_id__cmp(a, b); } @@ -1365,6 +1466,16 @@ void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) dso__set_long_name_id(dso, name, name_allocated); } +static void __dso__set_short_name(struct dso *dso, const char *name, bool name_allocated) +{ + if (dso__short_name_allocated(dso)) + free((char *)dso__short_name(dso)); + + RC_CHK_ACCESS(dso)->short_name = name; + RC_CHK_ACCESS(dso)->short_name_len = strlen(name); + dso__set_short_name_allocated(dso, name_allocated); +} + void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated) { struct dsos *dsos = dso__dsos(dso); @@ -1378,17 +1489,11 @@ void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated) * renaming the dso. */ down_write(&dsos->lock); - } - if (dso__short_name_allocated(dso)) - free((char *)dso__short_name(dso)); - - RC_CHK_ACCESS(dso)->short_name = name; - RC_CHK_ACCESS(dso)->short_name_len = strlen(name); - dso__set_short_name_allocated(dso, name_allocated); - - if (dsos) { + __dso__set_short_name(dso, name, name_allocated); dsos->sorted = false; up_write(&dsos->lock); + } else { + __dso__set_short_name(dso, name, name_allocated); } } @@ -1417,7 +1522,7 @@ void dso__set_sorted_by_name(struct dso *dso) RC_CHK_ACCESS(dso)->sorted_by_name = true; } -struct dso *dso__new_id(const char *name, struct dso_id *id) +struct dso *dso__new_id(const char *name, const struct dso_id *id) { RC_STRUCT(dso) *dso = zalloc(sizeof(*dso) + strlen(name) + 1); struct dso *res; @@ -1447,7 +1552,6 @@ struct dso *dso__new_id(const char *name, struct dso_id *id) dso->loaded = 0; dso->rel = 0; dso->sorted_by_name = 0; - dso->has_build_id = 0; dso->has_srcline = 1; dso->a2l_fails = 1; dso->kernel = DSO_SPACE__USER; @@ -1501,7 +1605,7 @@ void dso__delete(struct dso *dso) auxtrace_cache__free(RC_CHK_ACCESS(dso)->auxtrace_cache); dso_cache__free(dso); dso__free_a2l(dso); - zfree(&RC_CHK_ACCESS(dso)->symsrc_filename); + dso__free_symsrc_filename(dso); nsinfo__zput(RC_CHK_ACCESS(dso)->nsinfo); mutex_destroy(dso__lock(dso)); RC_CHK_FREE(dso); @@ -1519,21 +1623,51 @@ struct dso *dso__get(struct dso *dso) void dso__put(struct dso *dso) { +#ifdef REFCNT_CHECKING + if (dso && dso__data(dso) && refcount_read(&RC_CHK_ACCESS(dso)->refcnt) == 2) + dso__data_close(dso); +#endif if (dso && refcount_dec_and_test(&RC_CHK_ACCESS(dso)->refcnt)) dso__delete(dso); else RC_CHK_PUT(dso); } -void dso__set_build_id(struct dso *dso, struct build_id *bid) +int dso__swap_init(struct dso *dso, unsigned char eidata) { - RC_CHK_ACCESS(dso)->bid = *bid; - RC_CHK_ACCESS(dso)->has_build_id = 1; + static unsigned int const endian = 1; + + dso__set_needs_swap(dso, DSO_SWAP__NO); + + switch (eidata) { + case ELFDATA2LSB: + /* We are big endian, DSO is little endian. */ + if (*(unsigned char const *)&endian != 1) + dso__set_needs_swap(dso, DSO_SWAP__YES); + break; + + case ELFDATA2MSB: + /* We are little endian, DSO is big endian. */ + if (*(unsigned char const *)&endian != 0) + dso__set_needs_swap(dso, DSO_SWAP__YES); + break; + + default: + pr_err("unrecognized DSO data encoding %d\n", eidata); + return -EINVAL; + } + + return 0; } -bool dso__build_id_equal(const struct dso *dso, struct build_id *bid) +void dso__set_build_id(struct dso *dso, const struct build_id *bid) { - const struct build_id *dso_bid = dso__bid_const(dso); + dso__id(dso)->build_id = *bid; +} + +bool dso__build_id_equal(const struct dso *dso, const struct build_id *bid) +{ + const struct build_id *dso_bid = dso__bid(dso); if (dso_bid->size > bid->size && dso_bid->size == BUILD_ID_SIZE) { /* @@ -1552,18 +1686,20 @@ bool dso__build_id_equal(const struct dso *dso, struct build_id *bid) void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine) { char path[PATH_MAX]; + struct build_id bid = { .size = 0, }; if (machine__is_default_guest(machine)) return; sprintf(path, "%s/sys/kernel/notes", machine->root_dir); - if (sysfs__read_build_id(path, dso__bid(dso)) == 0) - dso__set_has_build_id(dso); + sysfs__read_build_id(path, &bid); + dso__set_build_id(dso, &bid); } int dso__kernel_module_get_build_id(struct dso *dso, const char *root_dir) { char filename[PATH_MAX]; + struct build_id bid = { .size = 0, }; /* * kernel module short names are of the form "[module]" and * we need just "module" here. @@ -1574,9 +1710,8 @@ int dso__kernel_module_get_build_id(struct dso *dso, "%s/sys/module/%.*s/notes/.note.gnu.build-id", root_dir, (int)strlen(name) - 1, name); - if (sysfs__read_build_id(filename, dso__bid(dso)) == 0) - dso__set_has_build_id(dso); - + sysfs__read_build_id(filename, &bid); + dso__set_build_id(dso, &bid); return 0; } @@ -1584,7 +1719,7 @@ static size_t dso__fprintf_buildid(struct dso *dso, FILE *fp) { char sbuild_id[SBUILD_ID_SIZE]; - build_id__sprintf(dso__bid(dso), sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); return fprintf(fp, "%s", sbuild_id); } @@ -1608,11 +1743,10 @@ size_t dso__fprintf(struct dso *dso, FILE *fp) enum dso_type dso__type(struct dso *dso, struct machine *machine) { - int fd; + int fd = -1; enum dso_type type = DSO__TYPE_UNKNOWN; - fd = dso__data_get_fd(dso, machine); - if (fd >= 0) { + if (dso__data_get_fd(dso, machine, &fd)) { type = dso__type_fd(fd); dso__data_put_fd(dso); } @@ -1652,3 +1786,127 @@ int dso__strerror_load(struct dso *dso, char *buf, size_t buflen) scnprintf(buf, buflen, "%s", dso_load__error_str[idx]); return 0; } + +bool perf_pid_map_tid(const char *dso_name, int *tid) +{ + return sscanf(dso_name, "/tmp/perf-%d.map", tid) == 1; +} + +bool is_perf_pid_map_name(const char *dso_name) +{ + int tid; + + return perf_pid_map_tid(dso_name, &tid); +} + +struct find_file_offset_data { + u64 ip; + u64 offset; +}; + +/* This will be called for each PHDR in an ELF binary */ +static int find_file_offset(u64 start, u64 len, u64 pgoff, void *arg) +{ + struct find_file_offset_data *data = arg; + + if (start <= data->ip && data->ip < start + len) { + data->offset = pgoff + data->ip - start; + return 1; + } + return 0; +} + +static const u8 *__dso__read_symbol(struct dso *dso, const char *symfs_filename, + u64 start, size_t len, + u8 **out_buf, u64 *out_buf_len, bool *is_64bit) +{ + struct nscookie nsc; + int fd; + ssize_t count; + struct find_file_offset_data data = { + .ip = start, + }; + u8 *code_buf = NULL; + int saved_errno; + + nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); + fd = open(symfs_filename, O_RDONLY); + saved_errno = errno; + nsinfo__mountns_exit(&nsc); + if (fd < 0) { + errno = saved_errno; + return NULL; + } + if (file__read_maps(fd, /*exe=*/true, find_file_offset, &data, is_64bit) <= 0) { + close(fd); + errno = ENOENT; + return NULL; + } + code_buf = malloc(len); + if (code_buf == NULL) { + close(fd); + errno = ENOMEM; + return NULL; + } + count = pread(fd, code_buf, len, data.offset); + saved_errno = errno; + close(fd); + if ((u64)count != len) { + free(code_buf); + errno = saved_errno; + return NULL; + } + *out_buf = code_buf; + *out_buf_len = len; + return code_buf; +} + +/* + * Read a symbol into memory for disassembly by a library like capstone of + * libLLVM. If memory is allocated out_buf holds it. + */ +const u8 *dso__read_symbol(struct dso *dso, const char *symfs_filename, + const struct map *map, const struct symbol *sym, + u8 **out_buf, u64 *out_buf_len, bool *is_64bit) +{ + u64 start = map__rip_2objdump(map, sym->start); + u64 end = map__rip_2objdump(map, sym->end); + size_t len = end - start; + + *out_buf = NULL; + *out_buf_len = 0; + *is_64bit = false; + + if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) { + /* + * Note, there is fallback BPF image disassembly in the objdump + * version but it currently does nothing. + */ + errno = EOPNOTSUPP; + return NULL; + } + if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) { +#ifdef HAVE_LIBBPF_SUPPORT + struct bpf_prog_info_node *info_node; + struct perf_bpil *info_linear; + + *is_64bit = sizeof(void *) == sizeof(u64); + info_node = perf_env__find_bpf_prog_info(dso__bpf_prog(dso)->env, + dso__bpf_prog(dso)->id); + if (!info_node) { + errno = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF; + return NULL; + } + info_linear = info_node->info_linear; + assert(len <= info_linear->info.jited_prog_len); + *out_buf_len = len; + return (const u8 *)(uintptr_t)(info_linear->info.jited_prog_insns); +#else + pr_debug("No BPF program disassembly support\n"); + errno = EOPNOTSUPP; + return NULL; +#endif + } + return __dso__read_symbol(dso, symfs_filename, start, len, + out_buf, out_buf_len, is_64bit); +} diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index df2c98402af3..f8ccb9816b89 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -10,6 +10,7 @@ #include <stdio.h> #include <linux/bitops.h> #include "build-id.h" +#include "debuginfo.h" #include "mutex.h" #include <internal/rc_check.h> @@ -20,30 +21,88 @@ struct perf_env; #define DSO__NAME_KALLSYMS "[kernel.kallsyms]" #define DSO__NAME_KCORE "[kernel.kcore]" +/** + * enum dso_binary_type - The kind of DSO generally associated with a memory + * region (struct map). + */ enum dso_binary_type { + /** @DSO_BINARY_TYPE__KALLSYMS: Symbols from /proc/kallsyms file. */ DSO_BINARY_TYPE__KALLSYMS = 0, + /** @DSO_BINARY_TYPE__GUEST_KALLSYMS: Guest /proc/kallsyms file. */ DSO_BINARY_TYPE__GUEST_KALLSYMS, + /** @DSO_BINARY_TYPE__VMLINUX: Path to kernel /boot/vmlinux file. */ DSO_BINARY_TYPE__VMLINUX, + /** @DSO_BINARY_TYPE__GUEST_VMLINUX: Path to guest kernel /boot/vmlinux file. */ DSO_BINARY_TYPE__GUEST_VMLINUX, + /** @DSO_BINARY_TYPE__JAVA_JIT: Symbols from /tmp/perf.map file. */ DSO_BINARY_TYPE__JAVA_JIT, + /** + * @DSO_BINARY_TYPE__DEBUGLINK: Debug file readable from the file path + * in the .gnu_debuglink ELF section of the dso. + */ DSO_BINARY_TYPE__DEBUGLINK, + /** + * @DSO_BINARY_TYPE__BUILD_ID_CACHE: File named after buildid located in + * the buildid cache with an elf filename. + */ DSO_BINARY_TYPE__BUILD_ID_CACHE, + /** + * @DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO: File named after buildid + * located in the buildid cache with a debug filename. + */ DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO, + /** + * @DSO_BINARY_TYPE__FEDORA_DEBUGINFO: Debug file in /usr/lib/debug + * with .debug suffix. + */ DSO_BINARY_TYPE__FEDORA_DEBUGINFO, + /** @DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: Debug file in /usr/lib/debug. */ DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, + /** + * @DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: dso__long_name debuginfo + * file in /usr/lib/debug/lib rather than the expected + * /usr/lib/debug/usr/lib. + */ DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO, + /** + * @DSO_BINARY_TYPE__BUILDID_DEBUGINFO: File named after buildid located + * in /usr/lib/debug/.build-id/. + */ DSO_BINARY_TYPE__BUILDID_DEBUGINFO, + /** + * @DSO_BINARY_TYPE__GNU_DEBUGDATA: MiniDebuginfo where a compressed + * ELF file is placed in a .gnu_debugdata section. + */ + DSO_BINARY_TYPE__GNU_DEBUGDATA, + /** @DSO_BINARY_TYPE__SYSTEM_PATH_DSO: A regular executable/shared-object file. */ DSO_BINARY_TYPE__SYSTEM_PATH_DSO, + /** @DSO_BINARY_TYPE__GUEST_KMODULE: Guest kernel module .ko file. */ DSO_BINARY_TYPE__GUEST_KMODULE, + /** @DSO_BINARY_TYPE__GUEST_KMODULE_COMP: Guest kernel module .ko.gz file. */ DSO_BINARY_TYPE__GUEST_KMODULE_COMP, + /** @DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE: Kernel module .ko file. */ DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE, + /** @DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP: Kernel module .ko.gz file. */ DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP, + /** @DSO_BINARY_TYPE__KCORE: /proc/kcore file. */ DSO_BINARY_TYPE__KCORE, + /** @DSO_BINARY_TYPE__GUEST_KCORE: Guest /proc/kcore file. */ DSO_BINARY_TYPE__GUEST_KCORE, + /** + * @DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: Openembedded/Yocto -dbg + * package debug info. + */ DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, + /** @DSO_BINARY_TYPE__BPF_PROG_INFO: jitted BPF code. */ DSO_BINARY_TYPE__BPF_PROG_INFO, + /** @DSO_BINARY_TYPE__BPF_IMAGE: jitted BPF trampoline or dispatcher code. */ DSO_BINARY_TYPE__BPF_IMAGE, + /** + * @DSO_BINARY_TYPE__OOL: out of line code such as kprobe-replaced + * instructions or optimized kprobes or ftrace trampolines. + */ DSO_BINARY_TYPE__OOL, + /** @DSO_BINARY_TYPE__NOT_FOUND: Unknown DSO kind. */ DSO_BINARY_TYPE__NOT_FOUND, }; @@ -127,14 +186,33 @@ enum dso_load_errno { #define DSO__DATA_CACHE_SIZE 4096 #define DSO__DATA_CACHE_MASK ~(DSO__DATA_CACHE_SIZE - 1) -/* - * Data about backing storage DSO, comes from PERF_RECORD_MMAP2 meta events +/** + * struct dso_id + * + * Data about backing storage DSO, comes from PERF_RECORD_MMAP2 meta events, + * reading from /proc/pid/maps or synthesis of build_ids from DSOs. Possibly + * incomplete at any particular use. */ struct dso_id { - u32 maj; - u32 min; - u64 ino; - u64 ino_generation; + /* Data related to the mmap2 event or read from /proc/pid/maps. */ + struct { + u32 maj; + u32 min; + u64 ino; + u64 ino_generation; + }; + /** @mmap2_valid: Are the maj, min and ino fields valid? */ + bool mmap2_valid; + /** + * @mmap2_ino_generation_valid: Is the ino_generation valid? Generally + * false for /proc/pid/maps mmap event. + */ + bool mmap2_ino_generation_valid; + /** + * @build_id: A possibly populated build_id. build_id__is_defined checks + * whether it is populated. + */ + struct build_id build_id; }; struct dso_cache { @@ -154,10 +232,12 @@ struct dso_data { int status; u32 status_seen; u64 file_size; +#ifdef HAVE_LIBUNWIND_SUPPORT u64 elf_base_addr; u64 debug_frame_offset; u64 eh_frame_hdr_addr; u64 eh_frame_hdr_offset; +#endif }; struct dso_bpf_prog { @@ -183,7 +263,6 @@ DECLARE_RC_STRUCT(dso) { u64 addr; struct symbol *symbol; } last_find_result; - struct build_id bid; u64 text_offset; u64 text_end; const char *short_name; @@ -216,12 +295,12 @@ DECLARE_RC_STRUCT(dso) { enum dso_swap_type needs_swap:2; bool is_kmod:1; u8 adjust_symbols:1; - u8 has_build_id:1; u8 header_build_id:1; u8 has_srcline:1; u8 hit:1; u8 annotate_warned:1; u8 auxtrace_warned:1; + u8 debuginfo_warned:1; u8 short_name_allocated:1; u8 long_name_allocated:1; u8 is_64_bit:1; @@ -231,6 +310,11 @@ DECLARE_RC_STRUCT(dso) { char name[]; }; +extern struct mutex _dso__data_open_lock; +extern const struct dso_id dso_id_empty; + +int dso_id__cmp(const struct dso_id *a, const struct dso_id *b); + /* dso__for_each_symbol - iterate over the symbols of given type * * @dso: the 'struct dso *' in which symbols are iterated @@ -280,39 +364,39 @@ static inline void dso__set_annotate_warned(struct dso *dso) RC_CHK_ACCESS(dso)->annotate_warned = 1; } -static inline struct auxtrace_cache *dso__auxtrace_cache(struct dso *dso) +static inline bool dso__debuginfo_warned(const struct dso *dso) { - return RC_CHK_ACCESS(dso)->auxtrace_cache; + return RC_CHK_ACCESS(dso)->debuginfo_warned; } -static inline void dso__set_auxtrace_cache(struct dso *dso, struct auxtrace_cache *cache) +static inline void dso__set_debuginfo_warned(struct dso *dso) { - RC_CHK_ACCESS(dso)->auxtrace_cache = cache; + RC_CHK_ACCESS(dso)->debuginfo_warned = 1; } -static inline struct build_id *dso__bid(struct dso *dso) +static inline bool dso__auxtrace_warned(const struct dso *dso) { - return &RC_CHK_ACCESS(dso)->bid; + return RC_CHK_ACCESS(dso)->auxtrace_warned; } -static inline const struct build_id *dso__bid_const(const struct dso *dso) +static inline void dso__set_auxtrace_warned(struct dso *dso) { - return &RC_CHK_ACCESS(dso)->bid; + RC_CHK_ACCESS(dso)->auxtrace_warned = 1; } -static inline struct dso_bpf_prog *dso__bpf_prog(struct dso *dso) +static inline struct auxtrace_cache *dso__auxtrace_cache(struct dso *dso) { - return &RC_CHK_ACCESS(dso)->bpf_prog; + return RC_CHK_ACCESS(dso)->auxtrace_cache; } -static inline bool dso__has_build_id(const struct dso *dso) +static inline void dso__set_auxtrace_cache(struct dso *dso, struct auxtrace_cache *cache) { - return RC_CHK_ACCESS(dso)->has_build_id; + RC_CHK_ACCESS(dso)->auxtrace_cache = cache; } -static inline void dso__set_has_build_id(struct dso *dso) +static inline struct dso_bpf_prog *dso__bpf_prog(struct dso *dso) { - RC_CHK_ACCESS(dso)->has_build_id = true; + return &RC_CHK_ACCESS(dso)->bpf_prog; } static inline bool dso__has_srcline(const struct dso *dso) @@ -390,6 +474,16 @@ static inline const struct dso_id *dso__id_const(const struct dso *dso) return &RC_CHK_ACCESS(dso)->id; } +static inline const struct build_id *dso__bid(const struct dso *dso) +{ + return &dso__id_const(dso)->build_id; +} + +static inline bool dso__has_build_id(const struct dso *dso) +{ + return build_id__is_defined(dso__bid(dso)); +} + static inline struct rb_root_cached *dso__inlined_nodes(struct dso *dso) { return &RC_CHK_ACCESS(dso)->inlined_nodes; @@ -592,6 +686,11 @@ static inline void dso__set_symsrc_filename(struct dso *dso, char *val) RC_CHK_ACCESS(dso)->symsrc_filename = val; } +static inline void dso__free_symsrc_filename(struct dso *dso) +{ + zfree(&RC_CHK_ACCESS(dso)->symsrc_filename); +} + static inline enum dso_binary_type dso__symtab_type(const struct dso *dso) { return RC_CHK_ACCESS(dso)->symtab_type; @@ -622,22 +721,19 @@ static inline void dso__set_text_offset(struct dso *dso, u64 val) RC_CHK_ACCESS(dso)->text_offset = val; } -int dso_id__cmp(const struct dso_id *a, const struct dso_id *b); -bool dso_id__empty(const struct dso_id *id); - -struct dso *dso__new_id(const char *name, struct dso_id *id); +struct dso *dso__new_id(const char *name, const struct dso_id *id); struct dso *dso__new(const char *name); void dso__delete(struct dso *dso); int dso__cmp_id(struct dso *a, struct dso *b); void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated); void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated); -void __dso__inject_id(struct dso *dso, struct dso_id *id); +void __dso__improve_id(struct dso *dso, const struct dso_id *id); int dso__name_len(const struct dso *dso); struct dso *dso__get(struct dso *dso); -void dso__put(struct dso *dso); +void dso__put(struct dso *dso) LOCKS_EXCLUDED(_dso__data_open_lock); static inline void __dso__zput(struct dso **dso) { @@ -660,8 +756,10 @@ bool dso__sorted_by_name(const struct dso *dso); void dso__set_sorted_by_name(struct dso *dso); void dso__sort_by_name(struct dso *dso); -void dso__set_build_id(struct dso *dso, struct build_id *bid); -bool dso__build_id_equal(const struct dso *dso, struct build_id *bid); +int dso__swap_init(struct dso *dso, unsigned char eidata); + +void dso__set_build_id(struct dso *dso, const struct build_id *bid); +bool dso__build_id_equal(const struct dso *dso, const struct build_id *bid); void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine); int dso__kernel_module_get_build_id(struct dso *dso, const char *root_dir); @@ -717,8 +815,8 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m, * The current usage of the dso__data_* interface is as follows: * * Get DSO's fd: - * int fd = dso__data_get_fd(dso, machine); - * if (fd >= 0) { + * int fd; + * if (dso__data_get_fd(dso, machine, &fd)) { * USE 'fd' SOMEHOW * dso__data_put_fd(dso); * } @@ -740,14 +838,16 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m, * * TODO */ -int dso__data_get_fd(struct dso *dso, struct machine *machine); -void dso__data_put_fd(struct dso *dso); -void dso__data_close(struct dso *dso); +bool dso__data_get_fd(struct dso *dso, struct machine *machine, int *fd) + EXCLUSIVE_TRYLOCK_FUNCTION(true, _dso__data_open_lock); +void dso__data_put_fd(struct dso *dso) UNLOCK_FUNCTION(_dso__data_open_lock); +void dso__data_close(struct dso *dso) LOCKS_EXCLUDED(_dso__data_open_lock); int dso__data_file_size(struct dso *dso, struct machine *machine); off_t dso__data_size(struct dso *dso, struct machine *machine); ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, u64 offset, u8 *data, ssize_t size); +uint16_t dso__e_machine(struct dso *dso, struct machine *machine); ssize_t dso__data_read_addr(struct dso *dso, struct map *map, struct machine *machine, u64 addr, u8 *data, ssize_t size); @@ -793,7 +893,9 @@ static inline bool dso__is_kcore(const struct dso *dso) static inline bool dso__is_kallsyms(const struct dso *dso) { - return RC_CHK_ACCESS(dso)->kernel && RC_CHK_ACCESS(dso)->long_name[0] != '/'; + enum dso_binary_type bt = dso__binary_type(dso); + + return bt == DSO_BINARY_TYPE__KALLSYMS || bt == DSO_BINARY_TYPE__GUEST_KALLSYMS; } bool dso__is_object_file(const struct dso *dso); @@ -809,4 +911,21 @@ void reset_fd_limit(void); u64 dso__find_global_type(struct dso *dso, u64 addr); u64 dso__findnew_global_type(struct dso *dso, u64 addr, u64 offset); +/* Check if dso name is of format "/tmp/perf-%d.map" */ +bool perf_pid_map_tid(const char *dso_name, int *tid); +bool is_perf_pid_map_name(const char *dso_name); + +/* + * In the future, we may get debuginfo using build-ID (w/o path). + * Add this helper is for the smooth conversion. + */ +static inline struct debuginfo *dso__debuginfo(struct dso *dso) +{ + return debuginfo__new(dso__long_name(dso)); +} + +const u8 *dso__read_symbol(struct dso *dso, const char *symfs_filename, + const struct map *map, const struct symbol *sym, + u8 **out_buf, u64 *out_buf_len, bool *is_64bit); + #endif /* __PERF_DSO */ diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index a69a9c661200..0a7645c7fae7 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -72,6 +72,7 @@ static int dsos__read_build_ids_cb(struct dso *dso, void *data) { struct dsos__read_build_ids_cb_args *args = data; struct nscookie nsc; + struct build_id bid = { .size = 0, }; if (args->with_hits && !dso__hit(dso) && !dso__is_vdso(dso)) return 0; @@ -80,15 +81,15 @@ static int dsos__read_build_ids_cb(struct dso *dso, void *data) return 0; } nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); - if (filename__read_build_id(dso__long_name(dso), dso__bid(dso)) > 0) { + if (filename__read_build_id(dso__long_name(dso), &bid) > 0) { + dso__set_build_id(dso, &bid); args->have_build_id = true; - dso__set_has_build_id(dso); } else if (errno == ENOENT && dso__nsinfo(dso)) { char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso)); - if (new_name && filename__read_build_id(new_name, dso__bid(dso)) > 0) { + if (new_name && filename__read_build_id(new_name, &bid) > 0) { + dso__set_build_id(dso, &bid); args->have_build_id = true; - dso__set_has_build_id(dso); } free(new_name); } @@ -155,8 +156,9 @@ static int dsos__cmp_key_long_name_id(const void *vkey, const void *vdso) */ static struct dso *__dsos__find_by_longname_id(struct dsos *dsos, const char *name, - struct dso_id *id, + const struct dso_id *id, bool write_locked) + SHARED_LOCKS_REQUIRED(dsos->lock) { struct dsos__key key = { .long_name = name, @@ -164,6 +166,9 @@ static struct dso *__dsos__find_by_longname_id(struct dsos *dsos, }; struct dso **res; + if (dsos->dsos == NULL) + return NULL; + if (!dsos->sorted) { if (!write_locked) { struct dso *dso; @@ -241,7 +246,7 @@ int dsos__add(struct dsos *dsos, struct dso *dso) struct dsos__find_id_cb_args { const char *name; - struct dso_id *id; + const struct dso_id *id; struct dso *res; }; @@ -257,8 +262,9 @@ static int dsos__find_id_cb(struct dso *dso, void *data) } -static struct dso *__dsos__find_id(struct dsos *dsos, const char *name, struct dso_id *id, +static struct dso *__dsos__find_id(struct dsos *dsos, const char *name, const struct dso_id *id, bool cmp_short, bool write_locked) + SHARED_LOCKS_REQUIRED(dsos->lock) { struct dso *res; @@ -281,7 +287,7 @@ struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short) struct dso *res; down_read(&dsos->lock); - res = __dsos__find_id(dsos, name, NULL, cmp_short, /*write_locked=*/false); + res = __dsos__find_id(dsos, name, &dso_id_empty, cmp_short, /*write_locked=*/false); up_read(&dsos->lock); return res; } @@ -291,7 +297,7 @@ static void dso__set_basename(struct dso *dso) char *base, *lname; int tid; - if (sscanf(dso__long_name(dso), "/tmp/perf-%d.map", &tid) == 1) { + if (perf_pid_map_tid(dso__long_name(dso), &tid)) { if (asprintf(&base, "[JIT] tid %d", tid) < 0) return; } else { @@ -318,7 +324,7 @@ static void dso__set_basename(struct dso *dso) dso__set_short_name(dso, base, true); } -static struct dso *__dsos__addnew_id(struct dsos *dsos, const char *name, struct dso_id *id) +static struct dso *__dsos__addnew_id(struct dsos *dsos, const char *name, const struct dso_id *id) { struct dso *dso = dso__new_id(name, id); @@ -334,17 +340,18 @@ static struct dso *__dsos__addnew_id(struct dsos *dsos, const char *name, struct return dso; } -static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id) +static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, const struct dso_id *id) + SHARED_LOCKS_REQUIRED(dsos->lock) { struct dso *dso = __dsos__find_id(dsos, name, id, false, /*write_locked=*/true); - if (dso && dso_id__empty(dso__id(dso)) && !dso_id__empty(id)) - __dso__inject_id(dso, id); + if (dso) + __dso__improve_id(dso, id); return dso ? dso : __dsos__addnew_id(dsos, name, id); } -struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id) +struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, const struct dso_id *id) { struct dso *dso; down_write(&dsos->lock); @@ -367,7 +374,7 @@ static int dsos__fprintf_buildid_cb(struct dso *dso, void *data) if (args->skip && args->skip(dso, args->parm)) return 0; - build_id__sprintf(dso__bid(dso), sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); args->ret += fprintf(args->fp, "%-40s %s\n", sbuild_id, dso__long_name(dso)); return 0; } @@ -430,7 +437,8 @@ struct dso *dsos__findnew_module_dso(struct dsos *dsos, down_write(&dsos->lock); - dso = __dsos__find_id(dsos, m->name, NULL, /*cmp_short=*/true, /*write_locked=*/true); + dso = __dsos__find_id(dsos, m->name, &dso_id_empty, /*cmp_short=*/true, + /*write_locked=*/true); if (dso) { up_write(&dsos->lock); return dso; diff --git a/tools/perf/util/dsos.h b/tools/perf/util/dsos.h index 6c13b65648bc..a26774950866 100644 --- a/tools/perf/util/dsos.h +++ b/tools/perf/util/dsos.h @@ -32,7 +32,7 @@ int __dsos__add(struct dsos *dsos, struct dso *dso); int dsos__add(struct dsos *dsos, struct dso *dso); struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short); -struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id); +struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, const struct dso_id *id); bool dsos__read_build_ids(struct dsos *dsos, bool with_hits); diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c index 2bd8585db93c..c1cc0ade48d0 100644 --- a/tools/perf/util/dump-insn.c +++ b/tools/perf/util/dump-insn.c @@ -15,7 +15,7 @@ const char *dump_insn(struct perf_insn *x __maybe_unused, } __weak -int arch_is_branch(const unsigned char *buf __maybe_unused, +int arch_is_uncond_branch(const unsigned char *buf __maybe_unused, size_t len __maybe_unused, int x86_64 __maybe_unused) { diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h index 4a7797dd6d09..20d4d7bb5275 100644 --- a/tools/perf/util/dump-insn.h +++ b/tools/perf/util/dump-insn.h @@ -21,6 +21,6 @@ struct perf_insn { const char *dump_insn(struct perf_insn *x, u64 ip, u8 *inbuf, int inlen, int *lenp); -int arch_is_branch(const unsigned char *buf, size_t len, int x86_64); +int arch_is_uncond_branch(const unsigned char *buf, size_t len, int x86_64); #endif diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 44ef968a7ad3..9267af204c7d 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -267,7 +267,7 @@ Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) } /* Get a type die, but skip qualifiers */ -static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) +Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem) { int tag; @@ -1182,7 +1182,6 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf) return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die)); } -#if defined(HAVE_DWARF_GETLOCATIONS_SUPPORT) || defined(HAVE_DWARF_CFI_SUPPORT) static int reg_from_dwarf_op(Dwarf_Op *op) { switch (op->atom) { @@ -1245,9 +1244,7 @@ static bool check_allowed_ops(Dwarf_Op *ops, size_t nops) } return true; } -#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT || HAVE_DWARF_CFI_SUPPORT */ -#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT /** * die_get_var_innermost_scope - Get innermost scope range of given variable DIE * @sp_die: a subprogram DIE @@ -1391,18 +1388,19 @@ struct find_var_data { #define DWARF_OP_DIRECT_REGS 32 static bool match_var_offset(Dwarf_Die *die_mem, struct find_var_data *data, - u64 addr_offset, u64 addr_type, bool is_pointer) + s64 addr_offset, s64 addr_type, bool is_pointer) { Dwarf_Die type_die; Dwarf_Word size; + s64 offset = addr_offset - addr_type; - if (addr_offset == addr_type) { + if (offset == 0) { /* Update offset relative to the start of the variable */ data->offset = 0; return true; } - if (addr_offset < addr_type) + if (offset < 0) return false; if (die_get_real_type(die_mem, &type_die) == NULL) @@ -1417,14 +1415,42 @@ static bool match_var_offset(Dwarf_Die *die_mem, struct find_var_data *data, if (dwarf_aggregate_size(&type_die, &size) < 0) return false; - if (addr_offset >= addr_type + size) + if ((u64)offset >= size) return false; /* Update offset relative to the start of the variable */ - data->offset = addr_offset - addr_type; + data->offset = offset; return true; } +/** + * is_breg_access_indirect - Check if breg based access implies type + * dereference + * @ops: DWARF operations array + * @nops: Number of operations in @ops + * + * Returns true if the DWARF expression evaluates to the variable's + * value, so the memory access on that register needs type dereference. + * Returns false if the expression evaluates to the variable's address. + * This is called after check_allowed_ops. + */ +static bool is_breg_access_indirect(Dwarf_Op *ops, size_t nops) +{ + /* only the base register */ + if (nops == 1) + return false; + + if (nops == 2 && ops[1].atom == DW_OP_stack_value) + return true; + + if (nops == 3 && (ops[1].atom == DW_OP_deref || + ops[1].atom == DW_OP_deref_size) && + ops[2].atom == DW_OP_stack_value) + return false; + /* unreachable, OP not supported */ + return false; +} + /* Only checks direct child DIEs in the given scope. */ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg) { @@ -1444,7 +1470,7 @@ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg) while ((off = dwarf_getlocations(&attr, off, &base, &start, &end, &ops, &nops)) > 0) { /* Assuming the location list is sorted by address */ - if (end < data->pc) + if (end <= data->pc) continue; if (start > data->pc) break; @@ -1453,7 +1479,7 @@ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg) if (data->is_fbreg && ops->atom == DW_OP_fbreg && check_allowed_ops(ops, nops) && match_var_offset(die_mem, data, data->offset, ops->number, - /*is_pointer=*/false)) + is_breg_access_indirect(ops, nops))) return DIE_FIND_CB_END; /* Only match with a simple case */ @@ -1465,11 +1491,11 @@ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg) /*is_pointer=*/true)) return DIE_FIND_CB_END; - /* Local variables accessed by a register + offset */ + /* variables accessed by a register + offset */ if (ops->atom == (DW_OP_breg0 + data->reg) && check_allowed_ops(ops, nops) && match_var_offset(die_mem, data, data->offset, ops->number, - /*is_pointer=*/false)) + is_breg_access_indirect(ops, nops))) return DIE_FIND_CB_END; } else { /* pointer variables saved in a register 32 or above */ @@ -1479,11 +1505,11 @@ static int __die_find_var_reg_cb(Dwarf_Die *die_mem, void *arg) /*is_pointer=*/true)) return DIE_FIND_CB_END; - /* Local variables accessed by a register + offset */ + /* variables accessed by a register + offset */ if (ops->atom == DW_OP_bregx && data->reg == ops->number && check_allowed_ops(ops, nops) && match_var_offset(die_mem, data, data->offset, ops->number2, - /*is_poitner=*/false)) + is_breg_access_indirect(ops, nops))) return DIE_FIND_CB_END; } } @@ -1598,13 +1624,25 @@ static int __die_collect_vars_cb(Dwarf_Die *die_mem, void *arg) if (dwarf_getlocations(&attr, 0, &base, &start, &end, &ops, &nops) <= 0) return DIE_FIND_CB_SIBLING; - if (die_get_real_type(die_mem, &type_die) == NULL) + if (!check_allowed_ops(ops, nops)) + return DIE_FIND_CB_SIBLING; + + if (__die_get_real_type(die_mem, &type_die) == NULL) return DIE_FIND_CB_SIBLING; vt = malloc(sizeof(*vt)); if (vt == NULL) return DIE_FIND_CB_END; + /* Usually a register holds the value of a variable */ + vt->is_reg_var_addr = false; + + if (((ops->atom >= DW_OP_breg0 && ops->atom <= DW_OP_breg31) || + ops->atom == DW_OP_bregx || ops->atom == DW_OP_fbreg) && + !is_breg_access_indirect(ops, nops)) + /* The register contains an address of the variable. */ + vt->is_reg_var_addr = true; + vt->die_off = dwarf_dieoffset(&type_die); vt->addr = start; vt->reg = reg_from_dwarf_op(ops); @@ -1694,9 +1732,7 @@ void die_collect_global_vars(Dwarf_Die *cu_die, struct die_var_type **var_types) die_find_child(cu_die, __die_collect_global_vars_cb, (void *)var_types, &die_mem); } -#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ -#ifdef HAVE_DWARF_CFI_SUPPORT /** * die_get_cfa - Get frame base information * @dwarf: a Dwarf info @@ -1729,7 +1765,6 @@ int die_get_cfa(Dwarf *dwarf, u64 pc, int *preg, int *poffset) } return -1; } -#endif /* HAVE_DWARF_CFI_SUPPORT */ /* * die_has_loclist - Check if DW_AT_location of @vr_die is a location list @@ -1923,6 +1958,7 @@ struct find_scope_data { static int __die_find_scope_cb(Dwarf_Die *die_mem, void *arg) { struct find_scope_data *data = arg; + int tag = dwarf_tag(die_mem); if (dwarf_haspc(die_mem, data->pc)) { Dwarf_Die *tmp; @@ -1936,6 +1972,14 @@ static int __die_find_scope_cb(Dwarf_Die *die_mem, void *arg) data->nr++; return DIE_FIND_CB_CHILD; } + + /* + * If the DIE doesn't have the PC, we still need to check its children + * and siblings if it's a container like a namespace. + */ + if (tag == DW_TAG_namespace) + return DIE_FIND_CB_CONTINUE; + return DIE_FIND_CB_SIBLING; } @@ -1974,8 +2018,15 @@ static int __die_find_member_offset_cb(Dwarf_Die *die_mem, void *arg) return DIE_FIND_CB_SIBLING; /* Unions might not have location */ - if (die_get_data_member_location(die_mem, &loc) < 0) - loc = 0; + if (die_get_data_member_location(die_mem, &loc) < 0) { + Dwarf_Attribute attr; + + if (dwarf_attr_integrate(die_mem, DW_AT_data_bit_offset, &attr) && + dwarf_formudata(&attr, &loc) == 0) + loc /= 8; + else + loc = 0; + } if (offset == loc) return DIE_FIND_CB_END; diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 24446412b869..cd481ec9c5a1 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -56,6 +56,8 @@ const char *die_get_decl_file(Dwarf_Die *dw_die); /* Get type die */ Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); +/* Get a type die, but skip qualifiers */ +Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); /* Get a type die, but skip qualifiers and typedef */ Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); @@ -146,6 +148,8 @@ struct die_var_type { u64 addr; int reg; int offset; + /* Whether the register holds a address to the type */ + bool is_reg_var_addr; }; /* Return type info of a member at offset */ @@ -154,8 +158,6 @@ Dwarf_Die *die_get_member_type(Dwarf_Die *type_die, int offset, Dwarf_Die *die_m /* Return type info where the pointer and offset point to */ Dwarf_Die *die_deref_ptr_type(Dwarf_Die *ptr_die, int offset, Dwarf_Die *die_mem); -#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT - /* Get byte offset range of given variable DIE */ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); @@ -174,58 +176,7 @@ void die_collect_vars(Dwarf_Die *sc_die, struct die_var_type **var_types); /* Save all global variables in this CU */ void die_collect_global_vars(Dwarf_Die *cu_die, struct die_var_type **var_types); -#else /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ - -static inline int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, - Dwarf_Die *vr_die __maybe_unused, - struct strbuf *buf __maybe_unused) -{ - return -ENOTSUP; -} - -static inline Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die __maybe_unused, - Dwarf_Addr pc __maybe_unused, - int reg __maybe_unused, - int *poffset __maybe_unused, - bool is_fbreg __maybe_unused, - Dwarf_Die *die_mem __maybe_unused) -{ - return NULL; -} - -static inline Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die __maybe_unused, - Dwarf_Addr addr __maybe_unused, - Dwarf_Die *die_mem __maybe_unused, - int *offset __maybe_unused) -{ - return NULL; -} - -static inline void die_collect_vars(Dwarf_Die *sc_die __maybe_unused, - struct die_var_type **var_types __maybe_unused) -{ -} - -static inline void die_collect_global_vars(Dwarf_Die *cu_die __maybe_unused, - struct die_var_type **var_types __maybe_unused) -{ -} - -#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ - -#ifdef HAVE_DWARF_CFI_SUPPORT - /* Get the frame base information from CFA */ int die_get_cfa(Dwarf *dwarf, u64 pc, int *preg, int *poffset); -#else /* HAVE_DWARF_CFI_SUPPORT */ - -static inline int die_get_cfa(Dwarf *dwarf __maybe_unused, u64 pc __maybe_unused, - int *preg __maybe_unused, int *poffset __maybe_unused) -{ - return -1; -} - -#endif /* HAVE_DWARF_CFI_SUPPORT */ - #endif /* _DWARF_AUX_H */ diff --git a/tools/perf/util/dwarf-regs-csky.c b/tools/perf/util/dwarf-regs-csky.c new file mode 100644 index 000000000000..d38ef1f07f3e --- /dev/null +++ b/tools/perf/util/dwarf-regs-csky.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. +// Mapping of DWARF debug register numbers into register names. + +#include <stddef.h> +#include <dwarf-regs.h> + +#define CSKY_ABIV2_MAX_REGS 73 +const char *csky_dwarf_regs_table_abiv2[CSKY_ABIV2_MAX_REGS] = { + /* r0 ~ r8 */ + "%a0", "%a1", "%a2", "%a3", "%regs0", "%regs1", "%regs2", "%regs3", + /* r9 ~ r15 */ + "%regs4", "%regs5", "%regs6", "%regs7", "%regs8", "%regs9", "%sp", + "%lr", + /* r16 ~ r23 */ + "%exregs0", "%exregs1", "%exregs2", "%exregs3", "%exregs4", + "%exregs5", "%exregs6", "%exregs7", + /* r24 ~ r31 */ + "%exregs8", "%exregs9", "%exregs10", "%exregs11", "%exregs12", + "%exregs13", "%exregs14", "%tls", + "%pc", NULL, NULL, NULL, "%hi", "%lo", NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "%epc", +}; + +#define CSKY_ABIV1_MAX_REGS 57 +const char *csky_dwarf_regs_table_abiv1[CSKY_ABIV1_MAX_REGS] = { + /* r0 ~ r8 */ + "%sp", "%regs9", "%a0", "%a1", "%a2", "%a3", "%regs0", "%regs1", + /* r9 ~ r15 */ + "%regs2", "%regs3", "%regs4", "%regs5", "%regs6", "%regs7", "%regs8", + "%lr", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "%epc", +}; + +const char *get_csky_regstr(unsigned int n, unsigned int flags) +{ + if (flags & EF_CSKY_ABIV2) + return (n < CSKY_ABIV2_MAX_REGS) ? csky_dwarf_regs_table_abiv2[n] : NULL; + + return (n < CSKY_ABIV1_MAX_REGS) ? csky_dwarf_regs_table_abiv1[n] : NULL; +} diff --git a/tools/perf/util/dwarf-regs-powerpc.c b/tools/perf/util/dwarf-regs-powerpc.c new file mode 100644 index 000000000000..caf77a234c78 --- /dev/null +++ b/tools/perf/util/dwarf-regs-powerpc.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Mapping of DWARF debug register numbers into register names. + * + * Copyright (C) 2010 Ian Munsie, IBM Corporation. + */ + +#include <dwarf-regs.h> + +#define PPC_OP(op) (((op) >> 26) & 0x3F) +#define PPC_RA(a) (((a) >> 16) & 0x1f) +#define PPC_RT(t) (((t) >> 21) & 0x1f) +#define PPC_RB(b) (((b) >> 11) & 0x1f) +#define PPC_D(D) ((D) & 0xfffe) +#define PPC_DS(DS) ((DS) & 0xfffc) +#define OP_LD 58 +#define OP_STD 62 + +static int get_source_reg(u32 raw_insn) +{ + return PPC_RA(raw_insn); +} + +static int get_target_reg(u32 raw_insn) +{ + return PPC_RT(raw_insn); +} + +static int get_offset_opcode(u32 raw_insn) +{ + int opcode = PPC_OP(raw_insn); + + /* DS- form */ + if ((opcode == OP_LD) || (opcode == OP_STD)) + return PPC_DS(raw_insn); + else + return PPC_D(raw_insn); +} + +/* + * Fills the required fields for op_loc depending on if it + * is a source or target. + * D form: ins RT,D(RA) -> src_reg1 = RA, offset = D, dst_reg1 = RT + * DS form: ins RT,DS(RA) -> src_reg1 = RA, offset = DS, dst_reg1 = RT + * X form: ins RT,RA,RB -> src_reg1 = RA, src_reg2 = RB, dst_reg1 = RT + */ +void get_powerpc_regs(u32 raw_insn, int is_source, + struct annotated_op_loc *op_loc) +{ + if (is_source) + op_loc->reg1 = get_source_reg(raw_insn); + else + op_loc->reg1 = get_target_reg(raw_insn); + + if (op_loc->multi_regs) + op_loc->reg2 = PPC_RB(raw_insn); + + /* TODO: Implement offset handling for X Form */ + if ((op_loc->mem_ref) && (PPC_OP(raw_insn) != 31)) + op_loc->offset = get_offset_opcode(raw_insn); +} diff --git a/tools/perf/util/dwarf-regs-x86.c b/tools/perf/util/dwarf-regs-x86.c new file mode 100644 index 000000000000..7a55c65e8da6 --- /dev/null +++ b/tools/perf/util/dwarf-regs-x86.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. + * Extracted from probe-finder.c + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + */ + +#include <errno.h> /* for EINVAL */ +#include <string.h> /* for strcmp */ +#include <linux/kernel.h> /* for ARRAY_SIZE */ +#include <dwarf-regs.h> + +struct dwarf_regs_idx { + const char *name; + int idx; +}; + +static const struct dwarf_regs_idx x86_regidx_table[] = { + { "rax", 0 }, { "eax", 0 }, { "ax", 0 }, { "al", 0 }, + { "rdx", 1 }, { "edx", 1 }, { "dx", 1 }, { "dl", 1 }, + { "rcx", 2 }, { "ecx", 2 }, { "cx", 2 }, { "cl", 2 }, + { "rbx", 3 }, { "edx", 3 }, { "bx", 3 }, { "bl", 3 }, + { "rsi", 4 }, { "esi", 4 }, { "si", 4 }, { "sil", 4 }, + { "rdi", 5 }, { "edi", 5 }, { "di", 5 }, { "dil", 5 }, + { "rbp", 6 }, { "ebp", 6 }, { "bp", 6 }, { "bpl", 6 }, + { "rsp", 7 }, { "esp", 7 }, { "sp", 7 }, { "spl", 7 }, + { "r8", 8 }, { "r8d", 8 }, { "r8w", 8 }, { "r8b", 8 }, + { "r9", 9 }, { "r9d", 9 }, { "r9w", 9 }, { "r9b", 9 }, + { "r10", 10 }, { "r10d", 10 }, { "r10w", 10 }, { "r10b", 10 }, + { "r11", 11 }, { "r11d", 11 }, { "r11w", 11 }, { "r11b", 11 }, + { "r12", 12 }, { "r12d", 12 }, { "r12w", 12 }, { "r12b", 12 }, + { "r13", 13 }, { "r13d", 13 }, { "r13w", 13 }, { "r13b", 13 }, + { "r14", 14 }, { "r14d", 14 }, { "r14w", 14 }, { "r14b", 14 }, + { "r15", 15 }, { "r15d", 15 }, { "r15w", 15 }, { "r15b", 15 }, + { "rip", DWARF_REG_PC }, +}; + +int get_x86_regnum(const char *name) +{ + unsigned int i; + + if (*name != '%') + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(x86_regidx_table); i++) + if (!strcmp(x86_regidx_table[i].name, name + 1)) + return x86_regidx_table[i].idx; + return -ENOENT; +} diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c index 5b7f86c0063f..28a1cfdf26d4 100644 --- a/tools/perf/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs.c @@ -13,14 +13,6 @@ #include <errno.h> #include <linux/kernel.h> -#ifndef EM_AARCH64 -#define EM_AARCH64 183 /* ARM 64 bit */ -#endif - -#ifndef EM_LOONGARCH -#define EM_LOONGARCH 258 /* LoongArch */ -#endif - /* Define const char * {arch}_register_tbl[] */ #define DEFINE_DWARF_REGSTR_TABLE #include "../arch/x86/include/dwarf-regs-table.h" @@ -28,6 +20,7 @@ #include "../arch/arm64/include/dwarf-regs-table.h" #include "../arch/sh/include/dwarf-regs-table.h" #include "../arch/powerpc/include/dwarf-regs-table.h" +#include "../arch/riscv/include/dwarf-regs-table.h" #include "../arch/s390/include/dwarf-regs-table.h" #include "../arch/sparc/include/dwarf-regs-table.h" #include "../arch/xtensa/include/dwarf-regs-table.h" @@ -37,11 +30,13 @@ #define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL) /* Return architecture dependent register string (for kprobe-tracer) */ -const char *get_dwarf_regstr(unsigned int n, unsigned int machine) +const char *get_dwarf_regstr(unsigned int n, unsigned int machine, unsigned int flags) { + if (machine == EM_NONE) { + /* Generic arch - use host arch */ + machine = EM_HOST; + } switch (machine) { - case EM_NONE: /* Generic arch - use host arch */ - return get_arch_regstr(n); case EM_386: return __get_dwarf_regstr(x86_32_regstr_tbl, n); case EM_X86_64: @@ -50,6 +45,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) return __get_dwarf_regstr(arm_regstr_tbl, n); case EM_AARCH64: return __get_dwarf_regstr(aarch64_regstr_tbl, n); + case EM_CSKY: + return get_csky_regstr(n, flags); case EM_SH: return __get_dwarf_regstr(sh_regstr_tbl, n); case EM_S390: @@ -57,6 +54,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) case EM_PPC: case EM_PPC64: return __get_dwarf_regstr(powerpc_regstr_tbl, n); + case EM_RISCV: + return __get_dwarf_regstr(riscv_regstr_tbl, n); case EM_SPARC: case EM_SPARCV9: return __get_dwarf_regstr(sparc_regstr_tbl, n); @@ -72,13 +71,15 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) return NULL; } +#if EM_HOST != EM_X86_64 && EM_HOST != EM_386 __weak int get_arch_regnum(const char *name __maybe_unused) { return -ENOTSUP; } +#endif /* Return DWARF register number from architecture register name */ -int get_dwarf_regnum(const char *name, unsigned int machine) +int get_dwarf_regnum(const char *name, unsigned int machine, unsigned int flags __maybe_unused) { char *regname = strdup(name); int reg = -1; @@ -92,10 +93,21 @@ int get_dwarf_regnum(const char *name, unsigned int machine) if (p) *p = '\0'; + if (machine == EM_NONE) { + /* Generic arch - use host arch */ + machine = EM_HOST; + } switch (machine) { - case EM_NONE: /* Generic arch - use host arch */ +#if EM_HOST != EM_X86_64 && EM_HOST != EM_386 + case EM_HOST: reg = get_arch_regnum(regname); break; +#endif + case EM_X86_64: + fallthrough; + case EM_386: + reg = get_x86_regnum(regname); + break; default: pr_err("ELF MACHINE %x is not supported.\n", machine); } diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index a459374d0a1a..f1626d2032cd 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -3,34 +3,40 @@ #include "debug.h" #include "env.h" #include "util/header.h" -#include "linux/compiler.h" +#include "util/rwsem.h" +#include <linux/compiler.h> #include <linux/ctype.h> +#include <linux/rbtree.h> +#include <linux/string.h> #include <linux/zalloc.h> #include "cgroup.h" #include <errno.h> #include <sys/utsname.h> #include <stdlib.h> #include <string.h> +#include "pmu.h" #include "pmus.h" #include "strbuf.h" #include "trace/beauty/beauty.h" -struct perf_env perf_env; - #ifdef HAVE_LIBBPF_SUPPORT #include "bpf-event.h" #include "bpf-utils.h" #include <bpf/libbpf.h> -void perf_env__insert_bpf_prog_info(struct perf_env *env, +bool perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node) { + bool ret; + down_write(&env->bpf_progs.lock); - __perf_env__insert_bpf_prog_info(env, info_node); + ret = __perf_env__insert_bpf_prog_info(env, info_node); up_write(&env->bpf_progs.lock); + + return ret; } -void __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node) +bool __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node) { __u32 prog_id = info_node->info_linear->info.id; struct bpf_prog_info_node *node; @@ -48,13 +54,14 @@ void __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info p = &(*p)->rb_right; } else { pr_debug("duplicated bpf prog info %u\n", prog_id); - return; + return false; } } rb_link_node(&info_node->rb_node, parent, p); rb_insert_color(&info_node->rb_node, &env->bpf_progs.infos); env->bpf_progs.infos_cnt++; + return true; } struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, @@ -82,6 +89,20 @@ out: return node; } +void perf_env__iterate_bpf_prog_info(struct perf_env *env, + void (*cb)(struct bpf_prog_info_node *node, + void *data), + void *data) +{ + struct rb_node *first; + + down_read(&env->bpf_progs.lock); + first = rb_first(&env->bpf_progs.infos); + for (struct rb_node *node = first; node != NULL; node = rb_next(node)) + (*cb)(rb_entry(node, struct bpf_prog_info_node, rb_node), data); + up_read(&env->bpf_progs.lock); +} + bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) { bool ret; @@ -167,6 +188,7 @@ static void perf_env__purge_bpf(struct perf_env *env) next = rb_next(&node->rb_node); rb_erase(&node->rb_node, root); zfree(&node->info_linear); + bpf_metadata_free(node->metadata); free(node); } @@ -247,6 +269,7 @@ void perf_env__exit(struct perf_env *env) void perf_env__init(struct perf_env *env) { + memset(env, 0, sizeof(*env)); #ifdef HAVE_LIBBPF_SUPPORT env->bpf_progs.infos = RB_ROOT; env->bpf_progs.btfs = RB_ROOT; @@ -324,10 +347,13 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) for (idx = 0; idx < nr_cpus; ++idx) { struct perf_cpu cpu = { .cpu = idx }; + int core_id = cpu__get_core_id(cpu); + int socket_id = cpu__get_socket_id(cpu); + int die_id = cpu__get_die_id(cpu); - env->cpu[idx].core_id = cpu__get_core_id(cpu); - env->cpu[idx].socket_id = cpu__get_socket_id(cpu); - env->cpu[idx].die_id = cpu__get_die_id(cpu); + env->cpu[idx].core_id = core_id >= 0 ? core_id : -1; + env->cpu[idx].socket_id = socket_id >= 0 ? socket_id : -1; + env->cpu[idx].die_id = die_id >= 0 ? die_id : -1; } env->nr_cpus_avail = nr_cpus; @@ -372,7 +398,8 @@ error: int perf_env__read_cpuid(struct perf_env *env) { char cpuid[128]; - int err = get_cpuid(cpuid, sizeof(cpuid)); + struct perf_cpu cpu = {-1}; + int err = get_cpuid(cpuid, sizeof(cpuid), cpu); if (err) return err; @@ -405,6 +432,116 @@ static int perf_env__read_nr_cpus_avail(struct perf_env *env) return env->nr_cpus_avail ? 0 : -ENOENT; } +static int __perf_env__read_core_pmu_caps(const struct perf_pmu *pmu, + int *nr_caps, char ***caps, + unsigned int *max_branches, + unsigned int *br_cntr_nr, + unsigned int *br_cntr_width) +{ + struct perf_pmu_caps *pcaps = NULL; + char *ptr, **tmp; + int ret = 0; + + *nr_caps = 0; + *caps = NULL; + + if (!pmu->nr_caps) + return 0; + + *caps = calloc(pmu->nr_caps, sizeof(char *)); + if (!*caps) + return -ENOMEM; + + tmp = *caps; + list_for_each_entry(pcaps, &pmu->caps, list) { + if (asprintf(&ptr, "%s=%s", pcaps->name, pcaps->value) < 0) { + ret = -ENOMEM; + goto error; + } + + *tmp++ = ptr; + + if (!strcmp(pcaps->name, "branches")) + *max_branches = atoi(pcaps->value); + else if (!strcmp(pcaps->name, "branch_counter_nr")) + *br_cntr_nr = atoi(pcaps->value); + else if (!strcmp(pcaps->name, "branch_counter_width")) + *br_cntr_width = atoi(pcaps->value); + } + *nr_caps = pmu->nr_caps; + return 0; +error: + while (tmp-- != *caps) + zfree(tmp); + zfree(caps); + *nr_caps = 0; + return ret; +} + +int perf_env__read_core_pmu_caps(struct perf_env *env) +{ + struct pmu_caps *pmu_caps; + struct perf_pmu *pmu = NULL; + int nr_pmu, i = 0, j; + int ret; + + nr_pmu = perf_pmus__num_core_pmus(); + + if (!nr_pmu) + return -ENODEV; + + if (nr_pmu == 1) { + pmu = perf_pmus__find_core_pmu(); + if (!pmu) + return -ENODEV; + ret = perf_pmu__caps_parse(pmu); + if (ret < 0) + return ret; + return __perf_env__read_core_pmu_caps(pmu, &env->nr_cpu_pmu_caps, + &env->cpu_pmu_caps, + &env->max_branches, + &env->br_cntr_nr, + &env->br_cntr_width); + } + + pmu_caps = calloc(nr_pmu, sizeof(*pmu_caps)); + if (!pmu_caps) + return -ENOMEM; + + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { + if (perf_pmu__caps_parse(pmu) <= 0) + continue; + ret = __perf_env__read_core_pmu_caps(pmu, &pmu_caps[i].nr_caps, + &pmu_caps[i].caps, + &pmu_caps[i].max_branches, + &pmu_caps[i].br_cntr_nr, + &pmu_caps[i].br_cntr_width); + if (ret) + goto error; + + pmu_caps[i].pmu_name = strdup(pmu->name); + if (!pmu_caps[i].pmu_name) { + ret = -ENOMEM; + goto error; + } + i++; + } + + env->nr_pmus_with_caps = nr_pmu; + env->pmu_caps = pmu_caps; + + return 0; +error: + for (i = 0; i < nr_pmu; i++) { + for (j = 0; j < pmu_caps[i].nr_caps; j++) + zfree(&pmu_caps[i].caps[j]); + zfree(&pmu_caps[i].caps); + zfree(&pmu_caps[i].pmu_name); + } + zfree(&pmu_caps); + return ret; +} + const char *perf_env__raw_arch(struct perf_env *env) { return env && !perf_env__read_arch(env) ? env->arch : "unknown"; @@ -469,15 +606,19 @@ const char *perf_env__arch(struct perf_env *env) return normalize_arch(arch_name); } +#if defined(HAVE_LIBTRACEEVENT) +#include "trace/beauty/arch_errno_names.c" +#endif + const char *perf_env__arch_strerrno(struct perf_env *env __maybe_unused, int err __maybe_unused) { -#if defined(HAVE_SYSCALL_TABLE_SUPPORT) && defined(HAVE_LIBTRACEEVENT) +#if defined(HAVE_LIBTRACEEVENT) if (env->arch_strerrno == NULL) env->arch_strerrno = arch_syscalls__strerrno_function(perf_env__arch(env)); return env->arch_strerrno ? env->arch_strerrno(err) : "no arch specific strerrno function"; #else - return "!(HAVE_SYSCALL_TABLE_SUPPORT && HAVE_LIBTRACEEVENT)"; + return "!HAVE_LIBTRACEEVENT"; #endif } @@ -528,7 +669,7 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu) for (i = 0; i < env->nr_numa_nodes; i++) { nn = &env->numa_nodes[i]; - nr = max(nr, perf_cpu_map__max(nn->map).cpu); + nr = max(nr, (int)perf_cpu_map__max(nn->map).cpu); } nr++; @@ -624,3 +765,62 @@ out: free(cap_eq); return NULL; } + +void perf_env__find_br_cntr_info(struct perf_env *env, + unsigned int *nr, + unsigned int *width) +{ + if (nr) { + *nr = env->cpu_pmu_caps ? env->br_cntr_nr : + env->pmu_caps->br_cntr_nr; + } + + if (width) { + *width = env->cpu_pmu_caps ? env->br_cntr_width : + env->pmu_caps->br_cntr_width; + } +} + +bool perf_env__is_x86_amd_cpu(struct perf_env *env) +{ + static int is_amd; /* 0: Uninitialized, 1: Yes, -1: No */ + + if (is_amd == 0) + is_amd = env->cpuid && strstarts(env->cpuid, "AuthenticAMD") ? 1 : -1; + + return is_amd >= 1 ? true : false; +} + +bool x86__is_amd_cpu(void) +{ + struct perf_env env = { .total_mem = 0, }; + bool is_amd; + + perf_env__cpuid(&env); + is_amd = perf_env__is_x86_amd_cpu(&env); + perf_env__exit(&env); + + return is_amd; +} + +bool perf_env__is_x86_intel_cpu(struct perf_env *env) +{ + static int is_intel; /* 0: Uninitialized, 1: Yes, -1: No */ + + if (is_intel == 0) + is_intel = env->cpuid && strstarts(env->cpuid, "GenuineIntel") ? 1 : -1; + + return is_intel >= 1 ? true : false; +} + +bool x86__is_intel_cpu(void) +{ + struct perf_env env = { .total_mem = 0, }; + bool is_intel; + + perf_env__cpuid(&env); + is_intel = perf_env__is_x86_intel_cpu(&env); + perf_env__exit(&env); + + return is_intel; +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 2a2c37cc40b7..9977b85523a8 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -56,8 +56,6 @@ struct pmu_caps { typedef const char *(arch_syscalls__strerrno_t)(int err); -arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch); - struct perf_env { char *hostname; char *os_release; @@ -152,8 +150,7 @@ enum perf_compress_type { struct bpf_prog_info_node; struct btf_node; -extern struct perf_env perf_env; - +int perf_env__read_core_pmu_caps(struct perf_env *env); void perf_env__exit(struct perf_env *env); int perf_env__kernel_is_64_bit(struct perf_env *env); @@ -176,20 +173,35 @@ const char *perf_env__raw_arch(struct perf_env *env); int perf_env__nr_cpus_avail(struct perf_env *env); void perf_env__init(struct perf_env *env); -void __perf_env__insert_bpf_prog_info(struct perf_env *env, +#ifdef HAVE_LIBBPF_SUPPORT +bool __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node); -void perf_env__insert_bpf_prog_info(struct perf_env *env, +bool perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node); struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, __u32 prog_id); +void perf_env__iterate_bpf_prog_info(struct perf_env *env, + void (*cb)(struct bpf_prog_info_node *node, + void *data), + void *data); bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id); +#endif // HAVE_LIBBPF_SUPPORT int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu); char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name, const char *cap); bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name); +void perf_env__find_br_cntr_info(struct perf_env *env, + unsigned int *nr, + unsigned int *width); + +bool x86__is_amd_cpu(void); +bool perf_env__is_x86_amd_cpu(struct perf_env *env); +bool x86__is_intel_cpu(void); +bool perf_env__is_x86_intel_cpu(struct perf_env *env); + #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index f32f9abf6344..4c92cc1a952c 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1,9 +1,12 @@ #include <errno.h> #include <fcntl.h> #include <inttypes.h> +#include <linux/compiler.h> #include <linux/kernel.h> #include <linux/types.h> #include <perf/cpumap.h> +#include <perf/event.h> +#include <stdio.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> @@ -58,6 +61,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_CGROUP] = "CGROUP", [PERF_RECORD_TEXT_POKE] = "TEXT_POKE", [PERF_RECORD_AUX_OUTPUT_HW_ID] = "AUX_OUTPUT_HW_ID", + [PERF_RECORD_CALLCHAIN_DEFERRED] = "CALLCHAIN_DEFERRED", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -77,6 +81,8 @@ static const char *perf_event__names[] = { [PERF_RECORD_HEADER_FEATURE] = "FEATURE", [PERF_RECORD_COMPRESSED] = "COMPRESSED", [PERF_RECORD_FINISHED_INIT] = "FINISHED_INIT", + [PERF_RECORD_COMPRESSED2] = "COMPRESSED2", + [PERF_RECORD_BPF_METADATA] = "BPF_METADATA", }; const char *perf_event__name(unsigned int id) @@ -216,7 +222,7 @@ size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp) event->cgroup.id, event->cgroup.path); } -int perf_event__process_comm(struct perf_tool *tool __maybe_unused, +int perf_event__process_comm(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -224,7 +230,7 @@ int perf_event__process_comm(struct perf_tool *tool __maybe_unused, return machine__process_comm_event(machine, event, sample); } -int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused, +int perf_event__process_namespaces(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -232,7 +238,7 @@ int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused, return machine__process_namespaces_event(machine, event, sample); } -int perf_event__process_cgroup(struct perf_tool *tool __maybe_unused, +int perf_event__process_cgroup(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -240,7 +246,7 @@ int perf_event__process_cgroup(struct perf_tool *tool __maybe_unused, return machine__process_cgroup_event(machine, event, sample); } -int perf_event__process_lost(struct perf_tool *tool __maybe_unused, +int perf_event__process_lost(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -248,7 +254,7 @@ int perf_event__process_lost(struct perf_tool *tool __maybe_unused, return machine__process_lost_event(machine, event, sample); } -int perf_event__process_aux(struct perf_tool *tool __maybe_unused, +int perf_event__process_aux(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused, struct machine *machine) @@ -256,7 +262,7 @@ int perf_event__process_aux(struct perf_tool *tool __maybe_unused, return machine__process_aux_event(machine, event); } -int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused, +int perf_event__process_itrace_start(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused, struct machine *machine) @@ -264,7 +270,7 @@ int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused, return machine__process_itrace_start_event(machine, event); } -int perf_event__process_aux_output_hw_id(struct perf_tool *tool __maybe_unused, +int perf_event__process_aux_output_hw_id(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused, struct machine *machine) @@ -272,7 +278,7 @@ int perf_event__process_aux_output_hw_id(struct perf_tool *tool __maybe_unused, return machine__process_aux_output_hw_id_event(machine, event); } -int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused, +int perf_event__process_lost_samples(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -280,7 +286,7 @@ int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused, return machine__process_lost_samples_event(machine, event, sample); } -int perf_event__process_switch(struct perf_tool *tool __maybe_unused, +int perf_event__process_switch(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused, struct machine *machine) @@ -288,7 +294,7 @@ int perf_event__process_switch(struct perf_tool *tool __maybe_unused, return machine__process_switch_event(machine, event); } -int perf_event__process_ksymbol(struct perf_tool *tool __maybe_unused, +int perf_event__process_ksymbol(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused, struct machine *machine) @@ -296,7 +302,7 @@ int perf_event__process_ksymbol(struct perf_tool *tool __maybe_unused, return machine__process_ksymbol(machine, event, sample); } -int perf_event__process_bpf(struct perf_tool *tool __maybe_unused, +int perf_event__process_bpf(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -304,7 +310,7 @@ int perf_event__process_bpf(struct perf_tool *tool __maybe_unused, return machine__process_bpf(machine, event, sample); } -int perf_event__process_text_poke(struct perf_tool *tool __maybe_unused, +int perf_event__process_text_poke(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -329,7 +335,7 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp) build_id__init(&bid, event->mmap2.build_id, event->mmap2.build_id_size); - build_id__sprintf(&bid, sbuild_id); + build_id__snprintf(&bid, sbuild_id, sizeof(sbuild_id)); return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64 " <%s>]: %c%c%c%c %s\n", @@ -387,7 +393,7 @@ size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp) return ret; } -int perf_event__process_mmap(struct perf_tool *tool __maybe_unused, +int perf_event__process_mmap(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -395,7 +401,7 @@ int perf_event__process_mmap(struct perf_tool *tool __maybe_unused, return machine__process_mmap_event(machine, event, sample); } -int perf_event__process_mmap2(struct perf_tool *tool __maybe_unused, +int perf_event__process_mmap2(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -410,7 +416,7 @@ size_t perf_event__fprintf_task(union perf_event *event, FILE *fp) event->fork.ppid, event->fork.ptid); } -int perf_event__process_fork(struct perf_tool *tool __maybe_unused, +int perf_event__process_fork(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -418,7 +424,7 @@ int perf_event__process_fork(struct perf_tool *tool __maybe_unused, return machine__process_fork_event(machine, event, sample); } -int perf_event__process_exit(struct perf_tool *tool __maybe_unused, +int perf_event__process_exit(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -426,14 +432,35 @@ int perf_event__process_exit(struct perf_tool *tool __maybe_unused, return machine__process_exit_event(machine, event, sample); } +int perf_event__exit_del_thread(const struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine) +{ + struct thread *thread = machine__findnew_thread(machine, + event->fork.pid, + event->fork.tid); + + dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid, + event->fork.ppid, event->fork.ptid); + + if (thread) { + machine__remove_thread(machine, thread); + thread__put(thread); + } + + return 0; +} + size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp) { - return fprintf(fp, " offset: %#"PRI_lx64" size: %#"PRI_lx64" flags: %#"PRI_lx64" [%s%s%s]\n", + return fprintf(fp, " offset: %#"PRI_lx64" size: %#"PRI_lx64" flags: %#"PRI_lx64" [%s%s%s%s]\n", event->aux.aux_offset, event->aux.aux_size, event->aux.flags, event->aux.flags & PERF_AUX_FLAG_TRUNCATED ? "T" : "", event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : "", - event->aux.flags & PERF_AUX_FLAG_PARTIAL ? "P" : ""); + event->aux.flags & PERF_AUX_FLAG_PARTIAL ? "P" : "", + event->aux.flags & PERF_AUX_FLAG_COLLISION ? "C" : ""); } size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp) @@ -483,6 +510,20 @@ size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp) event->bpf.type, event->bpf.flags, event->bpf.id); } +size_t perf_event__fprintf_bpf_metadata(union perf_event *event, FILE *fp) +{ + struct perf_record_bpf_metadata *metadata = &event->bpf_metadata; + size_t ret; + + ret = fprintf(fp, " prog %s\n", metadata->prog_name); + for (__u32 i = 0; i < metadata->nr_entries; i++) { + ret += fprintf(fp, " entry %d: %20s = %s\n", i, + metadata->entries[i].key, + metadata->entries[i].value); + } + return ret; +} + static int text_poke_printer(enum binary_printer_ops op, unsigned int val, void *extra, FILE *fp) { @@ -580,6 +621,9 @@ size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FIL case PERF_RECORD_AUX_OUTPUT_HW_ID: ret += perf_event__fprintf_aux_output_hw_id(event, fp); break; + case PERF_RECORD_BPF_METADATA: + ret += perf_event__fprintf_bpf_metadata(event, fp); + break; default: ret += fprintf(fp, "\n"); } @@ -587,7 +631,7 @@ size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FIL return ret; } -int perf_event__process(struct perf_tool *tool __maybe_unused, +int perf_event__process(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) @@ -747,6 +791,17 @@ int machine__resolve(struct machine *machine, struct addr_location *al, al->socket = env->cpu[al->cpu].socket_id; } + /* Account for possible out-of-order switch events. */ + al->parallelism = max(1, min(machine->parallelism, machine__nr_cpus_avail(machine))); + if (test_bit(al->parallelism, symbol_conf.parallelism_filter)) + al->filtered |= (1 << HIST_FILTER__PARALLELISM); + /* + * Multiply it by some const to avoid precision loss or dealing + * with floats. The multiplier does not matter otherwise since + * we only print it as percents. + */ + al->latency = sample->period * 1000 / al->parallelism; + if (al->map) { if (symbol_conf.dso_list && (!dso || !(strlist__has_entry(symbol_conf.dso_list, diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index d8bcee2e9b93..64c63b59d617 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -66,9 +66,16 @@ enum { PERF_IP_FLAG_VMEXIT = 1ULL << 12, PERF_IP_FLAG_INTR_DISABLE = 1ULL << 13, PERF_IP_FLAG_INTR_TOGGLE = 1ULL << 14, + PERF_IP_FLAG_BRANCH_MISS = 1ULL << 15, + PERF_IP_FLAG_NOT_TAKEN = 1ULL << 16, }; -#define PERF_IP_FLAG_CHARS "bcrosyiABExghDt" +#define PERF_IP_FLAG_CHARS "bcrosyiABExghDtmn" + +#define PERF_ADDITIONAL_STATE_MASK \ + (PERF_IP_FLAG_IN_TX | \ + PERF_IP_FLAG_INTR_DISABLE | \ + PERF_IP_FLAG_INTR_TOGGLE) #define PERF_BRANCH_MASK (\ PERF_IP_FLAG_BRANCH |\ @@ -84,6 +91,10 @@ enum { PERF_IP_FLAG_VMENTRY |\ PERF_IP_FLAG_VMEXIT) +#define PERF_IP_FLAG_BRANCH_EVENT_MASK \ + (PERF_IP_FLAG_BRANCH_MISS | \ + PERF_IP_FLAG_NOT_TAKEN) + #define PERF_MEM_DATA_SRC_NONE \ (PERF_MEM_S(OP, NA) |\ PERF_MEM_S(LVL, NA) |\ @@ -106,6 +117,7 @@ enum perf_synth_id { PERF_SYNTH_INTEL_PSB, PERF_SYNTH_INTEL_EVT, PERF_SYNTH_INTEL_IFLAG_CHG, + PERF_SYNTH_POWERPC_VPA_DTL, }; /* @@ -243,6 +255,25 @@ struct perf_synth_intel_iflag_chg { u64 branch_ip; /* If via_branch */ }; +/* + * The powerpc VPA DTL entries are of below format + */ +struct powerpc_vpadtl_entry { + u8 dispatch_reason; + u8 preempt_reason; + u16 processor_id; + u32 enqueue_to_dispatch_time; + u32 ready_to_enqueue_time; + u32 waiting_to_ready_time; + u64 timebase; + u64 fault_addr; + u64 srr0; + u64 srr1; +}; + +extern const char *dispatch_reasons[11]; +extern const char *preempt_reasons[10]; + static inline void *perf_synth__raw_data(void *p) { return p + 4; @@ -267,71 +298,75 @@ struct perf_tool; void perf_event__read_stat_config(struct perf_stat_config *config, struct perf_record_stat_config *event); -int perf_event__process_comm(struct perf_tool *tool, +int perf_event__process_comm(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_lost(struct perf_tool *tool, +int perf_event__process_lost(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_lost_samples(struct perf_tool *tool, +int perf_event__process_lost_samples(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_aux(struct perf_tool *tool, +int perf_event__process_aux(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_itrace_start(struct perf_tool *tool, +int perf_event__process_itrace_start(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_aux_output_hw_id(struct perf_tool *tool, +int perf_event__process_aux_output_hw_id(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_switch(struct perf_tool *tool, +int perf_event__process_switch(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_namespaces(struct perf_tool *tool, +int perf_event__process_namespaces(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_cgroup(struct perf_tool *tool, +int perf_event__process_cgroup(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_mmap(struct perf_tool *tool, +int perf_event__process_mmap(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_mmap2(struct perf_tool *tool, +int perf_event__process_mmap2(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_fork(struct perf_tool *tool, +int perf_event__process_fork(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_exit(struct perf_tool *tool, +int perf_event__process_exit(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_ksymbol(struct perf_tool *tool, +int perf_event__exit_del_thread(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_bpf(struct perf_tool *tool, +int perf_event__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); +int perf_event__process_bpf(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process_text_poke(struct perf_tool *tool, +int perf_event__process_text_poke(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__process(struct perf_tool *tool, +int perf_event__process(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); @@ -355,6 +390,7 @@ size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp); size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp); size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp); size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_bpf_metadata(union perf_event *event, FILE *fp); size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine,FILE *fp); size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp); @@ -375,11 +411,6 @@ extern unsigned int proc_map_timeout; #define PAGE_SIZE_NAME_LEN 32 char *get_page_size_name(u64 size, char *str); -void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type); -void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type); -const char *arch_perf_header_entry(const char *se_header); -int arch_support_sort_key(const char *sort_key); - static inline bool perf_event_header__cpumode_is_guest(u8 cpumode) { return cpumode == PERF_RECORD_MISC_GUEST_KERNEL || diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h index 8fecc9fbaecc..dcff697ed252 100644 --- a/tools/perf/util/events_stats.h +++ b/tools/perf/util/events_stats.h @@ -18,7 +18,18 @@ * PERF_RECORD_LOST_SAMPLES event. The number of lost-samples events is stored * in .nr_events[PERF_RECORD_LOST_SAMPLES] while total_lost_samples tells * exactly how many samples the kernel in fact dropped, i.e. it is the sum of - * all struct perf_record_lost_samples.lost fields reported. + * all struct perf_record_lost_samples.lost fields reported without setting the + * misc field in the header. + * + * The BPF program can discard samples according to the filter expressions given + * by the user. This number is kept in a BPF map and dumped at the end of perf + * record in a PERF_RECORD_LOST_SAMPLES event. To differentiate it from other + * lost samples, perf tools sets PERF_RECORD_MISC_LOST_SAMPLES_BPF flag in the + * header.misc field. The number of dropped-samples events is stored in + * .nr_events[PERF_RECORD_LOST_SAMPLES] while total_dropped_samples tells + * exactly how many samples the BPF program in fact dropped, i.e. it is the sum + * of all struct perf_record_lost_samples.lost fields reported with the misc + * field set in the header. * * The total_period is needed because by default auto-freq is used, so * multiplying nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get @@ -28,6 +39,7 @@ struct events_stats { u64 total_lost; u64 total_lost_samples; + u64 total_dropped_samples; u64 total_aux_lost; u64 total_aux_partial; u64 total_aux_collision; @@ -45,14 +57,16 @@ struct events_stats { struct hists_stats { u64 total_period; u64 total_non_filtered_period; + u64 total_latency; + u64 total_non_filtered_latency; u32 nr_samples; u32 nr_non_filtered_samples; u32 nr_lost_samples; + u32 nr_dropped_samples; }; void events_stats__inc(struct events_stats *stats, u32 type); -size_t events_stats__fprintf(struct events_stats *stats, FILE *fp, - bool skip_empty); +size_t events_stats__fprintf(struct events_stats *stats, FILE *fp); #endif /* __PERF_EVENTS_STATS_ */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3a719edafc7a..03674d2cbd01 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -33,6 +33,10 @@ #include "util/bpf-filter.h" #include "util/stat.h" #include "util/util.h" +#include "util/env.h" +#include "util/intel-tpebs.h" +#include "util/metricgroup.h" +#include "util/strbuf.h" #include <signal.h> #include <unistd.h> #include <sched.h> @@ -46,6 +50,7 @@ #include <sys/mman.h> #include <sys/prctl.h> #include <sys/timerfd.h> +#include <sys/wait.h> #include <linux/bitops.h> #include <linux/hash.h> @@ -78,6 +83,9 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus, evlist->ctl_fd.fd = -1; evlist->ctl_fd.ack = -1; evlist->ctl_fd.pos = -1; + evlist->nr_br_cntr = -1; + metricgroup__rblist_init(&evlist->metric_events); + INIT_LIST_HEAD(&evlist->deferred_samples); } struct evlist *evlist__new(void) @@ -94,16 +102,24 @@ struct evlist *evlist__new_default(void) { struct evlist *evlist = evlist__new(); bool can_profile_kernel; - int err; + struct perf_pmu *pmu = NULL; if (!evlist) return NULL; can_profile_kernel = perf_event_paranoid_check(1); - err = parse_event(evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu"); - if (err) { - evlist__delete(evlist); - return NULL; + + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { + char buf[256]; + int err; + + snprintf(buf, sizeof(buf), "%s/cycles/%s", pmu->name, + can_profile_kernel ? "P" : "Pu"); + err = parse_event(evlist, buf); + if (err) { + evlist__delete(evlist); + return NULL; + } } if (evlist->core.nr_entries > 1) { @@ -168,6 +184,7 @@ static void evlist__purge(struct evlist *evlist) void evlist__exit(struct evlist *evlist) { + metricgroup__rblist_exit(&evlist->metric_events); event_enable_timer__exit(&evlist->eet); zfree(&evlist->mmap); zfree(&evlist->overwrite_mmap); @@ -315,62 +332,6 @@ struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide) } #endif -int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) -{ - struct evsel *evsel, *n; - LIST_HEAD(head); - size_t i; - - for (i = 0; i < nr_attrs; i++) { - evsel = evsel__new_idx(attrs + i, evlist->core.nr_entries + i); - if (evsel == NULL) - goto out_delete_partial_list; - list_add_tail(&evsel->core.node, &head); - } - - evlist__splice_list_tail(evlist, &head); - - return 0; - -out_delete_partial_list: - __evlist__for_each_entry_safe(&head, n, evsel) - evsel__delete(evsel); - return -1; -} - -int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) -{ - size_t i; - - for (i = 0; i < nr_attrs; i++) - event_attr_init(attrs + i); - - return evlist__add_attrs(evlist, attrs, nr_attrs); -} - -__weak int arch_evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, - size_t nr_attrs) -{ - if (!nr_attrs) - return 0; - - return __evlist__add_default_attrs(evlist, attrs, nr_attrs); -} - -struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id) -{ - struct evsel *evsel; - - evlist__for_each_entry(evlist, evsel) { - if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT && - (int)evsel->core.attr.config == id) - return evsel; - } - - return NULL; -} - struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name) { struct evsel *evsel; @@ -1057,13 +1018,12 @@ int evlist__create_maps(struct evlist *evlist, struct target *target) * per-thread data. thread_map__new_str will call * thread_map__new_all_cpus to enumerate all threads. */ - threads = thread_map__new_str(target->pid, target->tid, target->uid, - all_threads); + threads = thread_map__new_str(target->pid, target->tid, all_threads); if (!threads) return -1; - if (target__uses_dummy_map(target)) + if (target__uses_dummy_map(target) && !evlist__has_bpf_output(evlist)) cpus = perf_cpu_map__new_any_cpu(); else cpus = perf_cpu_map__new(target->cpu_list); @@ -1086,7 +1046,8 @@ out_delete_threads: return -1; } -int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel) +int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel, + struct target *target) { struct evsel *evsel; int err = 0; @@ -1108,7 +1069,7 @@ int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel) * non-tracepoint events can have BPF filters. */ if (!list_empty(&evsel->bpf_filters)) { - err = perf_bpf_filter__prepare(evsel); + err = perf_bpf_filter__prepare(evsel, target); if (err) { *err_evsel = evsel; break; @@ -1194,11 +1155,6 @@ int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) return ret; } -int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid) -{ - return evlist__set_tp_filter_pids(evlist, 1, &pid); -} - int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) { char *filter = asprintf__tp_filter_pids(npids, pids); @@ -1261,6 +1217,72 @@ u64 evlist__combined_branch_type(struct evlist *evlist) return branch_type; } +static struct evsel * +evlist__find_dup_event_from_prev(struct evlist *evlist, struct evsel *event) +{ + struct evsel *pos; + + evlist__for_each_entry(evlist, pos) { + if (event == pos) + break; + if ((pos->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) && + !strcmp(pos->name, event->name)) + return pos; + } + return NULL; +} + +#define MAX_NR_ABBR_NAME (26 * 11) + +/* + * The abbr name is from A to Z9. If the number of event + * which requires the branch counter > MAX_NR_ABBR_NAME, + * return NA. + */ +static void evlist__new_abbr_name(char *name) +{ + static int idx; + int i = idx / 26; + + if (idx >= MAX_NR_ABBR_NAME) { + name[0] = 'N'; + name[1] = 'A'; + name[2] = '\0'; + return; + } + + name[0] = 'A' + (idx % 26); + + if (!i) + name[1] = '\0'; + else { + name[1] = '0' + i - 1; + name[2] = '\0'; + } + + idx++; +} + +void evlist__update_br_cntr(struct evlist *evlist) +{ + struct evsel *evsel, *dup; + int i = 0; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + evsel->br_cntr_idx = i++; + evsel__leader(evsel)->br_cntr_nr++; + + dup = evlist__find_dup_event_from_prev(evlist, evsel); + if (dup) + memcpy(evsel->abbr_name, dup->abbr_name, 3 * sizeof(char)); + else + evlist__new_abbr_name(evsel->abbr_name); + } + } + evlist->nr_br_cntr = i; +} + bool evlist__valid_read_format(struct evlist *evlist) { struct evsel *first = evlist__first(evlist), *pos = first; @@ -1362,19 +1384,18 @@ static int evlist__create_syswide_maps(struct evlist *evlist) */ cpus = perf_cpu_map__new_online_cpus(); if (!cpus) - goto out; + return -ENOMEM; threads = perf_thread_map__new_dummy(); - if (!threads) - goto out_put; + if (!threads) { + perf_cpu_map__put(cpus); + return -ENOMEM; + } perf_evlist__set_maps(&evlist->core, cpus, threads); - perf_thread_map__put(threads); -out_put: perf_cpu_map__put(cpus); -out: - return -ENOMEM; + return 0; } int evlist__open(struct evlist *evlist) @@ -1413,6 +1434,8 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const int child_ready_pipe[2], go_pipe[2]; char bf; + evlist->workload.cork_fd = -1; + if (pipe(child_ready_pipe) < 0) { perror("failed to create 'ready' pipe"); return -1; @@ -1465,7 +1488,7 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const * For cancelling the workload without actually running it, * the parent will just close workload.cork_fd, without writing * anything, i.e. read will return zero and we just exit() - * here. + * here (See evlist__cancel_workload()). */ if (ret != 1) { if (ret == -1) @@ -1529,7 +1552,7 @@ out_close_ready_pipe: int evlist__start_workload(struct evlist *evlist) { - if (evlist->workload.cork_fd > 0) { + if (evlist->workload.cork_fd >= 0) { char bf = 0; int ret; /* @@ -1540,12 +1563,24 @@ int evlist__start_workload(struct evlist *evlist) perror("unable to write to pipe"); close(evlist->workload.cork_fd); + evlist->workload.cork_fd = -1; return ret; } return 0; } +void evlist__cancel_workload(struct evlist *evlist) +{ + int status; + + if (evlist->workload.cork_fd >= 0) { + close(evlist->workload.cork_fd); + evlist->workload.cork_fd = -1; + waitpid(evlist->workload.pid, &status, WNOHANG); + } +} + int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) { struct evsel *evsel = evlist__event2evsel(evlist, event); @@ -2444,23 +2479,36 @@ struct evsel *evlist__find_evsel(struct evlist *evlist, int idx) return NULL; } -int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf) +void evlist__format_evsels(struct evlist *evlist, struct strbuf *sb, size_t max_length) { - struct evsel *evsel; - int printed = 0; + struct evsel *evsel, *leader = NULL; + bool first = true; evlist__for_each_entry(evlist, evsel) { + struct evsel *new_leader = evsel__leader(evsel); + if (evsel__is_dummy_event(evsel)) continue; - if (size > (strlen(evsel__name(evsel)) + (printed ? 2 : 1))) { - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "," : "", evsel__name(evsel)); - } else { - printed += scnprintf(bf + printed, size - printed, "%s...", printed ? "," : ""); - break; + + if (leader != new_leader && leader && leader->core.nr_members > 1) + strbuf_addch(sb, '}'); + + if (!first) + strbuf_addch(sb, ','); + + if (sb->len > max_length) { + strbuf_addstr(sb, "..."); + return; } - } + if (leader != new_leader && new_leader->core.nr_members > 1) + strbuf_addch(sb, '{'); - return printed; + strbuf_addstr(sb, evsel__name(evsel)); + first = false; + leader = new_leader; + } + if (leader && leader->core.nr_members > 1) + strbuf_addch(sb, '}'); } void evlist__check_mem_load_aux(struct evlist *evlist) @@ -2510,49 +2558,86 @@ void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_lis return; evlist__for_each_entry(evlist, pos) { - struct perf_cpu_map *intersect, *to_test; - const struct perf_pmu *pmu = evsel__find_pmu(pos); + evsel__warn_user_requested_cpus(pos, user_requested_cpus); + } + perf_cpu_map__put(user_requested_cpus); +} - to_test = pmu && pmu->is_core ? pmu->cpus : cpu_map__online(); - intersect = perf_cpu_map__intersect(to_test, user_requested_cpus); - if (!perf_cpu_map__equal(intersect, user_requested_cpus)) { - char buf[128]; +/* Should uniquify be disabled for the evlist? */ +static bool evlist__disable_uniquify(const struct evlist *evlist) +{ + struct evsel *counter; + struct perf_pmu *last_pmu = NULL; + bool first = true; - cpu_map__snprint(to_test, buf, sizeof(buf)); - pr_warning("WARNING: A requested CPU in '%s' is not supported by PMU '%s' (CPUs %s) for event '%s'\n", - cpu_list, pmu ? pmu->name : "cpu", buf, evsel__name(pos)); + evlist__for_each_entry(evlist, counter) { + /* If PMUs vary then uniquify can be useful. */ + if (!first && counter->pmu != last_pmu) + return false; + first = false; + if (counter->pmu) { + /* Allow uniquify for uncore PMUs. */ + if (!counter->pmu->is_core) + return false; + /* Keep hybrid event names uniquified for clarity. */ + if (perf_pmus__num_core_pmus() > 1) + return false; } - perf_cpu_map__put(intersect); + last_pmu = counter->pmu; } - perf_cpu_map__put(user_requested_cpus); + return true; } -void evlist__uniquify_name(struct evlist *evlist) +static bool evlist__set_needs_uniquify(struct evlist *evlist, const struct perf_stat_config *config) { - char *new_name, empty_attributes[2] = ":", *attributes; - struct evsel *pos; + struct evsel *counter; + bool needs_uniquify = false; - if (perf_pmus__num_core_pmus() == 1) - return; + if (evlist__disable_uniquify(evlist)) { + evlist__for_each_entry(evlist, counter) + counter->uniquified_name = true; + return false; + } - evlist__for_each_entry(evlist, pos) { - if (!evsel__is_hybrid(pos)) - continue; + evlist__for_each_entry(evlist, counter) { + if (evsel__set_needs_uniquify(counter, config)) + needs_uniquify = true; + } + return needs_uniquify; +} - if (strchr(pos->name, '/')) - continue; +void evlist__uniquify_evsel_names(struct evlist *evlist, const struct perf_stat_config *config) +{ + if (evlist__set_needs_uniquify(evlist, config)) { + struct evsel *pos; - attributes = strchr(pos->name, ':'); - if (attributes) - *attributes = '\0'; - else - attributes = empty_attributes; + evlist__for_each_entry(evlist, pos) + evsel__uniquify_counter(pos); + } +} - if (asprintf(&new_name, "%s/%s/%s", pos->pmu_name, pos->name, attributes + 1)) { - free(pos->name); - pos->name = new_name; - } else { - *attributes = ':'; - } +bool evlist__has_bpf_output(struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel__is_bpf_output(evsel)) + return true; + } + + return false; +} + +bool evlist__needs_bpf_sb_event(struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel__is_dummy_event(evsel)) + continue; + if (!evsel->core.attr.exclude_kernel) + return true; } + + return false; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index cb91dc9117a2..911834ae7c2a 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -12,6 +12,7 @@ #include <perf/evlist.h> #include "events_stats.h" #include "evsel.h" +#include "rblist.h" #include <pthread.h> #include <signal.h> #include <unistd.h> @@ -19,7 +20,10 @@ struct pollfd; struct thread_map; struct perf_cpu_map; +struct perf_stat_config; struct record_opts; +struct strbuf; +struct target; /* * State machine of bkw_mmap_state: @@ -56,6 +60,7 @@ struct evlist { bool enabled; int id_pos; int is_pos; + int nr_br_cntr; u64 combined_sample_type; enum bkw_mmap_state bkw_mmap_state; struct { @@ -66,7 +71,7 @@ struct evlist { struct mmap *overwrite_mmap; struct evsel *selected; struct events_stats stats; - struct perf_env *env; + struct perf_session *session; void (*trace_event_sample_raw)(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); @@ -82,6 +87,13 @@ struct evlist { int pos; /* index at evlist core object to check signals */ } ctl_fd; struct event_enable_timer *eet; + /** + * @metric_events: A list of struct metric_event which each have a list + * of struct metric_expr. + */ + struct rblist metric_events; + /* samples with deferred_callchain would wait here. */ + struct list_head deferred_samples; }; struct evsel_str_handler { @@ -100,19 +112,8 @@ void evlist__delete(struct evlist *evlist); void evlist__add(struct evlist *evlist, struct evsel *entry); void evlist__remove(struct evlist *evlist, struct evsel *evsel); -int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs); - -int __evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, size_t nr_attrs); - -int arch_evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, - size_t nr_attrs); - -#define evlist__add_default_attrs(evlist, array) \ - arch_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) - int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs); +int arch_evlist__add_required_events(struct list_head *list); int evlist__add_dummy(struct evlist *evlist); struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide); @@ -142,7 +143,6 @@ int __evlist__set_tracepoints_handlers(struct evlist *evlist, __evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array)) int evlist__set_tp_filter(struct evlist *evlist, const char *filter); -int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid); int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); int evlist__append_tp_filter(struct evlist *evlist, const char *filter); @@ -150,7 +150,6 @@ int evlist__append_tp_filter(struct evlist *evlist, const char *filter); int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid); int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); -struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id); struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name); int evlist__add_pollfd(struct evlist *evlist, int fd); @@ -184,6 +183,7 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const char *argv[], bool pipe_output, void (*exec_error)(int signo, siginfo_t *info, void *ucontext)); int evlist__start_workload(struct evlist *evlist); +void evlist__cancel_workload(struct evlist *evlist); struct option; @@ -212,11 +212,13 @@ void evlist__enable_non_dummy(struct evlist *evlist); void evlist__set_selected(struct evlist *evlist, struct evsel *evsel); int evlist__create_maps(struct evlist *evlist, struct target *target); -int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel); +int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel, + struct target *target); u64 __evlist__combined_sample_type(struct evlist *evlist); u64 evlist__combined_sample_type(struct evlist *evlist); u64 evlist__combined_branch_type(struct evlist *evlist); +void evlist__update_br_cntr(struct evlist *evlist); bool evlist__sample_id_all(struct evlist *evlist); u16 evlist__id_hdr_size(struct evlist *evlist); @@ -439,9 +441,11 @@ int event_enable_timer__process(struct event_enable_timer *eet); struct evsel *evlist__find_evsel(struct evlist *evlist, int idx); -int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf); +void evlist__format_evsels(struct evlist *evlist, struct strbuf *sb, size_t max_length); void evlist__check_mem_load_aux(struct evlist *evlist); void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list); -void evlist__uniquify_name(struct evlist *evlist); +void evlist__uniquify_evsel_names(struct evlist *evlist, const struct perf_stat_config *config); +bool evlist__has_bpf_output(struct evlist *evlist); +bool evlist__needs_bpf_sb_event(struct evlist *evlist); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 4f818ab6b662..9cd706f62793 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -5,6 +5,11 @@ * Parts came from builtin-{top,stat,record}.c, see those files for further * copyright notes. */ +/* + * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select + * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu. + */ +#define __SANE_USERSPACE_TYPES__ #include <byteswap.h> #include <errno.h> @@ -19,6 +24,7 @@ #include <linux/zalloc.h> #include <sys/ioctl.h> #include <sys/resource.h> +#include <sys/syscall.h> #include <sys/types.h> #include <dirent.h> #include <stdlib.h> @@ -30,6 +36,7 @@ #include "counts.h" #include "event.h" #include "evsel.h" +#include "time-utils.h" #include "util/env.h" #include "util/evsel_config.h" #include "util/evsel_fprintf.h" @@ -41,6 +48,7 @@ #include "record.h" #include "debug.h" #include "trace-event.h" +#include "session.h" #include "stat.h" #include "string2.h" #include "memswap.h" @@ -49,57 +57,152 @@ #include "off_cpu.h" #include "pmu.h" #include "pmus.h" +#include "drm_pmu.h" +#include "hwmon_pmu.h" +#include "tool_pmu.h" +#include "tp_pmu.h" #include "rlimit.h" #include "../perf-sys.h" #include "util/parse-branch-options.h" #include "util/bpf-filter.h" +#include "util/hist.h" #include <internal/xyarray.h> #include <internal/lib.h> #include <internal/threadmap.h> +#include "util/intel-tpebs.h" #include <linux/ctype.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif struct perf_missing_features perf_missing_features; static clockid_t clockid; -static const char *const perf_tool_event__tool_names[PERF_TOOL_MAX] = { - NULL, - "duration_time", - "user_time", - "system_time", -}; - -const char *perf_tool_event__to_str(enum perf_tool_event ev) +static int evsel__no_extra_init(struct evsel *evsel __maybe_unused) { - if (ev > PERF_TOOL_NONE && ev < PERF_TOOL_MAX) - return perf_tool_event__tool_names[ev]; - - return NULL; + return 0; } -enum perf_tool_event perf_tool_event__from_str(const char *str) +static bool test_attr__enabled(void) { - int i; + static bool test_attr__enabled; + static bool test_attr__enabled_tested; + + if (!test_attr__enabled_tested) { + char *dir = getenv("PERF_TEST_ATTR"); - perf_tool_event__for_each_event(i) { - if (!strcmp(str, perf_tool_event__tool_names[i])) - return i; + test_attr__enabled = (dir != NULL); + test_attr__enabled_tested = true; } - return PERF_TOOL_NONE; + return test_attr__enabled; } +#define __WRITE_ASS(str, fmt, data) \ +do { \ + if (fprintf(file, #str "=%"fmt "\n", data) < 0) { \ + perror("test attr - failed to write event file"); \ + fclose(file); \ + return -1; \ + } \ +} while (0) -static int evsel__no_extra_init(struct evsel *evsel __maybe_unused) +#define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field) + +static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, + int fd, int group_fd, unsigned long flags) { + FILE *file; + char path[PATH_MAX]; + char *dir = getenv("PERF_TEST_ATTR"); + + snprintf(path, PATH_MAX, "%s/event-%d-%llu-%d", dir, + attr->type, attr->config, fd); + + file = fopen(path, "w+"); + if (!file) { + perror("test attr - failed to open event file"); + return -1; + } + + if (fprintf(file, "[event-%d-%llu-%d]\n", + attr->type, attr->config, fd) < 0) { + perror("test attr - failed to write event file"); + fclose(file); + return -1; + } + + /* syscall arguments */ + __WRITE_ASS(fd, "d", fd); + __WRITE_ASS(group_fd, "d", group_fd); + __WRITE_ASS(cpu, "d", cpu.cpu); + __WRITE_ASS(pid, "d", pid); + __WRITE_ASS(flags, "lu", flags); + + /* struct perf_event_attr */ + WRITE_ASS(type, PRIu32); + WRITE_ASS(size, PRIu32); + WRITE_ASS(config, "llu"); + WRITE_ASS(sample_period, "llu"); + WRITE_ASS(sample_type, "llu"); + WRITE_ASS(read_format, "llu"); + WRITE_ASS(disabled, "d"); + WRITE_ASS(inherit, "d"); + WRITE_ASS(pinned, "d"); + WRITE_ASS(exclusive, "d"); + WRITE_ASS(exclude_user, "d"); + WRITE_ASS(exclude_kernel, "d"); + WRITE_ASS(exclude_hv, "d"); + WRITE_ASS(exclude_idle, "d"); + WRITE_ASS(mmap, "d"); + WRITE_ASS(comm, "d"); + WRITE_ASS(freq, "d"); + WRITE_ASS(inherit_stat, "d"); + WRITE_ASS(enable_on_exec, "d"); + WRITE_ASS(task, "d"); + WRITE_ASS(watermark, "d"); + WRITE_ASS(precise_ip, "d"); + WRITE_ASS(mmap_data, "d"); + WRITE_ASS(sample_id_all, "d"); + WRITE_ASS(exclude_host, "d"); + WRITE_ASS(exclude_guest, "d"); + WRITE_ASS(exclude_callchain_kernel, "d"); + WRITE_ASS(exclude_callchain_user, "d"); + WRITE_ASS(mmap2, "d"); + WRITE_ASS(comm_exec, "d"); + WRITE_ASS(context_switch, "d"); + WRITE_ASS(write_backward, "d"); + WRITE_ASS(namespaces, "d"); + WRITE_ASS(use_clockid, "d"); + WRITE_ASS(wakeup_events, PRIu32); + WRITE_ASS(bp_type, PRIu32); + WRITE_ASS(config1, "llu"); + WRITE_ASS(config2, "llu"); + WRITE_ASS(branch_sample_type, "llu"); + WRITE_ASS(sample_regs_user, "llu"); + WRITE_ASS(sample_stack_user, PRIu32); + + fclose(file); return 0; } -void __weak test_attr__ready(void) { } +#undef __WRITE_ASS +#undef WRITE_ASS + +static void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, + int fd, int group_fd, unsigned long flags) +{ + int errno_saved = errno; + + if ((fd != -1) && store_event(attr, pid, cpu, fd, group_fd, flags)) { + pr_err("test attr FAILED"); + exit(128); + } + + errno = errno_saved; +} static void evsel__no_extra_fini(struct evsel *evsel __maybe_unused) { @@ -137,6 +240,16 @@ set_methods: return 0; } +const char *evsel__pmu_name(const struct evsel *evsel) +{ + struct perf_pmu *pmu = evsel__find_pmu(evsel); + + if (pmu) + return pmu->name; + + return event_type(evsel->core.attr.type); +} + #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) int __evsel__sample_size(u64 sample_type) @@ -289,12 +402,13 @@ void evsel__init(struct evsel *evsel, evsel->sample_size = __evsel__sample_size(attr->sample_type); evsel__calc_id_pos(evsel); evsel->cmdline_group_boundary = false; - evsel->metric_events = NULL; evsel->per_pkg_mask = NULL; evsel->collect_stat = false; - evsel->pmu_name = NULL; evsel->group_pmu_name = NULL; evsel->skippable = false; + evsel->supported = true; + evsel->alternate_hw_config = PERF_COUNT_HW_MAX; + evsel->script_output_type = -1; // FIXME: OUTPUT_TYPE_UNSET, see builtin-script.c } struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) @@ -354,7 +468,7 @@ static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src) * The assumption is that @orig is not configured nor opened yet. * So we only care about the attributes that can be set while it's parsed. */ -struct evsel *evsel__clone(struct evsel *orig) +struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig) { struct evsel *evsel; @@ -367,12 +481,16 @@ struct evsel *evsel__clone(struct evsel *orig) if (orig->bpf_obj) return NULL; - evsel = evsel__new(&orig->core.attr); + if (dest) + evsel = dest; + else + evsel = evsel__new(&orig->core.attr); + if (evsel == NULL) return NULL; evsel->core.cpus = perf_cpu_map__get(orig->core.cpus); - evsel->core.own_cpus = perf_cpu_map__get(orig->core.own_cpus); + evsel->core.pmu_cpus = perf_cpu_map__get(orig->core.pmu_cpus); evsel->core.threads = perf_thread_map__get(orig->core.threads); evsel->core.nr_members = orig->core.nr_members; evsel->core.system_wide = orig->core.system_wide; @@ -389,11 +507,6 @@ struct evsel *evsel__clone(struct evsel *orig) if (evsel->group_name == NULL) goto out_err; } - if (orig->pmu_name) { - evsel->pmu_name = strdup(orig->pmu_name); - if (evsel->pmu_name == NULL) - goto out_err; - } if (orig->group_pmu_name) { evsel->group_pmu_name = strdup(orig->group_pmu_name); if (evsel->group_pmu_name == NULL) @@ -411,18 +524,29 @@ struct evsel *evsel__clone(struct evsel *orig) } evsel->cgrp = cgroup__get(orig->cgrp); #ifdef HAVE_LIBTRACEEVENT + if (orig->tp_sys) { + evsel->tp_sys = strdup(orig->tp_sys); + if (evsel->tp_sys == NULL) + goto out_err; + } + if (orig->tp_name) { + evsel->tp_name = strdup(orig->tp_name); + if (evsel->tp_name == NULL) + goto out_err; + } evsel->tp_format = orig->tp_format; #endif evsel->handler = orig->handler; evsel->core.leader = orig->core.leader; + evsel->metric_leader = orig->metric_leader; evsel->max_events = orig->max_events; - evsel->tool_event = orig->tool_event; - free((char *)evsel->unit); - evsel->unit = strdup(orig->unit); - if (evsel->unit == NULL) - goto out_err; - + zfree(&evsel->unit); + if (orig->unit) { + evsel->unit = strdup(orig->unit); + if (evsel->unit == NULL) + goto out_err; + } evsel->scale = orig->scale; evsel->snapshot = orig->snapshot; evsel->per_pkg = orig->per_pkg; @@ -432,15 +556,17 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->exclude_GH = orig->exclude_GH; evsel->sample_read = orig->sample_read; - evsel->auto_merge_stats = orig->auto_merge_stats; evsel->collect_stat = orig->collect_stat; evsel->weak_group = orig->weak_group; evsel->use_config_name = orig->use_config_name; evsel->pmu = orig->pmu; + evsel->first_wildcard_match = orig->first_wildcard_match; if (evsel__copy_config_terms(evsel, orig) < 0) goto out_err; + evsel->alternate_hw_config = orig->alternate_hw_config; + return evsel; out_err: @@ -451,50 +577,84 @@ out_err: /* * Returns pointer with encoded error via <linux/err.h> interface. */ -#ifdef HAVE_LIBTRACEEVENT struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx, bool format) { + struct perf_event_attr attr = { + .type = PERF_TYPE_TRACEPOINT, + .sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | + PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), + }; struct evsel *evsel = zalloc(perf_evsel__object.size); - int err = -ENOMEM; + int err = -ENOMEM, id = -1; - if (evsel == NULL) { + if (evsel == NULL) goto out_err; - } else { - struct perf_event_attr attr = { - .type = PERF_TYPE_TRACEPOINT, - .sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | - PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), - }; - if (asprintf(&evsel->name, "%s:%s", sys, name) < 0) - goto out_free; - event_attr_init(&attr); + if (asprintf(&evsel->name, "%s:%s", sys, name) < 0) + goto out_free; - if (format) { - evsel->tp_format = trace_event__tp_format(sys, name); - if (IS_ERR(evsel->tp_format)) { - err = PTR_ERR(evsel->tp_format); - goto out_free; - } - attr.config = evsel->tp_format->id; - } else { - attr.config = (__u64) -1; - } +#ifdef HAVE_LIBTRACEEVENT + evsel->tp_sys = strdup(sys); + if (!evsel->tp_sys) + goto out_free; + evsel->tp_name = strdup(name); + if (!evsel->tp_name) + goto out_free; +#endif - attr.sample_period = 1; - evsel__init(evsel, &attr, idx); - } + event_attr_init(&attr); + if (format) { + id = tp_pmu__id(sys, name); + if (id < 0) { + err = id; + goto out_free; + } + } + attr.config = (__u64)id; + attr.sample_period = 1; + evsel__init(evsel, &attr, idx); return evsel; out_free: zfree(&evsel->name); +#ifdef HAVE_LIBTRACEEVENT + zfree(&evsel->tp_sys); + zfree(&evsel->tp_name); +#endif free(evsel); out_err: return ERR_PTR(err); } + +#ifdef HAVE_LIBTRACEEVENT +struct tep_event *evsel__tp_format(struct evsel *evsel) +{ + struct tep_event *tp_format = evsel->tp_format; + + if (tp_format) + return tp_format; + + if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) + return NULL; + + if (!evsel->tp_sys) + tp_format = trace_event__tp_format_id(evsel->core.attr.config); + else + tp_format = trace_event__tp_format(evsel->tp_sys, evsel->tp_name); + + if (IS_ERR(tp_format)) { + int err = -PTR_ERR(evsel->tp_format); + + pr_err("Error getting tracepoint format '%s' '%s'(%d)\n", + evsel__name(evsel), strerror(err), err); + return NULL; + } + evsel->tp_format = tp_format; + return evsel->tp_format; +} #endif const char *const evsel__hw_names[PERF_COUNT_HW_MAX] = { @@ -544,7 +704,6 @@ static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size) { int colon = 0, r = 0; struct perf_event_attr *attr = &evsel->core.attr; - bool exclude_guest_default = false; #define MOD_PRINT(context, mod) do { \ if (!attr->exclude_##context) { \ @@ -556,17 +715,15 @@ static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size) MOD_PRINT(kernel, 'k'); MOD_PRINT(user, 'u'); MOD_PRINT(hv, 'h'); - exclude_guest_default = true; } if (attr->precise_ip) { if (!colon) colon = ++r; r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp"); - exclude_guest_default = true; } - if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) { + if (attr->exclude_host || attr->exclude_guest) { MOD_PRINT(host, 'H'); MOD_PRINT(guest, 'G'); } @@ -613,11 +770,6 @@ static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size) return r + evsel__add_modifiers(evsel, bf + r, size - r); } -static int evsel__tool_name(enum perf_tool_event ev, char *bf, size_t size) -{ - return scnprintf(bf, size, "%s", perf_tool_event__to_str(ev)); -} - static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) { int r; @@ -768,10 +920,7 @@ const char *evsel__name(struct evsel *evsel) break; case PERF_TYPE_SOFTWARE: - if (evsel__is_tool(evsel)) - evsel__tool_name(evsel->tool_event, bf, sizeof(bf)); - else - evsel__sw_name(evsel, bf, sizeof(bf)); + evsel__sw_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_TRACEPOINT: @@ -782,6 +931,10 @@ const char *evsel__name(struct evsel *evsel) evsel__bp_name(evsel, bf, sizeof(bf)); break; + case PERF_PMU_TYPE_TOOL: + scnprintf(bf, sizeof(bf), "%s", evsel__tool_pmu_event_name(evsel)); + break; + default: scnprintf(bf, sizeof(bf), "unknown attr type: %d", evsel->core.attr.type); @@ -807,7 +960,7 @@ const char *evsel__metric_id(const struct evsel *evsel) return evsel->metric_id; if (evsel__is_tool(evsel)) - return perf_tool_event__to_str(evsel->tool_event); + return evsel__tool_pmu_event_name(evsel); return "unknown"; } @@ -830,16 +983,22 @@ const char *evsel__group_name(struct evsel *evsel) int evsel__group_desc(struct evsel *evsel, char *buf, size_t size) { int ret = 0; + bool first = true; struct evsel *pos; const char *group_name = evsel__group_name(evsel); if (!evsel->forced_leader) ret = scnprintf(buf, size, "%s { ", group_name); - ret += scnprintf(buf + ret, size - ret, "%s", evsel__name(evsel)); + for_each_group_evsel(pos, evsel) { + if (symbol_conf.skip_empty && + evsel__hists(pos)->stats.nr_samples == 0) + continue; - for_each_group_member(pos, evsel) - ret += scnprintf(buf + ret, size - ret, ", %s", evsel__name(pos)); + ret += scnprintf(buf + ret, size - ret, "%s%s", + first ? "" : ", ", evsel__name(pos)); + first = false; + } if (!evsel->forced_leader) ret += scnprintf(buf + ret, size - ret, " }"); @@ -852,7 +1011,6 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o { bool function = evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->core.attr; - const char *arch = perf_env__arch(evsel__env(evsel)); evsel__set_sample_bit(evsel, CALLCHAIN); @@ -883,6 +1041,8 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o if (param->record_mode == CALLCHAIN_DWARF) { if (!function) { + const char *arch = perf_env__arch(evsel__env(evsel)); + evsel__set_sample_bit(evsel, REGS_USER); evsel__set_sample_bit(evsel, STACK_USER); if (opts->sample_user_regs && @@ -906,6 +1066,9 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o pr_info("Disabling user space callchains for function trace event.\n"); attr->exclude_callchain_user = 1; } + + if (param->defer && !attr->exclude_callchain_user) + attr->defer_callchain = 1; } void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts, @@ -932,6 +1095,71 @@ static void evsel__reset_callgraph(struct evsel *evsel, struct callchain_param * } } +static void evsel__apply_ratio_to_prev(struct evsel *evsel, + struct perf_event_attr *attr, + struct record_opts *opts, + const char *buf) +{ + struct perf_event_attr *prev_attr = NULL; + struct evsel *evsel_prev = NULL; + u64 type = evsel->core.attr.sample_type; + u64 prev_type = 0; + double rtp; + + rtp = strtod(buf, NULL); + if (rtp <= 0) { + pr_err("Invalid ratio-to-prev value %lf\n", rtp); + return; + } + if (evsel == evsel__leader(evsel)) { + pr_err("Invalid use of ratio-to-prev term without preceding element in group\n"); + return; + } + if (!evsel->pmu->is_core) { + pr_err("Event using ratio-to-prev term must have a core PMU\n"); + return; + } + + evsel_prev = evsel__prev(evsel); + if (!evsel_prev) { + pr_err("Previous event does not exist.\n"); + return; + } + + if (evsel_prev->pmu->type != evsel->pmu->type) { + pr_err("Compared events (\"%s\", \"%s\") must have same PMU\n", + evsel->name, evsel_prev->name); + return; + } + + prev_attr = &evsel_prev->core.attr; + prev_type = evsel_prev->core.attr.sample_type; + + if (!(prev_type & PERF_SAMPLE_PERIOD)) { + attr->sample_period = prev_attr->sample_period * rtp; + attr->freq = 0; + evsel__reset_sample_bit(evsel, PERIOD); + } else if (!(type & PERF_SAMPLE_PERIOD)) { + prev_attr->sample_period = attr->sample_period / rtp; + prev_attr->freq = 0; + evsel__reset_sample_bit(evsel_prev, PERIOD); + } else { + if (opts->user_interval != ULLONG_MAX) { + prev_attr->sample_period = opts->user_interval; + attr->sample_period = prev_attr->sample_period * rtp; + prev_attr->freq = 0; + attr->freq = 0; + evsel__reset_sample_bit(evsel_prev, PERIOD); + evsel__reset_sample_bit(evsel, PERIOD); + } else { + pr_err("Event period term or count (-c) must be set when using ratio-to-prev term.\n"); + return; + } + } + + arch_evsel__apply_ratio_to_prev(evsel, attr); +} + static void evsel__apply_config_terms(struct evsel *evsel, struct record_opts *opts, bool track) { @@ -945,6 +1173,7 @@ static void evsel__apply_config_terms(struct evsel *evsel, u32 dump_size = 0; int max_stack = 0; const char *callgraph_buf = NULL; + const char *rtp_buf = NULL; list_for_each_entry(term, config_terms, list) { switch (term->type) { @@ -1007,11 +1236,17 @@ static void evsel__apply_config_terms(struct evsel *evsel, case EVSEL__CONFIG_TERM_AUX_OUTPUT: attr->aux_output = term->val.aux_output ? 1 : 0; break; + case EVSEL__CONFIG_TERM_AUX_ACTION: + /* Already applied by auxtrace */ + break; case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE: /* Already applied by auxtrace */ break; case EVSEL__CONFIG_TERM_CFG_CHG: break; + case EVSEL__CONFIG_TERM_RATIO_TO_PREV: + rtp_buf = term->val.str; + break; default: break; } @@ -1063,6 +1298,8 @@ static void evsel__apply_config_terms(struct evsel *evsel, evsel__config_callchain(evsel, opts, ¶m); } } + if (rtp_buf) + evsel__apply_ratio_to_prev(evsel, attr, opts, rtp_buf); } struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type) @@ -1087,6 +1324,11 @@ void __weak arch__post_evsel_config(struct evsel *evsel __maybe_unused, { } +void __weak arch_evsel__apply_ratio_to_prev(struct evsel *evsel __maybe_unused, + struct perf_event_attr *attr __maybe_unused) +{ +} + static void evsel__set_default_freq_period(struct record_opts *opts, struct perf_event_attr *attr) { @@ -1098,9 +1340,10 @@ static void evsel__set_default_freq_period(struct record_opts *opts, } } -static bool evsel__is_offcpu_event(struct evsel *evsel) +bool evsel__is_offcpu_event(struct evsel *evsel) { - return evsel__is_bpf_output(evsel) && evsel__name_is(evsel, OFFCPU_EVENT); + return evsel__is_bpf_output(evsel) && evsel__name_is(evsel, OFFCPU_EVENT) && + evsel->core.attr.sample_type & PERF_SAMPLE_RAW; } /* @@ -1140,7 +1383,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread; attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1; - attr->inherit = !opts->no_inherit; + attr->inherit = target__has_cpu(&opts->target) ? 0 : !opts->no_inherit; attr->write_backward = opts->overwrite ? 1 : 0; attr->read_format = PERF_FORMAT_LOST; @@ -1162,7 +1405,15 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, */ if (leader->core.nr_members > 1) { attr->read_format |= PERF_FORMAT_GROUP; - attr->inherit = 0; + } + + /* + * Inherit + SAMPLE_READ requires SAMPLE_TID in the read_format + */ + if (attr->inherit) { + evsel__set_sample_bit(evsel, TID); + evsel->core.attr.read_format |= + PERF_FORMAT_ID; } } @@ -1240,7 +1491,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, evsel__set_sample_bit(evsel, CPU); } - if (opts->sample_address) + if (opts->sample_data_src) evsel__set_sample_bit(evsel, DATA_SRC); if (opts->sample_phys_addr) @@ -1255,14 +1506,16 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, attr->branch_sample_type = opts->branch_stack; } - if (opts->sample_weight) + if (opts->sample_weight || evsel->retire_lat) { arch_evsel__set_sample_weight(evsel); - + evsel->retire_lat = false; + } attr->task = track; attr->mmap = track; attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; attr->build_id = track && opts->build_id; + attr->defer_output = track && callchain && callchain->defer; /* * ksymbol is tracked separately with text poke because it needs to be @@ -1339,7 +1592,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, attr->exclude_user = 1; } - if (evsel->core.own_cpus || evsel->unit) + if (evsel->core.pmu_cpus || evsel->unit) evsel->core.attr.read_format |= PERF_FORMAT_ID; /* @@ -1369,8 +1622,10 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, if (evsel__is_dummy_event(evsel)) evsel__reset_sample_bit(evsel, BRANCH_STACK); - if (evsel__is_offcpu_event(evsel)) + if (evsel__is_offcpu_event(evsel)) { evsel->core.attr.sample_type &= OFFCPU_SAMPLE_TYPES; + attr->inherit = 0; + } arch__post_evsel_config(evsel, attr); } @@ -1467,10 +1722,21 @@ static void evsel__free_config_terms(struct evsel *evsel) free_config_terms(&evsel->config_terms); } +static void (*evsel__priv_destructor)(void *priv); + +void evsel__set_priv_destructor(void (*destructor)(void *priv)) +{ + assert(evsel__priv_destructor == NULL); + + evsel__priv_destructor = destructor; +} + void evsel__exit(struct evsel *evsel) { assert(list_empty(&evsel->core.node)); assert(evsel->evlist == NULL); + if (evsel__is_retire_lat(evsel)) + evsel__tpebs_close(evsel); bpf_counter__destroy(evsel); perf_bpf_filter__destroy(evsel); evsel__free_counts(evsel); @@ -1478,21 +1744,26 @@ void evsel__exit(struct evsel *evsel) perf_evsel__free_id(&evsel->core); evsel__free_config_terms(evsel); cgroup__put(evsel->cgrp); - perf_cpu_map__put(evsel->core.cpus); - perf_cpu_map__put(evsel->core.own_cpus); - perf_thread_map__put(evsel->core.threads); + perf_evsel__exit(&evsel->core); zfree(&evsel->group_name); zfree(&evsel->name); +#ifdef HAVE_LIBTRACEEVENT + zfree(&evsel->tp_sys); + zfree(&evsel->tp_name); +#endif zfree(&evsel->filter); - zfree(&evsel->pmu_name); zfree(&evsel->group_pmu_name); zfree(&evsel->unit); zfree(&evsel->metric_id); evsel__zero_per_pkg(evsel); hashmap__free(evsel->per_pkg_mask); evsel->per_pkg_mask = NULL; - zfree(&evsel->metric_events); + if (evsel__priv_destructor) + evsel__priv_destructor(evsel->priv); perf_evsel__object.fini(evsel); + if (evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME || + evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) + xyarray__delete(evsel->start_times); } void evsel__delete(struct evsel *evsel) @@ -1534,6 +1805,12 @@ static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread, count = perf_counts(counter->counts, cpu_map_idx, thread); + if (evsel__is_retire_lat(counter)) { + evsel__tpebs_read(counter, cpu_map_idx, thread); + perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true); + return; + } + count->val = val; count->ena = ena; count->run = run; @@ -1542,6 +1819,60 @@ static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread, perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true); } +static bool evsel__group_has_tpebs(struct evsel *leader) +{ + struct evsel *evsel; + + for_each_group_evsel(evsel, leader) { + if (evsel__is_retire_lat(evsel)) + return true; + } + return false; +} + +static u64 evsel__group_read_nr_members(struct evsel *leader) +{ + u64 nr = leader->core.nr_members; + struct evsel *evsel; + + for_each_group_evsel(evsel, leader) { + if (evsel__is_retire_lat(evsel)) + nr--; + } + return nr; +} + +static u64 evsel__group_read_size(struct evsel *leader) +{ + u64 read_format = leader->core.attr.read_format; + int entry = sizeof(u64); /* value */ + int size = 0; + int nr = 1; + + if (!evsel__group_has_tpebs(leader)) + return perf_evsel__read_size(&leader->core); + + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + size += sizeof(u64); + + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + size += sizeof(u64); + + if (read_format & PERF_FORMAT_ID) + entry += sizeof(u64); + + if (read_format & PERF_FORMAT_LOST) + entry += sizeof(u64); + + if (read_format & PERF_FORMAT_GROUP) { + nr = evsel__group_read_nr_members(leader); + size += sizeof(u64); + } + + size += entry * nr; + return size; +} + static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int thread, u64 *data) { u64 read_format = leader->core.attr.read_format; @@ -1550,7 +1881,7 @@ static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int nr = *data++; - if (nr != (u64) leader->core.nr_members) + if (nr != evsel__group_read_nr_members(leader)) return -EINVAL; if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -1580,7 +1911,7 @@ static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread) { struct perf_stat_evsel *ps = leader->stats; u64 read_format = leader->core.attr.read_format; - int size = perf_evsel__read_size(&leader->core); + int size = evsel__group_read_size(leader); u64 *data = ps->group_data; if (!(read_format & PERF_FORMAT_ID)) @@ -1606,11 +1937,42 @@ static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread) return evsel__process_group_data(leader, cpu_map_idx, thread, data); } +bool __evsel__match(const struct evsel *evsel, u32 type, u64 config) +{ + + u32 e_type = evsel->core.attr.type; + u64 e_config = evsel->core.attr.config; + + if (e_type == type && e_config == config) + return true; + if (type != PERF_TYPE_HARDWARE && type != PERF_TYPE_HW_CACHE) + return false; + if ((e_type == PERF_TYPE_HARDWARE || e_type == PERF_TYPE_HW_CACHE) && + perf_pmus__supports_extended_type()) + e_config &= PERF_HW_EVENT_MASK; + if (e_type == type && e_config == config) + return true; + if (type == PERF_TYPE_HARDWARE && evsel->pmu && evsel->pmu->is_core && + evsel->alternate_hw_config == config) + return true; + return false; +} + int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread) { - u64 read_format = evsel->core.attr.read_format; + if (evsel__is_tool(evsel)) + return evsel__tool_pmu_read(evsel, cpu_map_idx, thread); + + if (evsel__is_hwmon(evsel)) + return evsel__hwmon_pmu_read(evsel, cpu_map_idx, thread); - if (read_format & PERF_FORMAT_GROUP) + if (evsel__is_drm(evsel)) + return evsel__drm_pmu_read(evsel, cpu_map_idx, thread); + + if (evsel__is_retire_lat(evsel)) + return evsel__tpebs_read(evsel, cpu_map_idx, thread); + + if (evsel->core.attr.read_format & PERF_FORMAT_GROUP) return evsel__read_group(evsel, cpu_map_idx, thread); return evsel__read_one(evsel, cpu_map_idx, thread); @@ -1662,7 +2024,7 @@ static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread) struct evsel *leader = evsel__leader(evsel); int fd; - if (evsel__is_group_leader(evsel)) + if (!evsel->supported || evsel__is_group_leader(evsel)) return -1; /* @@ -1676,7 +2038,7 @@ static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread) return -1; fd = FD(leader, cpu_map_idx, thread); - BUG_ON(fd == -1 && !leader->skippable); + BUG_ON(fd == -1 && leader->supported); /* * When the leader has been skipped, return -2 to distinguish from no @@ -1799,6 +2161,7 @@ static struct perf_thread_map *empty_thread_map; static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads) { + int ret = 0; int nthreads = perf_thread_map__nr(threads); if ((perf_missing_features.write_backward && evsel->core.attr.write_backward) || @@ -1829,15 +2192,25 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0) return -ENOMEM; + if (evsel__is_tool(evsel)) + ret = evsel__tool_pmu_prepare_open(evsel, cpus, nthreads); + evsel->open_flags = PERF_FLAG_FD_CLOEXEC; if (evsel->cgrp) evsel->open_flags |= PERF_FLAG_PID_CGROUP; - return 0; + return ret; } static void evsel__disable_missing_features(struct evsel *evsel) { + if (perf_missing_features.defer_callchain && evsel->core.attr.defer_callchain) + evsel->core.attr.defer_callchain = 0; + if (perf_missing_features.defer_callchain && evsel->core.attr.defer_output) + evsel->core.attr.defer_output = 0; + if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit && + (evsel->core.attr.sample_type & PERF_SAMPLE_READ)) + evsel->core.attr.inherit = 0; if (perf_missing_features.branch_counters) evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS; if (perf_missing_features.read_lost) @@ -1887,120 +2260,406 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, return err; } -bool evsel__detect_missing_features(struct evsel *evsel) +static bool __has_attr_feature(struct perf_event_attr *attr, + struct perf_cpu cpu, unsigned long flags) +{ + int fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu, + /*group_fd=*/-1, flags); + close(fd); + + if (fd < 0) { + attr->exclude_kernel = 1; + + fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu, + /*group_fd=*/-1, flags); + close(fd); + } + + if (fd < 0) { + attr->exclude_hv = 1; + + fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu, + /*group_fd=*/-1, flags); + close(fd); + } + + if (fd < 0) { + attr->exclude_guest = 1; + + fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu, + /*group_fd=*/-1, flags); + close(fd); + } + + attr->exclude_kernel = 0; + attr->exclude_guest = 0; + attr->exclude_hv = 0; + + return fd >= 0; +} + +static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags) { + struct perf_cpu cpu = {.cpu = -1}; + + return __has_attr_feature(attr, cpu, flags); +} + +static void evsel__detect_missing_pmu_features(struct evsel *evsel) +{ + struct perf_event_attr attr = { + .type = evsel->core.attr.type, + .config = evsel->core.attr.config, + .disabled = 1, + }; + struct perf_pmu *pmu = evsel->pmu; + int old_errno; + + old_errno = errno; + + if (pmu == NULL) + pmu = evsel->pmu = evsel__find_pmu(evsel); + + if (pmu == NULL || pmu->missing_features.checked) + goto out; + /* * Must probe features in the order they were added to the - * perf_event_attr interface. + * perf_event_attr interface. These are kernel core limitation but + * specific to PMUs with branch stack. So we can detect with the given + * hardware event and stop on the first one succeeded. */ - if (!perf_missing_features.branch_counters && - (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) { - perf_missing_features.branch_counters = true; - pr_debug2("switching off branch counters support\n"); + + /* Please add new feature detection here. */ + + attr.exclude_guest = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + pmu->missing_features.exclude_guest = true; + pr_debug2("switching off exclude_guest for PMU %s\n", pmu->name); + +found: + pmu->missing_features.checked = true; +out: + errno = old_errno; +} + +static void evsel__detect_missing_brstack_features(struct evsel *evsel) +{ + static bool detection_done = false; + struct perf_event_attr attr = { + .type = evsel->core.attr.type, + .config = evsel->core.attr.config, + .disabled = 1, + .sample_type = PERF_SAMPLE_BRANCH_STACK, + .sample_period = 1000, + }; + int old_errno; + + if (detection_done) + return; + + old_errno = errno; + + /* + * Must probe features in the order they were added to the + * perf_event_attr interface. These are PMU specific limitation + * so we can detect with the given hardware event and stop on the + * first one succeeded. + */ + + /* Please add new feature detection here. */ + + attr.branch_sample_type = PERF_SAMPLE_BRANCH_COUNTERS; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.branch_counters = true; + pr_debug2("switching off branch counters support\n"); + + attr.branch_sample_type = PERF_SAMPLE_BRANCH_HW_INDEX; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.branch_hw_idx = true; + pr_debug2("switching off branch HW index support\n"); + + attr.branch_sample_type = PERF_SAMPLE_BRANCH_NO_CYCLES | PERF_SAMPLE_BRANCH_NO_FLAGS; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.lbr_flags = true; + pr_debug2_peo("switching off branch sample type no (cycles/flags)\n"); + +found: + detection_done = true; + errno = old_errno; +} + +static bool evsel__probe_aux_action(struct evsel *evsel, struct perf_cpu cpu) +{ + struct perf_event_attr attr = evsel->core.attr; + int old_errno = errno; + + attr.disabled = 1; + attr.aux_start_paused = 1; + + if (__has_attr_feature(&attr, cpu, /*flags=*/0)) { + errno = old_errno; return true; - } else if (!perf_missing_features.read_lost && - (evsel->core.attr.read_format & PERF_FORMAT_LOST)) { - perf_missing_features.read_lost = true; - pr_debug2("switching off PERF_FORMAT_LOST support\n"); + } + + /* + * EOPNOTSUPP means the kernel supports the feature but the PMU does + * not, so keep that distinction if possible. + */ + if (errno != EOPNOTSUPP) + errno = old_errno; + + return false; +} + +static void evsel__detect_missing_aux_action_feature(struct evsel *evsel, struct perf_cpu cpu) +{ + static bool detection_done; + struct evsel *leader; + + /* + * Don't bother probing aux_action if it is not being used or has been + * probed before. + */ + if (!evsel->core.attr.aux_action || detection_done) + return; + + detection_done = true; + + /* + * The leader is an AUX area event. If it has failed, assume the feature + * is not supported. + */ + leader = evsel__leader(evsel); + if (evsel == leader) { + perf_missing_features.aux_action = true; + return; + } + + /* + * AUX area event with aux_action must have been opened successfully + * already, so feature is supported. + */ + if (leader->core.attr.aux_action) + return; + + if (!evsel__probe_aux_action(leader, cpu)) + perf_missing_features.aux_action = true; +} + +static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu cpu) +{ + static bool detection_done = false; + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_TASK_CLOCK, + .disabled = 1, + }; + int old_errno; + + evsel__detect_missing_aux_action_feature(evsel, cpu); + + evsel__detect_missing_pmu_features(evsel); + + if (evsel__has_br_stack(evsel)) + evsel__detect_missing_brstack_features(evsel); + + if (detection_done) + goto check; + + old_errno = errno; + + /* + * Must probe features in the order they were added to the + * perf_event_attr interface. These are kernel core limitation + * not PMU-specific so we can detect with a software event and + * stop on the first one succeeded. + */ + + /* Please add new feature detection here. */ + + attr.defer_callchain = true; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.defer_callchain = true; + pr_debug2("switching off deferred callchain support\n"); + attr.defer_callchain = false; + + attr.inherit = true; + attr.sample_type = PERF_SAMPLE_READ | PERF_SAMPLE_TID; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.inherit_sample_read = true; + pr_debug2("Using PERF_SAMPLE_READ / :S modifier is not compatible with inherit, falling back to no-inherit.\n"); + attr.inherit = false; + attr.sample_type = 0; + + attr.read_format = PERF_FORMAT_LOST; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.read_lost = true; + pr_debug2("switching off PERF_FORMAT_LOST support\n"); + attr.read_format = 0; + + attr.sample_type = PERF_SAMPLE_WEIGHT_STRUCT; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.weight_struct = true; + pr_debug2("switching off weight struct support\n"); + attr.sample_type = 0; + + attr.sample_type = PERF_SAMPLE_CODE_PAGE_SIZE; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.code_page_size = true; + pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support\n"); + attr.sample_type = 0; + + attr.sample_type = PERF_SAMPLE_DATA_PAGE_SIZE; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.data_page_size = true; + pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support\n"); + attr.sample_type = 0; + + attr.cgroup = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.cgroup = true; + pr_debug2_peo("Kernel has no cgroup sampling support\n"); + attr.cgroup = 0; + + attr.aux_output = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.aux_output = true; + pr_debug2_peo("Kernel has no attr.aux_output support\n"); + attr.aux_output = 0; + + attr.bpf_event = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.bpf = true; + pr_debug2_peo("switching off bpf_event\n"); + attr.bpf_event = 0; + + attr.ksymbol = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.ksymbol = true; + pr_debug2_peo("switching off ksymbol\n"); + attr.ksymbol = 0; + + attr.write_backward = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.write_backward = true; + pr_debug2_peo("switching off write_backward\n"); + attr.write_backward = 0; + + attr.use_clockid = 1; + attr.clockid = CLOCK_MONOTONIC; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.clockid = true; + pr_debug2_peo("switching off clockid\n"); + attr.use_clockid = 0; + attr.clockid = 0; + + if (has_attr_feature(&attr, /*flags=*/PERF_FLAG_FD_CLOEXEC)) + goto found; + perf_missing_features.cloexec = true; + pr_debug2_peo("switching off cloexec flag\n"); + + attr.mmap2 = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.mmap2 = true; + pr_debug2_peo("switching off mmap2\n"); + attr.mmap2 = 0; + + /* set this unconditionally? */ + perf_missing_features.sample_id_all = true; + pr_debug2_peo("switching off sample_id_all\n"); + + attr.inherit = 1; + attr.read_format = PERF_FORMAT_GROUP; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.group_read = true; + pr_debug2_peo("switching off group read\n"); + attr.inherit = 0; + attr.read_format = 0; + +found: + detection_done = true; + errno = old_errno; + +check: + if ((evsel->core.attr.defer_callchain || evsel->core.attr.defer_output) && + perf_missing_features.defer_callchain) return true; - } else if (!perf_missing_features.weight_struct && - (evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) { - perf_missing_features.weight_struct = true; - pr_debug2("switching off weight struct support\n"); + + if (evsel->core.attr.inherit && + (evsel->core.attr.sample_type & PERF_SAMPLE_READ) && + perf_missing_features.inherit_sample_read) return true; - } else if (!perf_missing_features.code_page_size && - (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) { - perf_missing_features.code_page_size = true; - pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n"); - return false; - } else if (!perf_missing_features.data_page_size && - (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) { - perf_missing_features.data_page_size = true; - pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n"); - return false; - } else if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) { - perf_missing_features.cgroup = true; - pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n"); - return false; - } else if (!perf_missing_features.branch_hw_idx && - (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) { - perf_missing_features.branch_hw_idx = true; - pr_debug2("switching off branch HW index support\n"); + + if ((evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) && + perf_missing_features.branch_counters) return true; - } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) { - perf_missing_features.aux_output = true; - pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n"); - return false; - } else if (!perf_missing_features.bpf && evsel->core.attr.bpf_event) { - perf_missing_features.bpf = true; - pr_debug2_peo("switching off bpf_event\n"); + + if ((evsel->core.attr.read_format & PERF_FORMAT_LOST) && + perf_missing_features.read_lost) return true; - } else if (!perf_missing_features.ksymbol && evsel->core.attr.ksymbol) { - perf_missing_features.ksymbol = true; - pr_debug2_peo("switching off ksymbol\n"); + + if ((evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT) && + perf_missing_features.weight_struct) return true; - } else if (!perf_missing_features.write_backward && evsel->core.attr.write_backward) { - perf_missing_features.write_backward = true; - pr_debug2_peo("switching off write_backward\n"); - return false; - } else if (!perf_missing_features.clockid_wrong && evsel->core.attr.use_clockid) { + + if (evsel->core.attr.use_clockid && evsel->core.attr.clockid != CLOCK_MONOTONIC && + !perf_missing_features.clockid) { perf_missing_features.clockid_wrong = true; - pr_debug2_peo("switching off clockid\n"); return true; - } else if (!perf_missing_features.clockid && evsel->core.attr.use_clockid) { - perf_missing_features.clockid = true; - pr_debug2_peo("switching off use_clockid\n"); + } + + if (evsel->core.attr.use_clockid && perf_missing_features.clockid) return true; - } else if (!perf_missing_features.cloexec && (evsel->open_flags & PERF_FLAG_FD_CLOEXEC)) { - perf_missing_features.cloexec = true; - pr_debug2_peo("switching off cloexec flag\n"); + + if ((evsel->open_flags & PERF_FLAG_FD_CLOEXEC) && + perf_missing_features.cloexec) return true; - } else if (!perf_missing_features.mmap2 && evsel->core.attr.mmap2) { - perf_missing_features.mmap2 = true; - pr_debug2_peo("switching off mmap2\n"); + + if (evsel->core.attr.mmap2 && perf_missing_features.mmap2) return true; - } else if (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) { - if (evsel->pmu == NULL) - evsel->pmu = evsel__find_pmu(evsel); - - if (evsel->pmu) - evsel->pmu->missing_features.exclude_guest = true; - else { - /* we cannot find PMU, disable attrs now */ - evsel->core.attr.exclude_host = false; - evsel->core.attr.exclude_guest = false; - } - if (evsel->exclude_GH) { - pr_debug2_peo("PMU has no exclude_host/guest support, bailing out\n"); - return false; - } - if (!perf_missing_features.exclude_guest) { - perf_missing_features.exclude_guest = true; - pr_debug2_peo("switching off exclude_guest, exclude_host\n"); - } + if ((evsel->core.attr.branch_sample_type & (PERF_SAMPLE_BRANCH_NO_FLAGS | + PERF_SAMPLE_BRANCH_NO_CYCLES)) && + perf_missing_features.lbr_flags) return true; - } else if (!perf_missing_features.sample_id_all) { - perf_missing_features.sample_id_all = true; - pr_debug2_peo("switching off sample_id_all\n"); + + if (evsel->core.attr.inherit && (evsel->core.attr.read_format & PERF_FORMAT_GROUP) && + perf_missing_features.group_read) return true; - } else if (!perf_missing_features.lbr_flags && - (evsel->core.attr.branch_sample_type & - (PERF_SAMPLE_BRANCH_NO_CYCLES | - PERF_SAMPLE_BRANCH_NO_FLAGS))) { - perf_missing_features.lbr_flags = true; - pr_debug2_peo("switching off branch sample type no (cycles/flags)\n"); + + if (evsel->core.attr.ksymbol && perf_missing_features.ksymbol) return true; - } else if (!perf_missing_features.group_read && - evsel->core.attr.inherit && - (evsel->core.attr.read_format & PERF_FORMAT_GROUP) && - evsel__is_group_leader(evsel)) { - perf_missing_features.group_read = true; - pr_debug2_peo("switching off group read\n"); + + if (evsel->core.attr.bpf_event && perf_missing_features.bpf) return true; - } else { - return false; - } + + if ((evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) && + perf_missing_features.branch_hw_idx) + return true; + + if (evsel->core.attr.sample_id_all && perf_missing_features.sample_id_all) + return true; + + return false; } static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, @@ -2010,10 +2669,16 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int idx, thread, nthreads; int pid = -1, err, old_errno; enum rlimit_action set_rlimit = NO_CHANGE; + struct perf_cpu cpu; + + if (evsel__is_retire_lat(evsel)) { + err = evsel__tpebs_open(evsel); + goto out; + } err = __evsel__prepare_open(evsel, cpus, threads); if (err) - return err; + goto out; if (cpus == NULL) cpus = empty_cpu_map; @@ -2032,7 +2697,27 @@ fallback_missing_features: pr_debug3("Opening: %s\n", evsel__name(evsel)); display_attr(&evsel->core.attr); + if (evsel__is_tool(evsel)) { + err = evsel__tool_pmu_open(evsel, threads, + start_cpu_map_idx, + end_cpu_map_idx); + goto out; + } + if (evsel__is_hwmon(evsel)) { + err = evsel__hwmon_pmu_open(evsel, threads, + start_cpu_map_idx, + end_cpu_map_idx); + goto out; + } + if (evsel__is_drm(evsel)) { + err = evsel__drm_pmu_open(evsel, threads, + start_cpu_map_idx, + end_cpu_map_idx); + goto out; + } + for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { + cpu = perf_cpu_map__cpu(cpus, idx); for (thread = 0; thread < nthreads; thread++) { int fd, group_fd; @@ -2051,14 +2736,11 @@ retry_open: goto out_close; } - test_attr__ready(); - /* Debug message used by test scripts */ pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", - pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags); + pid, cpu.cpu, group_fd, evsel->open_flags); - fd = sys_perf_event_open(&evsel->core.attr, pid, - perf_cpu_map__cpu(cpus, idx).cpu, + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, group_fd, evsel->open_flags); FD(evsel, idx, thread) = fd; @@ -2073,9 +2755,8 @@ retry_open: bpf_counter__install_pe(evsel, idx, fd); - if (unlikely(test_attr__enabled)) { - test_attr__open(&evsel->core.attr, pid, - perf_cpu_map__cpu(cpus, idx), + if (unlikely(test_attr__enabled())) { + test_attr__open(&evsel->core.attr, pid, cpu, fd, group_fd, evsel->open_flags); } @@ -2111,12 +2792,10 @@ retry_open: } } - return 0; + err = 0; + goto out; try_fallback: - if (evsel__precise_ip_fallback(evsel)) - goto retry_open; - if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus), idx, threads, thread, err)) { /* We just removed 1 thread, so lower the upper nthreads limit. */ @@ -2133,11 +2812,12 @@ try_fallback: if (err == -EMFILE && rlimit__increase_nofile(&set_rlimit)) goto retry_open; - if (err != -EINVAL || idx > 0 || thread > 0) - goto out_close; - - if (evsel__detect_missing_features(evsel)) + if (err == -EINVAL && evsel__detect_missing_features(evsel, cpu)) goto fallback_missing_features; + + if (evsel__precise_ip_fallback(evsel)) + goto retry_open; + out_close: if (err) threads->err_thread = thread; @@ -2152,6 +2832,9 @@ out_close: thread = nthreads; } while (--idx >= 0); errno = old_errno; +out: + if (err) + evsel->supported = false; return err; } @@ -2163,21 +2846,38 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, void evsel__close(struct evsel *evsel) { + if (evsel__is_retire_lat(evsel)) + evsel__tpebs_close(evsel); perf_evsel__close(&evsel->core); perf_evsel__free_id(&evsel->core); } -int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx) +int evsel__open_per_cpu_and_thread(struct evsel *evsel, + struct perf_cpu_map *cpus, int cpu_map_idx, + struct perf_thread_map *threads) { if (cpu_map_idx == -1) - return evsel__open_cpu(evsel, cpus, NULL, 0, perf_cpu_map__nr(cpus)); + return evsel__open_cpu(evsel, cpus, threads, 0, perf_cpu_map__nr(cpus)); - return evsel__open_cpu(evsel, cpus, NULL, cpu_map_idx, cpu_map_idx + 1); + return evsel__open_cpu(evsel, cpus, threads, cpu_map_idx, cpu_map_idx + 1); +} + +int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx) +{ + struct perf_thread_map *threads = thread_map__new_by_tid(-1); + int ret = evsel__open_per_cpu_and_thread(evsel, cpus, cpu_map_idx, threads); + + perf_thread_map__put(threads); + return ret; } int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads) { - return evsel__open(evsel, NULL, threads); + struct perf_cpu_map *cpus = perf_cpu_map__new_any_cpu(); + int ret = evsel__open_per_cpu_and_thread(evsel, cpus, -1, threads); + + perf_cpu_map__put(cpus); + return ret; } static int perf_evsel__parse_id_sample(const struct evsel *evsel, @@ -2270,11 +2970,18 @@ perf_event__check_size(union perf_event *event, unsigned int sample_size) return 0; } -void __weak arch_perf_parse_sample_weight(struct perf_sample *data, - const __u64 *array, - u64 type __maybe_unused) +static void perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type) { - data->weight = *array; + union perf_sample_weight weight; + + weight.full = *array; + if (type & PERF_SAMPLE_WEIGHT_STRUCT) { + data->weight = weight.var1_dw; + data->ins_lat = weight.var2_w; + data->weight3 = weight.var3_w; + } else { + data->weight = weight.full; + } } u64 evsel__bitfield_swap_branch_flags(u64 value) @@ -2333,20 +3040,50 @@ u64 evsel__bitfield_swap_branch_flags(u64 value) static inline bool evsel__has_branch_counters(const struct evsel *evsel) { - struct evsel *cur, *leader = evsel__leader(evsel); + struct evsel *leader = evsel__leader(evsel); /* The branch counters feature only supports group */ if (!leader || !evsel->evlist) return false; - evlist__for_each_entry(evsel->evlist, cur) { - if ((leader == evsel__leader(cur)) && - (cur->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) - return true; - } + if (evsel->evlist->nr_br_cntr < 0) + evlist__update_br_cntr(evsel->evlist); + + if (leader->br_cntr_nr > 0) + return true; + return false; } +static int __set_offcpu_sample(struct perf_sample *data) +{ + u64 *array = data->raw_data; + u32 max_size = data->raw_size, *p32; + const void *endp = (void *)array + max_size; + + if (array == NULL) + return -EFAULT; + + OVERFLOW_CHECK_u64(array); + p32 = (void *)array++; + data->pid = p32[0]; + data->tid = p32[1]; + + OVERFLOW_CHECK_u64(array); + data->period = *array++; + + OVERFLOW_CHECK_u64(array); + data->callchain = (struct ip_callchain *)array++; + OVERFLOW_CHECK(array, data->callchain->nr * sizeof(u64), max_size); + data->ip = data->callchain->ips[1]; + array += data->callchain->nr; + + OVERFLOW_CHECK_u64(array); + data->cgroup = *array; + + return 0; +} + int evsel__parse_sample(struct evsel *evsel, union perf_event *event, struct perf_sample *data) { @@ -2372,6 +3109,20 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, data->data_src = PERF_MEM_DATA_SRC_NONE; data->vcpu = -1; + if (event->header.type == PERF_RECORD_CALLCHAIN_DEFERRED) { + const u64 max_callchain_nr = UINT64_MAX / sizeof(u64); + + data->callchain = (struct ip_callchain *)&event->callchain_deferred.nr; + if (data->callchain->nr > max_callchain_nr) + return -EFAULT; + + data->deferred_cookie = event->callchain_deferred.cookie; + + if (evsel->core.attr.sample_id_all) + perf_evsel__parse_id_sample(evsel, event, data); + return 0; + } + if (event->header.type != PERF_RECORD_SAMPLE) { if (!evsel->core.attr.sample_id_all) return 0; @@ -2496,12 +3247,25 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, if (type & PERF_SAMPLE_CALLCHAIN) { const u64 max_callchain_nr = UINT64_MAX / sizeof(u64); + u64 callchain_nr; OVERFLOW_CHECK_u64(array); data->callchain = (struct ip_callchain *)array++; - if (data->callchain->nr > max_callchain_nr) + callchain_nr = data->callchain->nr; + if (callchain_nr > max_callchain_nr) return -EFAULT; - sz = data->callchain->nr * sizeof(u64); + sz = callchain_nr * sizeof(u64); + /* + * Save the cookie for the deferred user callchain. The last 2 + * entries in the callchain should be the context marker and the + * cookie. The cookie will be used to match PERF_RECORD_ + * CALLCHAIN_DEFERRED later. + */ + if (evsel->core.attr.defer_callchain && callchain_nr >= 2 && + data->callchain->ips[callchain_nr - 2] == PERF_CONTEXT_USER_DEFERRED) { + data->deferred_cookie = data->callchain->ips[callchain_nr - 1]; + data->deferred_callchain = true; + } OVERFLOW_CHECK(array, sz, max_size); array = (void *)array + sz; } @@ -2581,8 +3345,6 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, array = (void *)array + sz; if (evsel__has_branch_counters(evsel)) { - OVERFLOW_CHECK_u64(array); - data->branch_stack_cntr = (u64 *)array; sz = data->branch_stack->nr * sizeof(u64); @@ -2592,17 +3354,19 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, } if (type & PERF_SAMPLE_REGS_USER) { + struct regs_dump *regs = perf_sample__user_regs(data); + OVERFLOW_CHECK_u64(array); - data->user_regs.abi = *array; + regs->abi = *array; array++; - if (data->user_regs.abi) { + if (regs->abi) { u64 mask = evsel->core.attr.sample_regs_user; sz = hweight64(mask) * sizeof(u64); OVERFLOW_CHECK(array, sz, max_size); - data->user_regs.mask = mask; - data->user_regs.regs = (u64 *)array; + regs->mask = mask; + regs->regs = (u64 *)array; array = (void *)array + sz; } } @@ -2630,7 +3394,7 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, if (type & PERF_SAMPLE_WEIGHT_TYPE) { OVERFLOW_CHECK_u64(array); - arch_perf_parse_sample_weight(data, array, type); + perf_parse_sample_weight(data, array, type); array++; } @@ -2646,19 +3410,20 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, array++; } - data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE; if (type & PERF_SAMPLE_REGS_INTR) { + struct regs_dump *regs = perf_sample__intr_regs(data); + OVERFLOW_CHECK_u64(array); - data->intr_regs.abi = *array; + regs->abi = *array; array++; - if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) { + if (regs->abi != PERF_SAMPLE_REGS_ABI_NONE) { u64 mask = evsel->core.attr.sample_regs_intr; sz = hweight64(mask) * sizeof(u64); OVERFLOW_CHECK(array, sz, max_size); - data->intr_regs.mask = mask; - data->intr_regs.regs = (u64 *)array; + regs->mask = mask; + regs->regs = (u64 *)array; array = (void *)array + sz; } } @@ -2700,6 +3465,9 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, array = (void *)array + sz; } + if (evsel__is_offcpu_event(evsel)) + return __set_offcpu_sample(data); + return 0; } @@ -2746,7 +3514,7 @@ int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event, return 0; } -u16 evsel__id_hdr_size(struct evsel *evsel) +u16 evsel__id_hdr_size(const struct evsel *evsel) { u64 sample_type = evsel->core.attr.sample_type; u16 size = 0; @@ -2775,12 +3543,16 @@ u16 evsel__id_hdr_size(struct evsel *evsel) #ifdef HAVE_LIBTRACEEVENT struct tep_format_field *evsel__field(struct evsel *evsel, const char *name) { - return tep_find_field(evsel->tp_format, name); + struct tep_event *tp_format = evsel__tp_format(evsel); + + return tp_format ? tep_find_field(tp_format, name) : NULL; } struct tep_format_field *evsel__common_field(struct evsel *evsel, const char *name) { - return tep_find_common_field(evsel->tp_format, name); + struct tep_event *tp_format = evsel__tp_format(evsel); + + return tp_format ? tep_find_common_field(tp_format, name) : NULL; } void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name) @@ -2924,7 +3696,7 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err, /* If event has exclude user then don't exclude kernel. */ if (evsel->core.attr.exclude_user) - return false; + goto no_fallback; /* Is there already the separator in the name. */ if (strchr(name, '/') || @@ -2932,7 +3704,7 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err, sep = ""; if (asprintf(&new_name, "%s%su", name, sep) < 0) - return false; + goto no_fallback; free(evsel->name); evsel->name = new_name; @@ -2943,8 +3715,31 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err, evsel->core.attr.exclude_hv = 1; return true; - } + } else if (err == EOPNOTSUPP && !evsel->core.attr.exclude_guest && + !evsel->exclude_GH) { + const char *name = evsel__name(evsel); + char *new_name; + const char *sep = ":"; + /* Is there already the separator in the name. */ + if (strchr(name, '/') || + (strchr(name, ':') && !evsel->is_libpfm_event)) + sep = ""; + + if (asprintf(&new_name, "%s%sH", name, sep) < 0) + goto no_fallback; + + free(evsel->name); + evsel->name = new_name; + /* Apple M1 requires exclude_guest */ + scnprintf(msg, msgsize, "Trying to fall back to excluding guest samples"); + evsel->core.attr.exclude_guest = 1; + + return true; + } +no_fallback: + scnprintf(msg, msgsize, "No fallback found for '%s' for error %d", + evsel__name(evsel), err); return false; } @@ -2984,7 +3779,80 @@ static bool find_process(const char *name) return ret ? false : true; } +static int dump_perf_event_processes(char *msg, size_t size) +{ + DIR *proc_dir; + struct dirent *proc_entry; + int printed = 0; + + proc_dir = opendir(procfs__mountpoint()); + if (!proc_dir) + return 0; + + /* Walk through the /proc directory. */ + while ((proc_entry = readdir(proc_dir)) != NULL) { + char buf[256]; + DIR *fd_dir; + struct dirent *fd_entry; + int fd_dir_fd; + + if (proc_entry->d_type != DT_DIR || + !isdigit(proc_entry->d_name[0]) || + strlen(proc_entry->d_name) > sizeof(buf) - 4) + continue; + + scnprintf(buf, sizeof(buf), "%s/fd", proc_entry->d_name); + fd_dir_fd = openat(dirfd(proc_dir), buf, O_DIRECTORY); + if (fd_dir_fd == -1) + continue; + fd_dir = fdopendir(fd_dir_fd); + if (!fd_dir) { + close(fd_dir_fd); + continue; + } + while ((fd_entry = readdir(fd_dir)) != NULL) { + ssize_t link_size; + + if (fd_entry->d_type != DT_LNK) + continue; + link_size = readlinkat(fd_dir_fd, fd_entry->d_name, buf, sizeof(buf)); + if (link_size < 0) + continue; + /* Take care as readlink doesn't null terminate the string. */ + if (!strncmp(buf, "anon_inode:[perf_event]", link_size)) { + int cmdline_fd; + ssize_t cmdline_size; + + scnprintf(buf, sizeof(buf), "%s/cmdline", proc_entry->d_name); + cmdline_fd = openat(dirfd(proc_dir), buf, O_RDONLY); + if (cmdline_fd == -1) + continue; + cmdline_size = read(cmdline_fd, buf, sizeof(buf) - 1); + close(cmdline_fd); + if (cmdline_size < 0) + continue; + buf[cmdline_size] = '\0'; + for (ssize_t i = 0; i < cmdline_size; i++) { + if (buf[i] == '\0') + buf[i] = ' '; + } + + if (printed == 0) + printed += scnprintf(msg, size, "Possible processes:\n"); + + printed += scnprintf(msg + printed, size - printed, + "%s %s\n", proc_entry->d_name, buf); + break; + } + } + closedir(fd_dir); + } + closedir(proc_dir); + return printed; +} + int __weak arch_evsel__open_strerror(struct evsel *evsel __maybe_unused, + int err __maybe_unused, char *msg __maybe_unused, size_t size __maybe_unused) { @@ -2994,6 +3862,7 @@ int __weak arch_evsel__open_strerror(struct evsel *evsel __maybe_unused, int evsel__open_strerror(struct evsel *evsel, struct target *target, int err, char *msg, size_t size) { + struct perf_pmu *pmu; char sbuf[STRERR_BUFSIZE]; int printed = 0, enforced = 0; int ret; @@ -3017,7 +3886,7 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, printed += scnprintf(msg, size, "No permission to enable %s event.\n\n", evsel__name(evsel)); - return scnprintf(msg + printed, size - printed, + return printed + scnprintf(msg + printed, size - printed, "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n" "access to performance monitoring and observability operations for processes\n" "without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n" @@ -3062,6 +3931,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg, size, "%s: PMU Hardware doesn't support 'aux_output' feature", evsel__name(evsel)); + if (evsel->core.attr.aux_action) + return scnprintf(msg, size, + "%s: PMU Hardware doesn't support 'aux_action' feature", + evsel__name(evsel)); if (evsel->core.attr.sample_period != 0) return scnprintf(msg, size, "%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'", @@ -3074,12 +3947,21 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg, size, "%s", "No hardware sampling interrupt available.\n"); #endif + if (!target__has_cpu(target)) + return scnprintf(msg, size, + "Unsupported event (%s) in per-thread mode, enable system wide with '-a'.", + evsel__name(evsel)); break; case EBUSY: if (find_process("oprofiled")) return scnprintf(msg, size, "The PMU counters are busy/taken by another profiler.\n" "We found oprofile daemon running, please stop it and try again."); + printed += scnprintf( + msg, size, + "The PMU %s counters are busy and in use by another process.\n", + evsel->pmu ? evsel->pmu->name : ""); + return printed + dump_perf_event_processes(msg + printed, size - printed); break; case EINVAL: if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size) @@ -3092,9 +3974,12 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg, size, "clockid feature not supported."); if (perf_missing_features.clockid_wrong) return scnprintf(msg, size, "wrong clockid (%d).", clockid); + if (perf_missing_features.aux_action) + return scnprintf(msg, size, "The 'aux_action' feature is not supported, update the kernel."); if (perf_missing_features.aux_output) return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel."); - if (!target__has_cpu(target)) + pmu = evsel__find_pmu(evsel); + if (!pmu->is_core && !target__has_cpu(target)) return scnprintf(msg, size, "Invalid event (%s) in per-thread mode, enable system wide with '-a'.", evsel__name(evsel)); @@ -3107,27 +3992,38 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, break; } - ret = arch_evsel__open_strerror(evsel, msg, size); + ret = arch_evsel__open_strerror(evsel, err, msg, size); if (ret) return ret; return scnprintf(msg, size, "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n" - "/bin/dmesg | grep -i perf may provide additional information.\n", + "\"dmesg | grep -i perf\" may provide additional information.\n", err, str_error_r(err, sbuf, sizeof(sbuf)), evsel__name(evsel)); } +struct perf_session *evsel__session(struct evsel *evsel) +{ + return evsel && evsel->evlist ? evsel->evlist->session : NULL; +} + struct perf_env *evsel__env(struct evsel *evsel) { - if (evsel && evsel->evlist && evsel->evlist->env) - return evsel->evlist->env; - return &perf_env; + struct perf_session *session = evsel__session(evsel); + + return session ? perf_session__env(session) : NULL; } static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist) { int cpu_map_idx, thread; + if (evsel__is_retire_lat(evsel)) + return 0; + + if (perf_pmu__kind(evsel->pmu) != PERF_PMU_KIND_PE) + return 0; + for (cpu_map_idx = 0; cpu_map_idx < xyarray__max_x(evsel->core.fd); cpu_map_idx++) { for (thread = 0; thread < xyarray__max_y(evsel->core.fd); thread++) { @@ -3173,14 +4069,16 @@ void evsel__zero_per_pkg(struct evsel *evsel) */ bool evsel__is_hybrid(const struct evsel *evsel) { - if (perf_pmus__num_core_pmus() == 1) + if (!evsel->core.is_pmu_core) return false; - return evsel->core.is_pmu_core; + return perf_pmus__num_core_pmus() > 1; } struct evsel *evsel__leader(const struct evsel *evsel) { + if (evsel->core.leader == NULL) + return NULL; return container_of(evsel->core.leader, struct evsel, core); } @@ -3229,3 +4127,152 @@ void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader) leader->core.nr_members--; } } + +bool evsel__set_needs_uniquify(struct evsel *counter, const struct perf_stat_config *config) +{ + struct evsel *evsel; + + if (counter->needs_uniquify) { + /* Already set. */ + return true; + } + + if (counter->use_config_name || counter->is_libpfm_event) { + /* Original name will be used. */ + return false; + } + + if (!config->hybrid_merge && evsel__is_hybrid(counter)) { + /* Unique hybrid counters necessary. */ + counter->needs_uniquify = true; + return true; + } + + if (counter->core.attr.type < PERF_TYPE_MAX && counter->core.attr.type != PERF_TYPE_RAW) { + /* Legacy event, don't uniquify. */ + return false; + } + + if (counter->pmu && counter->pmu->is_core && + counter->alternate_hw_config != PERF_COUNT_HW_MAX) { + /* A sysfs or json event replacing a legacy event, don't uniquify. */ + return false; + } + + if (config->aggr_mode == AGGR_NONE) { + /* Always unique with no aggregation. */ + counter->needs_uniquify = true; + return true; + } + + if (counter->first_wildcard_match != NULL) { + /* + * If stats are merged then only the first_wildcard_match is + * displayed, there is no need to uniquify this evsel as the + * name won't be shown. + */ + return false; + } + + /* + * Do other non-merged events in the evlist have the same name? If so + * uniquify is necessary. + */ + evlist__for_each_entry(counter->evlist, evsel) { + if (evsel == counter || evsel->first_wildcard_match || evsel->pmu == counter->pmu) + continue; + + if (evsel__name_is(counter, evsel__name(evsel))) { + counter->needs_uniquify = true; + return true; + } + } + return false; +} + +void evsel__uniquify_counter(struct evsel *counter) +{ + const char *name, *pmu_name, *config; + char *new_name; + int len, ret; + + /* No uniquification necessary. */ + if (!counter->needs_uniquify) + return; + + /* The evsel was already uniquified. */ + if (counter->uniquified_name) + return; + + /* Avoid checking to uniquify twice. */ + counter->uniquified_name = true; + + name = evsel__name(counter); + config = strchr(name, '/'); + pmu_name = counter->pmu->name; + + /* Already prefixed by the PMU name? */ + len = pmu_name_len_no_suffix(pmu_name); + + if (!strncmp(name, pmu_name, len)) { + /* + * If the PMU name is there, then there is no sense in not + * having a slash. Do this for robustness. + */ + if (config == NULL) + config = name - 1; + + ret = asprintf(&new_name, "%s/%s", pmu_name, config + 1); + } else if (config) { + len = config - name; + if (config[1] == '/') { + /* case: event// */ + ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 2); + } else { + /* case: event/.../ */ + ret = asprintf(&new_name, "%s/%.*s,%s", pmu_name, len, name, config + 1); + } + } else { + config = strchr(name, ':'); + if (config) { + /* case: event:.. */ + len = config - name; + + ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 1); + } else { + /* case: event */ + ret = asprintf(&new_name, "%s/%s/", pmu_name, name); + } + } + if (ret > 0) { + free(counter->name); + counter->name = new_name; + } else { + /* ENOMEM from asprintf. */ + counter->uniquified_name = false; + } +} + +void evsel__warn_user_requested_cpus(struct evsel *evsel, struct perf_cpu_map *user_requested_cpus) +{ + struct perf_cpu_map *intersect, *online = NULL; + const struct perf_pmu *pmu = evsel__find_pmu(evsel); + + if (pmu && pmu->is_core) { + intersect = perf_cpu_map__intersect(pmu->cpus, user_requested_cpus); + } else { + online = cpu_map__online(); + intersect = perf_cpu_map__intersect(online, user_requested_cpus); + } + if (!perf_cpu_map__equal(intersect, user_requested_cpus)) { + char buf1[128]; + char buf2[128]; + + cpu_map__snprint(user_requested_cpus, buf1, sizeof(buf1)); + cpu_map__snprint(online ?: pmu->cpus, buf2, sizeof(buf2)); + pr_warning("WARNING: A requested CPU in '%s' is not supported by PMU '%s' (CPUs %s) for event '%s'\n", + buf1, pmu ? pmu->name : "cpu", buf2, evsel__name(evsel)); + } + perf_cpu_map__put(intersect); + perf_cpu_map__put(online); +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 375a38e15cd9..a08130ff2e47 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -11,10 +11,12 @@ #include <perf/evsel.h> #include "symbol_conf.h" #include "pmus.h" +#include "pmu.h" struct bpf_object; struct cgroup; struct perf_counts; +struct perf_stat_config; struct perf_stat_evsel; union perf_event; struct bpf_counter_ops; @@ -22,25 +24,9 @@ struct target; struct hashmap; struct bperf_leader_bpf; struct bperf_follower_bpf; -struct perf_pmu; typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); -enum perf_tool_event { - PERF_TOOL_NONE = 0, - PERF_TOOL_DURATION_TIME = 1, - PERF_TOOL_USER_TIME = 2, - PERF_TOOL_SYSTEM_TIME = 3, - - PERF_TOOL_MAX, -}; - -const char *perf_tool_event__to_str(enum perf_tool_event ev); -enum perf_tool_event perf_tool_event__from_str(const char *str); - -#define perf_tool_event__for_each_event(ev) \ - for ((ev) = PERF_TOOL_DURATION_TIME; (ev) < PERF_TOOL_MAX; ev++) - /** struct evsel - event selector * * @evlist - evlist this evsel is in, if it is in one. @@ -72,9 +58,10 @@ struct evsel { struct { char *name; char *group_name; - const char *pmu_name; const char *group_pmu_name; #ifdef HAVE_LIBTRACEEVENT + char *tp_sys; + char *tp_name; struct tep_event *tp_format; #endif char *filter; @@ -83,7 +70,11 @@ struct evsel { const char *unit; struct cgroup *cgrp; const char *metric_id; - enum perf_tool_event tool_event; + /* + * This point to the first evsel with the same name, intended to store the + * aggregated counts in aggregation mode. + */ + struct evsel *first_wildcard_match; /* parse modifier helper */ int exclude_GH; int sample_read; @@ -92,22 +83,23 @@ struct evsel { bool percore; bool precise_max; bool is_libpfm_event; - bool auto_merge_stats; bool collect_stat; bool weak_group; bool bpf_counter; bool use_config_name; bool skippable; + bool retire_lat; + bool dont_regroup; int bpf_fd; struct bpf_object *bpf_obj; struct list_head config_terms; + u64 alternate_hw_config; }; /* * metric fields are similar, but needs more care as they can have * references to other metric (evsel). */ - struct evsel **metric_events; struct evsel *metric_leader; void *handler; @@ -127,13 +119,14 @@ struct evsel { bool ignore_missing_thread; bool forced_leader; bool cmdline_group_boundary; - bool merged_stat; bool reset_group; - bool errored; bool needs_auxtrace_mmap; bool default_metricgroup; /* A member of the Default metricgroup */ + bool default_show_events; /* If a default group member, show the event */ + bool needs_uniquify; struct hashmap *per_pkg_mask; int err; + int script_output_type; struct { evsel__sb_cb_t *cb; void *data; @@ -148,6 +141,20 @@ struct evsel { __u64 synth_sample_type; /* + * Store the branch counter related information. + * br_cntr_idx: The idx of the branch counter event in the evlist + * br_cntr_nr: The number of the branch counter event in the group + * (Only available for the leader event) + * abbr_name: The abbreviation name assigned to an event which is + * logged by the branch counter. + * The abbr name is from A to Z9. NA is applied if out + * of the range. + */ + int br_cntr_idx; + int br_cntr_nr; + char abbr_name[3]; + + /* * bpf_counter_ops serves two use cases: * 1. perf-stat -b counting events used byBPF programs * 2. perf-stat --use-bpf use BPF programs to aggregate counts @@ -168,8 +175,28 @@ struct evsel { unsigned long open_flags; int precise_ip_original; - /* for missing_features */ + /* The PMU the event is from. Used for missing_features, PMU name, etc. */ struct perf_pmu *pmu; + + /* For tool events */ + /* Beginning time subtracted when the counter is read. */ + union { + /* Defaults for retirement latency events. */ + struct _retirement_latency { + double mean; + double min; + double max; + } retirement_latency; + /* duration_time is a single global time. */ + __u64 start_time; + /* + * user_time and system_time read an initial value potentially + * per-CPU or per-pid. + */ + struct xyarray *start_times; + }; + /* Is the tool's fd for /proc/pid/stat or /proc/stat. */ + bool pid_stat; }; struct perf_missing_features { @@ -192,6 +219,9 @@ struct perf_missing_features { bool weight_struct; bool read_lost; bool branch_counters; + bool aux_action; + bool inherit_sample_read; + bool defer_callchain; }; extern struct perf_missing_features perf_missing_features; @@ -218,6 +248,7 @@ int evsel__object_config(size_t object_size, void (*fini)(struct evsel *evsel)); struct perf_pmu *evsel__find_pmu(const struct evsel *evsel); +const char *evsel__pmu_name(const struct evsel *evsel); bool evsel__is_aux_event(const struct evsel *evsel); struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx); @@ -227,32 +258,31 @@ static inline struct evsel *evsel__new(struct perf_event_attr *attr) return evsel__new_idx(attr, 0); } -struct evsel *evsel__clone(struct evsel *orig); +struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig); int copy_config_terms(struct list_head *dst, struct list_head *src); void free_config_terms(struct list_head *config_terms); -#ifdef HAVE_LIBTRACEEVENT -struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx, bool format); - /* * Returns pointer with encoded error via <linux/err.h> interface. */ +struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx, bool format); static inline struct evsel *evsel__newtp(const char *sys, const char *name) { return evsel__newtp_idx(sys, name, 0, true); } -#endif #ifdef HAVE_LIBTRACEEVENT -struct tep_event *event_format__new(const char *sys, const char *name); +struct tep_event *evsel__tp_format(struct evsel *evsel); #endif void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx); void evsel__exit(struct evsel *evsel); void evsel__delete(struct evsel *evsel); +void evsel__set_priv_destructor(void (*destructor)(void *priv)); + struct callchain_param; void evsel__config(struct evsel *evsel, struct record_opts *opts, @@ -291,9 +321,9 @@ const char *evsel__name(struct evsel *evsel); bool evsel__name_is(struct evsel *evsel, const char *name); const char *evsel__metric_id(const struct evsel *evsel); -static inline bool evsel__is_tool(const struct evsel *evsel) +static inline bool evsel__is_retire_lat(const struct evsel *evsel) { - return evsel->tool_event != PERF_TOOL_NONE; + return evsel->retire_lat; } const char *evsel__group_name(struct evsel *evsel); @@ -312,7 +342,8 @@ void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier); void arch_evsel__set_sample_weight(struct evsel *evsel); void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr); -int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size); +int arch_evsel__open_strerror(struct evsel *evsel, int err, char *msg, size_t size); +void arch_evsel__apply_ratio_to_prev(struct evsel *evsel, struct perf_event_attr *attr); int evsel__set_filter(struct evsel *evsel, const char *filter); int evsel__append_tp_filter(struct evsel *evsel, const char *filter); @@ -322,6 +353,9 @@ int evsel__enable(struct evsel *evsel); int evsel__disable(struct evsel *evsel); int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx); +int evsel__open_per_cpu_and_thread(struct evsel *evsel, + struct perf_cpu_map *cpus, int cpu_map_idx, + struct perf_thread_map *threads); int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx); int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads); int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, @@ -329,7 +363,6 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, void evsel__close(struct evsel *evsel); int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); -bool evsel__detect_missing_features(struct evsel *evsel); bool evsel__precise_ip_fallback(struct evsel *evsel); @@ -354,26 +387,10 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam struct tep_format_field *evsel__field(struct evsel *evsel, const char *name); struct tep_format_field *evsel__common_field(struct evsel *evsel, const char *name); -static inline bool __evsel__match(const struct evsel *evsel, u32 type, u64 config) -{ - if (evsel->core.attr.type != type) - return false; - - if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) && - perf_pmus__supports_extended_type()) - return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config; - - return evsel->core.attr.config == config; -} +bool __evsel__match(const struct evsel *evsel, u32 type, u64 config); #define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t, PERF_COUNT_##c) -static inline bool evsel__match2(struct evsel *e1, struct evsel *e2) -{ - return (e1->core.attr.type == e2->core.attr.type) && - (e1->core.attr.config == e2->core.attr.config); -} - int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread); int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale); @@ -408,7 +425,7 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event, u64 *timestamp); -u16 evsel__id_hdr_size(struct evsel *evsel); +u16 evsel__id_hdr_size(const struct evsel *evsel); static inline struct evsel *evsel__next(struct evsel *evsel) { @@ -527,6 +544,7 @@ static inline bool evsel__is_dummy_event(struct evsel *evsel) (evsel->core.attr.config == PERF_COUNT_SW_DUMMY); } +struct perf_session *evsel__session(struct evsel *evsel); struct perf_env *evsel__env(struct evsel *evsel); int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); @@ -542,6 +560,9 @@ void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader); bool arch_evsel__must_be_in_group(const struct evsel *evsel); +bool evsel__set_needs_uniquify(struct evsel *counter, const struct perf_stat_config *config); +void evsel__uniquify_counter(struct evsel *counter); + /* * Macro to swap the bit-field postition and size. * Used when, @@ -557,4 +578,8 @@ u64 evsel__bitfield_swap_branch_flags(u64 value); void evsel__set_config_if_unset(struct perf_pmu *pmu, struct evsel *evsel, const char *config_name, u64 val); +bool evsel__is_offcpu_event(struct evsel *evsel); + +void evsel__warn_user_requested_cpus(struct evsel *evsel, struct perf_cpu_map *user_requested_cpus); + #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h index aee6f808b512..bcd3a978f0c4 100644 --- a/tools/perf/util/evsel_config.h +++ b/tools/perf/util/evsel_config.h @@ -25,8 +25,10 @@ enum evsel_term_type { EVSEL__CONFIG_TERM_BRANCH, EVSEL__CONFIG_TERM_PERCORE, EVSEL__CONFIG_TERM_AUX_OUTPUT, + EVSEL__CONFIG_TERM_AUX_ACTION, EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE, EVSEL__CONFIG_TERM_CFG_CHG, + EVSEL__CONFIG_TERM_RATIO_TO_PREV, }; struct evsel_config_term { @@ -47,6 +49,7 @@ struct evsel_config_term { u32 aux_sample_size; u64 cfg_chg; char *str; + int cpu; } val; bool weak; }; diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 8719b3cb5646..10f1a03c2860 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -14,7 +14,7 @@ #include "dso.h" #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) @@ -81,13 +81,15 @@ int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE #ifdef HAVE_LIBTRACEEVENT if (details->trace_fields) { struct tep_format_field *field; + const struct tep_event *tp_format; if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) { printed += comma_fprintf(fp, &first, " (not a tracepoint)"); goto out; } - field = evsel->tp_format->format.fields; + tp_format = evsel__tp_format(evsel); + field = tp_format ? tp_format->format.fields : NULL; if (field == NULL) { printed += comma_fprintf(fp, &first, " (no trace field)"); goto out; @@ -107,7 +109,6 @@ out: return ++printed; } -#ifndef PYTHON_PERF int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, unsigned int print_opts, struct callchain_cursor *cursor, struct strlist *bt_stop_list, FILE *fp) @@ -167,7 +168,10 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, node_al.addr = addr; node_al.map = map__get(map); - if (print_symoffset) { + if (sample->deferred_callchain && + sample->deferred_cookie == node->ip) { + printed += fprintf(fp, "(cookie)"); + } else if (print_symoffset) { printed += __symbol__fprintf_symname_offs(sym, &node_al, print_unknown_as_addr, true, fp); @@ -248,4 +252,3 @@ int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al, return printed; } -#endif /* PYTHON_PERF */ diff --git a/tools/perf/util/evswitch.c b/tools/perf/util/evswitch.c index 40cb56a9347d..d4c06a3f825a 100644 --- a/tools/perf/util/evswitch.c +++ b/tools/perf/util/evswitch.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only // Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> +#include <errno.h> #include "evswitch.h" #include "evlist.h" diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index b8875aac8f87..465fe2e9bbbe 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -5,29 +5,22 @@ #include <stdlib.h> #include <string.h> #include "metricgroup.h" -#include "cpumap.h" -#include "cputopo.h" #include "debug.h" #include "evlist.h" #include "expr.h" +#include "smt.h" +#include "tool_pmu.h" #include <util/expr-bison.h> #include <util/expr-flex.h> #include "util/hashmap.h" #include "util/header.h" #include "util/pmu.h" -#include "smt.h" -#include "tsc.h" -#include <api/fs/fs.h> +#include <perf/cpumap.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/zalloc.h> #include <ctype.h> #include <math.h> -#include "pmu.h" - -#ifdef PARSER_DEBUG -extern int expr_debug; -#endif struct expr_id_data { union { @@ -173,8 +166,12 @@ int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id, data_ptr->kind = EXPR_ID_DATA__VALUE; ret = hashmap__set(ctx->ids, id, data_ptr, &old_key, &old_data); - if (ret) + if (ret) { free(data_ptr); + } else if (old_data) { + data_ptr->val.val += old_data->val.val; + data_ptr->val.source_count += old_data->val.source_count; + } free(old_key); free(old_data); return ret; @@ -222,6 +219,8 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref) int expr__get_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **data) { + if (!ctx || !id) + return -1; return hashmap__find(ctx->ids, id, data) ? 0 : -1; } @@ -292,7 +291,7 @@ struct expr_parse_ctx *expr__ctx_new(void) { struct expr_parse_ctx *ctx; - ctx = malloc(sizeof(struct expr_parse_ctx)); + ctx = calloc(1, sizeof(struct expr_parse_ctx)); if (!ctx) return NULL; @@ -301,9 +300,6 @@ struct expr_parse_ctx *expr__ctx_new(void) free(ctx); return NULL; } - ctx->sctx.user_requested_cpu_list = NULL; - ctx->sctx.runtime = 0; - ctx->sctx.system_wide = false; return ctx; } @@ -397,90 +393,24 @@ double expr_id_data__source_count(const struct expr_id_data *data) return data->val.source_count; } -#if !defined(__i386__) && !defined(__x86_64__) -double arch_get_tsc_freq(void) -{ - return 0.0; -} -#endif - -static double has_pmem(void) -{ - static bool has_pmem, cached; - const char *sysfs = sysfs__mountpoint(); - char path[PATH_MAX]; - - if (!cached) { - snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs); - has_pmem = access(path, F_OK) == 0; - cached = true; - } - return has_pmem ? 1.0 : 0.0; -} - double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx) { - const struct cpu_topology *topology; double result = NAN; + enum tool_pmu_event ev = tool_pmu__str_to_event(literal + 1); - if (!strcmp("#num_cpus", literal)) { - result = cpu__max_present_cpu().cpu; - goto out; - } - if (!strcmp("#num_cpus_online", literal)) { - struct perf_cpu_map *online = cpu_map__online(); - - if (online) - result = perf_cpu_map__nr(online); - goto out; - } - - if (!strcasecmp("#system_tsc_freq", literal)) { - result = arch_get_tsc_freq(); - goto out; - } + if (ev != TOOL_PMU__EVENT_NONE) { + u64 count; - /* - * Assume that topology strings are consistent, such as CPUs "0-1" - * wouldn't be listed as "0,1", and so after deduplication the number of - * these strings gives an indication of the number of packages, dies, - * etc. - */ - if (!strcasecmp("#smt_on", literal)) { - result = smt_on() ? 1.0 : 0.0; - goto out; - } - if (!strcmp("#core_wide", literal)) { - result = core_wide(ctx->system_wide, ctx->user_requested_cpu_list) - ? 1.0 : 0.0; - goto out; - } - if (!strcmp("#num_packages", literal)) { - topology = online_topology(); - result = topology->package_cpus_lists; - goto out; - } - if (!strcmp("#num_dies", literal)) { - topology = online_topology(); - result = topology->die_cpus_lists; - goto out; - } - if (!strcmp("#num_cores", literal)) { - topology = online_topology(); - result = topology->core_cpus_lists; - goto out; - } - if (!strcmp("#slots", literal)) { - result = perf_pmu__cpu_slots_per_cycle(); - goto out; - } - if (!strcmp("#has_pmem", literal)) { - result = has_pmem(); - goto out; + if (tool_pmu__read_event(ev, /*evsel=*/NULL, + ctx->system_wide, ctx->user_requested_cpu_list, + &count)) + result = count; + else + pr_err("Failure to read '%s'", literal); + } else { + pr_err("Unrecognized literal '%s'", literal); } - pr_err("Unrecognized literal '%s'", literal); -out: pr_debug2("literal: %s = %f\n", literal, result); return result; } @@ -527,8 +457,8 @@ double expr__strcmp_cpuid_str(const struct expr_parse_ctx *ctx __maybe_unused, bool compute_ids __maybe_unused, const char *test_id) { double ret; - struct perf_pmu *pmu = perf_pmus__find_core_pmu(); - char *cpuid = perf_pmu__getcpuid(pmu); + struct perf_cpu cpu = {-1}; + char *cpuid = get_cpuid_allow_env_override(cpu); if (!cpuid) return NAN; diff --git a/tools/perf/util/fncache.c b/tools/perf/util/fncache.c index 6225cbc52310..bf9559c55c63 100644 --- a/tools/perf/util/fncache.c +++ b/tools/perf/util/fncache.c @@ -1,53 +1,58 @@ // SPDX-License-Identifier: GPL-2.0-only /* Manage a cache of file names' existence */ +#include <pthread.h> #include <stdlib.h> -#include <unistd.h> #include <string.h> -#include <linux/list.h> +#include <unistd.h> +#include <linux/compiler.h> #include "fncache.h" +#include "hashmap.h" -struct fncache { - struct hlist_node nd; - bool res; - char name[]; -}; +static struct hashmap *fncache; -#define FNHSIZE 61 +static size_t fncache__hash(long key, void *ctx __maybe_unused) +{ + return str_hash((const char *)key); +} -static struct hlist_head fncache_hash[FNHSIZE]; +static bool fncache__equal(long key1, long key2, void *ctx __maybe_unused) +{ + return strcmp((const char *)key1, (const char *)key2) == 0; +} -unsigned shash(const unsigned char *s) +static void fncache__init(void) { - unsigned h = 0; - while (*s) - h = 65599 * h + *s++; - return h ^ (h >> 16); + fncache = hashmap__new(fncache__hash, fncache__equal, /*ctx=*/NULL); +} + +static struct hashmap *fncache__get(void) +{ + static pthread_once_t fncache_once = PTHREAD_ONCE_INIT; + + pthread_once(&fncache_once, fncache__init); + + return fncache; } static bool lookup_fncache(const char *name, bool *res) { - int h = shash((const unsigned char *)name) % FNHSIZE; - struct fncache *n; - - hlist_for_each_entry(n, &fncache_hash[h], nd) { - if (!strcmp(n->name, name)) { - *res = n->res; - return true; - } - } - return false; + long val; + + if (!hashmap__find(fncache__get(), name, &val)) + return false; + + *res = (val != 0); + return true; } static void update_fncache(const char *name, bool res) { - struct fncache *n = malloc(sizeof(struct fncache) + strlen(name) + 1); - int h = shash((const unsigned char *)name) % FNHSIZE; - - if (!n) - return; - strcpy(n->name, name); - n->res = res; - hlist_add_head(&n->nd, &fncache_hash[h]); + char *old_key = NULL, *key = strdup(name); + + if (key) { + hashmap__set(fncache__get(), key, res, &old_key, /*old_value*/NULL); + free(old_key); + } } /* No LRU, only use when bounded in some other way. */ diff --git a/tools/perf/util/fncache.h b/tools/perf/util/fncache.h index fe020beaefb1..b6a0f209493e 100644 --- a/tools/perf/util/fncache.h +++ b/tools/perf/util/fncache.h @@ -1,7 +1,6 @@ #ifndef _FCACHE_H #define _FCACHE_H 1 -unsigned shash(const unsigned char *s); bool file_available(const char *name); #endif diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h index 558efcb98d25..950f2efafad2 100644 --- a/tools/perf/util/ftrace.h +++ b/tools/perf/util/ftrace.h @@ -6,6 +6,8 @@ #include "target.h" struct evlist; +struct hashamp; +struct stats; struct perf_ftrace { struct evlist *evlist; @@ -15,16 +17,28 @@ struct perf_ftrace { struct list_head notrace; struct list_head graph_funcs; struct list_head nograph_funcs; + struct list_head event_pair; + struct hashmap *profile_hash; unsigned long percpu_buffer_size; bool inherit; bool use_nsec; + unsigned int bucket_range; + unsigned int min_latency; + unsigned int max_latency; + unsigned int bucket_num; + bool hide_empty; int graph_depth; int func_stack_trace; int func_irq_info; + int graph_args; + int graph_retval; + int graph_retval_hex; + int graph_retaddr; int graph_nosleep_time; int graph_noirqs; int graph_verbose; int graph_thresh; + int graph_tail; }; struct filter_entry { @@ -40,7 +54,7 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace); int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace); int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace); int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace, - int buckets[]); + int buckets[], struct stats *stats); int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace); #else /* !HAVE_BPF_SKEL */ @@ -65,7 +79,8 @@ perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused) static inline int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, - int buckets[] __maybe_unused) + int buckets[] __maybe_unused, + struct stats *stats __maybe_unused) { return -1; } diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index ac17a3cb59dc..a1cd5196f4ec 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -12,15 +12,14 @@ #include <libelf.h> #include <string.h> #include <stdlib.h> -#include <unistd.h> #include <inttypes.h> -#include <fcntl.h> #include <err.h> -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #include <dwarf.h> #endif #include "genelf.h" +#include "sha1.h" #include "../util/jitdump.h" #include <linux/compiler.h> @@ -28,25 +27,6 @@ #define NT_GNU_BUILD_ID 3 #endif -#define BUILD_ID_URANDOM /* different uuid for each run */ - -#ifdef HAVE_LIBCRYPTO_SUPPORT - -#define BUILD_ID_MD5 -#undef BUILD_ID_SHA /* does not seem to work well when linked with Java */ -#undef BUILD_ID_URANDOM /* different uuid for each run */ - -#ifdef BUILD_ID_SHA -#include <openssl/sha.h> -#endif - -#ifdef BUILD_ID_MD5 -#include <openssl/evp.h> -#include <openssl/md5.h> -#endif -#endif - - typedef struct { unsigned int namesz; /* Size of entry's owner string */ unsigned int descsz; /* Size of the note descriptor */ @@ -54,11 +34,6 @@ typedef struct { char name[0]; /* Start of the name+desc data */ } Elf_Note; -struct options { - char *output; - int fd; -}; - static char shd_string_table[] = { 0, '.', 't', 'e', 'x', 't', 0, /* 1 */ @@ -76,7 +51,7 @@ static char shd_string_table[] = { static struct buildid_note { Elf_Note desc; /* descsz: size of build-id, must be multiple of 4 */ char name[4]; /* GNU\0 */ - char build_id[20]; + u8 build_id[SHA1_DIGEST_SIZE]; } bnote; static Elf_Sym symtab[]={ @@ -97,65 +72,6 @@ static Elf_Sym symtab[]={ } }; -#ifdef BUILD_ID_URANDOM -static void -gen_build_id(struct buildid_note *note, - unsigned long load_addr __maybe_unused, - const void *code __maybe_unused, - size_t csize __maybe_unused) -{ - int fd; - size_t sz = sizeof(note->build_id); - ssize_t sret; - - fd = open("/dev/urandom", O_RDONLY); - if (fd == -1) - err(1, "cannot access /dev/urandom for buildid"); - - sret = read(fd, note->build_id, sz); - - close(fd); - - if (sret != (ssize_t)sz) - memset(note->build_id, 0, sz); -} -#endif - -#ifdef BUILD_ID_SHA -static void -gen_build_id(struct buildid_note *note, - unsigned long load_addr __maybe_unused, - const void *code, - size_t csize) -{ - if (sizeof(note->build_id) < SHA_DIGEST_LENGTH) - errx(1, "build_id too small for SHA1"); - - SHA1(code, csize, (unsigned char *)note->build_id); -} -#endif - -#ifdef BUILD_ID_MD5 -static void -gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *code, size_t csize) -{ - EVP_MD_CTX *mdctx; - - if (sizeof(note->build_id) < 16) - errx(1, "build_id too small for MD5"); - - mdctx = EVP_MD_CTX_new(); - if (!mdctx) - errx(2, "failed to create EVP_MD_CTX"); - - EVP_DigestInit_ex(mdctx, EVP_md5(), NULL); - EVP_DigestUpdate(mdctx, &load_addr, sizeof(load_addr)); - EVP_DigestUpdate(mdctx, code, csize); - EVP_DigestFinal_ex(mdctx, (unsigned char *)note->build_id, NULL); - EVP_MD_CTX_free(mdctx); -} -#endif - static int jit_add_eh_frame_info(Elf *e, void* unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size, uint64_t base_offset) @@ -244,7 +160,7 @@ jit_add_eh_frame_info(Elf *e, void* unwinding, uint64_t unwinding_header_size, * csize: the code size in bytes */ int -jit_write_elf(int fd, uint64_t load_addr, const char *sym, +jit_write_elf(int fd, uint64_t load_addr __maybe_unused, const char *sym, const void *code, int csize, void *debug __maybe_unused, int nr_debug_entries __maybe_unused, void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size) @@ -257,6 +173,8 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, Elf_Shdr *shdr; uint64_t eh_frame_base_offset; char *strsym = NULL; + void *build_id_data = NULL, *tmp; + int build_id_data_len; int symlen; int retval = -1; @@ -335,6 +253,14 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC; shdr->sh_entsize = 0; + build_id_data = malloc(csize); + if (build_id_data == NULL) { + warnx("cannot allocate build-id data"); + goto error; + } + memcpy(build_id_data, code, csize); + build_id_data_len = csize; + /* * Setup .eh_frame_hdr and .eh_frame */ @@ -418,6 +344,15 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, shdr->sh_entsize = sizeof(Elf_Sym); shdr->sh_link = unwinding ? 6 : 4; /* index of .strtab section */ + tmp = realloc(build_id_data, build_id_data_len + sizeof(symtab)); + if (tmp == NULL) { + warnx("cannot allocate build-id data"); + goto error; + } + memcpy(tmp + build_id_data_len, symtab, sizeof(symtab)); + build_id_data = tmp; + build_id_data_len += sizeof(symtab); + /* * setup symbols string table * 2 = 1 for 0 in 1st entry, 1 for the 0 at end of symbol for 2nd entry @@ -460,6 +395,15 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, shdr->sh_flags = 0; shdr->sh_entsize = 0; + tmp = realloc(build_id_data, build_id_data_len + symlen); + if (tmp == NULL) { + warnx("cannot allocate build-id data"); + goto error; + } + memcpy(tmp + build_id_data_len, strsym, symlen); + build_id_data = tmp; + build_id_data_len += symlen; + /* * setup build-id section */ @@ -478,7 +422,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, /* * build-id generation */ - gen_build_id(&bnote, load_addr, code, csize); + sha1(build_id_data, build_id_data_len, bnote.build_id); bnote.desc.namesz = sizeof(bnote.name); /* must include 0 termination */ bnote.desc.descsz = sizeof(bnote.build_id); bnote.desc.type = NT_GNU_BUILD_ID; @@ -504,7 +448,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, shdr->sh_size = sizeof(bnote); shdr->sh_entsize = 0; -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT if (debug && nr_debug_entries) { retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries); if (retval) @@ -523,7 +467,7 @@ error: (void)elf_end(e); free(strsym); - + free(build_id_data); return retval; } diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index 4e2e4f40e134..9f0b875d6548 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -8,7 +8,7 @@ int jit_write_elf(int fd, uint64_t code_addr, const char *sym, const void *code, int csize, void *debug, int nr_debug_entries, void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size); -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT /* genelf_debug.c */ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries); #endif diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh index 1b5140e5ce99..6a73c903d690 100755 --- a/tools/perf/util/generate-cmdlist.sh +++ b/tools/perf/util/generate-cmdlist.sh @@ -38,7 +38,7 @@ do done echo "#endif /* HAVE_LIBELF_SUPPORT */" -echo "#if defined(HAVE_LIBTRACEEVENT) && (defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT))" +echo "#if defined(HAVE_LIBTRACEEVENT)" sed -n -e 's/^perf-\([^ ]*\)[ ].* audit*/\1/p' command-list.txt | sort | while read cmd @@ -51,7 +51,7 @@ do p }' "Documentation/perf-$cmd.txt" done -echo "#endif /* HAVE_LIBTRACEEVENT && (HAVE_LIBAUDIT_SUPPORT || HAVE_SYSCALL_TABLE_SUPPORT) */" +echo "#endif /* HAVE_LIBTRACEEVENT */" echo "#ifdef HAVE_LIBTRACEEVENT" sed -n -e 's/^perf-\([^ ]*\)[ ].* traceevent.*/\1/p' command-list.txt | diff --git a/tools/perf/util/get_current_dir_name.c b/tools/perf/util/get_current_dir_name.c deleted file mode 100644 index e68935e9ac8c..000000000000 --- a/tools/perf/util/get_current_dir_name.c +++ /dev/null @@ -1,18 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -// Copyright (C) 2018, 2019 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> -// -#ifndef HAVE_GET_CURRENT_DIR_NAME -#include "get_current_dir_name.h" -#include <limits.h> -#include <string.h> -#include <unistd.h> - -/* Android's 'bionic' library, for one, doesn't have this */ - -char *get_current_dir_name(void) -{ - char pwd[PATH_MAX]; - - return getcwd(pwd, sizeof(pwd)) == NULL ? NULL : strdup(pwd); -} -#endif // HAVE_GET_CURRENT_DIR_NAME diff --git a/tools/perf/util/get_current_dir_name.h b/tools/perf/util/get_current_dir_name.h deleted file mode 100644 index 69f7d5537d32..000000000000 --- a/tools/perf/util/get_current_dir_name.h +++ /dev/null @@ -1,8 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -// Copyright (C) 2018, 2019 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> -// -#ifndef __PERF_GET_CURRENT_DIR_NAME_H -#ifndef HAVE_GET_CURRENT_DIR_NAME -char *get_current_dir_name(void); -#endif // HAVE_GET_CURRENT_DIR_NAME -#endif // __PERF_GET_CURRENT_DIR_NAME_H diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h index c12f8320e668..0c4f155e8eb7 100644 --- a/tools/perf/util/hashmap.h +++ b/tools/perf/util/hashmap.h @@ -166,8 +166,8 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @bkt: integer used as a bucket loop cursor */ #define hashmap__for_each_entry(map, cur, bkt) \ - for (bkt = 0; bkt < map->cap; bkt++) \ - for (cur = map->buckets[bkt]; cur; cur = cur->next) + for (bkt = 0; bkt < (map)->cap; bkt++) \ + for (cur = (map)->buckets[bkt]; cur; cur = cur->next) /* * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe @@ -178,8 +178,8 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @bkt: integer used as a bucket loop cursor */ #define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \ - for (bkt = 0; bkt < map->cap; bkt++) \ - for (cur = map->buckets[bkt]; \ + for (bkt = 0; bkt < (map)->cap; bkt++) \ + for (cur = (map)->buckets[bkt]; \ cur && ({tmp = cur->next; true; }); \ cur = tmp) @@ -190,19 +190,19 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @key: key to iterate entries for */ #define hashmap__for_each_key_entry(map, cur, _key) \ - for (cur = map->buckets \ - ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + for (cur = (map)->buckets \ + ? (map)->buckets[hash_bits((map)->hash_fn((_key), (map)->ctx), (map)->cap_bits)] \ : NULL; \ cur; \ cur = cur->next) \ - if (map->equal_fn(cur->key, (_key), map->ctx)) + if ((map)->equal_fn(cur->key, (_key), (map)->ctx)) #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ - for (cur = map->buckets \ - ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + for (cur = (map)->buckets \ + ? (map)->buckets[hash_bits((map)->hash_fn((_key), (map)->ctx), (map)->cap_bits)] \ : NULL; \ cur && ({ tmp = cur->next; true; }); \ cur = tmp) \ - if (map->equal_fn(cur->key, (_key), map->ctx)) + if ((map)->equal_fn(cur->key, (_key), (map)->ctx)) #endif /* __LIBBPF_HASHMAP_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 55e9553861d0..f5cad377c99e 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -44,6 +44,7 @@ #include "build-id.h" #include "data.h" #include <api/fs/fs.h> +#include <api/io_dir.h> #include "asm/bug.h" #include "tool.h" #include "time-utils.h" @@ -58,7 +59,7 @@ #include <internal/lib.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif /* @@ -334,7 +335,6 @@ static int write_build_id(struct feat_fd *ff, pr_debug("failed to write buildid table\n"); return err; } - perf_session__cache_build_ids(session); return 0; } @@ -556,6 +556,7 @@ static int write_event_desc(struct feat_fd *ff, static int write_cmdline(struct feat_fd *ff, struct evlist *evlist __maybe_unused) { + struct perf_env *env = &ff->ph->env; char pbuf[MAXPATHLEN], *buf; int i, ret, n; @@ -563,7 +564,7 @@ static int write_cmdline(struct feat_fd *ff, buf = perf_exe(pbuf, MAXPATHLEN); /* account for binary path */ - n = perf_env.nr_cmdline + 1; + n = env->nr_cmdline + 1; ret = do_write(ff, &n, sizeof(n)); if (ret < 0) @@ -573,8 +574,8 @@ static int write_cmdline(struct feat_fd *ff, if (ret < 0) return ret; - for (i = 0 ; i < perf_env.nr_cmdline; i++) { - ret = do_write_string(ff, perf_env.cmdline_argv[i]); + for (i = 0 ; i < env->nr_cmdline; i++) { + ret = do_write_string(ff, env->cmdline_argv[i]); if (ret < 0) return ret; } @@ -585,6 +586,7 @@ static int write_cmdline(struct feat_fd *ff, static int write_cpu_topology(struct feat_fd *ff, struct evlist *evlist __maybe_unused) { + struct perf_env *env = &ff->ph->env; struct cpu_topology *tp; u32 i; int ret, j; @@ -612,17 +614,17 @@ static int write_cpu_topology(struct feat_fd *ff, break; } - ret = perf_env__read_cpu_topology_map(&perf_env); + ret = perf_env__read_cpu_topology_map(env); if (ret < 0) goto done; - for (j = 0; j < perf_env.nr_cpus_avail; j++) { - ret = do_write(ff, &perf_env.cpu[j].core_id, - sizeof(perf_env.cpu[j].core_id)); + for (j = 0; j < env->nr_cpus_avail; j++) { + ret = do_write(ff, &env->cpu[j].core_id, + sizeof(env->cpu[j].core_id)); if (ret < 0) return ret; - ret = do_write(ff, &perf_env.cpu[j].socket_id, - sizeof(perf_env.cpu[j].socket_id)); + ret = do_write(ff, &env->cpu[j].socket_id, + sizeof(env->cpu[j].socket_id)); if (ret < 0) return ret; } @@ -640,9 +642,9 @@ static int write_cpu_topology(struct feat_fd *ff, goto done; } - for (j = 0; j < perf_env.nr_cpus_avail; j++) { - ret = do_write(ff, &perf_env.cpu[j].die_id, - sizeof(perf_env.cpu[j].die_id)); + for (j = 0; j < env->nr_cpus_avail; j++) { + ret = do_write(ff, &env->cpu[j].die_id, + sizeof(env->cpu[j].die_id)); if (ret < 0) return ret; } @@ -819,11 +821,31 @@ static int write_group_desc(struct feat_fd *ff, * Each architecture should provide a more precise id string that * can be use to match the architecture's "mapfile". */ -char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +char * __weak get_cpuid_str(struct perf_cpu cpu __maybe_unused) { return NULL; } +char *get_cpuid_allow_env_override(struct perf_cpu cpu) +{ + char *cpuid; + static bool printed; + + cpuid = getenv("PERF_CPUID"); + if (cpuid) + cpuid = strdup(cpuid); + if (!cpuid) + cpuid = get_cpuid_str(cpu); + if (!cpuid) + return NULL; + + if (!printed) { + pr_debug("Using CPUID %s\n", cpuid); + printed = true; + } + return cpuid; +} + /* Return zero when the cpuid from the mapfile.csv matches the * cpuid string generated on this platform. * Otherwise return non-zero. @@ -856,18 +878,19 @@ int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid) * default get_cpuid(): nothing gets recorded * actual implementation must be in arch/$(SRCARCH)/util/header.c */ -int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused) +int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused, + struct perf_cpu cpu __maybe_unused) { return ENOSYS; /* Not implemented */ } -static int write_cpuid(struct feat_fd *ff, - struct evlist *evlist __maybe_unused) +static int write_cpuid(struct feat_fd *ff, struct evlist *evlist) { + struct perf_cpu cpu = perf_cpu_map__min(evlist->core.all_cpus); char buffer[64]; int ret; - ret = get_cpuid(buffer, sizeof(buffer)); + ret = get_cpuid(buffer, sizeof(buffer), cpu); if (ret) return -1; @@ -987,57 +1010,6 @@ static int write_dir_format(struct feat_fd *ff, return do_write(ff, &data->dir.version, sizeof(data->dir.version)); } -/* - * Check whether a CPU is online - * - * Returns: - * 1 -> if CPU is online - * 0 -> if CPU is offline - * -1 -> error case - */ -int is_cpu_online(unsigned int cpu) -{ - char *str; - size_t strlen; - char buf[256]; - int status = -1; - struct stat statbuf; - - snprintf(buf, sizeof(buf), - "/sys/devices/system/cpu/cpu%d", cpu); - if (stat(buf, &statbuf) != 0) - return 0; - - /* - * Check if /sys/devices/system/cpu/cpux/online file - * exists. Some cases cpu0 won't have online file since - * it is not expected to be turned off generally. - * In kernels without CONFIG_HOTPLUG_CPU, this - * file won't exist - */ - snprintf(buf, sizeof(buf), - "/sys/devices/system/cpu/cpu%d/online", cpu); - if (stat(buf, &statbuf) != 0) - return 1; - - /* - * Read online file using sysfs__read_str. - * If read or open fails, return -1. - * If read succeeds, return value from file - * which gets stored in "str" - */ - snprintf(buf, sizeof(buf), - "devices/system/cpu/cpu%d/online", cpu); - - if (sysfs__read_str(buf, &str, &strlen) < 0) - return status; - - status = atoi(str); - - free(str); - return status; -} - #ifdef HAVE_LIBBPF_SUPPORT static int write_bpf_prog_info(struct feat_fd *ff, struct evlist *evlist __maybe_unused) @@ -1045,13 +1017,13 @@ static int write_bpf_prog_info(struct feat_fd *ff, struct perf_env *env = &ff->ph->env; struct rb_root *root; struct rb_node *next; - int ret; + int ret = 0; down_read(&env->bpf_progs.lock); ret = do_write(ff, &env->bpf_progs.infos_cnt, sizeof(env->bpf_progs.infos_cnt)); - if (ret < 0) + if (ret < 0 || env->bpf_progs.infos_cnt == 0) goto out; root = &env->bpf_progs.infos; @@ -1087,14 +1059,14 @@ static int write_bpf_btf(struct feat_fd *ff, struct perf_env *env = &ff->ph->env; struct rb_root *root; struct rb_node *next; - int ret; + int ret = 0; down_read(&env->bpf_progs.lock); ret = do_write(ff, &env->bpf_progs.btfs_cnt, sizeof(env->bpf_progs.btfs_cnt)); - if (ret < 0) + if (ret < 0 || env->bpf_progs.btfs_cnt == 0) goto out; root = &env->bpf_progs.btfs; @@ -1341,11 +1313,11 @@ static int memory_node__read(struct memory_node *n, unsigned long idx) { unsigned int phys, size = 0; char path[PATH_MAX]; - struct dirent *ent; - DIR *dir; + struct io_dirent64 *ent; + struct io_dir dir; #define for_each_memory(mem, dir) \ - while ((ent = readdir(dir))) \ + while ((ent = io_dir__readdir(&dir)) != NULL) \ if (strcmp(ent->d_name, ".") && \ strcmp(ent->d_name, "..") && \ sscanf(ent->d_name, "memory%u", &mem) == 1) @@ -1354,9 +1326,9 @@ static int memory_node__read(struct memory_node *n, unsigned long idx) "%s/devices/system/node/node%lu", sysfs__mountpoint(), idx); - dir = opendir(path); - if (!dir) { - pr_warning("failed: can't open memory sysfs data\n"); + io_dir__init(&dir, open(path, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + if (dir.dirfd < 0) { + pr_warning("failed: can't open memory sysfs data '%s'\n", path); return -1; } @@ -1368,20 +1340,20 @@ static int memory_node__read(struct memory_node *n, unsigned long idx) n->set = bitmap_zalloc(size); if (!n->set) { - closedir(dir); + close(dir.dirfd); return -ENOMEM; } n->node = idx; n->size = size; - rewinddir(dir); + io_dir__rewinddir(&dir); for_each_memory(phys, dir) { __set_bit(phys, n->set); } - closedir(dir); + close(dir.dirfd); return 0; } @@ -1404,8 +1376,8 @@ static int memory_node__sort(const void *a, const void *b) static int build_mem_topology(struct memory_node **nodesp, u64 *cntp) { char path[PATH_MAX]; - struct dirent *ent; - DIR *dir; + struct io_dirent64 *ent; + struct io_dir dir; int ret = 0; size_t cnt = 0, size = 0; struct memory_node *nodes = NULL; @@ -1413,14 +1385,14 @@ static int build_mem_topology(struct memory_node **nodesp, u64 *cntp) scnprintf(path, PATH_MAX, "%s/devices/system/node/", sysfs__mountpoint()); - dir = opendir(path); - if (!dir) { + io_dir__init(&dir, open(path, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + if (dir.dirfd < 0) { pr_debug2("%s: couldn't read %s, does this arch have topology information?\n", __func__, path); return -1; } - while (!ret && (ent = readdir(dir))) { + while (!ret && (ent = io_dir__readdir(&dir))) { unsigned int idx; int r; @@ -1449,7 +1421,7 @@ static int build_mem_topology(struct memory_node **nodesp, u64 *cntp) cnt += 1; } out: - closedir(dir); + close(dir.dirfd); if (!ret) { *cntp = cnt; *nodesp = nodes; @@ -1582,7 +1554,7 @@ static int __write_pmu_caps(struct feat_fd *ff, struct perf_pmu *pmu, static int write_cpu_pmu_caps(struct feat_fd *ff, struct evlist *evlist __maybe_unused) { - struct perf_pmu *cpu_pmu = perf_pmus__find("cpu"); + struct perf_pmu *cpu_pmu = perf_pmus__find_core_pmu(); int ret; if (!cpu_pmu) @@ -1843,6 +1815,9 @@ static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp) root = &env->bpf_progs.infos; next = rb_first(root); + if (!next) + printf("# bpf_prog_info empty\n"); + while (next) { struct bpf_prog_info_node *node; @@ -1867,6 +1842,9 @@ static void print_bpf_btf(struct feat_fd *ff, FILE *fp) root = &env->bpf_progs.btfs; next = rb_first(root); + if (!next) + printf("# btf info empty\n"); + while (next) { struct btf_node *node; @@ -2140,17 +2118,18 @@ static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp) static void print_pmu_caps(struct feat_fd *ff, FILE *fp) { + struct perf_env *env = &ff->ph->env; struct pmu_caps *pmu_caps; - for (int i = 0; i < ff->ph->env.nr_pmus_with_caps; i++) { - pmu_caps = &ff->ph->env.pmu_caps[i]; + for (int i = 0; i < env->nr_pmus_with_caps; i++) { + pmu_caps = &env->pmu_caps[i]; __print_pmu_caps(fp, pmu_caps->nr_caps, pmu_caps->caps, pmu_caps->pmu_name); } - if (strcmp(perf_env__arch(&ff->ph->env), "x86") == 0 && - perf_env__has_pmu_mapping(&ff->ph->env, "ibs_op")) { - char *max_precise = perf_env__find_pmu_cap(&ff->ph->env, "cpu", "max_precise"); + if (strcmp(perf_env__arch(env), "x86") == 0 && + perf_env__has_pmu_mapping(env, "ibs_op")) { + char *max_precise = perf_env__find_pmu_cap(env, "cpu", "max_precise"); if (max_precise != NULL && atoi(max_precise) == 0) fprintf(fp, "# AMD systems uses ibs_op// PMU for some precise events, e.g.: cycles:p, see the 'perf list' man page for further details.\n"); @@ -2159,18 +2138,19 @@ static void print_pmu_caps(struct feat_fd *ff, FILE *fp) static void print_pmu_mappings(struct feat_fd *ff, FILE *fp) { + struct perf_env *env = &ff->ph->env; const char *delimiter = "# pmu mappings: "; char *str, *tmp; u32 pmu_num; u32 type; - pmu_num = ff->ph->env.nr_pmu_mappings; + pmu_num = env->nr_pmu_mappings; if (!pmu_num) { fprintf(fp, "# pmu mappings: not available\n"); return; } - str = ff->ph->env.pmu_mappings; + str = env->pmu_mappings; while (pmu_num) { type = strtoul(str, &tmp, 0); @@ -2252,17 +2232,18 @@ static void memory_node__fprintf(struct memory_node *n, static void print_mem_topology(struct feat_fd *ff, FILE *fp) { + struct perf_env *env = &ff->ph->env; struct memory_node *nodes; int i, nr; - nodes = ff->ph->env.memory_nodes; - nr = ff->ph->env.nr_memory_nodes; + nodes = env->memory_nodes; + nr = env->nr_memory_nodes; fprintf(fp, "# memory nodes (nr %d, block size 0x%llx):\n", - nr, ff->ph->env.memory_bsize); + nr, env->memory_bsize); for (i = 0; i < nr; i++) { - memory_node__fprintf(&nodes[i], ff->ph->env.memory_bsize, fp); + memory_node__fprintf(&nodes[i], env->memory_bsize, fp); } } @@ -2320,7 +2301,7 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev, free(m.name); } - build_id__sprintf(dso__bid(dso), sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); pr_debug("build id event received for %s: %s [%zu]\n", dso__long_name(dso), sbuild_id, size); dso__put(dso); @@ -2460,6 +2441,7 @@ static int process_build_id(struct feat_fd *ff, void *data __maybe_unused) static int process_nrcpus(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; int ret; u32 nr_cpus_avail, nr_cpus_online; @@ -2470,20 +2452,21 @@ static int process_nrcpus(struct feat_fd *ff, void *data __maybe_unused) ret = do_read_u32(ff, &nr_cpus_online); if (ret) return ret; - ff->ph->env.nr_cpus_avail = (int)nr_cpus_avail; - ff->ph->env.nr_cpus_online = (int)nr_cpus_online; + env->nr_cpus_avail = (int)nr_cpus_avail; + env->nr_cpus_online = (int)nr_cpus_online; return 0; } static int process_total_mem(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; u64 total_mem; int ret; ret = do_read_u64(ff, &total_mem); if (ret) return -1; - ff->ph->env.total_mem = (unsigned long long)total_mem; + env->total_mem = (unsigned long long)total_mem; return 0; } @@ -2544,13 +2527,14 @@ process_event_desc(struct feat_fd *ff, void *data __maybe_unused) static int process_cmdline(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; char *str, *cmdline = NULL, **argv = NULL; u32 nr, i, len = 0; if (do_read_u32(ff, &nr)) return -1; - ff->ph->env.nr_cmdline = nr; + env->nr_cmdline = nr; cmdline = zalloc(ff->size + nr + 1); if (!cmdline) @@ -2570,8 +2554,8 @@ static int process_cmdline(struct feat_fd *ff, void *data __maybe_unused) len += strlen(str) + 1; free(str); } - ff->ph->env.cmdline = cmdline; - ff->ph->env.cmdline_argv = (const char **) argv; + env->cmdline = cmdline; + env->cmdline_argv = (const char **) argv; return 0; error: @@ -2585,19 +2569,18 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) u32 nr, i; char *str = NULL; struct strbuf sb; - int cpu_nr = ff->ph->env.nr_cpus_avail; + struct perf_env *env = &ff->ph->env; + int cpu_nr = env->nr_cpus_avail; u64 size = 0; - struct perf_header *ph = ff->ph; - bool do_core_id_test = true; - ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu)); - if (!ph->env.cpu) + env->cpu = calloc(cpu_nr, sizeof(*env->cpu)); + if (!env->cpu) return -1; if (do_read_u32(ff, &nr)) goto free_cpu; - ph->env.nr_sibling_cores = nr; + env->nr_sibling_cores = nr; size += sizeof(u32); if (strbuf_init(&sb, 128) < 0) goto free_cpu; @@ -2613,12 +2596,12 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) size += string_size(str); zfree(&str); } - ph->env.sibling_cores = strbuf_detach(&sb, NULL); + env->sibling_cores = strbuf_detach(&sb, NULL); if (do_read_u32(ff, &nr)) return -1; - ph->env.nr_sibling_threads = nr; + env->nr_sibling_threads = nr; size += sizeof(u32); for (i = 0; i < nr; i++) { @@ -2632,43 +2615,28 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) size += string_size(str); zfree(&str); } - ph->env.sibling_threads = strbuf_detach(&sb, NULL); + env->sibling_threads = strbuf_detach(&sb, NULL); /* * The header may be from old perf, * which doesn't include core id and socket id information. */ if (ff->size <= size) { - zfree(&ph->env.cpu); + zfree(&env->cpu); return 0; } - /* On s390 the socket_id number is not related to the numbers of cpus. - * The socket_id number might be higher than the numbers of cpus. - * This depends on the configuration. - * AArch64 is the same. - */ - if (ph->env.arch && (!strncmp(ph->env.arch, "s390", 4) - || !strncmp(ph->env.arch, "aarch64", 7))) - do_core_id_test = false; - for (i = 0; i < (u32)cpu_nr; i++) { if (do_read_u32(ff, &nr)) goto free_cpu; - ph->env.cpu[i].core_id = nr; + env->cpu[i].core_id = nr; size += sizeof(u32); if (do_read_u32(ff, &nr)) goto free_cpu; - if (do_core_id_test && nr != (u32)-1 && nr > (u32)cpu_nr) { - pr_debug("socket_id number is too big." - "You may need to upgrade the perf tool.\n"); - goto free_cpu; - } - - ph->env.cpu[i].socket_id = nr; + env->cpu[i].socket_id = nr; size += sizeof(u32); } @@ -2682,7 +2650,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) if (do_read_u32(ff, &nr)) return -1; - ph->env.nr_sibling_dies = nr; + env->nr_sibling_dies = nr; size += sizeof(u32); for (i = 0; i < nr; i++) { @@ -2696,13 +2664,13 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) size += string_size(str); zfree(&str); } - ph->env.sibling_dies = strbuf_detach(&sb, NULL); + env->sibling_dies = strbuf_detach(&sb, NULL); for (i = 0; i < (u32)cpu_nr; i++) { if (do_read_u32(ff, &nr)) goto free_cpu; - ph->env.cpu[i].die_id = nr; + env->cpu[i].die_id = nr; } return 0; @@ -2711,12 +2679,13 @@ error: strbuf_release(&sb); zfree(&str); free_cpu: - zfree(&ph->env.cpu); + zfree(&env->cpu); return -1; } static int process_numa_topology(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; struct numa_node *nodes, *n; u32 nr, i; char *str; @@ -2751,8 +2720,8 @@ static int process_numa_topology(struct feat_fd *ff, void *data __maybe_unused) if (!n->map) goto error; } - ff->ph->env.nr_numa_nodes = nr; - ff->ph->env.numa_nodes = nodes; + env->nr_numa_nodes = nr; + env->numa_nodes = nodes; return 0; error: @@ -2762,6 +2731,7 @@ error: static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; char *name; u32 pmu_num; u32 type; @@ -2775,7 +2745,7 @@ static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused) return 0; } - ff->ph->env.nr_pmu_mappings = pmu_num; + env->nr_pmu_mappings = pmu_num; if (strbuf_init(&sb, 128) < 0) return -1; @@ -2794,12 +2764,14 @@ static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused) goto error; if (!strcmp(name, "msr")) - ff->ph->env.msr_pmu_type = type; + env->msr_pmu_type = type; free(name); pmu_num--; } - ff->ph->env.pmu_mappings = strbuf_detach(&sb, NULL); + /* AMD may set it by evlist__has_amd_ibs() from perf_session__new() */ + free(env->pmu_mappings); + env->pmu_mappings = strbuf_detach(&sb, NULL); return 0; error: @@ -2809,6 +2781,7 @@ error: static int process_group_desc(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; size_t ret = -1; u32 i, nr, nr_groups; struct perf_session *session; @@ -2822,7 +2795,7 @@ static int process_group_desc(struct feat_fd *ff, void *data __maybe_unused) if (do_read_u32(ff, &nr_groups)) return -1; - ff->ph->env.nr_groups = nr_groups; + env->nr_groups = nr_groups; if (!nr_groups) { pr_debug("group desc not available\n"); return 0; @@ -2906,6 +2879,7 @@ static int process_auxtrace(struct feat_fd *ff, void *data __maybe_unused) static int process_cache(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; struct cpu_cache_level *caches; u32 cnt, i, version; @@ -2946,8 +2920,8 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused) #undef _R } - ff->ph->env.caches = caches; - ff->ph->env.caches_cnt = cnt; + env->caches = caches; + env->caches_cnt = cnt; return 0; out_free_caches: for (i = 0; i < cnt; i++) { @@ -2983,6 +2957,7 @@ static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused) static int process_mem_topology(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; struct memory_node *nodes; u64 version, i, nr, bsize; int ret = -1; @@ -3021,9 +2996,9 @@ static int process_mem_topology(struct feat_fd *ff, nodes[i] = n; } - ff->ph->env.memory_bsize = bsize; - ff->ph->env.memory_nodes = nodes; - ff->ph->env.nr_memory_nodes = nr; + env->memory_bsize = bsize; + env->memory_nodes = nodes; + env->nr_memory_nodes = nr; ret = 0; out: @@ -3035,7 +3010,9 @@ out: static int process_clockid(struct feat_fd *ff, void *data __maybe_unused) { - if (do_read_u64(ff, &ff->ph->env.clock.clockid_res_ns)) + struct perf_env *env = &ff->ph->env; + + if (do_read_u64(ff, &env->clock.clockid_res_ns)) return -1; return 0; @@ -3044,6 +3021,7 @@ static int process_clockid(struct feat_fd *ff, static int process_clock_data(struct feat_fd *ff, void *_data __maybe_unused) { + struct perf_env *env = &ff->ph->env; u32 data32; u64 data64; @@ -3058,26 +3036,27 @@ static int process_clock_data(struct feat_fd *ff, if (do_read_u32(ff, &data32)) return -1; - ff->ph->env.clock.clockid = data32; + env->clock.clockid = data32; /* TOD ref time */ if (do_read_u64(ff, &data64)) return -1; - ff->ph->env.clock.tod_ns = data64; + env->clock.tod_ns = data64; /* clockid ref time */ if (do_read_u64(ff, &data64)) return -1; - ff->ph->env.clock.clockid_ns = data64; - ff->ph->env.clock.enabled = true; + env->clock.clockid_ns = data64; + env->clock.enabled = true; return 0; } static int process_hybrid_topology(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; struct hybrid_node *nodes, *n; u32 nr, i; @@ -3101,8 +3080,8 @@ static int process_hybrid_topology(struct feat_fd *ff, goto error; } - ff->ph->env.nr_hybrid_nodes = nr; - ff->ph->env.hybrid_nodes = nodes; + env->nr_hybrid_nodes = nr; + env->hybrid_nodes = nodes; return 0; error: @@ -3188,7 +3167,11 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) /* after reading from file, translate offset to address */ bpil_offs_to_addr(info_linear); info_node->info_linear = info_linear; - __perf_env__insert_bpf_prog_info(env, info_node); + info_node->metadata = NULL; + if (!__perf_env__insert_bpf_prog_info(env, info_node)) { + free(info_linear); + free(info_node); + } } up_write(&env->bpf_progs.lock); @@ -3235,7 +3218,8 @@ static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused) if (__do_read(ff, node->data, data_size)) goto out; - __perf_env__insert_btf(env, node); + if (!__perf_env__insert_btf(env, node)) + free(node); node = NULL; } @@ -3250,19 +3234,21 @@ out: static int process_compressed(struct feat_fd *ff, void *data __maybe_unused) { - if (do_read_u32(ff, &(ff->ph->env.comp_ver))) + struct perf_env *env = &ff->ph->env; + + if (do_read_u32(ff, &(env->comp_ver))) return -1; - if (do_read_u32(ff, &(ff->ph->env.comp_type))) + if (do_read_u32(ff, &(env->comp_type))) return -1; - if (do_read_u32(ff, &(ff->ph->env.comp_level))) + if (do_read_u32(ff, &(env->comp_level))) return -1; - if (do_read_u32(ff, &(ff->ph->env.comp_ratio))) + if (do_read_u32(ff, &(env->comp_ratio))) return -1; - if (do_read_u32(ff, &(ff->ph->env.comp_mmap_len))) + if (do_read_u32(ff, &(env->comp_mmap_len))) return -1; return 0; @@ -3334,19 +3320,21 @@ error: static int process_cpu_pmu_caps(struct feat_fd *ff, void *data __maybe_unused) { - int ret = __process_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps, - &ff->ph->env.cpu_pmu_caps, - &ff->ph->env.max_branches, - &ff->ph->env.br_cntr_nr, - &ff->ph->env.br_cntr_width); + struct perf_env *env = &ff->ph->env; + int ret = __process_pmu_caps(ff, &env->nr_cpu_pmu_caps, + &env->cpu_pmu_caps, + &env->max_branches, + &env->br_cntr_nr, + &env->br_cntr_width); - if (!ret && !ff->ph->env.cpu_pmu_caps) + if (!ret && !env->cpu_pmu_caps) pr_debug("cpu pmu capabilities not available\n"); return ret; } static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused) { + struct perf_env *env = &ff->ph->env; struct pmu_caps *pmu_caps; u32 nr_pmu, i; int ret; @@ -3384,8 +3372,8 @@ static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused) } } - ff->ph->env.nr_pmus_with_caps = nr_pmu; - ff->ph->env.pmu_caps = pmu_caps; + env->nr_pmus_with_caps = nr_pmu; + env->pmu_caps = pmu_caps; return 0; err: @@ -3676,32 +3664,51 @@ int perf_header__write_pipe(int fd) static int perf_session__do_write_header(struct perf_session *session, struct evlist *evlist, int fd, bool at_exit, - struct feat_copier *fc) + struct feat_copier *fc, + bool write_attrs_after_data) { struct perf_file_header f_header; - struct perf_file_attr f_attr; struct perf_header *header = &session->header; struct evsel *evsel; struct feat_fd ff = { + .ph = header, .fd = fd, }; - u64 attr_offset; + u64 attr_offset = sizeof(f_header), attr_size = 0; int err; - lseek(fd, sizeof(f_header), SEEK_SET); + if (write_attrs_after_data && at_exit) { + /* + * Write features at the end of the file first so that + * attributes may come after them. + */ + if (!header->data_offset && header->data_size) { + pr_err("File contains data but offset unknown\n"); + err = -1; + goto err_out; + } + header->feat_offset = header->data_offset + header->data_size; + err = perf_header__adds_write(header, evlist, fd, fc); + if (err < 0) + goto err_out; + attr_offset = lseek(fd, 0, SEEK_CUR); + } else { + lseek(fd, attr_offset, SEEK_SET); + } evlist__for_each_entry(session->evlist, evsel) { - evsel->id_offset = lseek(fd, 0, SEEK_CUR); - err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64)); - if (err < 0) { - pr_debug("failed to write perf header\n"); - free(ff.buf); - return err; + evsel->id_offset = attr_offset; + /* Avoid writing at the end of the file until the session is exiting. */ + if (!write_attrs_after_data || at_exit) { + err = do_write(&ff, evsel->core.id, evsel->core.ids * sizeof(u64)); + if (err < 0) { + pr_debug("failed to write perf header\n"); + goto err_out; + } } + attr_offset += evsel->core.ids * sizeof(u64); } - attr_offset = lseek(ff.fd, 0, SEEK_CUR); - evlist__for_each_entry(evlist, evsel) { if (evsel->core.attr.size < sizeof(evsel->core.attr)) { /* @@ -3711,40 +3718,46 @@ static int perf_session__do_write_header(struct perf_session *session, */ evsel->core.attr.size = sizeof(evsel->core.attr); } - f_attr = (struct perf_file_attr){ - .attr = evsel->core.attr, - .ids = { - .offset = evsel->id_offset, - .size = evsel->core.ids * sizeof(u64), + /* Avoid writing at the end of the file until the session is exiting. */ + if (!write_attrs_after_data || at_exit) { + struct perf_file_attr f_attr = { + .attr = evsel->core.attr, + .ids = { + .offset = evsel->id_offset, + .size = evsel->core.ids * sizeof(u64), + } + }; + err = do_write(&ff, &f_attr, sizeof(f_attr)); + if (err < 0) { + pr_debug("failed to write perf header attribute\n"); + goto err_out; } - }; - err = do_write(&ff, &f_attr, sizeof(f_attr)); - if (err < 0) { - pr_debug("failed to write perf header attribute\n"); - free(ff.buf); - return err; } + attr_size += sizeof(struct perf_file_attr); } - if (!header->data_offset) - header->data_offset = lseek(fd, 0, SEEK_CUR); + if (!header->data_offset) { + if (write_attrs_after_data) + header->data_offset = sizeof(f_header); + else + header->data_offset = attr_offset + attr_size; + } header->feat_offset = header->data_offset + header->data_size; - if (at_exit) { + if (!write_attrs_after_data && at_exit) { + /* Write features now feat_offset is known. */ err = perf_header__adds_write(header, evlist, fd, fc); - if (err < 0) { - free(ff.buf); - return err; - } + if (err < 0) + goto err_out; } f_header = (struct perf_file_header){ .magic = PERF_MAGIC, .size = sizeof(f_header), - .attr_size = sizeof(f_attr), + .attr_size = sizeof(struct perf_file_attr), .attrs = { .offset = attr_offset, - .size = evlist->core.nr_entries * sizeof(f_attr), + .size = attr_size, }, .data = { .offset = header->data_offset, @@ -3757,21 +3770,24 @@ static int perf_session__do_write_header(struct perf_session *session, lseek(fd, 0, SEEK_SET); err = do_write(&ff, &f_header, sizeof(f_header)); - free(ff.buf); if (err < 0) { pr_debug("failed to write perf header\n"); - return err; + goto err_out; + } else { + lseek(fd, 0, SEEK_END); + err = 0; } - lseek(fd, header->data_offset + header->data_size, SEEK_SET); - - return 0; +err_out: + free(ff.buf); + return err; } int perf_session__write_header(struct perf_session *session, struct evlist *evlist, int fd, bool at_exit) { - return perf_session__do_write_header(session, evlist, fd, at_exit, NULL); + return perf_session__do_write_header(session, evlist, fd, at_exit, /*fc=*/NULL, + /*write_attrs_after_data=*/false); } size_t perf_session__data_offset(const struct evlist *evlist) @@ -3791,9 +3807,11 @@ size_t perf_session__data_offset(const struct evlist *evlist) int perf_session__inject_header(struct perf_session *session, struct evlist *evlist, int fd, - struct feat_copier *fc) + struct feat_copier *fc, + bool write_attrs_after_data) { - return perf_session__do_write_header(session, evlist, fd, true, fc); + return perf_session__do_write_header(session, evlist, fd, true, fc, + write_attrs_after_data); } static int perf_header__getbuffer64(struct perf_header *header, @@ -3986,6 +4004,24 @@ int perf_file_header__read(struct perf_file_header *header, adds_features)); } + if (header->size > header->attrs.offset) { + pr_err("Perf file header corrupt: header overlaps attrs\n"); + return -1; + } + + if (header->size > header->data.offset) { + pr_err("Perf file header corrupt: header overlaps data\n"); + return -1; + } + + if ((header->attrs.offset <= header->data.offset && + header->attrs.offset + header->attrs.size > header->data.offset) || + (header->attrs.offset > header->data.offset && + header->data.offset + header->data.size > header->attrs.offset)) { + pr_err("Perf file header corrupt: Attributes and data overlap\n"); + return -1; + } + if (header->size != sizeof(*header)) { /* Support the previous format */ if (header->size == offsetof(typeof(*header), adds_features)) @@ -4066,13 +4102,8 @@ static int perf_file_section__process(struct perf_file_section *section, static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, struct perf_header *ph, - struct perf_data* data, - bool repipe, int repipe_fd) + struct perf_data *data) { - struct feat_fd ff = { - .fd = repipe_fd, - .ph = ph, - }; ssize_t ret; ret = perf_data__read(data, header, sizeof(*header)); @@ -4087,19 +4118,15 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, if (ph->needs_swap) header->size = bswap_64(header->size); - if (repipe && do_write(&ff, header, sizeof(*header)) < 0) - return -1; - return 0; } -static int perf_header__read_pipe(struct perf_session *session, int repipe_fd) +static int perf_header__read_pipe(struct perf_session *session) { struct perf_header *header = &session->header; struct perf_pipe_file_header f_header; - if (perf_file_header__read_pipe(&f_header, header, session->data, - session->repipe, repipe_fd) < 0) { + if (perf_file_header__read_pipe(&f_header, header, session->data) < 0) { pr_debug("incompatible file format\n"); return -EINVAL; } @@ -4199,7 +4226,7 @@ static int evlist__prepare_tracepoint_events(struct evlist *evlist, struct tep_h } #endif -int perf_session__read_header(struct perf_session *session, int repipe_fd) +int perf_session__read_header(struct perf_session *session) { struct perf_data *data = session->data; struct perf_header *header = &session->header; @@ -4213,14 +4240,14 @@ int perf_session__read_header(struct perf_session *session, int repipe_fd) if (session->evlist == NULL) return -ENOMEM; - session->evlist->env = &header->env; + session->evlist->session = session; session->machines.host.env = &header->env; /* * We can read 'pipe' data event from regular file, * check for the pipe header regardless of source. */ - err = perf_header__read_pipe(session, repipe_fd); + err = perf_header__read_pipe(session); if (!err || perf_data__is_pipe(data)) { data->is_pipe = true; return err; @@ -4326,12 +4353,12 @@ out_delete_evlist: int perf_event__process_feature(struct perf_session *session, union perf_event *event) { - struct perf_tool *tool = session->tool; struct feat_fd ff = { .fd = 0 }; struct perf_record_header_feature *fe = (struct perf_record_header_feature *)event; int type = fe->header.type; u64 feat = fe->feat_id; int ret = 0; + bool print = dump_trace; if (type < 0 || type >= PERF_RECORD_HEADER_MAX) { pr_warning("invalid record type %d in pipe-mode\n", type); @@ -4342,28 +4369,35 @@ int perf_event__process_feature(struct perf_session *session, return -1; } - if (!feat_ops[feat].process) - return 0; - ff.buf = (void *)fe->data; ff.size = event->header.size - sizeof(*fe); ff.ph = &session->header; - if (feat_ops[feat].process(&ff, NULL)) { + if (feat_ops[feat].process && feat_ops[feat].process(&ff, NULL)) { ret = -1; goto out; } - if (!feat_ops[feat].print || !tool->show_feat_hdr) - goto out; + if (session->tool->show_feat_hdr) { + if (!feat_ops[feat].full_only || + session->tool->show_feat_hdr >= SHOW_FEAT_HEADER_FULL_INFO) { + print = true; + } else { + fprintf(stdout, "# %s info available, use -I to display\n", + feat_ops[feat].name); + } + } - if (!feat_ops[feat].full_only || - tool->show_feat_hdr >= SHOW_FEAT_HEADER_FULL_INFO) { - feat_ops[feat].print(&ff, stdout); - } else { - fprintf(stdout, "# %s info available, use -I to display\n", - feat_ops[feat].name); + if (dump_trace) + printf(", "); + + if (print) { + if (feat_ops[feat].print) + feat_ops[feat].print(&ff, stdout); + else + printf("# %s", feat_ops[feat].name); } + out: free_event_desc(ff.events); return ret; @@ -4405,7 +4439,12 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) return ret; } -int perf_event__process_attr(struct perf_tool *tool __maybe_unused, +size_t perf_event__fprintf_attr(union perf_event *event, FILE *fp) +{ + return perf_event_attr__fprintf(fp, &event->attr.attr, __desc_attr__fprintf, NULL); +} + +int perf_event__process_attr(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct evlist **pevlist) { @@ -4414,6 +4453,9 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused, struct evsel *evsel; struct evlist *evlist = *pevlist; + if (dump_trace) + perf_event__fprintf_attr(event, stdout); + if (evlist == NULL) { *pevlist = evlist = evlist__new(); if (evlist == NULL) @@ -4444,7 +4486,7 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused, return 0; } -int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, +int perf_event__process_event_update(const struct perf_tool *tool __maybe_unused, union perf_event *event, struct evlist **pevlist) { @@ -4480,8 +4522,8 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, case PERF_EVENT_UPDATE__CPUS: map = cpu_map__new_data(&ev->cpus.cpus); if (map) { - perf_cpu_map__put(evsel->core.own_cpus); - evsel->core.own_cpus = map; + perf_cpu_map__put(evsel->core.pmu_cpus); + evsel->core.pmu_cpus = map; } else pr_err("failed to get event_update cpus\n"); default: @@ -4492,7 +4534,8 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, } #ifdef HAVE_LIBTRACEEVENT -int perf_event__process_tracing_data(struct perf_session *session, +int perf_event__process_tracing_data(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { ssize_t size_read, padding, size = event->tracing_data.size; @@ -4514,15 +4557,14 @@ int perf_event__process_tracing_data(struct perf_session *session, SEEK_SET); } - size_read = trace_report(fd, &session->tevent, - session->repipe); + size_read = trace_report(fd, &session->tevent, session->trace_event_repipe); padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read; if (readn(fd, buf, padding) < 0) { pr_err("%s: reading input file", __func__); return -1; } - if (session->repipe) { + if (session->trace_event_repipe) { int retw = write(STDOUT_FILENO, buf, padding); if (retw <= 0 || retw != padding) { pr_err("%s: repiping tracing data padding", __func__); @@ -4541,7 +4583,8 @@ int perf_event__process_tracing_data(struct perf_session *session, } #endif -int perf_event__process_build_id(struct perf_session *session, +int perf_event__process_build_id(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { __event_process_build_id(&event->build_id, diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 7c16a250e738..c058021c3150 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -10,7 +10,13 @@ #include <linux/bitmap.h> #include <linux/types.h> #include "env.h" -#include "pmu.h" +#include <perf/cpumap.h> + +struct evlist; +union perf_event; +struct perf_header; +struct perf_session; +struct perf_tool; enum { HEADER_RESERVED = 0, /* always cleared */ @@ -61,14 +67,28 @@ struct perf_file_section { u64 size; }; +/** + * struct perf_file_header: Header representation on disk. + */ struct perf_file_header { + /** @magic: Holds "PERFILE2". */ u64 magic; + /** @size: Size of this header - sizeof(struct perf_file_header). */ u64 size; + /** + * @attr_size: Size of attrs entries - sizeof(struct perf_event_attr) + + * sizeof(struct perf_file_section). + */ u64 attr_size; + /** @attrs: Offset and size of file section holding attributes. */ struct perf_file_section attrs; + /** @data: Offset and size of file section holding regular event data. */ struct perf_file_section data; - /* event_types is ignored */ + /** @event_types: Ignored. */ struct perf_file_section event_types; + /** + * @adds_features: Bitmap of features. The features are immediately after the data section. + */ DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); }; @@ -77,8 +97,6 @@ struct perf_pipe_file_header { u64 size; }; -struct perf_header; - int perf_file_header__read(struct perf_file_header *header, struct perf_header *ph, int fd); @@ -110,14 +128,9 @@ struct perf_header_feature_ops { bool synthesize; }; -struct evlist; -struct perf_session; -struct perf_tool; -union perf_event; - extern const char perf_version_string[]; -int perf_session__read_header(struct perf_session *session, int repipe_fd); +int perf_session__read_header(struct perf_session *session); int perf_session__write_header(struct perf_session *session, struct evlist *evlist, int fd, bool at_exit); @@ -136,7 +149,8 @@ struct feat_copier { int perf_session__inject_header(struct perf_session *session, struct evlist *evlist, int fd, - struct feat_copier *fc); + struct feat_copier *fc, + bool write_attrs_after_data); size_t perf_session__data_offset(const struct evlist *evlist); @@ -156,17 +170,20 @@ int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full); int perf_event__process_feature(struct perf_session *session, union perf_event *event); -int perf_event__process_attr(struct perf_tool *tool, union perf_event *event, +int perf_event__process_attr(const struct perf_tool *tool, union perf_event *event, struct evlist **pevlist); -int perf_event__process_event_update(struct perf_tool *tool, +int perf_event__process_event_update(const struct perf_tool *tool, union perf_event *event, struct evlist **pevlist); +size_t perf_event__fprintf_attr(union perf_event *event, FILE *fp); size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp); #ifdef HAVE_LIBTRACEEVENT -int perf_event__process_tracing_data(struct perf_session *session, +int perf_event__process_tracing_data(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); #endif -int perf_event__process_build_id(struct perf_session *session, +int perf_event__process_build_id(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); bool is_perf_magic(u64 magic); @@ -181,14 +198,16 @@ int write_padded(struct feat_fd *fd, const void *bf, #define MAX_CACHE_LVL 4 -int is_cpu_online(unsigned int cpu); int build_caches_for_cpu(u32 cpu, struct cpu_cache_level caches[], u32 *cntp); /* * arch specific callback */ -int get_cpuid(char *buffer, size_t sz); +int get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu); + +char *get_cpuid_str(struct perf_cpu cpu); + +char *get_cpuid_allow_env_override(struct perf_cpu cpu); -char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused); int strcmp_cpuid_str(const char *s1, const char *s2); #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/hisi-ptt-decoder/Build b/tools/perf/util/hisi-ptt-decoder/Build index db3db8b75033..2ee0eb731656 100644 --- a/tools/perf/util/hisi-ptt-decoder/Build +++ b/tools/perf/util/hisi-ptt-decoder/Build @@ -1 +1 @@ -perf-$(CONFIG_AUXTRACE) += hisi-ptt-pkt-decoder.o +perf-util-y += hisi-ptt-pkt-decoder.o diff --git a/tools/perf/util/hisi-ptt.c b/tools/perf/util/hisi-ptt.c index 52d0ce302ca0..e4cc4785f744 100644 --- a/tools/perf/util/hisi-ptt.c +++ b/tools/perf/util/hisi-ptt.c @@ -35,11 +35,6 @@ struct hisi_ptt { u32 pmu_type; }; -struct hisi_ptt_queue { - struct hisi_ptt *ptt; - struct auxtrace_buffer *buffer; -}; - static enum hisi_ptt_pkt_type hisi_ptt_check_packet_type(unsigned char *buf) { uint32_t head = *(uint32_t *)buf; @@ -84,14 +79,14 @@ static void hisi_ptt_dump_event(struct hisi_ptt *ptt, unsigned char *buf, static int hisi_ptt_process_event(struct perf_session *session __maybe_unused, union perf_event *event __maybe_unused, struct perf_sample *sample __maybe_unused, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { return 0; } static int hisi_ptt_process_auxtrace_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { struct hisi_ptt *ptt = container_of(session->auxtrace, struct hisi_ptt, auxtrace); @@ -128,7 +123,7 @@ static int hisi_ptt_process_auxtrace_event(struct perf_session *session, } static int hisi_ptt_flush(struct perf_session *session __maybe_unused, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { return 0; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 2e9e193179dd..ef4b569f7df4 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -32,6 +32,9 @@ #include <linux/time64.h> #include <linux/zalloc.h> +static int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); +static int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); + static bool hists__filter_entry_by_dso(struct hists *hists, struct hist_entry *he); static bool hists__filter_entry_by_thread(struct hists *hists, @@ -40,6 +43,8 @@ static bool hists__filter_entry_by_symbol(struct hists *hists, struct hist_entry *he); static bool hists__filter_entry_by_socket(struct hists *hists, struct hist_entry *he); +static bool hists__filter_entry_by_parallelism(struct hists *hists, + struct hist_entry *he); u16 hists__col_len(struct hists *hists, enum hist_column col) { @@ -204,6 +209,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_CGROUP, 6); hists__new_col_len(hists, HISTC_CGROUP_ID, 20); + hists__new_col_len(hists, HISTC_PARALLELISM, 11); hists__new_col_len(hists, HISTC_CPU, 3); hists__new_col_len(hists, HISTC_SOCKET, 6); hists__new_col_len(hists, HISTC_MEM_LOCKED, 6); @@ -218,6 +224,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13); hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13); hists__new_col_len(hists, HISTC_ADDR, BITS_PER_LONG / 4 + 2); + hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_PREDICTED, 9); + hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_ABORT, 5); + hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_CYCLES, 6); if (symbol_conf.nanosecs) hists__new_col_len(hists, HISTC_TIME, 16); @@ -296,9 +305,10 @@ static long hist_time(unsigned long htime) return htime; } -static void he_stat__add_period(struct he_stat *he_stat, u64 period) +static void he_stat__add_period(struct he_stat *he_stat, u64 period, u64 latency) { he_stat->period += period; + he_stat->latency += latency; he_stat->nr_events += 1; } @@ -313,6 +323,7 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) dest->weight2 += src->weight2; dest->weight3 += src->weight3; dest->nr_events += src->nr_events; + dest->latency += src->latency; } static void he_stat__decay(struct he_stat *he_stat) @@ -322,6 +333,70 @@ static void he_stat__decay(struct he_stat *he_stat) he_stat->weight1 = (he_stat->weight1 * 7) / 8; he_stat->weight2 = (he_stat->weight2 * 7) / 8; he_stat->weight3 = (he_stat->weight3 * 7) / 8; + he_stat->latency = (he_stat->latency * 7) / 8; +} + +static int hists__update_mem_stat(struct hists *hists, struct hist_entry *he, + struct mem_info *mi, u64 period) +{ + if (hists->nr_mem_stats == 0) + return 0; + + if (he->mem_stat == NULL) { + he->mem_stat = calloc(hists->nr_mem_stats, sizeof(*he->mem_stat)); + if (he->mem_stat == NULL) + return -1; + } + + for (int i = 0; i < hists->nr_mem_stats; i++) { + int idx = mem_stat_index(hists->mem_stat_types[i], + mem_info__const_data_src(mi)->val); + + assert(0 <= idx && idx < MEM_STAT_LEN); + he->mem_stat[i].entries[idx] += period; + hists->mem_stat_total[i].entries[idx] += period; + } + return 0; +} + +static void hists__add_mem_stat(struct hists *hists, struct hist_entry *dst, + struct hist_entry *src) +{ + if (hists->nr_mem_stats == 0) + return; + + for (int i = 0; i < hists->nr_mem_stats; i++) { + for (int k = 0; k < MEM_STAT_LEN; k++) + dst->mem_stat[i].entries[k] += src->mem_stat[i].entries[k]; + } +} + +static int hists__clone_mem_stat(struct hists *hists, struct hist_entry *dst, + struct hist_entry *src) +{ + if (hists->nr_mem_stats == 0) + return 0; + + dst->mem_stat = calloc(hists->nr_mem_stats, sizeof(*dst->mem_stat)); + if (dst->mem_stat == NULL) + return -1; + + for (int i = 0; i < hists->nr_mem_stats; i++) { + for (int k = 0; k < MEM_STAT_LEN; k++) + dst->mem_stat[i].entries[k] = src->mem_stat[i].entries[k]; + } + return 0; +} + +static void hists__decay_mem_stat(struct hists *hists, struct hist_entry *he) +{ + if (hists->nr_mem_stats == 0) + return; + + for (int i = 0; i < hists->nr_mem_stats; i++) { + for (int k = 0; k < MEM_STAT_LEN; k++) + he->mem_stat[i].entries[k] = (he->mem_stat[i].entries[k] * 7) / 8; + } } static void hists__delete_entry(struct hists *hists, struct hist_entry *he); @@ -329,7 +404,7 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he); static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) { u64 prev_period = he->stat.period; - u64 diff; + u64 prev_latency = he->stat.latency; if (prev_period == 0) return true; @@ -338,13 +413,18 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) if (symbol_conf.cumulate_callchain) he_stat__decay(he->stat_acc); decay_callchain(he->callchain); - - diff = prev_period - he->stat.period; + hists__decay_mem_stat(hists, he); if (!he->depth) { - hists->stats.total_period -= diff; - if (!he->filtered) - hists->stats.total_non_filtered_period -= diff; + u64 period_diff = prev_period - he->stat.period; + u64 latency_diff = prev_latency - he->stat.latency; + + hists->stats.total_period -= period_diff; + hists->stats.total_latency -= latency_diff; + if (!he->filtered) { + hists->stats.total_non_filtered_period -= period_diff; + hists->stats.total_non_filtered_latency -= latency_diff; + } } if (!he->leaf) { @@ -359,7 +439,7 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) } } - return he->stat.period == 0; + return he->stat.period == 0 && he->stat.latency == 0; } static void hists__delete_entry(struct hists *hists, struct hist_entry *he) @@ -472,10 +552,18 @@ static int hist_entry__init(struct hist_entry *he, memcpy(he->branch_info, template->branch_info, sizeof(*he->branch_info)); + he->branch_info->from.ms.maps = maps__get(he->branch_info->from.ms.maps); he->branch_info->from.ms.map = map__get(he->branch_info->from.ms.map); + he->branch_info->to.ms.maps = maps__get(he->branch_info->to.ms.maps); he->branch_info->to.ms.map = map__get(he->branch_info->to.ms.map); } + if (he->mem_info) { + he->mem_info = mem_info__clone(template->mem_info); + if (he->mem_info == NULL) + goto err_infos; + } + if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) callchain_init(he->callchain); @@ -520,10 +608,8 @@ err_infos: map_symbol__exit(&he->branch_info->to.ms); zfree(&he->branch_info); } - if (he->mem_info) { - map_symbol__exit(&mem_info__iaddr(he->mem_info)->ms); - map_symbol__exit(&mem_info__daddr(he->mem_info)->ms); - } + if (he->mem_info) + mem_info__zput(he->mem_info); err: map_symbol__exit(&he->ms); zfree(&he->stat_acc); @@ -570,21 +656,24 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, return he; } -static u8 symbol__parent_filter(const struct symbol *parent) +static filter_mask_t symbol__parent_filter(const struct symbol *parent) { if (symbol_conf.exclude_other && parent == NULL) return 1 << HIST_FILTER__PARENT; return 0; } -static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period) +static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period, u64 latency) { if (!hist_entry__has_callchains(he) || !symbol_conf.use_callchain) return; he->hists->callchain_period += period; - if (!he->filtered) + he->hists->callchain_latency += latency; + if (!he->filtered) { he->hists->callchain_non_filtered_period += period; + he->hists->callchain_non_filtered_latency += latency; + } } static struct hist_entry *hists__findnew_entry(struct hists *hists, @@ -597,6 +686,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, struct hist_entry *he; int64_t cmp; u64 period = entry->stat.period; + u64 latency = entry->stat.latency; bool leftmost = true; p = &hists->entries_in->rb_root.rb_node; @@ -615,16 +705,10 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (!cmp) { if (sample_self) { he_stat__add_stat(&he->stat, &entry->stat); - hist_entry__add_callchain_period(he, period); + hist_entry__add_callchain_period(he, period, latency); } if (symbol_conf.cumulate_callchain) - he_stat__add_period(he->stat_acc, period); - - /* - * This mem info was allocated from sample__resolve_mem - * and will not be used anymore. - */ - mem_info__zput(entry->mem_info); + he_stat__add_period(he->stat_acc, period, latency); block_info__delete(entry->block_info); @@ -636,7 +720,12 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, * mis-adjust symbol addresses when computing * the history counter to increment. */ - if (he->ms.map != entry->ms.map) { + if (hists__has(hists, sym) && he->ms.map != entry->ms.map) { + if (he->ms.sym) { + u64 addr = he->ms.sym->start; + he->ms.sym = map__find_symbol(entry->ms.map, addr); + } + map__put(he->ms.map); he->ms.map = map__get(entry->ms.map); } @@ -656,7 +745,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, return NULL; if (sample_self) - hist_entry__add_callchain_period(he, period); + hist_entry__add_callchain_period(he, period, latency); hists->nr_entries++; rb_link_node(&he->rb_node_in, parent, p); @@ -666,6 +755,10 @@ out: he_stat__add_cpumode_period(&he->stat, al->cpumode, period); if (symbol_conf.cumulate_callchain) he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period); + if (hists__update_mem_stat(hists, he, entry->mem_info, period) < 0) { + hist_entry__delete(he); + return NULL; + } return he; } @@ -728,18 +821,20 @@ __hists__add_entry(struct hists *hists, .ip = al->addr, .level = al->level, .code_page_size = sample->code_page_size, + .parallelism = al->parallelism, .stat = { .nr_events = 1, .period = sample->period, .weight1 = sample->weight, .weight2 = sample->ins_lat, - .weight3 = sample->p_stage_cyc, + .weight3 = sample->weight3, + .latency = al->latency, }, .parent = sym_parent, .filtered = symbol__parent_filter(sym_parent) | al->filtered, .hists = hists, .branch_info = bi, - .mem_info = mem_info__get(mi), + .mem_info = mi, .kvm_info = ki, .block_info = block_info, .transaction = sample->transaction, @@ -749,7 +844,7 @@ __hists__add_entry(struct hists *hists, .time = hist_time(sample->time), .weight = sample->weight, .ins_lat = sample->ins_lat, - .p_stage_cyc = sample->p_stage_cyc, + .weight3 = sample->weight3, .simd_flags = sample->simd_flags, }, *he = hists__findnew_entry(hists, &entry, al, sample_self); @@ -962,18 +1057,33 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a if (he == NULL) return -ENOMEM; - hists__inc_nr_samples(hists, he->filtered); - out: iter->he = he; iter->curr++; return err; } +static void branch_info__exit(struct branch_info *bi) +{ + map_symbol__exit(&bi->from.ms); + map_symbol__exit(&bi->to.ms); + zfree_srcline(&bi->srcline_from); + zfree_srcline(&bi->srcline_to); +} + static int iter_finish_branch_entry(struct hist_entry_iter *iter, struct addr_location *al __maybe_unused) { + struct evsel *evsel = iter->evsel; + struct hists *hists = evsel__hists(evsel); + + for (int i = 0; i < iter->total; i++) + branch_info__exit(&iter->bi[i]); + + if (iter->he) + hists__inc_nr_samples(hists, iter->he->filtered); + zfree(&iter->bi); iter->he = NULL; @@ -1271,19 +1381,35 @@ out: return err; } -int64_t -hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) +static int64_t +hist_entry__cmp_impl(struct perf_hpp_list *hpp_list, struct hist_entry *left, + struct hist_entry *right, unsigned long fn_offset, + bool ignore_dynamic, bool ignore_skipped) { struct hists *hists = left->hists; struct perf_hpp_fmt *fmt; - int64_t cmp = 0; + perf_hpp_fmt_cmp_t *fn; + int64_t cmp; - hists__for_each_sort_list(hists, fmt) { - if (perf_hpp__is_dynamic_entry(fmt) && + /* + * Never collapse filtered and non-filtered entries. + * Note this is not the same as having an extra (invisible) fmt + * that corresponds to the filtered status. + */ + cmp = (int64_t)!!left->filtered - (int64_t)!!right->filtered; + if (cmp) + return cmp; + + perf_hpp_list__for_each_sort_list(hpp_list, fmt) { + if (ignore_dynamic && perf_hpp__is_dynamic_entry(fmt) && !perf_hpp__defined_dynamic_entry(fmt, hists)) continue; - cmp = fmt->cmp(fmt, left, right); + if (ignore_skipped && perf_hpp__should_skip(fmt, hists)) + continue; + + fn = (void *)fmt + fn_offset; + cmp = (*fn)(fmt, left, right); if (cmp) break; } @@ -1292,37 +1418,54 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) } int64_t -hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) +hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) { - struct hists *hists = left->hists; - struct perf_hpp_fmt *fmt; - int64_t cmp = 0; + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, + offsetof(struct perf_hpp_fmt, cmp), true, false); +} - hists__for_each_sort_list(hists, fmt) { - if (perf_hpp__is_dynamic_entry(fmt) && - !perf_hpp__defined_dynamic_entry(fmt, hists)) - continue; +static int64_t +hist_entry__sort(struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, + offsetof(struct perf_hpp_fmt, sort), false, true); +} - cmp = fmt->collapse(fmt, left, right); - if (cmp) - break; - } +int64_t +hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_impl(left->hists->hpp_list, left, right, + offsetof(struct perf_hpp_fmt, collapse), true, false); +} - return cmp; +static int64_t +hist_entry__collapse_hierarchy(struct perf_hpp_list *hpp_list, + struct hist_entry *left, + struct hist_entry *right) +{ + return hist_entry__cmp_impl(hpp_list, left, right, + offsetof(struct perf_hpp_fmt, collapse), false, false); } void hist_entry__delete(struct hist_entry *he) { struct hist_entry_ops *ops = he->ops; + if (symbol_conf.report_hierarchy) { + struct rb_root *root = &he->hroot_out.rb_root; + struct hist_entry *child, *tmp; + + rbtree_postorder_for_each_entry_safe(child, tmp, root, rb_node) + hist_entry__delete(child); + + *root = RB_ROOT; + } + thread__zput(he->thread); map_symbol__exit(&he->ms); if (he->branch_info) { - map_symbol__exit(&he->branch_info->from.ms); - map_symbol__exit(&he->branch_info->to.ms); - zfree_srcline(&he->branch_info->srcline_from); - zfree_srcline(&he->branch_info->srcline_to); + branch_info__exit(he->branch_info); zfree(&he->branch_info); } @@ -1346,6 +1489,7 @@ void hist_entry__delete(struct hist_entry *he) free_callchain(he->callchain); zfree(&he->trace_output); zfree(&he->raw_data); + zfree(&he->mem_stat); ops->free(he); } @@ -1408,6 +1552,10 @@ static void hist_entry__check_and_remove_filter(struct hist_entry *he, if (symbol_conf.sym_list == NULL) return; break; + case HIST_FILTER__PARALLELISM: + if (__bitmap_weight(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1) == 0) + return; + break; case HIST_FILTER__PARENT: case HIST_FILTER__GUEST: case HIST_FILTER__HOST: @@ -1466,6 +1614,9 @@ static void hist_entry__apply_hierarchy_filters(struct hist_entry *he) hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL, perf_hpp__is_sym_entry); + hist_entry__check_and_remove_filter(he, HIST_FILTER__PARALLELISM, + perf_hpp__is_parallelism_entry); + hists__apply_filters(he->hists, he); } @@ -1485,16 +1636,10 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, while (*p != NULL) { parent = *p; iter = rb_entry(parent, struct hist_entry, rb_node_in); - - cmp = 0; - perf_hpp_list__for_each_sort_list(hpp_list, fmt) { - cmp = fmt->collapse(fmt, iter, he); - if (cmp) - break; - } - + cmp = hist_entry__collapse_hierarchy(hpp_list, iter, he); if (!cmp) { he_stat__add_stat(&iter->stat, &he->stat); + hists__add_mem_stat(hists, iter, he); return iter; } @@ -1536,6 +1681,11 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, new->srcfile = NULL; } + if (hists__clone_mem_stat(hists, new, he) < 0) { + hist_entry__delete(new); + return NULL; + } + rb_link_node(&new->rb_node_in, parent, p); rb_insert_color_cached(&new->rb_node_in, root, leftmost); return new; @@ -1618,6 +1768,7 @@ static int hists__collapse_insert_entry(struct hists *hists, he_stat__add_stat(&iter->stat, &he->stat); if (symbol_conf.cumulate_callchain) he_stat__add_stat(iter->stat_acc, he->stat_acc); + hists__add_mem_stat(hists, iter, he); if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) { struct callchain_cursor *cursor = get_tls_callchain_cursor(); @@ -1669,6 +1820,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he) hists__filter_entry_by_thread(hists, he); hists__filter_entry_by_symbol(hists, he); hists__filter_entry_by_socket(hists, he); + hists__filter_entry_by_parallelism(hists, he); } int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) @@ -1712,34 +1864,18 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) return 0; } -static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b) -{ - struct hists *hists = a->hists; - struct perf_hpp_fmt *fmt; - int64_t cmp = 0; - - hists__for_each_sort_list(hists, fmt) { - if (perf_hpp__should_skip(fmt, a->hists)) - continue; - - cmp = fmt->sort(fmt, a, b); - if (cmp) - break; - } - - return cmp; -} - static void hists__reset_filter_stats(struct hists *hists) { hists->nr_non_filtered_entries = 0; hists->stats.total_non_filtered_period = 0; + hists->stats.total_non_filtered_latency = 0; } void hists__reset_stats(struct hists *hists) { hists->nr_entries = 0; hists->stats.total_period = 0; + hists->stats.total_latency = 0; hists__reset_filter_stats(hists); } @@ -1748,6 +1884,7 @@ static void hists__inc_filter_stats(struct hists *hists, struct hist_entry *h) { hists->nr_non_filtered_entries++; hists->stats.total_non_filtered_period += h->stat.period; + hists->stats.total_non_filtered_latency += h->stat.latency; } void hists__inc_stats(struct hists *hists, struct hist_entry *h) @@ -1757,6 +1894,7 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h) hists->nr_entries++; hists->stats.total_period += h->stat.period; + hists->stats.total_latency += h->stat.latency; } static void hierarchy_recalc_total_periods(struct hists *hists) @@ -1768,6 +1906,8 @@ static void hierarchy_recalc_total_periods(struct hists *hists) hists->stats.total_period = 0; hists->stats.total_non_filtered_period = 0; + hists->stats.total_latency = 0; + hists->stats.total_non_filtered_latency = 0; /* * recalculate total period using top-level entries only @@ -1779,8 +1919,11 @@ static void hierarchy_recalc_total_periods(struct hists *hists) node = rb_next(node); hists->stats.total_period += he->stat.period; - if (!he->filtered) + hists->stats.total_latency += he->stat.latency; + if (!he->filtered) { hists->stats.total_non_filtered_period += he->stat.period; + hists->stats.total_non_filtered_latency += he->stat.latency; + } } } @@ -2173,6 +2316,16 @@ static bool hists__filter_entry_by_socket(struct hists *hists, return false; } +static bool hists__filter_entry_by_parallelism(struct hists *hists, + struct hist_entry *he) +{ + if (test_bit(he->parallelism, hists->parallelism_filter)) { + he->filtered |= (1 << HIST_FILTER__PARALLELISM); + return true; + } + return false; +} + typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he); static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter) @@ -2342,6 +2495,16 @@ void hists__filter_by_socket(struct hists *hists) hists__filter_entry_by_socket); } +void hists__filter_by_parallelism(struct hists *hists) +{ + if (symbol_conf.report_hierarchy) + hists__filter_hierarchy(hists, HIST_FILTER__PARALLELISM, + hists->parallelism_filter); + else + hists__filter_by_type(hists, HIST_FILTER__PARALLELISM, + hists__filter_entry_by_parallelism); +} + void events_stats__inc(struct events_stats *stats, u32 type) { ++stats->nr_events[0]; @@ -2370,6 +2533,11 @@ void hists__inc_nr_lost_samples(struct hists *hists, u32 lost) hists->stats.nr_lost_samples += lost; } +void hists__inc_nr_dropped_samples(struct hists *hists, u32 lost) +{ + hists->stats.nr_dropped_samples += lost; +} + static struct hist_entry *hists__add_dummy_entry(struct hists *hists, struct hist_entry *pair) { @@ -2426,21 +2594,15 @@ static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists, struct rb_node **p; struct rb_node *parent = NULL; struct hist_entry *he; - struct perf_hpp_fmt *fmt; bool leftmost = true; p = &root->rb_root.rb_node; while (*p != NULL) { - int64_t cmp = 0; + int64_t cmp; parent = *p; he = rb_entry(parent, struct hist_entry, rb_node_in); - - perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { - cmp = fmt->collapse(fmt, he, pair); - if (cmp) - break; - } + cmp = hist_entry__collapse_hierarchy(he->hpp_list, he, pair); if (!cmp) goto out; @@ -2498,16 +2660,10 @@ static struct hist_entry *hists__find_hierarchy_entry(struct rb_root_cached *roo while (n) { struct hist_entry *iter; - struct perf_hpp_fmt *fmt; - int64_t cmp = 0; + int64_t cmp; iter = rb_entry(n, struct hist_entry, rb_node_in); - perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { - cmp = fmt->collapse(fmt, iter, he); - if (cmp) - break; - } - + cmp = hist_entry__collapse_hierarchy(he->hpp_list, iter, he); if (cmp < 0) n = n->rb_left; else if (cmp > 0) @@ -2667,7 +2823,7 @@ int hists__unlink(struct hists *hists) void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, struct perf_sample *sample, bool nonany_branch_mode, - u64 *total_cycles) + u64 *total_cycles, struct evsel *evsel) { struct branch_info *bi; struct branch_entry *entries = perf_sample__branch_entries(sample); @@ -2691,7 +2847,8 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, for (int i = bs->nr - 1; i >= 0; i--) { addr_map_symbol__account_cycles(&bi[i].from, nonany_branch_mode ? NULL : prev, - bi[i].flags.cycles); + bi[i].flags.cycles, evsel, + bi[i].branch_stack_cntr); prev = &bi[i].to; if (total_cycles) @@ -2706,25 +2863,31 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, } } -size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp, - bool skip_empty) +size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp) { struct evsel *pos; size_t ret = 0; evlist__for_each_entry(evlist, pos) { struct hists *hists = evsel__hists(pos); + u64 total_samples = hists->stats.nr_samples; - if (skip_empty && !hists->stats.nr_samples && !hists->stats.nr_lost_samples) + total_samples += hists->stats.nr_lost_samples; + total_samples += hists->stats.nr_dropped_samples; + + if (symbol_conf.skip_empty && total_samples == 0) continue; ret += fprintf(fp, "%s stats:\n", evsel__name(pos)); if (hists->stats.nr_samples) - ret += fprintf(fp, "%16s events: %10d\n", + ret += fprintf(fp, "%20s events: %10d\n", "SAMPLE", hists->stats.nr_samples); if (hists->stats.nr_lost_samples) - ret += fprintf(fp, "%16s events: %10d\n", + ret += fprintf(fp, "%20s events: %10d\n", "LOST_SAMPLES", hists->stats.nr_lost_samples); + if (hists->stats.nr_dropped_samples) + ret += fprintf(fp, "%20s events: %10d\n", + "LOST_SAMPLES (BPF)", hists->stats.nr_dropped_samples); } return ret; @@ -2737,6 +2900,12 @@ u64 hists__total_period(struct hists *hists) hists->stats.total_period; } +u64 hists__total_latency(struct hists *hists) +{ + return symbol_conf.filter_relative ? hists->stats.total_non_filtered_latency : + hists->stats.total_latency; +} + int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq) { char unit; @@ -2848,6 +3017,7 @@ int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list) hists->entries = RB_ROOT_CACHED; mutex_init(&hists->lock); hists->socket_filter = -1; + hists->parallelism_filter = symbol_conf.parallelism_filter; hists->hpp_list = hpp_list; INIT_LIST_HEAD(&hists->hpp_formats); return 0; @@ -2882,6 +3052,8 @@ static void hists_evsel__exit(struct evsel *evsel) struct perf_hpp_list_node *node, *tmp; hists__delete_all_entries(hists); + zfree(&hists->mem_stat_types); + zfree(&hists->mem_stat_total); list_for_each_entry_safe(node, tmp, &hists->hpp_formats, list) { perf_hpp_list__for_each_format_safe(&node->hpp, fmt, pos) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 8fb3bdd29188..1d5ea632ca4e 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -9,6 +9,7 @@ #include "events_stats.h" #include "evsel.h" #include "map_symbol.h" +#include "mem-events.h" #include "mutex.h" #include "sample.h" #include "spark.h" @@ -31,17 +32,22 @@ enum hist_filter { HIST_FILTER__HOST, HIST_FILTER__SOCKET, HIST_FILTER__C2C, + HIST_FILTER__PARALLELISM, }; +typedef u16 filter_mask_t; + enum hist_column { HISTC_SYMBOL, HISTC_TIME, HISTC_DSO, HISTC_THREAD, + HISTC_TGID, HISTC_COMM, HISTC_CGROUP_ID, HISTC_CGROUP, HISTC_PARENT, + HISTC_PARALLELISM, HISTC_CPU, HISTC_SOCKET, HISTC_SRCLINE, @@ -86,12 +92,23 @@ enum hist_column { HISTC_TYPE, HISTC_TYPE_OFFSET, HISTC_SYMBOL_OFFSET, + HISTC_TYPE_CACHELINE, + HISTC_CALLCHAIN_BRANCH_PREDICTED, + HISTC_CALLCHAIN_BRANCH_ABORT, + HISTC_CALLCHAIN_BRANCH_CYCLES, HISTC_NR_COLS, /* Last entry */ }; struct thread; struct dso; +#define MEM_STAT_LEN 8 + +struct he_mem_stat { + /* meaning of entries depends on enum mem_stat_type */ + u64 entries[MEM_STAT_LEN]; +}; + struct hists { struct rb_root_cached entries_in_array[2]; struct rb_root_cached *entries_in; @@ -101,10 +118,13 @@ struct hists { u64 nr_non_filtered_entries; u64 callchain_period; u64 callchain_non_filtered_period; + u64 callchain_latency; + u64 callchain_non_filtered_latency; struct thread *thread_filter; const struct dso *dso_filter; const char *uid_filter_str; const char *symbol_filter_str; + unsigned long *parallelism_filter; struct mutex lock; struct hists_stats stats; u64 event_stream; @@ -114,6 +134,9 @@ struct hists { struct perf_hpp_list *hpp_list; struct list_head hpp_formats; int nr_hpp_node; + int nr_mem_stats; + enum mem_stat_type *mem_stat_types; + struct he_mem_stat *mem_stat_total; }; #define hists__has(__h, __f) (__h)->hpp_list->__f @@ -161,6 +184,12 @@ struct res_sample { struct he_stat { u64 period; + /* + * Period re-scaled from CPU time to wall-clock time (divided by the + * parallelism at the time of the sample). This represents effect of + * the event on latency rather than CPU consumption. + */ + u64 latency; u64 period_sys; u64 period_us; u64 period_guest_sys; @@ -215,6 +244,7 @@ struct hist_entry { } pairs; struct he_stat stat; struct he_stat *stat_acc; + struct he_mem_stat *mem_stat; struct map_symbol ms; struct thread *thread; struct comm *comm; @@ -222,15 +252,17 @@ struct hist_entry { u64 cgroup; u64 ip; u64 transaction; - s32 socket; - s32 cpu; u64 code_page_size; u64 weight; u64 ins_lat; - u64 p_stage_cyc; + /** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */ + u64 weight3; + s32 socket; + s32 cpu; + int parallelism; + int mem_type_off; u8 cpumode; u8 depth; - int mem_type_off; struct simd_flags simd_flags; /* We are added by hists__add_dummy_entry. */ @@ -238,7 +270,7 @@ struct hist_entry { bool leaf; char level; - u8 filtered; + filter_mask_t filtered; u16 callchain_size; union { @@ -338,8 +370,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, struct perf_hpp; struct perf_hpp_fmt; -int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); -int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); int hist_entry__transaction_len(void); int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size, struct hists *hists); @@ -366,27 +396,30 @@ void hists__output_recalc_col_len(struct hists *hists, int max_rows); struct hist_entry *hists__get_entry(struct hists *hists, int idx); u64 hists__total_period(struct hists *hists); +u64 hists__total_latency(struct hists *hists); void hists__reset_stats(struct hists *hists); void hists__inc_stats(struct hists *hists, struct hist_entry *h); void hists__inc_nr_events(struct hists *hists); void hists__inc_nr_samples(struct hists *hists, bool filtered); void hists__inc_nr_lost_samples(struct hists *hists, u32 lost); +void hists__inc_nr_dropped_samples(struct hists *hists, u32 lost); size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, int max_cols, float min_pcnt, FILE *fp, bool ignore_callchains); -size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp, - bool skip_empty); +size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp); void hists__filter_by_dso(struct hists *hists); void hists__filter_by_thread(struct hists *hists); void hists__filter_by_symbol(struct hists *hists); void hists__filter_by_socket(struct hists *hists); +void hists__filter_by_parallelism(struct hists *hists); static inline bool hists__has_filter(struct hists *hists) { return hists->thread_filter || hists->dso_filter || - hists->symbol_filter_str || (hists->socket_filter > -1); + hists->symbol_filter_str || (hists->socket_filter > -1) || + hists->parallelism_filter; } u16 hists__col_len(struct hists *hists, enum hist_column col); @@ -448,6 +481,9 @@ struct perf_hpp { bool skip; }; +typedef int64_t (*perf_hpp_fmt_cmp_t)( + struct perf_hpp_fmt *, struct hist_entry *, struct hist_entry *); + struct perf_hpp_fmt { const char *name; int (*header)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, @@ -459,12 +495,9 @@ struct perf_hpp_fmt { struct hist_entry *he); int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he); - int64_t (*cmp)(struct perf_hpp_fmt *fmt, - struct hist_entry *a, struct hist_entry *b); - int64_t (*collapse)(struct perf_hpp_fmt *fmt, - struct hist_entry *a, struct hist_entry *b); - int64_t (*sort)(struct perf_hpp_fmt *fmt, - struct hist_entry *a, struct hist_entry *b); + perf_hpp_fmt_cmp_t cmp; + perf_hpp_fmt_cmp_t collapse; + perf_hpp_fmt_cmp_t sort; bool (*equal)(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b); void (*free)(struct perf_hpp_fmt *fmt); @@ -545,27 +578,37 @@ extern struct perf_hpp_fmt perf_hpp__format[]; enum { /* Matches perf_hpp__format array. */ PERF_HPP__OVERHEAD, + PERF_HPP__LATENCY, PERF_HPP__OVERHEAD_SYS, PERF_HPP__OVERHEAD_US, PERF_HPP__OVERHEAD_GUEST_SYS, PERF_HPP__OVERHEAD_GUEST_US, PERF_HPP__OVERHEAD_ACC, + PERF_HPP__LATENCY_ACC, PERF_HPP__SAMPLES, PERF_HPP__PERIOD, PERF_HPP__WEIGHT1, PERF_HPP__WEIGHT2, PERF_HPP__WEIGHT3, + PERF_HPP__MEM_STAT_OP, + PERF_HPP__MEM_STAT_CACHE, + PERF_HPP__MEM_STAT_MEMORY, + PERF_HPP__MEM_STAT_SNOOP, + PERF_HPP__MEM_STAT_DTLB, PERF_HPP__MAX_INDEX }; void perf_hpp__init(void); -void perf_hpp__cancel_cumulate(void); +void perf_hpp__cancel_cumulate(struct evlist *evlist); +void perf_hpp__cancel_latency(struct evlist *evlist); void perf_hpp__setup_output_field(struct perf_hpp_list *list); void perf_hpp__reset_output_field(struct perf_hpp_list *list); void perf_hpp__append_sort_keys(struct perf_hpp_list *list); int perf_hpp__setup_hists_formats(struct perf_hpp_list *list, struct evlist *evlist); +int perf_hpp__alloc_mem_stats(struct perf_hpp_list *list, + struct evlist *evlist); bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format); @@ -578,6 +621,7 @@ bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt); bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt); bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt); bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_parallelism_entry(struct perf_hpp_fmt *fmt); struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt); @@ -604,6 +648,7 @@ void hists__reset_column_width(struct hists *hists); enum perf_hpp_fmt_type { PERF_HPP_FMT_TYPE__RAW, PERF_HPP_FMT_TYPE__PERCENT, + PERF_HPP_FMT_TYPE__LATENCY, PERF_HPP_FMT_TYPE__AVERAGE, }; @@ -619,6 +664,9 @@ int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he, hpp_field_fn get_field, const char *fmtstr, hpp_snprint_fn print_fn, enum perf_hpp_fmt_type fmtype); +int hpp__fmt_mem_stat(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he, enum mem_stat_type mst, + const char *fmtstr, hpp_snprint_fn print_fn); static inline void advance_hpp(struct perf_hpp *hpp, int inc) { @@ -661,15 +709,18 @@ struct block_hist { struct hist_entry he; }; +#define NO_ADDR 0 + #ifdef HAVE_SLANG_SUPPORT #include "../ui/keysyms.h" void attr_to_script(char *buf, struct perf_event_attr *attr); -int map_symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt); +int __hist_entry__tui_annotate(struct hist_entry *he, struct map_symbol *ms, + struct evsel *evsel, + struct hist_browser_timer *hbt, u64 al_addr); int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel, - struct hist_browser_timer *hbt); + struct hist_browser_timer *hbt, u64 al_addr); int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, float min_pcnt, struct perf_env *env, bool warn_lost_event); @@ -694,16 +745,19 @@ int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, { return 0; } -static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused, - struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused) +static inline int __hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, + struct map_symbol *ms __maybe_unused, + struct evsel *evsel __maybe_unused, + struct hist_browser_timer *hbt __maybe_unused, + u64 al_addr __maybe_unused) { return 0; } static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused, + u64 al_addr __maybe_unused) { return 0; } @@ -743,7 +797,7 @@ unsigned int hists__overhead_width(struct hists *hists); void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, struct perf_sample *sample, bool nonany_branch_mode, - u64 *total_cycles); + u64 *total_cycles, struct evsel *evsel); struct option; int parse_filter_percentage(const struct option *opt, const char *arg, int unset); diff --git a/tools/perf/util/hwmon_pmu.c b/tools/perf/util/hwmon_pmu.c new file mode 100644 index 000000000000..279d6b1a47f0 --- /dev/null +++ b/tools/perf/util/hwmon_pmu.c @@ -0,0 +1,835 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#include "counts.h" +#include "debug.h" +#include "evsel.h" +#include "hashmap.h" +#include "hwmon_pmu.h" +#include "pmu.h" +#include <internal/xyarray.h> +#include <internal/threadmap.h> +#include <perf/threadmap.h> +#include <sys/types.h> +#include <assert.h> +#include <ctype.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <api/fs/fs.h> +#include <api/io.h> +#include <api/io_dir.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/zalloc.h> + +/** Strings that correspond to enum hwmon_type. */ +static const char * const hwmon_type_strs[HWMON_TYPE_MAX] = { + NULL, + "cpu", + "curr", + "energy", + "fan", + "humidity", + "in", + "intrusion", + "power", + "pwm", + "temp", +}; +#define LONGEST_HWMON_TYPE_STR "intrusion" + +/** Strings that correspond to enum hwmon_item. */ +static const char * const hwmon_item_strs[HWMON_ITEM__MAX] = { + NULL, + "accuracy", + "alarm", + "auto_channels_temp", + "average", + "average_highest", + "average_interval", + "average_interval_max", + "average_interval_min", + "average_lowest", + "average_max", + "average_min", + "beep", + "cap", + "cap_hyst", + "cap_max", + "cap_min", + "crit", + "crit_hyst", + "div", + "emergency", + "emergency_hist", + "enable", + "fault", + "freq", + "highest", + "input", + "label", + "lcrit", + "lcrit_hyst", + "lowest", + "max", + "max_hyst", + "min", + "min_hyst", + "mod", + "offset", + "pulses", + "rated_max", + "rated_min", + "reset_history", + "target", + "type", + "vid", +}; +#define LONGEST_HWMON_ITEM_STR "average_interval_max" + +static const char *const hwmon_units[HWMON_TYPE_MAX] = { + NULL, + "V", /* cpu */ + "A", /* curr */ + "J", /* energy */ + "rpm", /* fan */ + "%", /* humidity */ + "V", /* in */ + "", /* intrusion */ + "W", /* power */ + "Hz", /* pwm */ + "'C", /* temp */ +}; + +struct hwmon_pmu { + struct perf_pmu pmu; + struct hashmap events; + char *hwmon_dir; +}; + +/** + * struct hwmon_pmu_event_value: Value in hwmon_pmu->events. + * + * Hwmon files are of the form <type><number>_<item> and may have a suffix + * _alarm. + */ +struct hwmon_pmu_event_value { + /** @items: which item files are present. */ + DECLARE_BITMAP(items, HWMON_ITEM__MAX); + /** @alarm_items: which item files are present. */ + DECLARE_BITMAP(alarm_items, HWMON_ITEM__MAX); + /** @label: contents of <type><number>_label if present. */ + char *label; + /** @name: name computed from label of the form <type>_<label>. */ + char *name; +}; + +bool perf_pmu__is_hwmon(const struct perf_pmu *pmu) +{ + return pmu && pmu->type >= PERF_PMU_TYPE_HWMON_START && + pmu->type <= PERF_PMU_TYPE_HWMON_END; +} + +bool evsel__is_hwmon(const struct evsel *evsel) +{ + return perf_pmu__is_hwmon(evsel->pmu); +} + +static size_t hwmon_pmu__event_hashmap_hash(long key, void *ctx __maybe_unused) +{ + return ((union hwmon_pmu_event_key)key).type_and_num; +} + +static bool hwmon_pmu__event_hashmap_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return ((union hwmon_pmu_event_key)key1).type_and_num == + ((union hwmon_pmu_event_key)key2).type_and_num; +} + +static int hwmon_strcmp(const void *a, const void *b) +{ + const char *sa = a; + const char * const *sb = b; + + return strcmp(sa, *sb); +} + +bool parse_hwmon_filename(const char *filename, + enum hwmon_type *type, + int *number, + enum hwmon_item *item, + bool *alarm) +{ + char fn_type[24]; + const char **elem; + const char *fn_item = NULL; + size_t fn_item_len; + + assert(strlen(LONGEST_HWMON_TYPE_STR) < sizeof(fn_type)); + strlcpy(fn_type, filename, sizeof(fn_type)); + for (size_t i = 0; fn_type[i] != '\0'; i++) { + if (fn_type[i] >= '0' && fn_type[i] <= '9') { + fn_type[i] = '\0'; + *number = strtoul(&filename[i], (char **)&fn_item, 10); + if (*fn_item == '_') + fn_item++; + break; + } + if (fn_type[i] == '_') { + fn_type[i] = '\0'; + *number = -1; + fn_item = &filename[i + 1]; + break; + } + } + if (fn_item == NULL || fn_type[0] == '\0' || (item != NULL && fn_item[0] == '\0')) { + pr_debug3("hwmon_pmu: not a hwmon file '%s'\n", filename); + return false; + } + elem = bsearch(&fn_type, hwmon_type_strs + 1, ARRAY_SIZE(hwmon_type_strs) - 1, + sizeof(hwmon_type_strs[0]), hwmon_strcmp); + if (!elem) { + pr_debug3("hwmon_pmu: not a hwmon type '%s' in file name '%s'\n", + fn_type, filename); + return false; + } + + *type = elem - &hwmon_type_strs[0]; + if (!item) + return true; + + *alarm = false; + fn_item_len = strlen(fn_item); + if (fn_item_len > 6 && !strcmp(&fn_item[fn_item_len - 6], "_alarm")) { + assert(strlen(LONGEST_HWMON_ITEM_STR) < sizeof(fn_type)); + strlcpy(fn_type, fn_item, fn_item_len - 5); + fn_item = fn_type; + *alarm = true; + } + elem = bsearch(fn_item, hwmon_item_strs + 1, ARRAY_SIZE(hwmon_item_strs) - 1, + sizeof(hwmon_item_strs[0]), hwmon_strcmp); + if (!elem) { + pr_debug3("hwmon_pmu: not a hwmon item '%s' in file name '%s'\n", + fn_item, filename); + return false; + } + *item = elem - &hwmon_item_strs[0]; + return true; +} + +static void fix_name(char *p) +{ + char *s = strchr(p, '\n'); + + if (s) + *s = '\0'; + + while (*p != '\0') { + if (strchr(" :,/\n\t", *p)) + *p = '_'; + else + *p = tolower(*p); + p++; + } +} + +static int hwmon_pmu__read_events(struct hwmon_pmu *pmu) +{ + int err = 0; + struct hashmap_entry *cur, *tmp; + size_t bkt; + struct io_dirent64 *ent; + struct io_dir dir; + + if (pmu->pmu.sysfs_aliases_loaded) + return 0; + + /* Use openat so that the directory contents are refreshed. */ + io_dir__init(&dir, open(pmu->hwmon_dir, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + + if (dir.dirfd < 0) + return -ENOENT; + + while ((ent = io_dir__readdir(&dir)) != NULL) { + enum hwmon_type type; + int number; + enum hwmon_item item; + bool alarm; + union hwmon_pmu_event_key key = { .type_and_num = 0 }; + struct hwmon_pmu_event_value *value; + + if (ent->d_type != DT_REG) + continue; + + if (!parse_hwmon_filename(ent->d_name, &type, &number, &item, &alarm)) { + pr_debug3("Not a hwmon file '%s'\n", ent->d_name); + continue; + } + key.num = number; + key.type = type; + if (!hashmap__find(&pmu->events, key.type_and_num, &value)) { + value = zalloc(sizeof(*value)); + if (!value) { + err = -ENOMEM; + goto err_out; + } + err = hashmap__add(&pmu->events, key.type_and_num, value); + if (err) { + free(value); + err = -ENOMEM; + goto err_out; + } + } + __set_bit(item, alarm ? value->alarm_items : value->items); + if (item == HWMON_ITEM_LABEL) { + char buf[128]; + int fd = openat(dir.dirfd, ent->d_name, O_RDONLY); + ssize_t read_len; + + if (fd < 0) + continue; + + read_len = read(fd, buf, sizeof(buf)); + + while (read_len > 0 && buf[read_len - 1] == '\n') + read_len--; + + if (read_len > 0) + buf[read_len] = '\0'; + + if (buf[0] == '\0') { + pr_debug("hwmon_pmu: empty label file %s %s\n", + pmu->pmu.name, ent->d_name); + close(fd); + continue; + } + value->label = strdup(buf); + if (!value->label) { + pr_debug("hwmon_pmu: memory allocation failure\n"); + close(fd); + continue; + } + snprintf(buf, sizeof(buf), "%s_%s", hwmon_type_strs[type], value->label); + fix_name(buf); + value->name = strdup(buf); + if (!value->name) + pr_debug("hwmon_pmu: memory allocation failure\n"); + close(fd); + } + } + if (hashmap__size(&pmu->events) == 0) + pr_debug2("hwmon_pmu: %s has no events\n", pmu->pmu.name); + + hashmap__for_each_entry_safe((&pmu->events), cur, tmp, bkt) { + union hwmon_pmu_event_key key = { + .type_and_num = cur->key, + }; + struct hwmon_pmu_event_value *value = cur->pvalue; + + if (!test_bit(HWMON_ITEM_INPUT, value->items)) { + pr_debug("hwmon_pmu: %s removing event '%s%d' that has no input file\n", + pmu->pmu.name, hwmon_type_strs[key.type], key.num); + hashmap__delete(&pmu->events, key.type_and_num, &key, &value); + zfree(&value->label); + zfree(&value->name); + free(value); + } + } + pmu->pmu.sysfs_aliases_loaded = true; + +err_out: + close(dir.dirfd); + return err; +} + +struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, const char *hwmon_dir, + const char *sysfs_name, const char *name) +{ + char buf[64]; + struct hwmon_pmu *hwm; + __u32 type = PERF_PMU_TYPE_HWMON_START + strtoul(sysfs_name + 5, NULL, 10); + + if (type > PERF_PMU_TYPE_HWMON_END) { + pr_err("Unable to encode hwmon type from %s in valid PMU type\n", sysfs_name); + return NULL; + } + + snprintf(buf, sizeof(buf), "hwmon_%s", name); + fix_name(buf + 6); + + hwm = zalloc(sizeof(*hwm)); + if (!hwm) + return NULL; + + if (perf_pmu__init(&hwm->pmu, type, buf) != 0) { + perf_pmu__delete(&hwm->pmu); + return NULL; + } + + hwm->hwmon_dir = strdup(hwmon_dir); + if (!hwm->hwmon_dir) { + perf_pmu__delete(&hwm->pmu); + return NULL; + } + hwm->pmu.alias_name = strdup(sysfs_name); + if (!hwm->pmu.alias_name) { + perf_pmu__delete(&hwm->pmu); + return NULL; + } + hwm->pmu.cpus = perf_cpu_map__new_int(0); + if (!hwm->pmu.cpus) { + perf_pmu__delete(&hwm->pmu); + return NULL; + } + INIT_LIST_HEAD(&hwm->pmu.format); + INIT_LIST_HEAD(&hwm->pmu.caps); + hashmap__init(&hwm->events, hwmon_pmu__event_hashmap_hash, + hwmon_pmu__event_hashmap_equal, /*ctx=*/NULL); + + list_add_tail(&hwm->pmu.list, pmus); + return &hwm->pmu; +} + +void hwmon_pmu__exit(struct perf_pmu *pmu) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + struct hashmap_entry *cur, *tmp; + size_t bkt; + + hashmap__for_each_entry_safe((&hwm->events), cur, tmp, bkt) { + struct hwmon_pmu_event_value *value = cur->pvalue; + + zfree(&value->label); + zfree(&value->name); + free(value); + } + hashmap__clear(&hwm->events); + zfree(&hwm->hwmon_dir); +} + +static size_t hwmon_pmu__describe_items(struct hwmon_pmu *hwm, char *out_buf, size_t out_buf_len, + union hwmon_pmu_event_key key, + const unsigned long *items, bool is_alarm) +{ + size_t bit; + char buf[64]; + size_t len = 0; + int dir = open(hwm->hwmon_dir, O_CLOEXEC | O_DIRECTORY | O_RDONLY); + + if (dir < 0) + return 0; + + for_each_set_bit(bit, items, HWMON_ITEM__MAX) { + int fd; + + if (bit == HWMON_ITEM_LABEL || bit == HWMON_ITEM_INPUT) + continue; + + snprintf(buf, sizeof(buf), "%s%d_%s%s", + hwmon_type_strs[key.type], + key.num, + hwmon_item_strs[bit], + is_alarm ? "_alarm" : ""); + fd = openat(dir, buf, O_RDONLY); + if (fd > 0) { + ssize_t read_len = read(fd, buf, sizeof(buf)); + + while (read_len > 0 && buf[read_len - 1] == '\n') + read_len--; + + if (read_len > 0) { + long long val; + + buf[read_len] = '\0'; + val = strtoll(buf, /*endptr=*/NULL, 10); + len += snprintf(out_buf + len, out_buf_len - len, "%s%s%s=%g%s", + len == 0 ? " " : ", ", + hwmon_item_strs[bit], + is_alarm ? "_alarm" : "", + (double)val / 1000.0, + hwmon_units[key.type]); + } + close(fd); + } + } + close(dir); + return len; +} + +int hwmon_pmu__for_each_event(struct perf_pmu *pmu, void *state, pmu_event_callback cb) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + struct hashmap_entry *cur; + size_t bkt; + + if (hwmon_pmu__read_events(hwm)) + return false; + + hashmap__for_each_entry((&hwm->events), cur, bkt) { + static const char *const hwmon_scale_units[HWMON_TYPE_MAX] = { + NULL, + "0.001V", /* cpu */ + "0.001A", /* curr */ + "0.001J", /* energy */ + "1rpm", /* fan */ + "0.001%", /* humidity */ + "0.001V", /* in */ + NULL, /* intrusion */ + "0.001W", /* power */ + "1Hz", /* pwm */ + "0.001'C", /* temp */ + }; + static const char *const hwmon_desc[HWMON_TYPE_MAX] = { + NULL, + "CPU core reference voltage", /* cpu */ + "Current", /* curr */ + "Cumulative energy use", /* energy */ + "Fan", /* fan */ + "Humidity", /* humidity */ + "Voltage", /* in */ + "Chassis intrusion detection", /* intrusion */ + "Power use", /* power */ + "Pulse width modulation fan control", /* pwm */ + "Temperature", /* temp */ + }; + char alias_buf[64]; + char desc_buf[256]; + char encoding_buf[128]; + union hwmon_pmu_event_key key = { + .type_and_num = cur->key, + }; + struct hwmon_pmu_event_value *value = cur->pvalue; + struct pmu_event_info info = { + .pmu = pmu, + .name = value->name, + .alias = alias_buf, + .scale_unit = hwmon_scale_units[key.type], + .desc = desc_buf, + .long_desc = NULL, + .encoding_desc = encoding_buf, + .topic = "hwmon", + .pmu_name = pmu->name, + .event_type_desc = "Hwmon event", + }; + int ret; + size_t len; + + len = snprintf(alias_buf, sizeof(alias_buf), "%s%d", + hwmon_type_strs[key.type], key.num); + if (!info.name) { + info.name = info.alias; + info.alias = NULL; + } + + len = snprintf(desc_buf, sizeof(desc_buf), "%s in unit %s named %s.", + hwmon_desc[key.type], + pmu->name + 6, + value->label ?: info.name); + + len += hwmon_pmu__describe_items(hwm, desc_buf + len, sizeof(desc_buf) - len, + key, value->items, /*is_alarm=*/false); + + len += hwmon_pmu__describe_items(hwm, desc_buf + len, sizeof(desc_buf) - len, + key, value->alarm_items, /*is_alarm=*/true); + + snprintf(encoding_buf, sizeof(encoding_buf), "%s/config=0x%lx/", + pmu->name, cur->key); + + ret = cb(state, &info); + if (ret) + return ret; + } + return 0; +} + +size_t hwmon_pmu__num_events(struct perf_pmu *pmu) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + + hwmon_pmu__read_events(hwm); + return hashmap__size(&hwm->events); +} + +bool hwmon_pmu__have_event(struct perf_pmu *pmu, const char *name) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + enum hwmon_type type; + int number; + union hwmon_pmu_event_key key = { .type_and_num = 0 }; + struct hashmap_entry *cur; + size_t bkt; + + if (!parse_hwmon_filename(name, &type, &number, /*item=*/NULL, /*is_alarm=*/NULL)) + return false; + + if (hwmon_pmu__read_events(hwm)) + return false; + + key.type = type; + key.num = number; + if (hashmap_find(&hwm->events, key.type_and_num, /*value=*/NULL)) + return true; + if (key.num != -1) + return false; + /* Item is of form <type>_ which means we should match <type>_<label>. */ + hashmap__for_each_entry((&hwm->events), cur, bkt) { + struct hwmon_pmu_event_value *value = cur->pvalue; + + key.type_and_num = cur->key; + if (key.type == type && value->name && !strcasecmp(name, value->name)) + return true; + } + return false; +} + +static int hwmon_pmu__config_term(const struct hwmon_pmu *hwm, + struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err) +{ + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) { + enum hwmon_type type; + int number; + + if (parse_hwmon_filename(term->config, &type, &number, + /*item=*/NULL, /*is_alarm=*/NULL)) { + if (number == -1) { + /* + * Item is of form <type>_ which means we should + * match <type>_<label>. + */ + struct hashmap_entry *cur; + size_t bkt; + + attr->config = 0; + hashmap__for_each_entry((&hwm->events), cur, bkt) { + union hwmon_pmu_event_key key = { + .type_and_num = cur->key, + }; + struct hwmon_pmu_event_value *value = cur->pvalue; + + if (key.type == type && value->name && + !strcasecmp(term->config, value->name)) { + attr->config = key.type_and_num; + break; + } + } + if (attr->config == 0) + return -EINVAL; + } else { + union hwmon_pmu_event_key key = { + .type_and_num = 0, + }; + + key.type = type; + key.num = number; + attr->config = key.type_and_num; + } + return 0; + } + } + if (err) { + char *err_str; + + parse_events_error__handle(err, term->err_val, + asprintf(&err_str, + "unexpected hwmon event term (%s) %s", + parse_events__term_type_str(term->type_term), + term->config) < 0 + ? strdup("unexpected hwmon event term") + : err_str, + NULL); + } + return -EINVAL; +} + +int hwmon_pmu__config_terms(const struct perf_pmu *pmu, + struct perf_event_attr *attr, + struct parse_events_terms *terms, + struct parse_events_error *err) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + struct parse_events_term *term; + int ret; + + ret = hwmon_pmu__read_events(hwm); + if (ret) + return ret; + + list_for_each_entry(term, &terms->terms, list) { + if (hwmon_pmu__config_term(hwm, attr, term, err)) + return -EINVAL; + } + + return 0; + +} + +int hwmon_pmu__check_alias(struct parse_events_terms *terms, struct perf_pmu_info *info, + struct parse_events_error *err) +{ + struct parse_events_term *term = + list_first_entry(&terms->terms, struct parse_events_term, list); + + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) { + enum hwmon_type type; + int number; + + if (parse_hwmon_filename(term->config, &type, &number, + /*item=*/NULL, /*is_alarm=*/NULL)) { + info->unit = hwmon_units[type]; + if (type == HWMON_TYPE_FAN || type == HWMON_TYPE_PWM || + type == HWMON_TYPE_INTRUSION) + info->scale = 1; + else + info->scale = 0.001; + } + return 0; + } + if (err) { + char *err_str; + + parse_events_error__handle(err, term->err_val, + asprintf(&err_str, + "unexpected hwmon event term (%s) %s", + parse_events__term_type_str(term->type_term), + term->config) < 0 + ? strdup("unexpected hwmon event term") + : err_str, + NULL); + } + return -EINVAL; +} + +int perf_pmus__read_hwmon_pmus(struct list_head *pmus) +{ + char *line = NULL; + struct io_dirent64 *class_hwmon_ent; + struct io_dir class_hwmon_dir; + char buf[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return 0; + + scnprintf(buf, sizeof(buf), "%s/class/hwmon/", sysfs); + io_dir__init(&class_hwmon_dir, open(buf, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + + if (class_hwmon_dir.dirfd < 0) + return 0; + + while ((class_hwmon_ent = io_dir__readdir(&class_hwmon_dir)) != NULL) { + size_t line_len; + int hwmon_dir, name_fd; + struct io io; + char buf2[128]; + + if (class_hwmon_ent->d_type != DT_LNK) + continue; + + scnprintf(buf, sizeof(buf), "%s/class/hwmon/%s", sysfs, class_hwmon_ent->d_name); + hwmon_dir = open(buf, O_DIRECTORY); + if (hwmon_dir == -1) { + pr_debug("hwmon_pmu: not a directory: '%s/class/hwmon/%s'\n", + sysfs, class_hwmon_ent->d_name); + continue; + } + name_fd = openat(hwmon_dir, "name", O_RDONLY); + if (name_fd == -1) { + pr_debug("hwmon_pmu: failure to open '%s/class/hwmon/%s/name'\n", + sysfs, class_hwmon_ent->d_name); + close(hwmon_dir); + continue; + } + io__init(&io, name_fd, buf2, sizeof(buf2)); + if (io__getline(&io, &line, &line_len) > 0 && line[line_len - 1] == '\n') + line[line_len - 1] = '\0'; + hwmon_pmu__new(pmus, buf, class_hwmon_ent->d_name, line); + close(name_fd); + close(hwmon_dir); + } + free(line); + close(class_hwmon_dir.dirfd); + return 0; +} + +#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) + +int evsel__hwmon_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx) +{ + struct hwmon_pmu *hwm = container_of(evsel->pmu, struct hwmon_pmu, pmu); + union hwmon_pmu_event_key key = { + .type_and_num = evsel->core.attr.config, + }; + int idx = 0, thread = 0, nthreads, err = 0; + int dir = open(hwm->hwmon_dir, O_CLOEXEC | O_DIRECTORY | O_RDONLY); + + if (dir < 0) + return -errno; + + nthreads = perf_thread_map__nr(threads); + for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { + for (thread = 0; thread < nthreads; thread++) { + char buf[64]; + int fd; + + snprintf(buf, sizeof(buf), "%s%d_input", + hwmon_type_strs[key.type], key.num); + + fd = openat(dir, buf, O_RDONLY); + FD(evsel, idx, thread) = fd; + if (fd < 0) { + err = -errno; + goto out_close; + } + } + } + close(dir); + return 0; +out_close: + if (err) + threads->err_thread = thread; + + do { + while (--thread >= 0) { + if (FD(evsel, idx, thread) >= 0) + close(FD(evsel, idx, thread)); + FD(evsel, idx, thread) = -1; + } + thread = nthreads; + } while (--idx >= 0); + close(dir); + return err; +} + +int evsel__hwmon_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) +{ + char buf[32]; + int fd; + ssize_t len; + struct perf_counts_values *count, *old_count = NULL; + + if (evsel->prev_raw_counts) + old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + + count = perf_counts(evsel->counts, cpu_map_idx, thread); + fd = FD(evsel, cpu_map_idx, thread); + len = pread(fd, buf, sizeof(buf), 0); + if (len <= 0) { + count->lost++; + return -EINVAL; + } + buf[len] = '\0'; + if (old_count) { + count->val = old_count->val + strtoll(buf, NULL, 10); + count->run = old_count->run + 1; + count->ena = old_count->ena + 1; + } else { + count->val = strtoll(buf, NULL, 10); + count->run++; + count->ena++; + } + return 0; +} diff --git a/tools/perf/util/hwmon_pmu.h b/tools/perf/util/hwmon_pmu.h new file mode 100644 index 000000000000..d1e403c8b70b --- /dev/null +++ b/tools/perf/util/hwmon_pmu.h @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __HWMON_PMU_H +#define __HWMON_PMU_H + +#include "pmu.h" +#include <stdbool.h> + +struct list_head; +struct perf_thread_map; + +/** + * enum hwmon_type: + * + * As described in Documentation/hwmon/sysfs-interface.rst hwmon events are + * defined over multiple files of the form <type><num>_<item>. This enum + * captures potential <type> values. + * + * This enum is exposed for testing. + */ +enum hwmon_type { + HWMON_TYPE_NONE, + + HWMON_TYPE_CPU, + HWMON_TYPE_CURR, + HWMON_TYPE_ENERGY, + HWMON_TYPE_FAN, + HWMON_TYPE_HUMIDITY, + HWMON_TYPE_IN, + HWMON_TYPE_INTRUSION, + HWMON_TYPE_POWER, + HWMON_TYPE_PWM, + HWMON_TYPE_TEMP, + + HWMON_TYPE_MAX +}; + +/** + * enum hwmon_item: + * + * Similar to enum hwmon_type but describes the item part of a sysfs filename. + * + * This enum is exposed for testing. + */ +enum hwmon_item { + HWMON_ITEM_NONE, + + HWMON_ITEM_ACCURACY, + HWMON_ITEM_ALARM, + HWMON_ITEM_AUTO_CHANNELS_TEMP, + HWMON_ITEM_AVERAGE, + HWMON_ITEM_AVERAGE_HIGHEST, + HWMON_ITEM_AVERAGE_INTERVAL, + HWMON_ITEM_AVERAGE_INTERVAL_MAX, + HWMON_ITEM_AVERAGE_INTERVAL_MIN, + HWMON_ITEM_AVERAGE_LOWEST, + HWMON_ITEM_AVERAGE_MAX, + HWMON_ITEM_AVERAGE_MIN, + HWMON_ITEM_BEEP, + HWMON_ITEM_CAP, + HWMON_ITEM_CAP_HYST, + HWMON_ITEM_CAP_MAX, + HWMON_ITEM_CAP_MIN, + HWMON_ITEM_CRIT, + HWMON_ITEM_CRIT_HYST, + HWMON_ITEM_DIV, + HWMON_ITEM_EMERGENCY, + HWMON_ITEM_EMERGENCY_HIST, + HWMON_ITEM_ENABLE, + HWMON_ITEM_FAULT, + HWMON_ITEM_FREQ, + HWMON_ITEM_HIGHEST, + HWMON_ITEM_INPUT, + HWMON_ITEM_LABEL, + HWMON_ITEM_LCRIT, + HWMON_ITEM_LCRIT_HYST, + HWMON_ITEM_LOWEST, + HWMON_ITEM_MAX, + HWMON_ITEM_MAX_HYST, + HWMON_ITEM_MIN, + HWMON_ITEM_MIN_HYST, + HWMON_ITEM_MOD, + HWMON_ITEM_OFFSET, + HWMON_ITEM_PULSES, + HWMON_ITEM_RATED_MAX, + HWMON_ITEM_RATED_MIN, + HWMON_ITEM_RESET_HISTORY, + HWMON_ITEM_TARGET, + HWMON_ITEM_TYPE, + HWMON_ITEM_VID, + + HWMON_ITEM__MAX, +}; + +/** + * union hwmon_pmu_event_key: Key for hwmon_pmu->events as such each key + * represents an event. + * union is exposed for testing to ensure problems are avoided on big + * endian machines. + * + * Related hwmon files start <type><number> that this key represents. + */ +union hwmon_pmu_event_key { + long type_and_num; + struct { + int num :16; + enum hwmon_type type :8; + }; +}; + +bool perf_pmu__is_hwmon(const struct perf_pmu *pmu); +bool evsel__is_hwmon(const struct evsel *evsel); + +/** + * parse_hwmon_filename() - Parse filename into constituent parts. + * + * @filename: To be parsed, of the form <type><number>_<item>. + * @type: The type defined from the parsed file name. + * @number: The number of the type, for example there may be more than 1 fan. + * @item: A hwmon <type><number> may have multiple associated items. + * @alarm: Is the filename for an alarm value? + * + * An example of a hwmon filename is "temp1_input". The type is temp for a + * temperature value. The number is 1. The item within the file is an input + * value - the temperature itself. This file doesn't contain an alarm value. + * + * Exposed for testing. + */ +bool parse_hwmon_filename(const char *filename, + enum hwmon_type *type, + int *number, + enum hwmon_item *item, + bool *alarm); + +/** + * hwmon_pmu__new() - Allocate and construct a hwmon PMU. + * + * @pmus: The list of PMUs to be added to. + * @hwmon_dir: The path to a hwmon directory. + * @sysfs_name: Name of the hwmon sysfs directory like hwmon0. + * @name: The contents of the "name" file in the hwmon directory. + * + * Exposed for testing. Regular construction should happen via + * perf_pmus__read_hwmon_pmus. + */ +struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, const char *hwmon_dir, + const char *sysfs_name, const char *name); +void hwmon_pmu__exit(struct perf_pmu *pmu); + +int hwmon_pmu__for_each_event(struct perf_pmu *pmu, void *state, pmu_event_callback cb); +size_t hwmon_pmu__num_events(struct perf_pmu *pmu); +bool hwmon_pmu__have_event(struct perf_pmu *pmu, const char *name); +int hwmon_pmu__config_terms(const struct perf_pmu *pmu, + struct perf_event_attr *attr, + struct parse_events_terms *terms, + struct parse_events_error *err); +int hwmon_pmu__check_alias(struct parse_events_terms *terms, struct perf_pmu_info *info, + struct parse_events_error *err); + +int perf_pmus__read_hwmon_pmus(struct list_head *pmus); + + +int evsel__hwmon_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx); +int evsel__hwmon_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread); + +#endif /* __HWMON_PMU_H */ diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h index 01fb25a1150a..6f1b9f6b2466 100644 --- a/tools/perf/util/include/dwarf-regs.h +++ b/tools/perf/util/include/dwarf-regs.h @@ -1,42 +1,133 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _PERF_DWARF_REGS_H_ #define _PERF_DWARF_REGS_H_ +#include "annotate.h" +#include <elf.h> + +#ifndef EM_AARCH64 +#define EM_AARCH64 183 /* ARM 64 bit */ +#endif + +#ifndef EM_CSKY +#define EM_CSKY 252 /* C-SKY */ +#endif +#ifndef EF_CSKY_ABIV1 +#define EF_CSKY_ABIV1 0X10000000 +#endif +#ifndef EF_CSKY_ABIV2 +#define EF_CSKY_ABIV2 0X20000000 +#endif + +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 /* LoongArch */ +#endif + +/* EM_HOST gives the ELF machine for host, EF_HOST gives additional flags. */ +#if defined(__x86_64__) + #define EM_HOST EM_X86_64 +#elif defined(__i386__) + #define EM_HOST EM_386 +#elif defined(__aarch64__) + #define EM_HOST EM_AARCH64 +#elif defined(__arm__) + #define EM_HOST EM_ARM +#elif defined(__alpha__) + #define EM_HOST EM_ALPHA +#elif defined(__arc__) + #define EM_HOST EM_ARC +#elif defined(__AVR__) + #define EM_HOST EM_AVR +#elif defined(__AVR32__) + #define EM_HOST EM_AVR32 +#elif defined(__bfin__) + #define EM_HOST EM_BLACKFIN +#elif defined(__csky__) + #define EM_HOST EM_CSKY + #if defined(__CSKYABIV2__) + #define EF_HOST EF_CSKY_ABIV2 + #else + #define EF_HOST EF_CSKY_ABIV1 + #endif +#elif defined(__cris__) + #define EM_HOST EM_CRIS +#elif defined(__hppa__) // HP PA-RISC + #define EM_HOST EM_PARISC +#elif defined(__loongarch__) + #define EM_HOST EM_LOONGARCH +#elif defined(__mips__) + #define EM_HOST EM_MIPS +#elif defined(__m32r__) + #define EM_HOST EM_M32R +#elif defined(__microblaze__) + #define EM_HOST EM_MICROBLAZE +#elif defined(__MSP430__) + #define EM_HOST EM_MSP430 +#elif defined(__powerpc64__) + #define EM_HOST EM_PPC64 +#elif defined(__powerpc__) + #define EM_HOST EM_PPC +#elif defined(__riscv) + #define EM_HOST EM_RISCV +#elif defined(__s390x__) + #define EM_HOST EM_S390 +#elif defined(__sh__) + #define EM_HOST EM_SH +#elif defined(__sparc64__) || defined(__sparc__) + #define EM_HOST EM_SPARC +#elif defined(__xtensa__) + #define EM_HOST EM_XTENSA +#else + /* Unknown host ELF machine type. */ + #define EM_HOST EM_NONE +#endif + +#if !defined(EF_HOST) + #define EF_HOST 0 +#endif #define DWARF_REG_PC 0xd3af9c /* random number */ #define DWARF_REG_FB 0xd3affb /* random number */ -#ifdef HAVE_DWARF_SUPPORT -const char *get_arch_regstr(unsigned int n); -/* - * get_dwarf_regstr - Returns ftrace register string from DWARF regnum - * n: DWARF register number - * machine: ELF machine signature (EM_*) +#ifdef HAVE_LIBDW_SUPPORT +const char *get_csky_regstr(unsigned int n, unsigned int flags); + +/** + * get_dwarf_regstr() - Returns ftrace register string from DWARF regnum. + * @n: DWARF register number. + * @machine: ELF machine signature (EM_*). + * @flags: ELF flags for things like ABI differences. */ -const char *get_dwarf_regstr(unsigned int n, unsigned int machine); +const char *get_dwarf_regstr(unsigned int n, unsigned int machine, unsigned int flags); + +int get_x86_regnum(const char *name); +#if !defined(__x86_64__) && !defined(__i386__) int get_arch_regnum(const char *name); +#endif + /* * get_dwarf_regnum - Returns DWARF regnum from register name * name: architecture register name * machine: ELF machine signature (EM_*) */ -int get_dwarf_regnum(const char *name, unsigned int machine); +int get_dwarf_regnum(const char *name, unsigned int machine, unsigned int flags); + +void get_powerpc_regs(u32 raw_insn, int is_source, struct annotated_op_loc *op_loc); -#else /* HAVE_DWARF_SUPPORT */ +#else /* HAVE_LIBDW_SUPPORT */ static inline int get_dwarf_regnum(const char *name __maybe_unused, - unsigned int machine __maybe_unused) + unsigned int machine __maybe_unused, + unsigned int flags __maybe_unused) { return -1; } -#endif -#ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET -/* - * Arch should support fetching the offset of a register in pt_regs - * by its name. See kernel's regs_query_register_offset in - * arch/xxx/kernel/ptrace.c. - */ -int regs_query_register_offset(const char *name); +static inline void get_powerpc_regs(u32 raw_insn __maybe_unused, int is_source __maybe_unused, + struct annotated_op_loc *op_loc __maybe_unused) +{ + return; +} #endif + #endif diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h index 178b00205fe6..34e2fdfe7300 100644 --- a/tools/perf/util/include/linux/linkage.h +++ b/tools/perf/util/include/linux/linkage.h @@ -120,7 +120,7 @@ #endif // In the kernel sources (include/linux/cfi_types.h), this has a different -// definition when CONFIG_CFI_CLANG is used, for tools/ just use the !clang +// definition when CONFIG_CFI is used, for tools/ just use the !cfi // definition: #ifndef SYM_TYPED_START #define SYM_TYPED_START(name, linkage, align...) \ @@ -132,4 +132,8 @@ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif +#ifndef SYM_PIC_ALIAS +#define SYM_PIC_ALIAS(sym) SYM_ALIAS(__pi_ ## sym, sym, SYM_T_FUNC, SYM_L_GLOBAL) +#endif + #endif /* PERF_LINUX_LINKAGE_H_ */ diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index ec1b3bd9f530..382255393fb3 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -100,7 +100,7 @@ static void intel_bts_dump(struct intel_bts *bts __maybe_unused, else sz = len; printf("."); - color_fprintf(stdout, color, " %08x: ", pos); + color_fprintf(stdout, color, " %08zx: ", pos); for (i = 0; i < sz; i++) color_fprintf(stdout, color, " %02x", buf[i]); for (; i < br_sz; i++) @@ -275,12 +275,13 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, int ret; struct intel_bts *bts = btsq->bts; union perf_event event; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; if (bts->synth_opts.initial_skip && bts->num_events++ <= bts->synth_opts.initial_skip) return 0; + perf_sample__init(&sample, /*all=*/true); sample.ip = le64_to_cpu(branch->from); sample.cpumode = intel_bts_cpumode(bts, sample.ip); sample.pid = btsq->pid; @@ -312,6 +313,7 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, pr_err("Intel BTS: failed to deliver branch event, error %d\n", ret); + perf_sample__exit(&sample); return ret; } @@ -591,7 +593,7 @@ static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp) static int intel_bts_process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, - struct perf_tool *tool) + const struct perf_tool *tool) { struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, auxtrace); @@ -634,7 +636,7 @@ static int intel_bts_process_event(struct perf_session *session, static int intel_bts_process_auxtrace_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, auxtrace); @@ -675,7 +677,7 @@ static int intel_bts_process_auxtrace_event(struct perf_session *session, } static int intel_bts_flush(struct perf_session *session, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, auxtrace); @@ -737,35 +739,6 @@ static bool intel_bts_evsel_is_auxtrace(struct perf_session *session, return evsel->core.attr.type == bts->pmu_type; } -struct intel_bts_synth { - struct perf_tool dummy_tool; - struct perf_session *session; -}; - -static int intel_bts_event_synth(struct perf_tool *tool, - union perf_event *event, - struct perf_sample *sample __maybe_unused, - struct machine *machine __maybe_unused) -{ - struct intel_bts_synth *intel_bts_synth = - container_of(tool, struct intel_bts_synth, dummy_tool); - - return perf_session__deliver_synth_event(intel_bts_synth->session, - event, NULL); -} - -static int intel_bts_synth_event(struct perf_session *session, - struct perf_event_attr *attr, u64 id) -{ - struct intel_bts_synth intel_bts_synth; - - memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth)); - intel_bts_synth.session = session; - - return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1, - &id, intel_bts_event_synth); -} - static int intel_bts_synth_events(struct intel_bts *bts, struct perf_session *session) { @@ -804,9 +777,7 @@ static int intel_bts_synth_events(struct intel_bts *bts, attr.sample_id_all = evsel->core.attr.sample_id_all; attr.read_format = evsel->core.attr.read_format; - id = evsel->core.id[0] + 1000000000; - if (!id) - id = 1; + id = auxtrace_synth_id_range_start(evsel); if (bts->synth_opts.branches) { attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; @@ -814,7 +785,7 @@ static int intel_bts_synth_events(struct intel_bts *bts, attr.sample_type |= PERF_SAMPLE_ADDR; pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", id, (u64)attr.sample_type); - err = intel_bts_synth_event(session, &attr, id); + err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) { pr_err("%s: failed to synthesize 'branches' event type\n", __func__); @@ -837,7 +808,7 @@ static int intel_bts_synth_events(struct intel_bts *bts, static const char * const intel_bts_info_fmts[] = { [INTEL_BTS_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_BTS_TIME_SHIFT] = " Time Shift %"PRIu64"\n", - [INTEL_BTS_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_BTS_TIME_MULT] = " Time Multiplier %"PRIu64"\n", [INTEL_BTS_TIME_ZERO] = " Time Zero %"PRIu64"\n", [INTEL_BTS_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", [INTEL_BTS_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index b41c2e9c6f88..8fd7e4330044 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build @@ -1,4 +1,4 @@ -perf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o +perf-util-y += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o inat_tables_script = $(srctree)/tools/arch/x86/tools/gen-insn-attr-x86.awk inat_tables_maps = $(srctree)/tools/arch/x86/lib/x86-opcode-map.txt @@ -7,16 +7,20 @@ $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_table $(call rule_mkdir) @$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@ -# Busybox's diff doesn't have -I, avoid warning in the case +perf-util-y += inat.o insn.o -$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c +$(OUTPUT)util/intel-pt-decoder/inat.o: $(srctree)/tools/arch/x86/lib/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c $(call rule_mkdir) $(call if_changed_dep,cc_o_c) -CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder +CFLAGS_inat.o += -I$(OUTPUT)util/intel-pt-decoder + +$(OUTPUT)util/intel-pt-decoder/insn.o: $(srctree)/tools/arch/x86/lib/insn.c + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) ifeq ($(CC_NO_CLANG), 1) - CFLAGS_intel-pt-insn-decoder.o += -Wno-override-init + CFLAGS_insn.o += -Wno-override-init endif -CFLAGS_intel-pt-insn-decoder.o += -Wno-packed +CFLAGS_insn.o += -Wno-packed diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index c5d57027ec23..72c7a4e15d61 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -11,9 +11,6 @@ #include <byteswap.h> #include "../../../arch/x86/include/asm/insn.h" -#include "../../../arch/x86/lib/inat.c" -#include "../../../arch/x86/lib/insn.c" - #include "event.h" #include "intel-pt-insn-decoder.h" @@ -35,7 +32,7 @@ static void intel_pt_insn_decoder(struct insn *insn, intel_pt_insn->rel = 0; intel_pt_insn->emulated_ptwrite = false; - if (insn_is_avx(insn)) { + if (insn_is_avx_or_xop(insn)) { intel_pt_insn->op = INTEL_PT_OP_OTHER; intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH; intel_pt_insn->length = insn->length; @@ -92,6 +89,15 @@ static void intel_pt_insn_decoder(struct insn *insn, op = INTEL_PT_OP_JCC; branch = INTEL_PT_BR_CONDITIONAL; break; + case 0xa1: + if (insn_is_rex2(insn)) { /* jmpabs */ + intel_pt_insn->op = INTEL_PT_OP_JMP; + /* jmpabs causes a TIP packet like an indirect branch */ + intel_pt_insn->branch = INTEL_PT_BR_INDIRECT; + intel_pt_insn->length = insn->length; + return; + } + break; case 0xc2: /* near ret */ case 0xc3: /* near ret */ case 0xca: /* far ret */ @@ -200,12 +206,13 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, return 0; } -int arch_is_branch(const unsigned char *buf, size_t len, int x86_64) +int arch_is_uncond_branch(const unsigned char *buf, size_t len, int x86_64) { struct intel_pt_insn in; if (intel_pt_get_insn(buf, len, x86_64, &in) < 0) return -1; - return in.branch != INTEL_PT_BR_NO_BRANCH; + return in.branch == INTEL_PT_BR_UNCONDITIONAL || + in.branch == INTEL_PT_BR_INDIRECT; } const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused, diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index bccb988a7a44..94fb16cf9e0c 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -10,7 +10,7 @@ #include <byteswap.h> #include <linux/kernel.h> #include <linux/compiler.h> -#include <asm-generic/unaligned.h> +#include <linux/unaligned.h> #include "intel-pt-pkt-decoder.h" diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index d6d7b7512505..fc9eec8b54b8 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -127,6 +127,7 @@ struct intel_pt { bool single_pebs; bool sample_pebs; + int pebs_data_src_fmt; struct evsel *pebs_evsel; u64 evt_sample_type; @@ -175,6 +176,7 @@ enum switch_state { struct intel_pt_pebs_event { struct evsel *evsel; u64 id; + int data_src_fmt; }; struct intel_pt_queue { @@ -249,7 +251,7 @@ static void intel_pt_dump(struct intel_pt *pt __maybe_unused, else pkt_len = 1; printf("."); - color_fprintf(stdout, color, " %08x: ", pos); + color_fprintf(stdout, color, " %08zx: ", pos); for (i = 0; i < pkt_len; i++) color_fprintf(stdout, color, " %02x", buf[i]); for (; i < 16; i++) @@ -1764,12 +1766,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct dummy_branch_stack { u64 nr; u64 hw_idx; struct branch_entry entries; } dummy_bs; + int ret; if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) return 0; @@ -1777,6 +1780,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_b_sample(pt, ptq, event, &sample); sample.id = ptq->pt->branches_id; @@ -1806,8 +1810,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt; } - return intel_pt_deliver_synth_event(pt, event, &sample, + perf_sample__exit(&sample); + ret = intel_pt_deliver_synth_event(pt, event, &sample, pt->branches_sample_type); + return ret; } static void intel_pt_prep_sample(struct intel_pt *pt, @@ -1835,11 +1841,13 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_sample(pt, ptq, event, &sample); sample.id = ptq->pt->instructions_id; @@ -1859,16 +1867,19 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) ptq->last_insn_cnt = ptq->state->tot_insn_cnt; - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->instructions_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->instructions_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; u64 period = 0; + int ret; if (ptq->sample_ipc) period = ptq->ipc_cyc_cnt - ptq->last_cy_cyc_cnt; @@ -1876,6 +1887,7 @@ static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq) if (!period || intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_sample(pt, ptq, event, &sample); sample.id = ptq->pt->cycles_id; @@ -1887,25 +1899,31 @@ static int intel_pt_synth_cycle_sample(struct intel_pt_queue *ptq) ptq->last_cy_insn_cnt = ptq->ipc_insn_cnt; ptq->last_cy_cyc_cnt = ptq->ipc_cyc_cnt; - return intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, pt->cycles_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_sample(pt, ptq, event, &sample); sample.id = ptq->pt->transactions_id; sample.stream_id = ptq->pt->transactions_id; - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->transactions_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->transactions_sample_type); + perf_sample__exit(&sample); + return ret; } static void intel_pt_prep_p_sample(struct intel_pt *pt, @@ -1953,15 +1971,17 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_cbr raw; u32 flags; + int ret; if (intel_pt_skip_cbr_event(pt)) return 0; ptq->cbr_seen = ptq->state->cbr; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->cbr_id; @@ -1975,20 +1995,24 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_psb raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->psb_id; @@ -2001,20 +2025,24 @@ static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_mwait raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->mwait_id; @@ -2026,20 +2054,24 @@ static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_pwre raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->pwre_id; @@ -2051,20 +2083,24 @@ static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_exstop raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->exstop_id; @@ -2076,20 +2112,24 @@ static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_pwrx raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->pwrx_id; @@ -2101,8 +2141,10 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->pwr_events_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->pwr_events_sample_type); + perf_sample__exit(&sample); + return ret; } /* @@ -2232,19 +2274,160 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack, } } -static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id) +#define P(a, b) PERF_MEM_S(a, b) +#define OP_LH (P(OP, LOAD) | P(LVL, HIT)) +#define LEVEL(x) P(LVLNUM, x) +#define REM P(REMOTE, REMOTE) +#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) + +#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10 +#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1) + +/* Based on kernel __intel_pmu_pebs_data_source_grt() and pebs_data_source */ +static const u64 pebs_data_source_grt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP Hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP Fwd */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* RAM hit|SNP Hit */ + OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* Remote L3 hit|SNP Hit */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* RAM hit|SNP None or Miss */ + OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* Remote RAM hit|SNP None or Miss */ + OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */ +}; + +/* Based on kernel __intel_pmu_pebs_data_source_cmt() and pebs_data_source */ +static const u64 pebs_data_source_cmt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* L3 hit|SNP Hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP Fwd */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* RAM hit|SNP Hit */ + OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE), /* Remote L3 hit|SNP Hit */ + OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD), /* RAM hit|SNP None or Miss */ + OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM), /* Remote RAM hit|SNP None or Miss */ + OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */ +}; + +/* Based on kernel pebs_set_tlb_lock() */ +static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock) +{ + /* + * TLB access + * 0 = did not miss 2nd level TLB + * 1 = missed 2nd level TLB + */ + if (tlb) + *val |= P(TLB, MISS) | P(TLB, L2); + else + *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); + + /* locked prefix */ + if (lock) + *val |= P(LOCK, LOCKED); +} + +/* Based on kernel __grt_latency_data() */ +static u64 intel_pt_grt_latency_data(u8 dse, bool tlb, bool lock, bool blk, + const u64 *pebs_data_source) +{ + u64 val; + + dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK; + val = pebs_data_source[dse]; + + pebs_set_tlb_lock(&val, tlb, lock); + + if (blk) + val |= P(BLK, DATA); + else + val |= P(BLK, NA); + + return val; +} + +/* Default value for data source */ +#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\ + PERF_MEM_S(LVL, NA) |\ + PERF_MEM_S(SNOOP, NA) |\ + PERF_MEM_S(LOCK, NA) |\ + PERF_MEM_S(TLB, NA) |\ + PERF_MEM_S(LVLNUM, NA)) + +enum DATA_SRC_FORMAT { + DATA_SRC_FORMAT_ERR = -1, + DATA_SRC_FORMAT_NA = 0, + DATA_SRC_FORMAT_GRT = 1, + DATA_SRC_FORMAT_CMT = 2, +}; + +/* Based on kernel grt_latency_data() and cmt_latency_data */ +static u64 intel_pt_get_data_src(u64 mem_aux_info, int data_src_fmt) +{ + switch (data_src_fmt) { + case DATA_SRC_FORMAT_GRT: { + union { + u64 val; + struct { + unsigned int dse:4; + unsigned int locked:1; + unsigned int stlb_miss:1; + unsigned int fwd_blk:1; + unsigned int reserved:25; + }; + } x = {.val = mem_aux_info}; + return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk, + pebs_data_source_grt); + } + case DATA_SRC_FORMAT_CMT: { + union { + u64 val; + struct { + unsigned int dse:5; + unsigned int locked:1; + unsigned int stlb_miss:1; + unsigned int fwd_blk:1; + unsigned int reserved:24; + }; + } x = {.val = mem_aux_info}; + return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk, + pebs_data_source_cmt); + } + default: + return PERF_MEM_NA; + } +} + +static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, + u64 id, int data_src_fmt) { const struct intel_pt_blk_items *items = &ptq->state->items; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; union perf_event *event = ptq->event_buf; struct intel_pt *pt = ptq->pt; u64 sample_type = evsel->core.attr.sample_type; u8 cpumode; - u64 regs[8 * sizeof(sample.intr_regs.mask)]; + u64 regs[8 * sizeof(sample.intr_regs->mask)]; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_a_sample(ptq, event, &sample); sample.id = id; @@ -2291,15 +2474,16 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse items->mask[INTEL_PT_XMM_POS])) { u64 regs_mask = evsel->core.attr.sample_regs_intr; u64 *pos; + struct regs_dump *intr_regs = perf_sample__intr_regs(&sample); - sample.intr_regs.abi = items->is_32_bit ? + intr_regs->abi = items->is_32_bit ? PERF_SAMPLE_REGS_ABI_32 : PERF_SAMPLE_REGS_ABI_64; - sample.intr_regs.regs = regs; + intr_regs->regs = regs; - pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask); + pos = intel_pt_add_gp_regs(intr_regs, regs, items, regs_mask); - intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask); + intel_pt_add_xmm(intr_regs, pos, items, regs_mask); } if (sample_type & PERF_SAMPLE_BRANCH_STACK) { @@ -2350,6 +2534,18 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse } } + if (sample_type & PERF_SAMPLE_DATA_SRC) { + if (items->has_mem_aux_info && data_src_fmt) { + if (data_src_fmt < 0) { + pr_err("Intel PT missing data_src info\n"); + return -1; + } + sample.data_src = intel_pt_get_data_src(items->mem_aux_info, data_src_fmt); + } else { + sample.data_src = PERF_MEM_NA; + } + } + if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) { u64 ax = items->has_rax ? items->rax : 0; /* Refer kernel's intel_hsw_transaction() */ @@ -2361,16 +2557,19 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse sample.transaction = txn; } - return intel_pt_deliver_synth_event(pt, event, &sample, sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; struct evsel *evsel = pt->pebs_evsel; + int data_src_fmt = pt->pebs_data_src_fmt; u64 id = evsel->core.id[0]; - return intel_pt_do_synth_pebs_sample(ptq, evsel, id); + return intel_pt_do_synth_pebs_sample(ptq, evsel, id, data_src_fmt); } static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) @@ -2395,7 +2594,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) hw_id); return intel_pt_synth_single_pebs_sample(ptq); } - err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id); + err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id, pe->data_src_fmt); if (err) return err; } @@ -2407,16 +2606,17 @@ static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct { struct perf_synth_intel_evt cfe; struct perf_synth_intel_evd evd[INTEL_PT_MAX_EVDS]; } raw; - int i; + int i, ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->evt_id; @@ -2438,20 +2638,24 @@ static int intel_pt_synth_events_sample(struct intel_pt_queue *ptq) ptq->state->evd_cnt * sizeof(struct perf_synth_intel_evd); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->evt_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->evt_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; union perf_event *event = ptq->event_buf; - struct perf_sample sample = { .ip = 0, }; + struct perf_sample sample; struct perf_synth_intel_iflag_chg raw; + int ret; if (intel_pt_skip_event(pt)) return 0; + perf_sample__init(&sample, /*all=*/true); intel_pt_prep_p_sample(pt, ptq, event, &sample); sample.id = ptq->pt->iflag_chg_id; @@ -2471,8 +2675,10 @@ static int intel_pt_synth_iflag_chg_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, event, &sample, - pt->iflag_chg_sample_type); + ret = intel_pt_deliver_synth_event(pt, event, &sample, + pt->iflag_chg_sample_type); + perf_sample__exit(&sample); + return ret; } static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, @@ -3355,6 +3561,49 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt, event->itrace_start.tid); } +/* + * Events with data_src are identified by L1_Hit_Indication + * refer https://github.com/intel/perfmon + */ +static int intel_pt_data_src_fmt(struct intel_pt *pt, struct evsel *evsel) +{ + struct perf_env *env = pt->machine->env; + int fmt = DATA_SRC_FORMAT_NA; + + if (!env->cpuid) + return DATA_SRC_FORMAT_ERR; + + /* + * PEBS-via-PT is only supported on E-core non-hybrid. Of those only + * Gracemont and Crestmont have data_src. Check for: + * Alderlake N (Gracemont) + * Sierra Forest (Crestmont) + * Grand Ridge (Crestmont) + */ + + if (!strncmp(env->cpuid, "GenuineIntel,6,190,", 19)) + fmt = DATA_SRC_FORMAT_GRT; + + if (!strncmp(env->cpuid, "GenuineIntel,6,175,", 19) || + !strncmp(env->cpuid, "GenuineIntel,6,182,", 19)) + fmt = DATA_SRC_FORMAT_CMT; + + if (fmt == DATA_SRC_FORMAT_NA) + return fmt; + + /* + * Only data_src events are: + * mem-loads event=0xd0,umask=0x5 + * mem-stores event=0xd0,umask=0x6 + */ + if (evsel->core.attr.type == PERF_TYPE_RAW && + ((evsel->core.attr.config & 0xffff) == 0x5d0 || + (evsel->core.attr.config & 0xffff) == 0x6d0)) + return fmt; + + return DATA_SRC_FORMAT_NA; +} + static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample) @@ -3375,6 +3624,7 @@ static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, ptq->pebs[hw_id].evsel = evsel; ptq->pebs[hw_id].id = sample->id; + ptq->pebs[hw_id].data_src_fmt = intel_pt_data_src_fmt(pt, evsel); return 0; } @@ -3449,7 +3699,7 @@ out: static int intel_pt_process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, - struct perf_tool *tool) + const struct perf_tool *tool) { struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, auxtrace); @@ -3533,7 +3783,7 @@ static int intel_pt_process_event(struct perf_session *session, return err; } -static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool) +static int intel_pt_flush(struct perf_session *session, const struct perf_tool *tool) { struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, auxtrace); @@ -3600,7 +3850,7 @@ static bool intel_pt_evsel_is_auxtrace(struct perf_session *session, static int intel_pt_process_auxtrace_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, auxtrace); @@ -3659,37 +3909,15 @@ static int intel_pt_queue_data(struct perf_session *session, data_offset, timestamp); } -struct intel_pt_synth { - struct perf_tool dummy_tool; - struct perf_session *session; -}; - -static int intel_pt_event_synth(struct perf_tool *tool, - union perf_event *event, - struct perf_sample *sample __maybe_unused, - struct machine *machine __maybe_unused) -{ - struct intel_pt_synth *intel_pt_synth = - container_of(tool, struct intel_pt_synth, dummy_tool); - - return perf_session__deliver_synth_event(intel_pt_synth->session, event, - NULL); -} - static int intel_pt_synth_event(struct perf_session *session, const char *name, struct perf_event_attr *attr, u64 id) { - struct intel_pt_synth intel_pt_synth; int err; pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n", name, id, (u64)attr->sample_type); - memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth)); - intel_pt_synth.session = session; - - err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1, - &id, intel_pt_event_synth); + err = perf_session__deliver_synth_attr_event(session, attr, id); if (err) pr_err("%s: failed to synthesize '%s' event type\n", __func__, name); @@ -3759,9 +3987,7 @@ static int intel_pt_synth_events(struct intel_pt *pt, attr.sample_id_all = evsel->core.attr.sample_id_all; attr.read_format = evsel->core.attr.read_format; - id = evsel->core.id[0] + 1000000000; - if (!id) - id = 1; + id = auxtrace_synth_id_range_start(evsel); if (pt->synth_opts.branches) { attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; @@ -3946,6 +4172,7 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt) } pt->single_pebs = true; pt->sample_pebs = true; + pt->pebs_data_src_fmt = intel_pt_data_src_fmt(pt, evsel); pt->pebs_evsel = evsel; } } @@ -4132,7 +4359,7 @@ static int intel_pt_parse_vm_tm_corr_args(struct intel_pt *pt) static const char * const intel_pt_info_fmts[] = { [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", - [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_PT_TIME_MULT] = " Time Multiplier %"PRIu64"\n", [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n", [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n", diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c new file mode 100644 index 000000000000..3c958d738ca6 --- /dev/null +++ b/tools/perf/util/intel-tpebs.c @@ -0,0 +1,663 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * intel_tpebs.c: Intel TPEBS support + */ + +#include <api/fs/fs.h> +#include <sys/param.h> +#include <subcmd/run-command.h> +#include <thread.h> +#include "intel-tpebs.h" +#include <linux/list.h> +#include <linux/zalloc.h> +#include <linux/err.h> +#include "sample.h" +#include "counts.h" +#include "debug.h" +#include "evlist.h" +#include "evsel.h" +#include "mutex.h" +#include "session.h" +#include "stat.h" +#include "tool.h" +#include "cpumap.h" +#include "metricgroup.h" +#include "stat.h" +#include <sys/stat.h> +#include <sys/file.h> +#include <errno.h> +#include <poll.h> +#include <math.h> + +#define PERF_DATA "-" + +bool tpebs_recording; +enum tpebs_mode tpebs_mode; +static LIST_HEAD(tpebs_results); +static pthread_t tpebs_reader_thread; +static struct child_process tpebs_cmd; +static int control_fd[2], ack_fd[2]; +static struct mutex tpebs_mtx; + +struct tpebs_retire_lat { + struct list_head nd; + /** @evsel: The evsel that opened the retire_lat event. */ + struct evsel *evsel; + /** @event: Event passed to perf record. */ + char *event; + /** @stats: Recorded retirement latency stats. */ + struct stats stats; + /** @last: Last retirement latency read. */ + uint64_t last; + /* Has the event been sent to perf record? */ + bool started; +}; + +static void tpebs_mtx_init(void) +{ + mutex_init(&tpebs_mtx); +} + +static struct mutex *tpebs_mtx_get(void) +{ + static pthread_once_t tpebs_mtx_once = PTHREAD_ONCE_INIT; + + pthread_once(&tpebs_mtx_once, tpebs_mtx_init); + return &tpebs_mtx; +} + +static struct tpebs_retire_lat *tpebs_retire_lat__find(struct evsel *evsel) + EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get()); + +static int evsel__tpebs_start_perf_record(struct evsel *evsel) +{ + const char **record_argv; + int tpebs_event_size = 0, i = 0, ret; + char control_fd_buf[32]; + char cpumap_buf[50]; + struct tpebs_retire_lat *t; + + list_for_each_entry(t, &tpebs_results, nd) + tpebs_event_size++; + + record_argv = malloc((10 + 2 * tpebs_event_size) * sizeof(*record_argv)); + if (!record_argv) + return -ENOMEM; + + record_argv[i++] = "perf"; + record_argv[i++] = "record"; + record_argv[i++] = "-W"; + record_argv[i++] = "--synth=no"; + + scnprintf(control_fd_buf, sizeof(control_fd_buf), "--control=fd:%d,%d", + control_fd[0], ack_fd[1]); + record_argv[i++] = control_fd_buf; + + record_argv[i++] = "-o"; + record_argv[i++] = PERF_DATA; + + if (!perf_cpu_map__is_any_cpu_or_is_empty(evsel->evlist->core.user_requested_cpus)) { + cpu_map__snprint(evsel->evlist->core.user_requested_cpus, cpumap_buf, + sizeof(cpumap_buf)); + record_argv[i++] = "-C"; + record_argv[i++] = cpumap_buf; + } + + list_for_each_entry(t, &tpebs_results, nd) { + record_argv[i++] = "-e"; + record_argv[i++] = t->event; + } + record_argv[i++] = NULL; + assert(i == 10 + 2 * tpebs_event_size || i == 8 + 2 * tpebs_event_size); + /* Note, no workload given so system wide is implied. */ + + assert(tpebs_cmd.pid == 0); + tpebs_cmd.argv = record_argv; + tpebs_cmd.out = -1; + ret = start_command(&tpebs_cmd); + zfree(&tpebs_cmd.argv); + list_for_each_entry(t, &tpebs_results, nd) + t->started = true; + + return ret; +} + +static bool is_child_pid(pid_t parent, pid_t child) +{ + if (parent < 0 || child < 0) + return false; + + while (true) { + char path[PATH_MAX]; + char line[256]; + FILE *fp; + +new_child: + if (parent == child) + return true; + + if (child <= 0) + return false; + + scnprintf(path, sizeof(path), "%s/%d/status", procfs__mountpoint(), child); + fp = fopen(path, "r"); + if (!fp) { + /* Presumably the process went away. Assume not a child. */ + return false; + } + while (fgets(line, sizeof(line), fp) != NULL) { + if (strncmp(line, "PPid:", 5) == 0) { + fclose(fp); + if (sscanf(line + 5, "%d", &child) != 1) { + /* Unexpected error parsing. */ + return false; + } + goto new_child; + } + } + /* Unexpected EOF. */ + fclose(fp); + return false; + } +} + +static bool should_ignore_sample(const struct perf_sample *sample, const struct tpebs_retire_lat *t) +{ + pid_t workload_pid, sample_pid = sample->pid; + + /* + * During evlist__purge the evlist will be removed prior to the + * evsel__exit calling evsel__tpebs_close and taking the + * tpebs_mtx. Avoid a segfault by ignoring samples in this case. + */ + if (t->evsel->evlist == NULL) + return true; + + workload_pid = t->evsel->evlist->workload.pid; + if (workload_pid < 0 || workload_pid == sample_pid) + return false; + + if (!t->evsel->core.attr.inherit) + return true; + + return !is_child_pid(workload_pid, sample_pid); +} + +static int process_sample_event(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample, + struct evsel *evsel, + struct machine *machine __maybe_unused) +{ + struct tpebs_retire_lat *t; + + mutex_lock(tpebs_mtx_get()); + if (tpebs_cmd.pid == 0) { + /* Record has terminated. */ + mutex_unlock(tpebs_mtx_get()); + return 0; + } + t = tpebs_retire_lat__find(evsel); + if (!t) { + mutex_unlock(tpebs_mtx_get()); + return -EINVAL; + } + if (should_ignore_sample(sample, t)) { + mutex_unlock(tpebs_mtx_get()); + return 0; + } + /* + * Need to handle per core results? We are assuming average retire + * latency value will be used. Save the number of samples and the sum of + * retire latency value for each event. + */ + t->last = sample->weight3; + update_stats(&t->stats, sample->weight3); + mutex_unlock(tpebs_mtx_get()); + return 0; +} + +static int process_feature_event(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event) +{ + if (event->feat.feat_id < HEADER_LAST_FEATURE) + return perf_event__process_feature(session, event); + return 0; +} + +static void *__sample_reader(void *arg __maybe_unused) +{ + struct perf_session *session; + struct perf_data data = { + .mode = PERF_DATA_MODE_READ, + .path = PERF_DATA, + .file.fd = tpebs_cmd.out, + }; + struct perf_tool tool; + + perf_tool__init(&tool, /*ordered_events=*/false); + tool.sample = process_sample_event; + tool.feature = process_feature_event; + tool.attr = perf_event__process_attr; + + session = perf_session__new(&data, &tool); + if (IS_ERR(session)) + return NULL; + perf_session__process_events(session); + perf_session__delete(session); + + return NULL; +} + +static int tpebs_send_record_cmd(const char *msg) EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get()) +{ + struct pollfd pollfd = { .events = POLLIN, }; + int ret, len, retries = 0; + char ack_buf[8]; + + /* Check if the command exited before the send, done with the lock held. */ + if (tpebs_cmd.pid == 0) + return 0; + + /* + * Let go of the lock while sending/receiving as blocking can starve the + * sample reading thread. + */ + mutex_unlock(tpebs_mtx_get()); + + /* Send perf record command.*/ + len = strlen(msg); + ret = write(control_fd[1], msg, len); + if (ret != len) { + pr_err("perf record control write control message '%s' failed\n", msg); + ret = -EPIPE; + goto out; + } + + if (!strcmp(msg, EVLIST_CTL_CMD_STOP_TAG)) { + ret = 0; + goto out; + } + + /* Wait for an ack. */ + pollfd.fd = ack_fd[0]; + + /* + * We need this poll to ensure the ack_fd PIPE will not hang + * when perf record failed for any reason. The timeout value + * 3000ms is an empirical selection. + */ +again: + if (!poll(&pollfd, 1, 500)) { + if (check_if_command_finished(&tpebs_cmd)) { + ret = 0; + goto out; + } + + if (retries++ < 6) + goto again; + pr_err("tpebs failed: perf record ack timeout for '%s'\n", msg); + ret = -ETIMEDOUT; + goto out; + } + + if (!(pollfd.revents & POLLIN)) { + if (check_if_command_finished(&tpebs_cmd)) { + ret = 0; + goto out; + } + + pr_err("tpebs failed: did not received an ack for '%s'\n", msg); + ret = -EPIPE; + goto out; + } + + ret = read(ack_fd[0], ack_buf, sizeof(ack_buf)); + if (ret > 0) + ret = strcmp(ack_buf, EVLIST_CTL_CMD_ACK_TAG); + else + pr_err("tpebs: perf record control ack failed\n"); +out: + /* Re-take lock as expected by caller. */ + mutex_lock(tpebs_mtx_get()); + return ret; +} + +/* + * tpebs_stop - stop the sample data read thread and the perf record process. + */ +static int tpebs_stop(void) EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get()) +{ + int ret = 0; + + /* Like tpebs_start, we should only run tpebs_end once. */ + if (tpebs_cmd.pid != 0) { + tpebs_send_record_cmd(EVLIST_CTL_CMD_STOP_TAG); + tpebs_cmd.pid = 0; + mutex_unlock(tpebs_mtx_get()); + pthread_join(tpebs_reader_thread, NULL); + mutex_lock(tpebs_mtx_get()); + close(control_fd[0]); + close(control_fd[1]); + close(ack_fd[0]); + close(ack_fd[1]); + close(tpebs_cmd.out); + ret = finish_command(&tpebs_cmd); + tpebs_cmd.pid = 0; + if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL) + ret = 0; + } + return ret; +} + +/** + * evsel__tpebs_event() - Create string event encoding to pass to `perf record`. + */ +static int evsel__tpebs_event(struct evsel *evsel, char **event) +{ + char *name, *modifier; + int ret; + + name = strdup(evsel->name); + if (!name) + return -ENOMEM; + + modifier = strrchr(name, 'R'); + if (!modifier) { + ret = -EINVAL; + goto out; + } + *modifier = 'p'; + modifier = strchr(name, ':'); + if (!modifier) + modifier = strrchr(name, '/'); + if (!modifier) { + ret = -EINVAL; + goto out; + } + *modifier = '\0'; + if (asprintf(event, "%s/name=tpebs_event_%p/%s", name, evsel, modifier + 1) > 0) + ret = 0; + else + ret = -ENOMEM; +out: + if (ret) + pr_err("Tpebs event modifier broken '%s'\n", evsel->name); + free(name); + return ret; +} + +static struct tpebs_retire_lat *tpebs_retire_lat__new(struct evsel *evsel) +{ + struct tpebs_retire_lat *result = zalloc(sizeof(*result)); + int ret; + + if (!result) + return NULL; + + ret = evsel__tpebs_event(evsel, &result->event); + if (ret) { + free(result); + return NULL; + } + result->evsel = evsel; + return result; +} + +static void tpebs_retire_lat__delete(struct tpebs_retire_lat *r) +{ + zfree(&r->event); + free(r); +} + +static struct tpebs_retire_lat *tpebs_retire_lat__find(struct evsel *evsel) +{ + struct tpebs_retire_lat *t; + unsigned long num; + const char *evsel_name; + + /* + * Evsels will match for evlist with the retirement latency event. The + * name with "tpebs_event_" prefix will be present on events being read + * from `perf record`. + */ + if (evsel__is_retire_lat(evsel)) { + list_for_each_entry(t, &tpebs_results, nd) { + if (t->evsel == evsel) + return t; + } + return NULL; + } + evsel_name = strstr(evsel->name, "tpebs_event_"); + if (!evsel_name) { + /* Unexpected that the perf record should have other events. */ + return NULL; + } + errno = 0; + num = strtoull(evsel_name + 12, NULL, 16); + if (errno) { + pr_err("Bad evsel for tpebs find '%s'\n", evsel->name); + return NULL; + } + list_for_each_entry(t, &tpebs_results, nd) { + if ((unsigned long)t->evsel == num) + return t; + } + return NULL; +} + +/** + * evsel__tpebs_prepare - create tpebs data structures ready for opening. + * @evsel: retire_latency evsel, all evsels on its list will be prepared. + */ +static int evsel__tpebs_prepare(struct evsel *evsel) +{ + struct evsel *pos; + struct tpebs_retire_lat *tpebs_event; + + mutex_lock(tpebs_mtx_get()); + tpebs_event = tpebs_retire_lat__find(evsel); + if (tpebs_event) { + /* evsel, or an identically named one, was already prepared. */ + mutex_unlock(tpebs_mtx_get()); + return 0; + } + tpebs_event = tpebs_retire_lat__new(evsel); + if (!tpebs_event) { + mutex_unlock(tpebs_mtx_get()); + return -ENOMEM; + } + list_add_tail(&tpebs_event->nd, &tpebs_results); + mutex_unlock(tpebs_mtx_get()); + + /* + * Eagerly prepare all other evsels on the list to try to ensure that by + * open they are all known. + */ + evlist__for_each_entry(evsel->evlist, pos) { + int ret; + + if (pos == evsel || !pos->retire_lat) + continue; + + ret = evsel__tpebs_prepare(pos); + if (ret) + return ret; + } + return 0; +} + +/** + * evsel__tpebs_open - starts tpebs execution. + * @evsel: retire_latency evsel, all evsels on its list will be selected. Each + * evsel is sampled to get the average retire_latency value. + */ +int evsel__tpebs_open(struct evsel *evsel) +{ + int ret; + bool tpebs_empty; + + /* We should only run tpebs_start when tpebs_recording is enabled. */ + if (!tpebs_recording) + return 0; + /* Only start the events once. */ + if (tpebs_cmd.pid != 0) { + struct tpebs_retire_lat *t; + bool valid; + + mutex_lock(tpebs_mtx_get()); + t = tpebs_retire_lat__find(evsel); + valid = t && t->started; + mutex_unlock(tpebs_mtx_get()); + /* May fail as the event wasn't started. */ + return valid ? 0 : -EBUSY; + } + + ret = evsel__tpebs_prepare(evsel); + if (ret) + return ret; + + mutex_lock(tpebs_mtx_get()); + tpebs_empty = list_empty(&tpebs_results); + if (!tpebs_empty) { + /*Create control and ack fd for --control*/ + if (pipe(control_fd) < 0) { + pr_err("tpebs: Failed to create control fifo"); + ret = -1; + goto out; + } + if (pipe(ack_fd) < 0) { + pr_err("tpebs: Failed to create control fifo"); + ret = -1; + goto out; + } + + ret = evsel__tpebs_start_perf_record(evsel); + if (ret) + goto out; + + if (pthread_create(&tpebs_reader_thread, /*attr=*/NULL, __sample_reader, + /*arg=*/NULL)) { + kill(tpebs_cmd.pid, SIGTERM); + close(tpebs_cmd.out); + pr_err("Could not create thread to process sample data.\n"); + ret = -1; + goto out; + } + ret = tpebs_send_record_cmd(EVLIST_CTL_CMD_ENABLE_TAG); + } +out: + if (ret) { + struct tpebs_retire_lat *t = tpebs_retire_lat__find(evsel); + + list_del_init(&t->nd); + tpebs_retire_lat__delete(t); + } + mutex_unlock(tpebs_mtx_get()); + return ret; +} + +int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread) +{ + struct perf_counts_values *count, *old_count = NULL; + struct tpebs_retire_lat *t; + uint64_t val; + int ret; + + /* Only set retire_latency value to the first CPU and thread. */ + if (cpu_map_idx != 0 || thread != 0) + return 0; + + if (evsel->prev_raw_counts) + old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + + count = perf_counts(evsel->counts, cpu_map_idx, thread); + + mutex_lock(tpebs_mtx_get()); + t = tpebs_retire_lat__find(evsel); + /* + * If reading the first tpebs result, send a ping to the record + * process. Allow the sample reader a chance to read by releasing and + * reacquiring the lock. + */ + if (t && &t->nd == tpebs_results.next) { + ret = tpebs_send_record_cmd(EVLIST_CTL_CMD_PING_TAG); + mutex_unlock(tpebs_mtx_get()); + if (ret) + return ret; + mutex_lock(tpebs_mtx_get()); + } + if (t == NULL || t->stats.n == 0) { + /* No sample data, use default. */ + if (tpebs_recording) { + pr_warning_once( + "Using precomputed retirement latency data as no samples\n"); + } + val = 0; + switch (tpebs_mode) { + case TPEBS_MODE__MIN: + val = rint(evsel->retirement_latency.min); + break; + case TPEBS_MODE__MAX: + val = rint(evsel->retirement_latency.max); + break; + default: + case TPEBS_MODE__LAST: + case TPEBS_MODE__MEAN: + val = rint(evsel->retirement_latency.mean); + break; + } + } else { + switch (tpebs_mode) { + case TPEBS_MODE__MIN: + val = t->stats.min; + break; + case TPEBS_MODE__MAX: + val = t->stats.max; + break; + case TPEBS_MODE__LAST: + val = t->last; + break; + default: + case TPEBS_MODE__MEAN: + val = rint(t->stats.mean); + break; + } + } + mutex_unlock(tpebs_mtx_get()); + + if (old_count) { + count->val = old_count->val + val; + count->run = old_count->run + 1; + count->ena = old_count->ena + 1; + } else { + count->val = val; + count->run++; + count->ena++; + } + return 0; +} + +/** + * evsel__tpebs_close() - delete tpebs related data. If the last event, stop the + * created thread and process by calling tpebs_stop(). + * + * This function is called in evsel__close() to be symmetric with + * evsel__tpebs_open() being called in evsel__open(). + */ +void evsel__tpebs_close(struct evsel *evsel) +{ + struct tpebs_retire_lat *t; + + mutex_lock(tpebs_mtx_get()); + t = tpebs_retire_lat__find(evsel); + if (t) { + list_del_init(&t->nd); + tpebs_retire_lat__delete(t); + + if (list_empty(&tpebs_results)) + tpebs_stop(); + } + mutex_unlock(tpebs_mtx_get()); +} diff --git a/tools/perf/util/intel-tpebs.h b/tools/perf/util/intel-tpebs.h new file mode 100644 index 000000000000..9475e2e6ea74 --- /dev/null +++ b/tools/perf/util/intel-tpebs.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * intel_tpebs.h: Intel TEPBS support + */ +#ifndef __INTEL_TPEBS_H +#define __INTEL_TPEBS_H + +struct evlist; +struct evsel; + +enum tpebs_mode { + TPEBS_MODE__MEAN, + TPEBS_MODE__MIN, + TPEBS_MODE__MAX, + TPEBS_MODE__LAST, +}; + +extern bool tpebs_recording; +extern enum tpebs_mode tpebs_mode; + +int evsel__tpebs_open(struct evsel *evsel); +void evsel__tpebs_close(struct evsel *evsel); +int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread); + +#endif /* __INTEL_TPEBS_H */ diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h index fb810e1b2de7..f4037203e9ec 100644 --- a/tools/perf/util/jit.h +++ b/tools/perf/util/jit.h @@ -5,7 +5,8 @@ #include <data.h> int jit_process(struct perf_session *session, struct perf_data *output, - struct machine *machine, char *filename, pid_t pid, pid_t tid, u64 *nbytes); + struct machine *machine, const char *filename, + pid_t pid, pid_t tid, u64 *nbytes); int jit_inject_record(const char *filename); diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 1f657ef8975f..f00814e37de9 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -14,9 +14,9 @@ #include <sys/mman.h> #include <linux/stringify.h> -#include "build-id.h" #include "event.h" #include "debug.h" +#include "dso.h" #include "evlist.h" #include "namespaces.h" #include "symbol.h" @@ -233,7 +233,8 @@ jit_open(struct jit_buf_desc *jd, const char *name) /* * keep dirname for generating files and mmap records */ - strcpy(jd->dir, name); + strncpy(jd->dir, name, PATH_MAX); + jd->dir[PATH_MAX - 1] = '\0'; dirname(jd->dir); free(buf); @@ -424,7 +425,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) { struct perf_sample sample; union perf_event *event; - struct perf_tool *tool = jd->session->tool; + const struct perf_tool *tool = jd->session->tool; uint64_t code, addr; uintptr_t uaddr; char *filename; @@ -516,7 +517,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) * create pseudo sample to induce dso hit increment * use first address as sample address */ - memset(&sample, 0, sizeof(sample)); + perf_sample__init(&sample, /*all=*/true); sample.cpumode = PERF_RECORD_MISC_USER; sample.pid = pid; sample.tid = tid; @@ -531,10 +532,26 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) /* * mark dso as use to generate buildid in the header */ - if (!ret) - build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine); - + if (!ret) { + struct dso_id dso_id = { + { + .maj = event->mmap2.maj, + .min = event->mmap2.min, + .ino = event->mmap2.ino, + .ino_generation = event->mmap2.ino_generation, + }, + .mmap2_valid = true, + .mmap2_ino_generation_valid = true, + }; + struct dso *dso = machine__findnew_dso_id(jd->machine, filename, &dso_id); + + if (dso) + dso__set_hit(dso); + + dso__put(dso); + } out: + perf_sample__exit(&sample); free(event); return ret; } @@ -543,7 +560,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) { struct perf_sample sample; union perf_event *event; - struct perf_tool *tool = jd->session->tool; + const struct perf_tool *tool = jd->session->tool; char *filename; size_t size; struct stat st; @@ -611,7 +628,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) * create pseudo sample to induce dso hit increment * use first address as sample address */ - memset(&sample, 0, sizeof(sample)); + perf_sample__init(&sample, /*all=*/true); sample.cpumode = PERF_RECORD_MISC_USER; sample.pid = pid; sample.tid = tid; @@ -620,12 +637,13 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) ret = perf_event__process_mmap2(tool, event, &sample, jd->machine); if (ret) - return ret; + goto out; ret = jit_inject_event(jd, event); if (!ret) build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine); - +out: + perf_sample__exit(&sample); return ret; } @@ -710,7 +728,7 @@ jit_process_dump(struct jit_buf_desc *jd) } static int -jit_inject(struct jit_buf_desc *jd, char *path) +jit_inject(struct jit_buf_desc *jd, const char *path) { int ret; @@ -737,7 +755,7 @@ jit_inject(struct jit_buf_desc *jd, char *path) * as captured in the RECORD_MMAP record */ static int -jit_detect(char *mmap_name, pid_t pid, struct nsinfo *nsi) +jit_detect(const char *mmap_name, pid_t pid, struct nsinfo *nsi, bool *in_pidns) { char *p; char *end = NULL; @@ -773,11 +791,16 @@ jit_detect(char *mmap_name, pid_t pid, struct nsinfo *nsi) if (!end) return -1; + *in_pidns = pid == nsinfo__nstgid(nsi); /* * pid does not match mmap pid * pid==0 in system-wide mode (synthesized) + * + * If the pid in the file name is equal to the nstgid, then + * the agent ran inside a container and perf outside the + * container, so record it for further use in jit_inject(). */ - if (pid && pid2 != nsinfo__nstgid(nsi)) + if (pid && !(pid2 == pid || *in_pidns)) return -1; /* * validate suffix @@ -821,7 +844,7 @@ int jit_process(struct perf_session *session, struct perf_data *output, struct machine *machine, - char *filename, + const char *filename, pid_t pid, pid_t tid, u64 *nbytes) @@ -830,6 +853,7 @@ jit_process(struct perf_session *session, struct nsinfo *nsi; struct evsel *first; struct jit_buf_desc jd; + bool in_pidns = false; int ret; thread = machine__findnew_thread(machine, pid, tid); @@ -844,7 +868,7 @@ jit_process(struct perf_session *session, /* * first, detect marker mmap (i.e., the jitdump mmap) */ - if (jit_detect(filename, pid, nsi)) { + if (jit_detect(filename, pid, nsi, &in_pidns)) { nsinfo__put(nsi); /* @@ -866,6 +890,9 @@ jit_process(struct perf_session *session, jd.machine = machine; jd.nsi = nsi; + if (in_pidns) + nsinfo__set_in_pidns(nsi); + /* * track sample_type to compute id_all layout * perf sets the same sample type to all events as of now diff --git a/tools/perf/util/kvm-stat.c b/tools/perf/util/kvm-stat.c new file mode 100644 index 000000000000..38ace736db5c --- /dev/null +++ b/tools/perf/util/kvm-stat.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "debug.h" +#include "evsel.h" +#include "kvm-stat.h" + +#if defined(HAVE_KVM_STAT_SUPPORT) && defined(HAVE_LIBTRACEEVENT) + +bool kvm_exit_event(struct evsel *evsel) +{ + return evsel__name_is(evsel, kvm_exit_trace); +} + +void exit_event_get_key(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + key->info = 0; + key->key = evsel__intval(evsel, sample, kvm_exit_reason); +} + + +bool exit_event_begin(struct evsel *evsel, + struct perf_sample *sample, struct event_key *key) +{ + if (kvm_exit_event(evsel)) { + exit_event_get_key(evsel, sample, key); + return true; + } + + return false; +} + +bool kvm_entry_event(struct evsel *evsel) +{ + return evsel__name_is(evsel, kvm_entry_trace); +} + +bool exit_event_end(struct evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + return kvm_entry_event(evsel); +} + +static const char *get_exit_reason(struct perf_kvm_stat *kvm, + struct exit_reasons_table *tbl, + u64 exit_code) +{ + while (tbl->reason != NULL) { + if (tbl->exit_code == exit_code) + return tbl->reason; + tbl++; + } + + pr_err("unknown kvm exit code:%lld on %s\n", + (unsigned long long)exit_code, kvm->exit_reasons_isa); + return "UNKNOWN"; +} + +void exit_event_decode_key(struct perf_kvm_stat *kvm, + struct event_key *key, + char *decode) +{ + const char *exit_reason = get_exit_reason(kvm, key->exit_reasons, + key->key); + + scnprintf(decode, KVM_EVENT_NAME_LEN, "%s", exit_reason); +} + +#endif diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h index 3e9ac754c3d1..a356b839c2ee 100644 --- a/tools/perf/util/kvm-stat.h +++ b/tools/perf/util/kvm-stat.h @@ -10,6 +10,7 @@ #include "symbol.h" #include "record.h" +#include <errno.h> #include <stdlib.h> #include <linux/zalloc.h> @@ -115,6 +116,8 @@ struct kvm_reg_events_ops { struct kvm_events_ops *ops; }; +#if defined(HAVE_KVM_STAT_SUPPORT) && defined(HAVE_LIBTRACEEVENT) + void exit_event_get_key(struct evsel *evsel, struct perf_sample *sample, struct event_key *key); @@ -127,6 +130,7 @@ bool exit_event_end(struct evsel *evsel, void exit_event_decode_key(struct perf_kvm_stat *kvm, struct event_key *key, char *decode); +#endif bool kvm_exit_event(struct evsel *evsel); bool kvm_entry_event(struct evsel *evsel); @@ -187,5 +191,15 @@ static inline struct kvm_info *kvm_info__new(void) #define kvm_info__zput(ki) do { } while (0) #endif /* HAVE_KVM_STAT_SUPPORT */ +#define STRDUP_FAIL_EXIT(s) \ + ({ char *_p; \ + _p = strdup(s); \ + if (!_p) { \ + ret = -ENOMEM; \ + goto EXIT; \ + } \ + _p; \ + }) + extern int kvm_add_default_arch_event(int *argc, const char **argv); #endif /* __PERF_KVM_STAT_H */ diff --git a/tools/perf/util/kwork.h b/tools/perf/util/kwork.h index 76fe2a821bcf..db00269b73f2 100644 --- a/tools/perf/util/kwork.h +++ b/tools/perf/util/kwork.h @@ -1,6 +1,7 @@ #ifndef PERF_UTIL_KWORK_H #define PERF_UTIL_KWORK_H +#include "perf.h" #include "util/tool.h" #include "util/time-utils.h" @@ -251,12 +252,14 @@ struct perf_kwork { * perf kwork top data */ struct kwork_top_stat top_stat; -}; -struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork, + /* Add work callback. */ + struct kwork_work *(*add_work)(struct perf_kwork *kwork, struct kwork_class *class, struct kwork_work *key); +}; + #ifdef HAVE_BPF_SKEL int perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork); diff --git a/tools/perf/util/libbfd.c b/tools/perf/util/libbfd.c new file mode 100644 index 000000000000..cc0c474cbfaa --- /dev/null +++ b/tools/perf/util/libbfd.c @@ -0,0 +1,643 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "libbfd.h" +#include "annotate.h" +#include "bpf-event.h" +#include "bpf-utils.h" +#include "debug.h" +#include "dso.h" +#include "env.h" +#include "map.h" +#include "srcline.h" +#include "symbol.h" +#include "symbol_conf.h" +#include "util.h" +#include <tools/dis-asm-compat.h> +#ifdef HAVE_LIBBPF_SUPPORT +#include <bpf/bpf.h> +#include <bpf/btf.h> +#endif +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#define PACKAGE "perf" +#include <bfd.h> + +/* + * Implement addr2line using libbfd. + */ +struct a2l_data { + const char *input; + u64 addr; + + bool found; + const char *filename; + const char *funcname; + unsigned int line; + + bfd *abfd; + asymbol **syms; +}; + +static bool perf_bfd_lock(void *bfd_mutex) +{ + mutex_lock(bfd_mutex); + return true; +} + +static bool perf_bfd_unlock(void *bfd_mutex) +{ + mutex_unlock(bfd_mutex); + return true; +} + +static void perf_bfd_init(void) +{ + static struct mutex bfd_mutex; + + mutex_init_recursive(&bfd_mutex); + + if (bfd_init() != BFD_INIT_MAGIC) { + pr_err("Error initializing libbfd\n"); + return; + } + if (!bfd_thread_init(perf_bfd_lock, perf_bfd_unlock, &bfd_mutex)) + pr_err("Error initializing libbfd threading\n"); +} + +static void ensure_bfd_init(void) +{ + static pthread_once_t bfd_init_once = PTHREAD_ONCE_INIT; + + pthread_once(&bfd_init_once, perf_bfd_init); +} + +static int bfd_error(const char *string) +{ + const char *errmsg; + + errmsg = bfd_errmsg(bfd_get_error()); + fflush(stdout); + + if (string) + pr_debug("%s: %s\n", string, errmsg); + else + pr_debug("%s\n", errmsg); + + return -1; +} + +static int slurp_symtab(bfd *abfd, struct a2l_data *a2l) +{ + long storage; + long symcount; + asymbol **syms; + bfd_boolean dynamic = FALSE; + + if ((bfd_get_file_flags(abfd) & HAS_SYMS) == 0) + return bfd_error(bfd_get_filename(abfd)); + + storage = bfd_get_symtab_upper_bound(abfd); + if (storage == 0L) { + storage = bfd_get_dynamic_symtab_upper_bound(abfd); + dynamic = TRUE; + } + if (storage < 0L) + return bfd_error(bfd_get_filename(abfd)); + + syms = malloc(storage); + if (dynamic) + symcount = bfd_canonicalize_dynamic_symtab(abfd, syms); + else + symcount = bfd_canonicalize_symtab(abfd, syms); + + if (symcount < 0) { + free(syms); + return bfd_error(bfd_get_filename(abfd)); + } + + a2l->syms = syms; + return 0; +} + +static void find_address_in_section(bfd *abfd, asection *section, void *data) +{ + bfd_vma pc, vma; + bfd_size_type size; + struct a2l_data *a2l = data; + flagword flags; + + if (a2l->found) + return; + +#ifdef bfd_get_section_flags + flags = bfd_get_section_flags(abfd, section); +#else + flags = bfd_section_flags(section); +#endif + if ((flags & SEC_ALLOC) == 0) + return; + + pc = a2l->addr; +#ifdef bfd_get_section_vma + vma = bfd_get_section_vma(abfd, section); +#else + vma = bfd_section_vma(section); +#endif +#ifdef bfd_get_section_size + size = bfd_get_section_size(section); +#else + size = bfd_section_size(section); +#endif + + if (pc < vma || pc >= vma + size) + return; + + a2l->found = bfd_find_nearest_line(abfd, section, a2l->syms, pc - vma, + &a2l->filename, &a2l->funcname, + &a2l->line); + + if (a2l->filename && !strlen(a2l->filename)) + a2l->filename = NULL; +} + +static struct a2l_data *addr2line_init(const char *path) +{ + bfd *abfd; + struct a2l_data *a2l = NULL; + + ensure_bfd_init(); + abfd = bfd_openr(path, NULL); + if (abfd == NULL) + return NULL; + + if (!bfd_check_format(abfd, bfd_object)) + goto out; + + a2l = zalloc(sizeof(*a2l)); + if (a2l == NULL) + goto out; + + a2l->abfd = abfd; + a2l->input = strdup(path); + if (a2l->input == NULL) + goto out; + + if (slurp_symtab(abfd, a2l)) + goto out; + + return a2l; + +out: + if (a2l) { + zfree((char **)&a2l->input); + free(a2l); + } + bfd_close(abfd); + return NULL; +} + +static void addr2line_cleanup(struct a2l_data *a2l) +{ + if (a2l->abfd) + bfd_close(a2l->abfd); + zfree((char **)&a2l->input); + zfree(&a2l->syms); + free(a2l); +} + +static int inline_list__append_dso_a2l(struct dso *dso, + struct inline_node *node, + struct symbol *sym) +{ + struct a2l_data *a2l = dso__a2l(dso); + struct symbol *inline_sym = new_inline_sym(dso, sym, a2l->funcname); + char *srcline = NULL; + + if (a2l->filename) + srcline = srcline_from_fileline(a2l->filename, a2l->line); + + return inline_list__append(inline_sym, srcline, node); +} + +int libbfd__addr2line(const char *dso_name, u64 addr, + char **file, unsigned int *line, struct dso *dso, + bool unwind_inlines, struct inline_node *node, + struct symbol *sym) +{ + int ret = 0; + struct a2l_data *a2l = dso__a2l(dso); + + if (!a2l) { + a2l = addr2line_init(dso_name); + dso__set_a2l(dso, a2l); + } + + if (a2l == NULL) { + if (!symbol_conf.disable_add2line_warn) + pr_warning("addr2line_init failed for %s\n", dso_name); + return 0; + } + + a2l->addr = addr; + a2l->found = false; + + bfd_map_over_sections(a2l->abfd, find_address_in_section, a2l); + + if (!a2l->found) + return 0; + + if (unwind_inlines) { + int cnt = 0; + + if (node && inline_list__append_dso_a2l(dso, node, sym)) + return 0; + + while (bfd_find_inliner_info(a2l->abfd, &a2l->filename, + &a2l->funcname, &a2l->line) && + cnt++ < MAX_INLINE_NEST) { + + if (a2l->filename && !strlen(a2l->filename)) + a2l->filename = NULL; + + if (node != NULL) { + if (inline_list__append_dso_a2l(dso, node, sym)) + return 0; + // found at least one inline frame + ret = 1; + } + } + } + + if (file) { + *file = a2l->filename ? strdup(a2l->filename) : NULL; + ret = *file ? 1 : 0; + } + + if (line) + *line = a2l->line; + + return ret; +} + +void dso__free_a2l_libbfd(struct dso *dso) +{ + struct a2l_data *a2l = dso__a2l(dso); + + if (!a2l) + return; + + addr2line_cleanup(a2l); + + dso__set_a2l(dso, NULL); +} + +static int bfd_symbols__cmpvalue(const void *a, const void *b) +{ + const asymbol *as = *(const asymbol **)a, *bs = *(const asymbol **)b; + + if (bfd_asymbol_value(as) != bfd_asymbol_value(bs)) + return bfd_asymbol_value(as) - bfd_asymbol_value(bs); + + return bfd_asymbol_name(as)[0] - bfd_asymbol_name(bs)[0]; +} + +static int bfd2elf_binding(asymbol *symbol) +{ + if (symbol->flags & BSF_WEAK) + return STB_WEAK; + if (symbol->flags & BSF_GLOBAL) + return STB_GLOBAL; + if (symbol->flags & BSF_LOCAL) + return STB_LOCAL; + return -1; +} + +int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) +{ + int err = -1; + long symbols_size, symbols_count, i; + asection *section; + asymbol **symbols, *sym; + struct symbol *symbol; + bfd *abfd; + u64 start, len; + + ensure_bfd_init(); + abfd = bfd_openr(debugfile, NULL); + if (!abfd) + return -1; + + if (!bfd_check_format(abfd, bfd_object)) { + pr_debug2("%s: cannot read %s bfd file.\n", __func__, + dso__long_name(dso)); + goto out_close; + } + + if (bfd_get_flavour(abfd) == bfd_target_elf_flavour) + goto out_close; + + symbols_size = bfd_get_symtab_upper_bound(abfd); + if (symbols_size == 0) { + bfd_close(abfd); + return 0; + } + + if (symbols_size < 0) + goto out_close; + + symbols = malloc(symbols_size); + if (!symbols) + goto out_close; + + symbols_count = bfd_canonicalize_symtab(abfd, symbols); + if (symbols_count < 0) + goto out_free; + + section = bfd_get_section_by_name(abfd, ".text"); + if (section) { + for (i = 0; i < symbols_count; ++i) { + if (!strcmp(bfd_asymbol_name(symbols[i]), "__ImageBase") || + !strcmp(bfd_asymbol_name(symbols[i]), "__image_base__")) + break; + } + if (i < symbols_count) { + /* PE symbols can only have 4 bytes, so use .text high bits */ + u64 text_offset = (section->vma - (u32)section->vma) + + (u32)bfd_asymbol_value(symbols[i]); + dso__set_text_offset(dso, text_offset); + dso__set_text_end(dso, (section->vma - text_offset) + section->size); + } else { + dso__set_text_offset(dso, section->vma - section->filepos); + dso__set_text_end(dso, section->filepos + section->size); + } + } + + qsort(symbols, symbols_count, sizeof(asymbol *), bfd_symbols__cmpvalue); + +#ifdef bfd_get_section +#define bfd_asymbol_section bfd_get_section +#endif + for (i = 0; i < symbols_count; ++i) { + sym = symbols[i]; + section = bfd_asymbol_section(sym); + if (bfd2elf_binding(sym) < 0) + continue; + + while (i + 1 < symbols_count && + bfd_asymbol_section(symbols[i + 1]) == section && + bfd2elf_binding(symbols[i + 1]) < 0) + i++; + + if (i + 1 < symbols_count && + bfd_asymbol_section(symbols[i + 1]) == section) + len = symbols[i + 1]->value - sym->value; + else + len = section->size - sym->value; + + start = bfd_asymbol_value(sym) - dso__text_offset(dso); + symbol = symbol__new(start, len, bfd2elf_binding(sym), STT_FUNC, + bfd_asymbol_name(sym)); + if (!symbol) + goto out_free; + + symbols__insert(dso__symbols(dso), symbol); + } +#ifdef bfd_get_section +#undef bfd_asymbol_section +#endif + + symbols__fixup_end(dso__symbols(dso), false); + symbols__fixup_duplicate(dso__symbols(dso)); + dso__set_adjust_symbols(dso, true); + + err = 0; +out_free: + free(symbols); +out_close: + bfd_close(abfd); + return err; +} + +int libbfd__read_build_id(const char *filename, struct build_id *bid) +{ + size_t size = sizeof(bid->data); + int err = -1, fd; + bfd *abfd; + + if (!filename) + return -EFAULT; + if (!is_regular_file(filename)) + return -EWOULDBLOCK; + + fd = open(filename, O_RDONLY); + if (fd < 0) + return -1; + + ensure_bfd_init(); + abfd = bfd_fdopenr(filename, /*target=*/NULL, fd); + if (!abfd) + return -1; + + if (!bfd_check_format(abfd, bfd_object)) { + pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename); + goto out_close; + } + + if (!abfd->build_id || abfd->build_id->size > size) + goto out_close; + + memcpy(bid->data, abfd->build_id->data, abfd->build_id->size); + memset(bid->data + abfd->build_id->size, 0, size - abfd->build_id->size); + err = bid->size = abfd->build_id->size; + +out_close: + bfd_close(abfd); + return err; +} + +int libbfd_filename__read_debuglink(const char *filename, char *debuglink, + size_t size) +{ + int err = -1; + asection *section; + bfd *abfd; + + ensure_bfd_init(); + abfd = bfd_openr(filename, NULL); + if (!abfd) + return -1; + + if (!bfd_check_format(abfd, bfd_object)) { + pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename); + goto out_close; + } + + section = bfd_get_section_by_name(abfd, ".gnu_debuglink"); + if (!section) + goto out_close; + + if (section->size > size) + goto out_close; + + if (!bfd_get_section_contents(abfd, section, debuglink, 0, + section->size)) + goto out_close; + + err = 0; + +out_close: + bfd_close(abfd); + return err; +} + +int symbol__disassemble_bpf_libbfd(struct symbol *sym __maybe_unused, + struct annotate_args *args __maybe_unused) +{ +#ifdef HAVE_LIBBPF_SUPPORT + struct annotation *notes = symbol__annotation(sym); + struct bpf_prog_linfo *prog_linfo = NULL; + struct bpf_prog_info_node *info_node; + int len = sym->end - sym->start; + disassembler_ftype disassemble; + struct map *map = args->ms.map; + struct perf_bpil *info_linear; + struct disassemble_info info; + struct dso *dso = map__dso(map); + int pc = 0, count, sub_id; + struct btf *btf = NULL; + char tpath[PATH_MAX]; + size_t buf_size; + int nr_skip = 0; + char *buf; + bfd *bfdf; + int ret; + FILE *s; + + if (dso__binary_type(dso) != DSO_BINARY_TYPE__BPF_PROG_INFO) + return SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE; + + pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__, + sym->name, sym->start, sym->end - sym->start); + + memset(tpath, 0, sizeof(tpath)); + perf_exe(tpath, sizeof(tpath)); + + ensure_bfd_init(); + bfdf = bfd_openr(tpath, NULL); + if (bfdf == NULL) + abort(); + + if (!bfd_check_format(bfdf, bfd_object)) + abort(); + + s = open_memstream(&buf, &buf_size); + if (!s) { + ret = errno; + goto out; + } + init_disassemble_info_compat(&info, s, + (fprintf_ftype) fprintf, + fprintf_styled); + info.arch = bfd_get_arch(bfdf); + info.mach = bfd_get_mach(bfdf); + + info_node = perf_env__find_bpf_prog_info(dso__bpf_prog(dso)->env, + dso__bpf_prog(dso)->id); + if (!info_node) { + ret = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF; + goto out; + } + info_linear = info_node->info_linear; + sub_id = dso__bpf_prog(dso)->sub_id; + + info.buffer = (void *)(uintptr_t)(info_linear->info.jited_prog_insns); + info.buffer_length = info_linear->info.jited_prog_len; + + if (info_linear->info.nr_line_info) + prog_linfo = bpf_prog_linfo__new(&info_linear->info); + + if (info_linear->info.btf_id) { + struct btf_node *node; + + node = perf_env__find_btf(dso__bpf_prog(dso)->env, + info_linear->info.btf_id); + if (node) + btf = btf__new((__u8 *)(node->data), + node->data_size); + } + + disassemble_init_for_target(&info); + +#ifdef DISASM_FOUR_ARGS_SIGNATURE + disassemble = disassembler(info.arch, + bfd_big_endian(bfdf), + info.mach, + bfdf); +#else + disassemble = disassembler(bfdf); +#endif + if (disassemble == NULL) + abort(); + + fflush(s); + do { + const struct bpf_line_info *linfo = NULL; + struct disasm_line *dl; + size_t prev_buf_size; + const char *srcline; + u64 addr; + + addr = pc + ((u64 *)(uintptr_t)(info_linear->info.jited_ksyms))[sub_id]; + count = disassemble(pc, &info); + + if (prog_linfo) + linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo, + addr, sub_id, + nr_skip); + + if (linfo && btf) { + srcline = btf__name_by_offset(btf, linfo->line_off); + nr_skip++; + } else + srcline = NULL; + + fprintf(s, "\n"); + prev_buf_size = buf_size; + fflush(s); + + if (!annotate_opts.hide_src_code && srcline) { + args->offset = -1; + args->line = strdup(srcline); + args->line_nr = 0; + args->fileloc = NULL; + args->ms.sym = sym; + dl = disasm_line__new(args); + if (dl) { + annotation_line__add(&dl->al, + ¬es->src->source); + } + } + + args->offset = pc; + args->line = buf + prev_buf_size; + args->line_nr = 0; + args->fileloc = NULL; + args->ms.sym = sym; + dl = disasm_line__new(args); + if (dl) + annotation_line__add(&dl->al, ¬es->src->source); + + pc += count; + } while (count > 0 && pc < len); + + ret = 0; +out: + free(prog_linfo); + btf__free(btf); + fclose(s); + bfd_close(bfdf); + return ret; +#else + return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF; +#endif +} diff --git a/tools/perf/util/libbfd.h b/tools/perf/util/libbfd.h new file mode 100644 index 000000000000..953886f3d62f --- /dev/null +++ b/tools/perf/util/libbfd.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_LIBBFD_H +#define __PERF_LIBBFD_H + +#include <linux/compiler.h> +#include <linux/types.h> +#include <stdbool.h> +#include <stddef.h> + +struct annotate_args; +struct build_id; +struct dso; +struct inline_node; +struct symbol; + +#ifdef HAVE_LIBBFD_SUPPORT +int libbfd__addr2line(const char *dso_name, u64 addr, + char **file, unsigned int *line, struct dso *dso, + bool unwind_inlines, struct inline_node *node, + struct symbol *sym); + + +void dso__free_a2l_libbfd(struct dso *dso); + +int symbol__disassemble_libbfd(const char *filename, struct symbol *sym, + struct annotate_args *args); + +int libbfd__read_build_id(const char *filename, struct build_id *bid); + +int libbfd_filename__read_debuglink(const char *filename, char *debuglink, size_t size); + +int symbol__disassemble_bpf_libbfd(struct symbol *sym, struct annotate_args *args); + +#else // !defined(HAVE_LIBBFD_SUPPORT) +#include "annotate.h" + +static inline int libbfd__addr2line(const char *dso_name __always_unused, + u64 addr __always_unused, + char **file __always_unused, + unsigned int *line __always_unused, + struct dso *dso __always_unused, + bool unwind_inlines __always_unused, + struct inline_node *node __always_unused, + struct symbol *sym __always_unused) +{ + return -1; +} + + +static inline void dso__free_a2l_libbfd(struct dso *dso __always_unused) +{ +} + +static inline int symbol__disassemble_libbfd(const char *filename __always_unused, + struct symbol *sym __always_unused, + struct annotate_args *args __always_unused) +{ + return -1; +} + +static inline int libbfd__read_build_id(const char *filename __always_unused, + struct build_id *bid __always_unused) +{ + return -1; +} + +static inline int libbfd_filename__read_debuglink(const char *filename __always_unused, + char *debuglink __always_unused, + size_t size __always_unused) +{ + return -1; +} + +static inline int symbol__disassemble_bpf_libbfd(struct symbol *sym __always_unused, + struct annotate_args *args __always_unused) +{ + return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF; +} + +#endif // defined(HAVE_LIBBFD_SUPPORT) + +#endif /* __PERF_LIBBFD_H */ diff --git a/tools/perf/util/llvm-c-helpers.cpp b/tools/perf/util/llvm-c-helpers.cpp new file mode 100644 index 000000000000..004081bd12c9 --- /dev/null +++ b/tools/perf/util/llvm-c-helpers.cpp @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Must come before the linux/compiler.h include, which defines several + * macros (e.g. noinline) that conflict with compiler builtins used + * by LLVM. + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" /* Needed for LLVM <= 15 */ +#include <llvm/DebugInfo/Symbolize/Symbolize.h> +#include <llvm/Support/TargetSelect.h> +#pragma GCC diagnostic pop + +#include <inttypes.h> +#include <stdio.h> +#include <sys/types.h> +#include <linux/compiler.h> +extern "C" { +#include <linux/zalloc.h> +} +#include "llvm-c-helpers.h" + +extern "C" +char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name); + +using namespace llvm; +using llvm::symbolize::LLVMSymbolizer; + +/* + * Allocate a static LLVMSymbolizer, which will live to the end of the program. + * Unlike the bfd paths, LLVMSymbolizer has its own cache, so we do not need + * to store anything in the dso struct. + */ +static LLVMSymbolizer *get_symbolizer() +{ + static LLVMSymbolizer *instance = nullptr; + if (instance == nullptr) { + LLVMSymbolizer::Options opts; + /* + * LLVM sometimes demangles slightly different from the rest + * of the code, and this mismatch can cause new_inline_sym() + * to get confused and mark non-inline symbol as inlined + * (since the name does not properly match up with base_sym). + * Thus, disable the demangling and let the rest of the code + * handle it. + */ + opts.Demangle = false; + instance = new LLVMSymbolizer(opts); + } + return instance; +} + +/* Returns 0 on error, 1 on success. */ +static int extract_file_and_line(const DILineInfo &line_info, char **file, + unsigned int *line) +{ + if (file) { + if (line_info.FileName == "<invalid>") { + /* Match the convention of libbfd. */ + *file = nullptr; + } else { + /* The caller expects to get something it can free(). */ + *file = strdup(line_info.FileName.c_str()); + if (*file == nullptr) + return 0; + } + } + if (line) + *line = line_info.Line; + return 1; +} + +extern "C" +int llvm_addr2line(const char *dso_name, u64 addr, + char **file, unsigned int *line, + bool unwind_inlines, + llvm_a2l_frame **inline_frames) +{ + LLVMSymbolizer *symbolizer = get_symbolizer(); + object::SectionedAddress sectioned_addr = { + addr, + object::SectionedAddress::UndefSection + }; + + if (unwind_inlines) { + Expected<DIInliningInfo> res_or_err = + symbolizer->symbolizeInlinedCode(dso_name, + sectioned_addr); + if (!res_or_err) + return 0; + unsigned num_frames = res_or_err->getNumberOfFrames(); + if (num_frames == 0) + return 0; + + if (extract_file_and_line(res_or_err->getFrame(0), + file, line) == 0) + return 0; + + *inline_frames = (llvm_a2l_frame *)calloc( + num_frames, sizeof(**inline_frames)); + if (*inline_frames == nullptr) + return 0; + + for (unsigned i = 0; i < num_frames; ++i) { + const DILineInfo &src = res_or_err->getFrame(i); + + llvm_a2l_frame &dst = (*inline_frames)[i]; + if (src.FileName == "<invalid>") + /* Match the convention of libbfd. */ + dst.filename = nullptr; + else + dst.filename = strdup(src.FileName.c_str()); + dst.funcname = strdup(src.FunctionName.c_str()); + dst.line = src.Line; + + if (dst.filename == nullptr || + dst.funcname == nullptr) { + for (unsigned j = 0; j <= i; ++j) { + zfree(&(*inline_frames)[j].filename); + zfree(&(*inline_frames)[j].funcname); + } + zfree(inline_frames); + return 0; + } + } + + return num_frames; + } else { + if (inline_frames) + *inline_frames = nullptr; + + Expected<DILineInfo> res_or_err = + symbolizer->symbolizeCode(dso_name, sectioned_addr); + if (!res_or_err) + return 0; + return extract_file_and_line(*res_or_err, file, line); + } +} + +static char * +make_symbol_relative_string(struct dso *dso, const char *sym_name, + u64 addr, u64 base_addr) +{ + if (!strcmp(sym_name, "<invalid>")) + return NULL; + + char *demangled = dso__demangle_sym(dso, 0, sym_name); + if (base_addr && base_addr != addr) { + char buf[256]; + snprintf(buf, sizeof(buf), "%s+0x%" PRIx64, + demangled ? demangled : sym_name, addr - base_addr); + free(demangled); + return strdup(buf); + } else { + if (demangled) + return demangled; + else + return strdup(sym_name); + } +} + +extern "C" +char *llvm_name_for_code(struct dso *dso, const char *dso_name, u64 addr) +{ + LLVMSymbolizer *symbolizer = get_symbolizer(); + object::SectionedAddress sectioned_addr = { + addr, + object::SectionedAddress::UndefSection + }; + Expected<DILineInfo> res_or_err = + symbolizer->symbolizeCode(dso_name, sectioned_addr); + if (!res_or_err) { + return NULL; + } + return make_symbol_relative_string( + dso, res_or_err->FunctionName.c_str(), + addr, res_or_err->StartAddress ? *res_or_err->StartAddress : 0); +} + +extern "C" +char *llvm_name_for_data(struct dso *dso, const char *dso_name, u64 addr) +{ + LLVMSymbolizer *symbolizer = get_symbolizer(); + object::SectionedAddress sectioned_addr = { + addr, + object::SectionedAddress::UndefSection + }; + Expected<DIGlobal> res_or_err = + symbolizer->symbolizeData(dso_name, sectioned_addr); + if (!res_or_err) { + return NULL; + } + return make_symbol_relative_string( + dso, res_or_err->Name.c_str(), + addr, res_or_err->Start); +} diff --git a/tools/perf/util/llvm-c-helpers.h b/tools/perf/util/llvm-c-helpers.h new file mode 100644 index 000000000000..d2b99637a28a --- /dev/null +++ b/tools/perf/util/llvm-c-helpers.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_LLVM_C_HELPERS +#define __PERF_LLVM_C_HELPERS 1 + +/* + * Helpers to call into LLVM C++ code from C, for the parts that do not have + * C APIs. + */ + +#include <linux/compiler.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct dso; + +struct llvm_a2l_frame { + char* filename; + char* funcname; + unsigned int line; +}; + +/* + * Implement addr2line() using libLLVM. LLVM is a C++ API, and + * many of the linux/ headers cannot be included in a C++ compile unit, + * so we need to make a little bridge code here. llvm_addr2line() will + * convert the inline frame information from LLVM's internal structures + * and put them into a flat array given in inline_frames. The caller + * is then responsible for taking that array and convert it into perf's + * regular inline frame structures (which depend on e.g. struct list_head). + * + * If the address could not be resolved, or an error occurred (e.g. OOM), + * returns 0. Otherwise, returns the number of inline frames (which means 1 + * if the address was not part of an inlined function). If unwind_inlines + * is set and the return code is nonzero, inline_frames will be set to + * a newly allocated array with that length. The caller is then responsible + * for freeing both the strings and the array itself. + */ +int llvm_addr2line(const char* dso_name, + u64 addr, + char** file, + unsigned int* line, + bool unwind_inlines, + struct llvm_a2l_frame** inline_frames); + +/* + * Simple symbolizers for addresses; will convert something like + * 0x12345 to "func+0x123". Will return NULL if no symbol was found. + * + * The returned value must be freed by the caller, with free(). + */ +char *llvm_name_for_code(struct dso *dso, const char *dso_name, u64 addr); +char *llvm_name_for_data(struct dso *dso, const char *dso_name, u64 addr); + +#ifdef __cplusplus +} +#endif + +#endif /* __PERF_LLVM_C_HELPERS */ diff --git a/tools/perf/util/llvm.c b/tools/perf/util/llvm.c new file mode 100644 index 000000000000..2ebf1f5f65bf --- /dev/null +++ b/tools/perf/util/llvm.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "llvm.h" +#include "annotate.h" +#include "debug.h" +#include "dso.h" +#include "map.h" +#include "namespaces.h" +#include "srcline.h" +#include "symbol.h" +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> +#include <linux/zalloc.h> + +#ifdef HAVE_LIBLLVM_SUPPORT +#include "llvm-c-helpers.h" +#include <llvm-c/Disassembler.h> +#include <llvm-c/Target.h> +#endif + +#ifdef HAVE_LIBLLVM_SUPPORT +static void free_llvm_inline_frames(struct llvm_a2l_frame *inline_frames, + int num_frames) +{ + if (inline_frames != NULL) { + for (int i = 0; i < num_frames; ++i) { + zfree(&inline_frames[i].filename); + zfree(&inline_frames[i].funcname); + } + zfree(&inline_frames); + } +} +#endif + +int llvm__addr2line(const char *dso_name __maybe_unused, u64 addr __maybe_unused, + char **file __maybe_unused, unsigned int *line __maybe_unused, + struct dso *dso __maybe_unused, bool unwind_inlines __maybe_unused, + struct inline_node *node __maybe_unused, struct symbol *sym __maybe_unused) +{ +#ifdef HAVE_LIBLLVM_SUPPORT + struct llvm_a2l_frame *inline_frames = NULL; + int num_frames = llvm_addr2line(dso_name, addr, file, line, + node && unwind_inlines, &inline_frames); + + if (num_frames == 0 || !inline_frames) { + /* Error, or we didn't want inlines. */ + return num_frames; + } + + for (int i = 0; i < num_frames; ++i) { + struct symbol *inline_sym = + new_inline_sym(dso, sym, inline_frames[i].funcname); + char *srcline = NULL; + + if (inline_frames[i].filename) { + srcline = + srcline_from_fileline(inline_frames[i].filename, + inline_frames[i].line); + } + if (inline_list__append(inline_sym, srcline, node) != 0) { + free_llvm_inline_frames(inline_frames, num_frames); + return 0; + } + } + free_llvm_inline_frames(inline_frames, num_frames); + + return num_frames; +#else + return -1; +#endif +} + +#ifdef HAVE_LIBLLVM_SUPPORT +static void init_llvm(void) +{ + static bool init; + + if (!init) { + LLVMInitializeAllTargetInfos(); + LLVMInitializeAllTargetMCs(); + LLVMInitializeAllDisassemblers(); + init = true; + } +} + +/* + * Whenever LLVM wants to resolve an address into a symbol, it calls this + * callback. We don't ever actually _return_ anything (in particular, because + * it puts quotation marks around what we return), but we use this as a hint + * that there is a branch or PC-relative address in the expression that we + * should add some textual annotation for after the instruction. The caller + * will use this information to add the actual annotation. + */ +struct symbol_lookup_storage { + u64 branch_addr; + u64 pcrel_load_addr; +}; + +static const char * +symbol_lookup_callback(void *disinfo, uint64_t value, + uint64_t *ref_type, + uint64_t address __maybe_unused, + const char **ref __maybe_unused) +{ + struct symbol_lookup_storage *storage = disinfo; + + if (*ref_type == LLVMDisassembler_ReferenceType_In_Branch) + storage->branch_addr = value; + else if (*ref_type == LLVMDisassembler_ReferenceType_In_PCrel_Load) + storage->pcrel_load_addr = value; + *ref_type = LLVMDisassembler_ReferenceType_InOut_None; + return NULL; +} +#endif + +int symbol__disassemble_llvm(const char *filename, struct symbol *sym, + struct annotate_args *args __maybe_unused) +{ +#ifdef HAVE_LIBLLVM_SUPPORT + struct annotation *notes = symbol__annotation(sym); + struct map *map = args->ms.map; + struct dso *dso = map__dso(map); + u64 start = map__rip_2objdump(map, sym->start); + /* Malloc-ed buffer containing instructions read from disk. */ + u8 *code_buf = NULL; + /* Pointer to code to be disassembled. */ + const u8 *buf; + u64 buf_len; + u64 pc; + bool is_64bit; + char disasm_buf[2048]; + size_t disasm_len; + struct disasm_line *dl; + LLVMDisasmContextRef disasm = NULL; + struct symbol_lookup_storage storage; + char *line_storage = NULL; + size_t line_storage_len = 0; + int ret = -1; + + if (args->options->objdump_path) + return -1; + + buf = dso__read_symbol(dso, filename, map, sym, + &code_buf, &buf_len, &is_64bit); + if (buf == NULL) + return errno; + + init_llvm(); + if (arch__is(args->arch, "x86")) { + const char *triplet = is_64bit ? "x86_64-pc-linux" : "i686-pc-linux"; + + disasm = LLVMCreateDisasm(triplet, &storage, /*tag_type=*/0, + /*get_op_info=*/NULL, symbol_lookup_callback); + } else { + char triplet[64]; + + scnprintf(triplet, sizeof(triplet), "%s-linux-gnu", + args->arch->name); + disasm = LLVMCreateDisasm(triplet, &storage, /*tag_type=*/0, + /*get_op_info=*/NULL, symbol_lookup_callback); + } + + if (disasm == NULL) + goto err; + + if (args->options->disassembler_style && + !strcmp(args->options->disassembler_style, "intel")) + LLVMSetDisasmOptions(disasm, + LLVMDisassembler_Option_AsmPrinterVariant); + + /* + * This needs to be set after AsmPrinterVariant, due to a bug in LLVM; + * setting AsmPrinterVariant makes a new instruction printer, making it + * forget about the PrintImmHex flag (which is applied before if both + * are given to the same call). + */ + LLVMSetDisasmOptions(disasm, LLVMDisassembler_Option_PrintImmHex); + + /* add the function address and name */ + scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:", + start, sym->name); + + args->offset = -1; + args->line = disasm_buf; + args->line_nr = 0; + args->fileloc = NULL; + args->ms.sym = sym; + + dl = disasm_line__new(args); + if (dl == NULL) + goto err; + + annotation_line__add(&dl->al, ¬es->src->source); + + pc = start; + for (u64 offset = 0; offset < buf_len; ) { + unsigned int ins_len; + + storage.branch_addr = 0; + storage.pcrel_load_addr = 0; + + /* + * LLVM's API has the code be disassembled as non-const, cast + * here as we may be disassembling from mapped read-only memory. + */ + ins_len = LLVMDisasmInstruction(disasm, (u8 *)(buf + offset), + buf_len - offset, pc, + disasm_buf, sizeof(disasm_buf)); + if (ins_len == 0) + goto err; + disasm_len = strlen(disasm_buf); + + if (storage.branch_addr != 0) { + char *name = llvm_name_for_code(dso, filename, + storage.branch_addr); + if (name != NULL) { + disasm_len += scnprintf(disasm_buf + disasm_len, + sizeof(disasm_buf) - + disasm_len, + " <%s>", name); + free(name); + } + } + if (storage.pcrel_load_addr != 0) { + char *name = llvm_name_for_data(dso, filename, + storage.pcrel_load_addr); + disasm_len += scnprintf(disasm_buf + disasm_len, + sizeof(disasm_buf) - disasm_len, + " # %#"PRIx64, + storage.pcrel_load_addr); + if (name) { + disasm_len += scnprintf(disasm_buf + disasm_len, + sizeof(disasm_buf) - + disasm_len, + " <%s>", name); + free(name); + } + } + + args->offset = offset; + args->line = expand_tabs(disasm_buf, &line_storage, + &line_storage_len); + args->line_nr = 0; + args->fileloc = NULL; + args->ms.sym = sym; + + llvm_addr2line(filename, pc, &args->fileloc, + (unsigned int *)&args->line_nr, false, NULL); + + dl = disasm_line__new(args); + if (dl == NULL) + goto err; + + annotation_line__add(&dl->al, ¬es->src->source); + + free(args->fileloc); + pc += ins_len; + offset += ins_len; + } + + ret = 0; + +err: + LLVMDisasmDispose(disasm); + free(code_buf); + free(line_storage); + return ret; +#else // HAVE_LIBLLVM_SUPPORT + pr_debug("The LLVM disassembler isn't linked in for %s in %s\n", + sym->name, filename); + return -1; +#endif +} diff --git a/tools/perf/util/llvm.h b/tools/perf/util/llvm.h new file mode 100644 index 000000000000..57f6bafb24bb --- /dev/null +++ b/tools/perf/util/llvm.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_LLVM_H +#define __PERF_LLVM_H + +#include <stdbool.h> +#include <linux/types.h> + +struct annotate_args; +struct dso; +struct inline_node; +struct symbol; + +int llvm__addr2line(const char *dso_name, u64 addr, + char **file, unsigned int *line, struct dso *dso, + bool unwind_inlines, struct inline_node *node, + struct symbol *sym); + +int symbol__disassemble_llvm(const char *filename, struct symbol *sym, + struct annotate_args *args); + +#endif /* __PERF_LLVM_H */ diff --git a/tools/perf/util/lock-contention.c b/tools/perf/util/lock-contention.c new file mode 100644 index 000000000000..92e7b7b572a2 --- /dev/null +++ b/tools/perf/util/lock-contention.c @@ -0,0 +1,143 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "debug.h" +#include "env.h" +#include "lock-contention.h" +#include "machine.h" +#include "symbol.h" + +#include <limits.h> +#include <string.h> + +#include <linux/hash.h> +#include <linux/zalloc.h> + +#define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS) +#define lockhashentry(key) (lockhash_table + __lockhashfn((key))) + +struct callstack_filter { + struct list_head list; + char name[]; +}; + +static LIST_HEAD(callstack_filters); +struct hlist_head *lockhash_table; + +int parse_call_stack(const struct option *opt __maybe_unused, const char *str, + int unset __maybe_unused) +{ + char *s, *tmp, *tok; + int ret = 0; + + s = strdup(str); + if (s == NULL) + return -1; + + for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { + struct callstack_filter *entry; + + entry = malloc(sizeof(*entry) + strlen(tok) + 1); + if (entry == NULL) { + pr_err("Memory allocation failure\n"); + free(s); + return -1; + } + + strcpy(entry->name, tok); + list_add_tail(&entry->list, &callstack_filters); + } + + free(s); + return ret; +} + +bool needs_callstack(void) +{ + return !list_empty(&callstack_filters); +} + +struct lock_stat *lock_stat_find(u64 addr) +{ + struct hlist_head *entry = lockhashentry(addr); + struct lock_stat *ret; + + hlist_for_each_entry(ret, entry, hash_entry) { + if (ret->addr == addr) + return ret; + } + return NULL; +} + +struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags) +{ + struct hlist_head *entry = lockhashentry(addr); + struct lock_stat *ret, *new; + + hlist_for_each_entry(ret, entry, hash_entry) { + if (ret->addr == addr) + return ret; + } + + new = zalloc(sizeof(struct lock_stat)); + if (!new) + goto alloc_failed; + + new->addr = addr; + new->name = strdup(name); + if (!new->name) { + free(new); + goto alloc_failed; + } + + new->flags = flags; + new->wait_time_min = ULLONG_MAX; + + hlist_add_head(&new->hash_entry, entry); + return new; + +alloc_failed: + pr_err("memory allocation failed\n"); + return NULL; +} + +bool match_callstack_filter(struct machine *machine, u64 *callstack, int max_stack_depth) +{ + struct map *kmap; + struct symbol *sym; + u64 ip; + const char *arch = perf_env__arch(machine->env); + + if (list_empty(&callstack_filters)) + return true; + + for (int i = 0; i < max_stack_depth; i++) { + struct callstack_filter *filter; + + /* + * In powerpc, the callchain saved by kernel always includes + * first three entries as the NIP (next instruction pointer), + * LR (link register), and the contents of LR save area in the + * second stack frame. In certain scenarios its possible to have + * invalid kernel instruction addresses in either LR or the second + * stack frame's LR. In that case, kernel will store that address as + * zero. + * + * The below check will continue to look into callstack, + * incase first or second callstack index entry has 0 + * address for powerpc. + */ + if (!callstack || (!callstack[i] && (strcmp(arch, "powerpc") || + (i != 1 && i != 2)))) + break; + + ip = callstack[i]; + sym = machine__find_kernel_symbol(machine, ip, &kmap); + if (sym == NULL) + continue; + + list_for_each_entry(filter, &callstack_filters, list) { + if (strstr(sym->name, filter->name)) + return true; + } + } + return false; +} diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h index 1a7248ff3889..59c94190b092 100644 --- a/tools/perf/util/lock-contention.h +++ b/tools/perf/util/lock-contention.h @@ -10,10 +10,18 @@ struct lock_filter { int nr_addrs; int nr_syms; int nr_cgrps; + int nr_slabs; unsigned int *types; unsigned long *addrs; char **syms; u64 *cgrps; + char **slabs; +}; + +struct lock_delay { + char *sym; + unsigned long addr; + unsigned long time; }; struct lock_stat { @@ -67,10 +75,11 @@ struct lock_stat { */ #define MAX_LOCK_DEPTH 48 -struct lock_stat *lock_stat_find(u64 addr); -struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags); +/* based on kernel/lockdep.c */ +#define LOCKHASH_BITS 12 +#define LOCKHASH_SIZE (1UL << LOCKHASH_BITS) -bool match_callstack_filter(struct machine *machine, u64 *callstack); +extern struct hlist_head *lockhash_table; /* * struct lock_seq_stat: @@ -137,25 +146,39 @@ struct lock_contention { struct machine *machine; struct hlist_head *result; struct lock_filter *filters; + struct lock_delay *delays; struct lock_contention_fails fails; struct rb_root cgroups; + void *btf; unsigned long map_nr_entries; int max_stack; int stack_skip; int aggr_mode; int owner; int nr_filtered; + int nr_delays; bool save_callstack; }; -#ifdef HAVE_BPF_SKEL +struct option; +int parse_call_stack(const struct option *opt, const char *str, int unset); +bool needs_callstack(void); + +struct lock_stat *lock_stat_find(u64 addr); +struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags); +bool match_callstack_filter(struct machine *machine, u64 *callstack, int max_stack_depth); + + +#ifdef HAVE_BPF_SKEL int lock_contention_prepare(struct lock_contention *con); int lock_contention_start(void); int lock_contention_stop(void); int lock_contention_read(struct lock_contention *con); int lock_contention_finish(struct lock_contention *con); +struct lock_stat *pop_owner_stack_trace(struct lock_contention *con); + #else /* !HAVE_BPF_SKEL */ static inline int lock_contention_prepare(struct lock_contention *con __maybe_unused) @@ -175,6 +198,11 @@ static inline int lock_contention_read(struct lock_contention *con __maybe_unuse return 0; } +static inline struct lock_stat *pop_owner_stack_trace(struct lock_contention *con __maybe_unused) +{ + return NULL; +} + #endif /* HAVE_BPF_SKEL */ #endif /* PERF_LOCK_CONTENTION_H */ diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c index af9a97612f9d..c355757ed391 100644 --- a/tools/perf/util/lzma.c +++ b/tools/perf/util/lzma.c @@ -32,7 +32,7 @@ static const char *lzma_strerror(lzma_ret ret) } } -int lzma_decompress_to_file(const char *input, int output_fd) +int lzma_decompress_stream_to_file(FILE *infile, int output_fd) { lzma_action action = LZMA_RUN; lzma_stream strm = LZMA_STREAM_INIT; @@ -41,18 +41,11 @@ int lzma_decompress_to_file(const char *input, int output_fd) u8 buf_in[BUFSIZE]; u8 buf_out[BUFSIZE]; - FILE *infile; - - infile = fopen(input, "rb"); - if (!infile) { - pr_debug("lzma: fopen failed on %s: '%s'\n", input, strerror(errno)); - return -1; - } ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED); if (ret != LZMA_OK) { pr_debug("lzma: lzma_stream_decoder failed %s (%d)\n", lzma_strerror(ret), ret); - goto err_fclose; + return err; } strm.next_in = NULL; @@ -100,11 +93,25 @@ int lzma_decompress_to_file(const char *input, int output_fd) err = 0; err_lzma_end: lzma_end(&strm); -err_fclose: - fclose(infile); return err; } +int lzma_decompress_to_file(const char *input, int output_fd) +{ + FILE *infile; + int ret; + + infile = fopen(input, "rb"); + if (!infile) { + pr_debug("lzma: fopen failed on %s: '%s'\n", input, strerror(errno)); + return -1; + } + + ret = lzma_decompress_stream_to_file(infile, output_fd); + fclose(infile); + return ret; +} + bool lzma_is_compressed(const char *input) { int fd = open(input, O_RDONLY); @@ -113,7 +120,7 @@ bool lzma_is_compressed(const char *input) ssize_t rc; if (fd < 0) - return -1; + return false; rc = read(fd, buf, sizeof(buf)); close(fd); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 8477edefc299..841b711d970e 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -20,6 +20,7 @@ #include "path.h" #include "srcline.h" #include "symbol.h" +#include "synthetic-events.h" #include "sort.h" #include "strlist.h" #include "target.h" @@ -37,6 +38,7 @@ #include <internal/lib.h> // page_size #include "cgroup.h" #include "arm64-frame-pointer-unwind-support.h" +#include <api/io_dir.h> #include <linux/ctype.h> #include <symbol/kallsyms.h> @@ -94,6 +96,8 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) machine->comm_exec = false; machine->kernel_start = 0; machine->vmlinux_map = NULL; + /* There is no initial context switch in, so we start at 1. */ + machine->parallelism = 1; machine->root_dir = strdup(root_dir); if (machine->root_dir == NULL) @@ -125,26 +129,62 @@ out: return 0; } -struct machine *machine__new_host(void) +static struct machine *__machine__new_host(struct perf_env *host_env, bool kernel_maps) { struct machine *machine = malloc(sizeof(*machine)); - if (machine != NULL) { - machine__init(machine, "", HOST_KERNEL_ID); + if (!machine) + return NULL; - if (machine__create_kernel_maps(machine) < 0) - goto out_delete; + machine__init(machine, "", HOST_KERNEL_ID); + + if (kernel_maps && machine__create_kernel_maps(machine) < 0) { + free(machine); + return NULL; } + machine->env = host_env; + return machine; +} + +struct machine *machine__new_host(struct perf_env *host_env) +{ + return __machine__new_host(host_env, /*kernel_maps=*/true); +} + +static int mmap_handler(const struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + return machine__process_mmap2_event(machine, event, sample); +} + +static int machine__init_live(struct machine *machine, pid_t pid) +{ + union perf_event event; + memset(&event, 0, sizeof(event)); + return perf_event__synthesize_mmap_events(NULL, &event, pid, pid, + mmap_handler, machine, true); +} + +struct machine *machine__new_live(struct perf_env *host_env, bool kernel_maps, pid_t pid) +{ + struct machine *machine = __machine__new_host(host_env, kernel_maps); + + if (!machine) + return NULL; + + if (machine__init_live(machine, pid)) { + machine__delete(machine); + return NULL; + } return machine; -out_delete: - free(machine); - return NULL; } -struct machine *machine__new_kallsyms(void) +struct machine *machine__new_kallsyms(struct perf_env *host_env) { - struct machine *machine = machine__new_host(); + struct machine *machine = machine__new_host(host_env); /* * FIXME: * 1) We should switch to machine__load_kallsyms(), i.e. not explicitly @@ -642,8 +682,9 @@ int machine__process_lost_event(struct machine *machine __maybe_unused, int machine__process_lost_samples_event(struct machine *machine __maybe_unused, union perf_event *event, struct perf_sample *sample) { - dump_printf(": id:%" PRIu64 ": lost samples :%" PRI_lu64 "\n", - sample->id, event->lost_samples.lost); + dump_printf(": id:%" PRIu64 ": lost samples :%" PRI_lu64 "%s\n", + sample->id, event->lost_samples.lost, + event->header.misc & PERF_RECORD_MISC_LOST_SAMPLES_BPF ? " (BPF)" : ""); return 0; } @@ -674,8 +715,11 @@ int machine__process_aux_output_hw_id_event(struct machine *machine __maybe_unus int machine__process_switch_event(struct machine *machine __maybe_unused, union perf_event *event) { + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; + if (dump_trace) perf_event__fprintf_switch(event, stdout); + machine->parallelism += out ? -1 : 1; return 0; } @@ -709,7 +753,7 @@ static int machine__process_ksymbol_register(struct machine *machine, map__set_start(map, event->ksymbol.addr); map__set_end(map, map__start(map) + event->ksymbol.len); - err = maps__insert(machine__kernel_maps(machine), map); + err = maps__fixup_overlap_and_insert(machine__kernel_maps(machine), map); if (err) { err = -ENOMEM; goto out; @@ -770,6 +814,10 @@ int machine__process_ksymbol(struct machine *machine __maybe_unused, if (dump_trace) perf_event__fprintf_ksymbol(event, stdout); + /* no need to process non-JIT BPF as it cannot get samples */ + if (event->ksymbol.len == 0) + return 0; + if (event->ksymbol.flags & PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER) return machine__process_ksymbol_unregister(machine, event, sample); @@ -883,26 +931,6 @@ size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp, return ret; } -size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp) -{ - int i; - size_t printed = 0; - struct dso *kdso = machine__kernel_dso(machine); - - if (dso__has_build_id(kdso)) { - char filename[PATH_MAX]; - - if (dso__build_id_filename(kdso, filename, sizeof(filename), false)) - printed += fprintf(fp, "[0] %s\n", filename); - } - - for (i = 0; i < vmlinux_path__nr_entries; ++i) { - printed += fprintf(fp, "[%d] %s\n", i + dso__has_build_id(kdso), - vmlinux_path[i]); - } - return printed; -} - struct machine_fprintf_cb_args { FILE *fp; size_t printed; @@ -1000,7 +1028,7 @@ static int machine__get_running_kernel_start(struct machine *machine, err = kallsyms__get_symbol_start(filename, "_edata", &addr); if (err) - err = kallsyms__get_function_start(filename, "_etext", &addr); + err = kallsyms__get_symbol_start(filename, "_etext", &addr); if (!err) *end = addr; @@ -1342,34 +1370,31 @@ static int maps__set_module_path(struct maps *maps, const char *path, struct kmo * we need to update the symtab_type if needed. */ if (m->comp && is_kmod_dso(dso)) { - dso__set_symtab_type(dso, dso__symtab_type(dso)); + dso__set_symtab_type(dso, dso__symtab_type(dso)+1); dso__set_comp(dso, m->comp); } map__put(map); return 0; } -static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, int depth) +static int maps__set_modules_path_dir(struct maps *maps, char *path, size_t path_size, int depth) { - struct dirent *dent; - DIR *dir = opendir(dir_name); + struct io_dirent64 *dent; + struct io_dir iod; + size_t root_len = strlen(path); int ret = 0; - if (!dir) { - pr_debug("%s: cannot open %s dir\n", __func__, dir_name); + io_dir__init(&iod, open(path, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + if (iod.dirfd < 0) { + pr_debug("%s: cannot open %s dir\n", __func__, path); return -1; } - - while ((dent = readdir(dir)) != NULL) { - char path[PATH_MAX]; - struct stat st; - - /*sshfs might return bad dent->d_type, so we have to stat*/ - path__join(path, sizeof(path), dir_name, dent->d_name); - if (stat(path, &st)) - continue; - - if (S_ISDIR(st.st_mode)) { + /* Bounds check, should never happen. */ + if (root_len >= path_size) + return -1; + path[root_len++] = '/'; + while ((dent = io_dir__readdir(&iod)) != NULL) { + if (io_dir__is_dir(&iod, dent)) { if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) continue; @@ -1381,7 +1406,12 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i continue; } - ret = maps__set_modules_path_dir(maps, path, depth + 1); + /* Bounds check, should never happen. */ + if (root_len + strlen(dent->d_name) >= path_size) + continue; + + strcpy(path + root_len, dent->d_name); + ret = maps__set_modules_path_dir(maps, path, path_size, depth + 1); if (ret < 0) goto out; } else { @@ -1391,9 +1421,14 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i if (ret) goto out; - if (m.kmod) - ret = maps__set_module_path(maps, path, &m); + if (m.kmod) { + /* Bounds check, should never happen. */ + if (root_len + strlen(dent->d_name) < path_size) { + strcpy(path + root_len, dent->d_name); + ret = maps__set_module_path(maps, path, &m); + } + } zfree(&m.name); if (ret) @@ -1402,7 +1437,7 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i } out: - closedir(dir); + close(iod.dirfd); return ret; } @@ -1419,7 +1454,8 @@ static int machine__set_modules_path(struct machine *machine) machine->root_dir, version); free(version); - return maps__set_modules_path_dir(machine__kernel_maps(machine), modules_path, 0); + return maps__set_modules_path_dir(machine__kernel_maps(machine), + modules_path, sizeof(modules_path), 0); } int __weak arch__fix_module_text_start(u64 *start __maybe_unused, u64 *size __maybe_unused, @@ -1558,6 +1594,8 @@ int machine__create_kernel_maps(struct machine *machine) } } + maps__fixup_end(machine__kernel_maps(machine)); + out_put: dso__put(kernel); return ret; @@ -1693,21 +1731,21 @@ int machine__process_mmap2_event(struct machine *machine, { struct thread *thread; struct map *map; - struct dso_id dso_id = { - .maj = event->mmap2.maj, - .min = event->mmap2.min, - .ino = event->mmap2.ino, - .ino_generation = event->mmap2.ino_generation, - }; - struct build_id __bid, *bid = NULL; + struct dso_id dso_id = dso_id_empty; int ret = 0; if (dump_trace) perf_event__fprintf_mmap2(event, stdout); if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { - bid = &__bid; - build_id__init(bid, event->mmap2.build_id, event->mmap2.build_id_size); + build_id__init(&dso_id.build_id, event->mmap2.build_id, event->mmap2.build_id_size); + } else { + dso_id.maj = event->mmap2.maj; + dso_id.min = event->mmap2.min; + dso_id.ino = event->mmap2.ino; + dso_id.ino_generation = event->mmap2.ino_generation; + dso_id.mmap2_valid = true; + dso_id.mmap2_ino_generation_valid = true; } if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL || @@ -1719,7 +1757,7 @@ int machine__process_mmap2_event(struct machine *machine, }; strlcpy(xm.name, event->mmap2.filename, KMAP_NAME_LEN); - ret = machine__process_kernel_mmap_event(machine, &xm, bid); + ret = machine__process_kernel_mmap_event(machine, &xm, &dso_id.build_id); if (ret < 0) goto out_problem; return 0; @@ -1733,7 +1771,7 @@ int machine__process_mmap2_event(struct machine *machine, map = map__new(machine, event->mmap2.start, event->mmap2.len, event->mmap2.pgoff, &dso_id, event->mmap2.prot, - event->mmap2.flags, bid, + event->mmap2.flags, event->mmap2.filename, thread); if (map == NULL) @@ -1791,8 +1829,8 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event prot = PROT_EXEC; map = map__new(machine, event->mmap.start, - event->mmap.len, event->mmap.pgoff, - NULL, prot, 0, NULL, event->mmap.filename, thread); + event->mmap.len, event->mmap.pgoff, + &dso_id_empty, prot, /*flags=*/0, event->mmap.filename, thread); if (map == NULL) goto out_problem_map; @@ -1895,6 +1933,8 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event if (dump_trace) perf_event__fprintf_task(event, stdout); + /* There is no context switch out before exit, so we decrement here. */ + machine->parallelism--; if (thread != NULL) { if (symbol_conf.keep_exited_threads) thread__set_exited(thread, /*exited=*/true); @@ -1971,7 +2011,7 @@ static void ip__resolve_ams(struct thread *thread, * Thus, we have to try consecutively until we find a match * or else, the symbol is unknown */ - thread__find_cpumode_addr_location(thread, ip, &al); + thread__find_cpumode_addr_location(thread, ip, /*symbols=*/true, &al); ams->addr = ip; ams->al_addr = al.addr; @@ -2059,7 +2099,8 @@ static int add_callchain_ip(struct thread *thread, bool branch, struct branch_flags *flags, struct iterations *iter, - u64 branch_from) + u64 branch_from, + bool symbols) { struct map_symbol ms = {}; struct addr_location al; @@ -2072,7 +2113,7 @@ static int add_callchain_ip(struct thread *thread, al.sym = NULL; al.srcline = NULL; if (!cpumode) { - thread__find_cpumode_addr_location(thread, ip, &al); + thread__find_cpumode_addr_location(thread, ip, symbols, &al); } else { if (ip >= PERF_CONTEXT_MAX) { switch (ip) { @@ -2083,6 +2124,7 @@ static int add_callchain_ip(struct thread *thread, *cpumode = PERF_RECORD_MISC_KERNEL; break; case PERF_CONTEXT_USER: + case PERF_CONTEXT_USER_DEFERRED: *cpumode = PERF_RECORD_MISC_USER; break; default: @@ -2098,7 +2140,10 @@ static int add_callchain_ip(struct thread *thread, } goto out; } - thread__find_symbol(thread, *cpumode, ip, &al); + if (symbols) + thread__find_symbol(thread, *cpumode, ip, &al); + else + thread__find_map(thread, *cpumode, ip, &al); } if (al.sym != NULL) { @@ -2141,6 +2186,7 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, unsigned int i; const struct branch_stack *bs = sample->branch_stack; struct branch_entry *entries = perf_sample__branch_entries(sample); + u64 *branch_stack_cntr = sample->branch_stack_cntr; struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info)); if (!bi) @@ -2150,6 +2196,8 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, ip__resolve_ams(al->thread, &bi[i].to, entries[i].to); ip__resolve_ams(al->thread, &bi[i].from, entries[i].from); bi[i].flags = entries[i].flags; + if (branch_stack_cntr) + bi[i].branch_stack_cntr = branch_stack_cntr[i]; } return bi; } @@ -2224,7 +2272,8 @@ static int lbr_callchain_add_kernel_ip(struct thread *thread, struct symbol **parent, struct addr_location *root_al, u64 branch_from, - bool callee, int end) + bool callee, int end, + bool symbols) { struct ip_callchain *chain = sample->callchain; u8 cpumode = PERF_RECORD_MISC_USER; @@ -2234,7 +2283,8 @@ static int lbr_callchain_add_kernel_ip(struct thread *thread, for (i = 0; i < end + 1; i++) { err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, chain->ips[i], - false, NULL, NULL, branch_from); + false, NULL, NULL, branch_from, + symbols); if (err) return err; } @@ -2244,7 +2294,8 @@ static int lbr_callchain_add_kernel_ip(struct thread *thread, for (i = end; i >= 0; i--) { err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, chain->ips[i], - false, NULL, NULL, branch_from); + false, NULL, NULL, branch_from, + symbols); if (err) return err; } @@ -2270,8 +2321,12 @@ static void save_lbr_cursor_node(struct thread *thread, cursor->curr = cursor->first; else cursor->curr = cursor->curr->next; + + map_symbol__exit(&lbr_stitch->prev_lbr_cursor[idx].ms); memcpy(&lbr_stitch->prev_lbr_cursor[idx], cursor->curr, sizeof(struct callchain_cursor_node)); + lbr_stitch->prev_lbr_cursor[idx].ms.maps = maps__get(cursor->curr->ms.maps); + lbr_stitch->prev_lbr_cursor[idx].ms.map = map__get(cursor->curr->ms.map); lbr_stitch->prev_lbr_cursor[idx].valid = true; cursor->pos++; @@ -2283,7 +2338,8 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread, struct symbol **parent, struct addr_location *root_al, u64 *branch_from, - bool callee) + bool callee, + bool symbols) { struct branch_stack *lbr_stack = sample->branch_stack; struct branch_entry *entries = perf_sample__branch_entries(sample); @@ -2316,7 +2372,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread, err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, true, flags, NULL, - *branch_from); + *branch_from, symbols); if (err) return err; @@ -2341,7 +2397,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread, err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, true, flags, NULL, - *branch_from); + *branch_from, symbols); if (err) return err; save_lbr_cursor_node(thread, cursor, i); @@ -2356,7 +2412,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread, err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, true, flags, NULL, - *branch_from); + *branch_from, symbols); if (err) return err; save_lbr_cursor_node(thread, cursor, i); @@ -2370,7 +2426,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread, err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, true, flags, NULL, - *branch_from); + *branch_from, symbols); if (err) return err; } @@ -2482,6 +2538,9 @@ static bool has_stitched_lbr(struct thread *thread, memcpy(&stitch_node->cursor, &lbr_stitch->prev_lbr_cursor[i], sizeof(struct callchain_cursor_node)); + stitch_node->cursor.ms.maps = maps__get(lbr_stitch->prev_lbr_cursor[i].ms.maps); + stitch_node->cursor.ms.map = map__get(lbr_stitch->prev_lbr_cursor[i].ms.map); + if (callee) list_add(&stitch_node->node, &lbr_stitch->lists); else @@ -2505,6 +2564,8 @@ static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr) if (!thread__lbr_stitch(thread)->prev_lbr_cursor) goto free_lbr_stitch; + thread__lbr_stitch(thread)->prev_lbr_cursor_size = max_lbr + 1; + INIT_LIST_HEAD(&thread__lbr_stitch(thread)->lists); INIT_LIST_HEAD(&thread__lbr_stitch(thread)->free_lists); @@ -2532,7 +2593,8 @@ static int resolve_lbr_callchain_sample(struct thread *thread, struct symbol **parent, struct addr_location *root_al, int max_stack, - unsigned int max_lbr) + unsigned int max_lbr, + bool symbols) { bool callee = (callchain_param.order == ORDER_CALLEE); struct ip_callchain *chain = sample->callchain; @@ -2560,8 +2622,12 @@ static int resolve_lbr_callchain_sample(struct thread *thread, max_lbr, callee); if (!stitched_lbr && !list_empty(&lbr_stitch->lists)) { - list_replace_init(&lbr_stitch->lists, - &lbr_stitch->free_lists); + struct stitch_list *stitch_node; + + list_for_each_entry(stitch_node, &lbr_stitch->lists, node) + map_symbol__exit(&stitch_node->cursor.ms); + + list_splice_init(&lbr_stitch->lists, &lbr_stitch->free_lists); } memcpy(&lbr_stitch->prev_sample, sample, sizeof(*sample)); } @@ -2570,12 +2636,12 @@ static int resolve_lbr_callchain_sample(struct thread *thread, /* Add kernel ip */ err = lbr_callchain_add_kernel_ip(thread, cursor, sample, parent, root_al, branch_from, - true, i); + true, i, symbols); if (err) goto error; err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent, - root_al, &branch_from, true); + root_al, &branch_from, true, symbols); if (err) goto error; @@ -2592,14 +2658,14 @@ static int resolve_lbr_callchain_sample(struct thread *thread, goto error; } err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent, - root_al, &branch_from, false); + root_al, &branch_from, false, symbols); if (err) goto error; /* Add kernel ip */ err = lbr_callchain_add_kernel_ip(thread, cursor, sample, parent, root_al, branch_from, - false, i); + false, i, symbols); if (err) goto error; } @@ -2613,7 +2679,7 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread, struct callchain_cursor *cursor, struct symbol **parent, struct addr_location *root_al, - u8 *cpumode, int ent) + u8 *cpumode, int ent, bool symbols) { int err = 0; @@ -2623,7 +2689,7 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread, if (ip >= PERF_CONTEXT_MAX) { err = add_callchain_ip(thread, cursor, parent, root_al, cpumode, ip, - false, NULL, NULL, 0); + false, NULL, NULL, 0, symbols); break; } } @@ -2645,7 +2711,8 @@ static int thread__resolve_callchain_sample(struct thread *thread, struct perf_sample *sample, struct symbol **parent, struct addr_location *root_al, - int max_stack) + int max_stack, + bool symbols) { struct branch_stack *branch = sample->branch_stack; struct branch_entry *entries = perf_sample__branch_entries(sample); @@ -2665,7 +2732,8 @@ static int thread__resolve_callchain_sample(struct thread *thread, err = resolve_lbr_callchain_sample(thread, cursor, sample, parent, root_al, max_stack, - !env ? 0 : env->max_branches); + !env ? 0 : env->max_branches, + symbols); if (err) return (err < 0) ? err : 0; } @@ -2730,13 +2798,14 @@ static int thread__resolve_callchain_sample(struct thread *thread, root_al, NULL, be[i].to, true, &be[i].flags, - NULL, be[i].from); + NULL, be[i].from, symbols); - if (!err) + if (!err) { err = add_callchain_ip(thread, cursor, parent, root_al, NULL, be[i].from, true, &be[i].flags, - &iter[i], 0); + &iter[i], 0, symbols); + } if (err == -EINVAL) break; if (err) @@ -2752,7 +2821,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, check_calls: if (chain && callchain_param.order != ORDER_CALLEE) { err = find_prev_cpumode(chain, thread, cursor, parent, root_al, - &cpumode, chain->nr - first_call); + &cpumode, chain->nr - first_call, symbols); if (err) return (err < 0) ? err : 0; } @@ -2774,7 +2843,7 @@ check_calls: ++nr_entries; else if (callchain_param.order != ORDER_CALLEE) { err = find_prev_cpumode(chain, thread, cursor, parent, - root_al, &cpumode, j); + root_al, &cpumode, j, symbols); if (err) return (err < 0) ? err : 0; continue; @@ -2801,8 +2870,8 @@ check_calls: if (leaf_frame_caller && leaf_frame_caller != ip) { err = add_callchain_ip(thread, cursor, parent, - root_al, &cpumode, leaf_frame_caller, - false, NULL, NULL, 0); + root_al, &cpumode, leaf_frame_caller, + false, NULL, NULL, 0, symbols); if (err) return (err < 0) ? err : 0; } @@ -2810,7 +2879,7 @@ check_calls: err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, - false, NULL, NULL, 0); + false, NULL, NULL, 0, symbols); if (err) return (err < 0) ? err : 0; @@ -2890,7 +2959,7 @@ static int thread__resolve_callchain_unwind(struct thread *thread, struct callchain_cursor *cursor, struct evsel *evsel, struct perf_sample *sample, - int max_stack) + int max_stack, bool symbols) { /* Can we do dwarf post unwind? */ if (!((evsel->core.attr.sample_type & PERF_SAMPLE_REGS_USER) && @@ -2898,21 +2967,25 @@ static int thread__resolve_callchain_unwind(struct thread *thread, return 0; /* Bail out if nothing was captured. */ - if ((!sample->user_regs.regs) || - (!sample->user_stack.size)) + if (!sample->user_regs || !sample->user_regs->regs || + !sample->user_stack.size) return 0; + if (!symbols) + pr_debug("Not resolving symbols with an unwinder isn't currently supported\n"); + return unwind__get_entries(unwind_entry, cursor, thread, sample, max_stack, false); } -int thread__resolve_callchain(struct thread *thread, - struct callchain_cursor *cursor, - struct evsel *evsel, - struct perf_sample *sample, - struct symbol **parent, - struct addr_location *root_al, - int max_stack) +int __thread__resolve_callchain(struct thread *thread, + struct callchain_cursor *cursor, + struct evsel *evsel, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + int max_stack, + bool symbols) { int ret = 0; @@ -2925,22 +2998,22 @@ int thread__resolve_callchain(struct thread *thread, ret = thread__resolve_callchain_sample(thread, cursor, evsel, sample, parent, root_al, - max_stack); + max_stack, symbols); if (ret) return ret; ret = thread__resolve_callchain_unwind(thread, cursor, evsel, sample, - max_stack); + max_stack, symbols); } else { ret = thread__resolve_callchain_unwind(thread, cursor, evsel, sample, - max_stack); + max_stack, symbols); if (ret) return ret; ret = thread__resolve_callchain_sample(thread, cursor, evsel, sample, parent, root_al, - max_stack); + max_stack, symbols); } return ret; @@ -3112,14 +3185,15 @@ out: return addr_cpumode; } -struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, struct dso_id *id) +struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, + const struct dso_id *id) { return dsos__findnew_id(&machine->dsos, filename, id); } struct dso *machine__findnew_dso(struct machine *machine, const char *filename) { - return machine__findnew_dso_id(machine, filename, NULL); + return machine__findnew_dso_id(machine, filename, &dso_id_empty); } char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp) diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 82a47bac8023..22a42c5825fa 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -50,6 +50,12 @@ struct machine { u64 text_start; u64 text_end; } sched, lock, traceiter, trace; + /* + * The current parallelism level (number of threads that run on CPUs). + * This value can be less than 1, or larger than the total number + * of CPUs, if events are poorly ordered. + */ + int parallelism; pid_t *current_tid; size_t current_tid_sz; union { /* Tool specific area */ @@ -163,8 +169,9 @@ struct thread *machine__findnew_guest_code(struct machine *machine, pid_t pid); void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size); void machines__set_comm_exec(struct machines *machines, bool comm_exec); -struct machine *machine__new_host(void); -struct machine *machine__new_kallsyms(void); +struct machine *machine__new_host(struct perf_env *host_env); +struct machine *machine__new_kallsyms(struct perf_env *host_env); +struct machine *machine__new_live(struct perf_env *host_env, bool kernel_maps, pid_t pid); int machine__init(struct machine *machine, const char *root_dir, pid_t pid); void machine__exit(struct machine *machine); void machine__delete_threads(struct machine *machine); @@ -178,13 +185,32 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, struct callchain_cursor; -int thread__resolve_callchain(struct thread *thread, - struct callchain_cursor *cursor, - struct evsel *evsel, - struct perf_sample *sample, - struct symbol **parent, - struct addr_location *root_al, - int max_stack); +int __thread__resolve_callchain(struct thread *thread, + struct callchain_cursor *cursor, + struct evsel *evsel, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + int max_stack, + bool symbols); + +static inline int thread__resolve_callchain(struct thread *thread, + struct callchain_cursor *cursor, + struct evsel *evsel, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + int max_stack) +{ + return __thread__resolve_callchain(thread, + cursor, + evsel, + sample, + parent, + root_al, + max_stack, + /*symbols=*/true); +} /* * Default guest kernel is defined by parameter --guestkallsyms @@ -207,7 +233,8 @@ int machine__nr_cpus_avail(struct machine *machine); struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); -struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, struct dso_id *id); +struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, + const struct dso_id *id); struct dso *machine__findnew_dso(struct machine *machine, const char *filename); size_t machine__fprintf(struct machine *machine, FILE *fp); @@ -246,8 +273,6 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid); int machines__create_guest_kernel_maps(struct machines *machines); void machines__destroy_kernel_maps(struct machines *machines); -size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp); - typedef int (*machine__dso_t)(struct dso *dso, struct machine *machine, void *priv); int machine__for_each_dso(struct machine *machine, machine__dso_t fn, diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index e1d14936a60d..41cdddc987ee 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -102,21 +102,26 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) return false; } -void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso) +static void map__init(struct map *map, u64 start, u64 end, u64 pgoff, + struct dso *dso, u32 prot, u32 flags) { map__set_start(map, start); map__set_end(map, end); map__set_pgoff(map, pgoff); - map__set_reloc(map, 0); + assert(map__reloc(map) == 0); map__set_dso(map, dso__get(dso)); - map__set_mapping_type(map, MAPPING_TYPE__DSO); - map__set_erange_warned(map, false); refcount_set(map__refcnt(map), 1); + RC_CHK_ACCESS(map)->prot = prot; + RC_CHK_ACCESS(map)->flags = flags; + map__set_mapping_type(map, MAPPING_TYPE__DSO); + assert(map__erange_warned(map) == false); + assert(map__priv(map) == false); + assert(map__hit(map) == false); } struct map *map__new(struct machine *machine, u64 start, u64 len, - u64 pgoff, struct dso_id *id, - u32 prot, u32 flags, struct build_id *bid, + u64 pgoff, const struct dso_id *id, + u32 prot, u32 flags, char *filename, struct thread *thread) { struct map *result; @@ -124,18 +129,16 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, struct nsinfo *nsi = NULL; struct nsinfo *nnsi; - map = malloc(sizeof(*map)); + map = zalloc(sizeof(*map)); if (ADD_RC_CHK(result, map)) { char newfilename[PATH_MAX]; - struct dso *dso, *header_bid_dso; + struct dso *dso; int anon, no_dso, vdso, android; android = is_android_lib(filename); anon = is_anon_memory(filename) || flags & MAP_HUGETLB; vdso = is_vdso_map(filename); no_dso = is_no_dso_memory(filename); - map->prot = prot; - map->flags = flags; nsi = nsinfo__get(thread__nsinfo(thread)); if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) { @@ -169,7 +172,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, goto out_delete; assert(!dso__kernel(dso)); - map__init(result, start, start + len, pgoff, dso); + map__init(result, start, start + len, pgoff, dso, prot, flags); if (anon || no_dso) { map->mapping_type = MAPPING_TYPE__IDENTITY; @@ -186,16 +189,15 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, dso__set_nsinfo(dso, nsi); mutex_unlock(dso__lock(dso)); - if (build_id__is_defined(bid)) { - dso__set_build_id(dso, bid); - } else { + if (!build_id__is_defined(&id->build_id)) { /* * If the mmap event had no build ID, search for an existing dso from the * build ID header by name. Otherwise only the dso loaded at the time of * reading the header will have the build ID set and all future mmaps will * have it missing. */ - header_bid_dso = dsos__find(&machine->dsos, filename, false); + struct dso *header_bid_dso = dsos__find(&machine->dsos, filename, false); + if (header_bid_dso && dso__header_build_id(header_bid_dso)) { dso__set_build_id(dso, dso__bid(header_bid_dso)); dso__set_header_build_id(dso, 1); @@ -223,10 +225,8 @@ struct map *map__new2(u64 start, struct dso *dso) map = calloc(1, sizeof(*map) + (dso__kernel(dso) ? sizeof(struct kmap) : 0)); if (ADD_RC_CHK(result, map)) { - /* - * ->end will be filled after we load all the symbols - */ - map__init(result, start, 0, 0, dso); + /* ->end will be filled after we load all the symbols. */ + map__init(result, start, /*end=*/0, /*pgoff=*/0, dso, /*prot=*/0, /*flags=*/0); } return result; @@ -353,7 +353,7 @@ int map__load(struct map *map) if (dso__has_build_id(dso)) { char sbuild_id[SBUILD_ID_SIZE]; - build_id__sprintf(dso__bid(dso), sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); pr_debug("%s with build id %s not found", name, sbuild_id); } else pr_debug("Failed to open %s", name); @@ -513,6 +513,8 @@ void srccode_state_free(struct srccode_state *state) state->line = 0; } +static const struct kmap *__map__const_kmap(const struct map *map); + /** * map__rip_2objdump - convert symbol start address to objdump address. * @map: memory map @@ -524,9 +526,9 @@ void srccode_state_free(struct srccode_state *state) * * Return: Address suitable for passing to "objdump --start-address=" */ -u64 map__rip_2objdump(struct map *map, u64 rip) +u64 map__rip_2objdump(const struct map *map, u64 rip) { - struct kmap *kmap = __map__kmap(map); + const struct kmap *kmap = __map__const_kmap(map); const struct dso *dso = map__dso(map); /* @@ -569,7 +571,7 @@ u64 map__rip_2objdump(struct map *map, u64 rip) * * Return: Memory address. */ -u64 map__objdump_2mem(struct map *map, u64 ip) +u64 map__objdump_2mem(const struct map *map, u64 ip) { const struct dso *dso = map__dso(map); @@ -586,7 +588,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip) } /* convert objdump address to relative address. (To be removed) */ -u64 map__objdump_2rip(struct map *map, u64 ip) +u64 map__objdump_2rip(const struct map *map, u64 ip) { const struct dso *dso = map__dso(map); @@ -618,6 +620,15 @@ struct kmap *__map__kmap(struct map *map) return (struct kmap *)(&RC_CHK_ACCESS(map)[1]); } +static const struct kmap *__map__const_kmap(const struct map *map) +{ + const struct dso *dso = map__dso(map); + + if (!dso || !dso__kernel(dso)) + return NULL; + return (struct kmap *)(&RC_CHK_ACCESS(map)[1]); +} + struct kmap *map__kmap(struct map *map) { struct kmap *kmap = __map__kmap(map); diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 65e2609fa1b1..979b3e11b9bc 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -35,6 +35,7 @@ DECLARE_RC_STRUCT(map) { enum mapping_type mapping_type:8; bool erange_warned; bool priv; + bool hit; }; struct kmap; @@ -83,6 +84,11 @@ static inline bool map__priv(const struct map *map) return RC_CHK_ACCESS(map)->priv; } +static inline bool map__hit(const struct map *map) +{ + return RC_CHK_ACCESS(map)->hit; +} + static inline refcount_t *map__refcnt(struct map *map) { return &RC_CHK_ACCESS(map)->refcnt; @@ -127,13 +133,13 @@ static inline u64 map__unmap_ip(const struct map *map, u64 ip_or_rip) } /* rip/ip <-> addr suitable for passing to `objdump --start-address=` */ -u64 map__rip_2objdump(struct map *map, u64 rip); +u64 map__rip_2objdump(const struct map *map, u64 rip); /* objdump address -> memory address */ -u64 map__objdump_2mem(struct map *map, u64 ip); +u64 map__objdump_2mem(const struct map *map, u64 ip); /* objdump address -> rip */ -u64 map__objdump_2rip(struct map *map, u64 ip); +u64 map__objdump_2rip(const struct map *map, u64 ip); struct symbol; struct thread; @@ -166,15 +172,11 @@ struct thread; #define map__for_each_symbol_by_name(map, sym_name, pos, idx) \ __map__for_each_symbol_by_name(map, sym_name, (pos), idx) -void map__init(struct map *map, - u64 start, u64 end, u64 pgoff, struct dso *dso); - struct dso_id; -struct build_id; struct map *map__new(struct machine *machine, u64 start, u64 len, - u64 pgoff, struct dso_id *id, u32 prot, u32 flags, - struct build_id *bid, char *filename, struct thread *thread); + u64 pgoff, const struct dso_id *id, u32 prot, u32 flags, + char *filename, struct thread *thread); struct map *map__new2(u64 start, struct dso *dso); void map__delete(struct map *map); struct map *map__clone(struct map *map); @@ -285,14 +287,19 @@ static inline void map__set_reloc(struct map *map, u64 reloc) RC_CHK_ACCESS(map)->reloc = reloc; } -static inline void map__set_priv(struct map *map, int priv) +static inline void map__set_priv(struct map *map) +{ + RC_CHK_ACCESS(map)->priv = true; +} + +static inline void map__set_hit(struct map *map) { - RC_CHK_ACCESS(map)->priv = priv; + RC_CHK_ACCESS(map)->hit = true; } -static inline void map__set_erange_warned(struct map *map, bool erange_warned) +static inline void map__set_erange_warned(struct map *map) { - RC_CHK_ACCESS(map)->erange_warned = erange_warned; + RC_CHK_ACCESS(map)->erange_warned = true; } static inline void map__set_dso(struct map *map, struct dso *dso) diff --git a/tools/perf/util/map_symbol.c b/tools/perf/util/map_symbol.c index bef5079f2403..6ad2960bc289 100644 --- a/tools/perf/util/map_symbol.c +++ b/tools/perf/util/map_symbol.c @@ -13,3 +13,21 @@ void addr_map_symbol__exit(struct addr_map_symbol *ams) { map_symbol__exit(&ams->ms); } + +void map_symbol__copy(struct map_symbol *dst, struct map_symbol *src) +{ + dst->maps = maps__get(src->maps); + dst->map = map__get(src->map); + dst->sym = src->sym; +} + +void addr_map_symbol__copy(struct addr_map_symbol *dst, struct addr_map_symbol *src) +{ + map_symbol__copy(&dst->ms, &src->ms); + + dst->addr = src->addr; + dst->al_addr = src->al_addr; + dst->al_level = src->al_level; + dst->phys_addr = src->phys_addr; + dst->data_page_size = src->data_page_size; +} diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h index 72d5ed938ed6..e370bb32ed47 100644 --- a/tools/perf/util/map_symbol.h +++ b/tools/perf/util/map_symbol.h @@ -26,4 +26,7 @@ struct addr_map_symbol { void map_symbol__exit(struct map_symbol *ms); void addr_map_symbol__exit(struct addr_map_symbol *ams); +void map_symbol__copy(struct map_symbol *dst, struct map_symbol *src); +void addr_map_symbol__copy(struct addr_map_symbol *dst, struct addr_map_symbol *src); + #endif // __PERF_MAP_SYMBOL diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 16b39db594f4..c321d4f4d846 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -428,11 +428,29 @@ static unsigned int maps__by_name_index(const struct maps *maps, const struct ma return -1; } +static void map__set_kmap_maps(struct map *map, struct maps *maps) +{ + struct dso *dso; + + if (map == NULL) + return; + + dso = map__dso(map); + + if (dso && dso__kernel(dso)) { + struct kmap *kmap = map__kmap(map); + + if (kmap) + kmap->kmaps = maps; + else + pr_err("Internal error: kernel dso with non kernel map\n"); + } +} + static int __maps__insert(struct maps *maps, struct map *new) { struct map **maps_by_address = maps__maps_by_address(maps); struct map **maps_by_name = maps__maps_by_name(maps); - const struct dso *dso = map__dso(new); unsigned int nr_maps = maps__nr_maps(maps); unsigned int nr_allocate = RC_CHK_ACCESS(maps)->nr_maps_allocated; @@ -459,6 +477,7 @@ static int __maps__insert(struct maps *maps, struct map *new) } /* Insert the value at the end. */ maps_by_address[nr_maps] = map__get(new); + map__set_kmap_maps(new, maps); if (maps_by_name) maps_by_name[nr_maps] = map__get(new); @@ -483,14 +502,7 @@ static int __maps__insert(struct maps *maps, struct map *new) } if (map__end(new) < map__start(new)) RC_CHK_ACCESS(maps)->ends_broken = true; - if (dso && dso__kernel(dso)) { - struct kmap *kmap = map__kmap(new); - if (kmap) - kmap->kmaps = maps; - else - pr_err("Internal error: kernel dso with non kernel map\n"); - } return 0; } @@ -735,26 +747,83 @@ static unsigned int first_ending_after(struct maps *maps, const struct map *map) return first; } +static int __maps__insert_sorted(struct maps *maps, unsigned int first_after_index, + struct map *new1, struct map *new2) +{ + struct map **maps_by_address = maps__maps_by_address(maps); + struct map **maps_by_name = maps__maps_by_name(maps); + unsigned int nr_maps = maps__nr_maps(maps); + unsigned int nr_allocate = RC_CHK_ACCESS(maps)->nr_maps_allocated; + unsigned int to_add = new2 ? 2 : 1; + + assert(maps__maps_by_address_sorted(maps)); + assert(first_after_index == nr_maps || + map__end(new1) <= map__start(maps_by_address[first_after_index])); + assert(!new2 || map__end(new1) <= map__start(new2)); + assert(first_after_index == nr_maps || !new2 || + map__end(new2) <= map__start(maps_by_address[first_after_index])); + + if (nr_maps + to_add > nr_allocate) { + nr_allocate = !nr_allocate ? 32 : nr_allocate * 2; + + maps_by_address = realloc(maps_by_address, nr_allocate * sizeof(new1)); + if (!maps_by_address) + return -ENOMEM; + + maps__set_maps_by_address(maps, maps_by_address); + if (maps_by_name) { + maps_by_name = realloc(maps_by_name, nr_allocate * sizeof(new1)); + if (!maps_by_name) { + /* + * If by name fails, just disable by name and it will + * recompute next time it is required. + */ + __maps__free_maps_by_name(maps); + } + maps__set_maps_by_name(maps, maps_by_name); + } + RC_CHK_ACCESS(maps)->nr_maps_allocated = nr_allocate; + } + memmove(&maps_by_address[first_after_index+to_add], + &maps_by_address[first_after_index], + (nr_maps - first_after_index) * sizeof(new1)); + maps_by_address[first_after_index] = map__get(new1); + if (maps_by_name) + maps_by_name[nr_maps] = map__get(new1); + if (new2) { + maps_by_address[first_after_index + 1] = map__get(new2); + if (maps_by_name) + maps_by_name[nr_maps + 1] = map__get(new2); + } + RC_CHK_ACCESS(maps)->nr_maps = nr_maps + to_add; + maps__set_maps_by_name_sorted(maps, false); + map__set_kmap_maps(new1, maps); + map__set_kmap_maps(new2, maps); + + check_invariants(maps); + return 0; +} + /* * Adds new to maps, if new overlaps existing entries then the existing maps are * adjusted or removed so that new fits without overlapping any entries. */ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) { - struct map **maps_by_address; int err = 0; FILE *fp = debug_file(); + unsigned int i, ni = INT_MAX; // Some gcc complain, but depends on maps_by_name... -sort_again: if (!maps__maps_by_address_sorted(maps)) __maps__sort_by_address(maps); - maps_by_address = maps__maps_by_address(maps); /* * Iterate through entries where the end of the existing entry is * greater-than the new map's start. */ - for (unsigned int i = first_ending_after(maps, new); i < maps__nr_maps(maps); ) { + for (i = first_ending_after(maps, new); i < maps__nr_maps(maps); ) { + struct map **maps_by_address = maps__maps_by_address(maps); + struct map **maps_by_name = maps__maps_by_name(maps); struct map *pos = maps_by_address[i]; struct map *before = NULL, *after = NULL; @@ -774,6 +843,9 @@ sort_again: map__fprintf(pos, fp); } + if (maps_by_name) + ni = maps__by_name_index(maps, pos); + /* * Now check if we need to create new maps for areas not * overlapped by the new map: @@ -818,40 +890,97 @@ sort_again: if (before) { map__put(maps_by_address[i]); maps_by_address[i] = before; + map__set_kmap_maps(before, maps); + + if (maps_by_name) { + map__put(maps_by_name[ni]); + maps_by_name[ni] = map__get(before); + } + /* Maps are still ordered, go to next one. */ i++; if (after) { - __maps__insert(maps, after); - map__put(after); - if (!maps__maps_by_address_sorted(maps)) { - /* - * Sorting broken so invariants don't - * hold, sort and go again. - */ - goto sort_again; - } /* - * Maps are still ordered, skip after and go to - * next one (terminate loop). + * 'before' and 'after' mean 'new' split the + * 'pos' mapping and therefore there are no + * later mappings. */ - i++; + err = __maps__insert_sorted(maps, i, new, after); + map__put(after); + check_invariants(maps); + return err; } + check_invariants(maps); } else if (after) { + /* + * 'after' means 'new' split 'pos' and there are no + * later mappings. + */ map__put(maps_by_address[i]); - maps_by_address[i] = after; - /* Maps are ordered, go to next one. */ - i++; + maps_by_address[i] = map__get(new); + map__set_kmap_maps(new, maps); + + if (maps_by_name) { + map__put(maps_by_name[ni]); + maps_by_name[ni] = map__get(new); + } + + err = __maps__insert_sorted(maps, i + 1, after, NULL); + map__put(after); + check_invariants(maps); + return err; } else { - __maps__remove(maps, pos); + struct map *next = NULL; + unsigned int nr_maps = maps__nr_maps(maps); + + if (i + 1 < nr_maps) + next = maps_by_address[i + 1]; + + if (!next || map__start(next) >= map__end(new)) { + /* + * Replace existing mapping and end knowing + * there aren't later overlapping or any + * mappings. + */ + map__put(maps_by_address[i]); + maps_by_address[i] = map__get(new); + map__set_kmap_maps(new, maps); + + if (maps_by_name) { + map__put(maps_by_name[ni]); + maps_by_name[ni] = map__get(new); + } + + check_invariants(maps); + return err; + } + /* + * pos fully covers the previous mapping so remove + * it. The following is an inlined version of + * maps__remove that reuses the already computed + * indices. + */ + map__put(maps_by_address[i]); + memmove(&maps_by_address[i], + &maps_by_address[i + 1], + (nr_maps - i - 1) * sizeof(*maps_by_address)); + + if (maps_by_name) { + map__put(maps_by_name[ni]); + memmove(&maps_by_name[ni], + &maps_by_name[ni + 1], + (nr_maps - ni - 1) * sizeof(*maps_by_name)); + } + --RC_CHK_ACCESS(maps)->nr_maps; + check_invariants(maps); /* * Maps are ordered but no need to increase `i` as the * later maps were moved down. */ } - check_invariants(maps); } /* Add the map. */ - __maps__insert(maps, new); + err = __maps__insert_sorted(maps, i, new, NULL); out_err: return err; } @@ -908,6 +1037,7 @@ int maps__copy_from(struct maps *dest, struct maps *parent) err = unwind__prepare_access(dest, new, NULL); if (!err) { dest_maps_by_address[i] = new; + map__set_kmap_maps(new, dest); if (dest_maps_by_name) dest_maps_by_name[i] = map__get(new); RC_CHK_ACCESS(dest)->nr_maps = i + 1; @@ -971,10 +1101,13 @@ struct map *maps__find(struct maps *maps, u64 ip) while (!done) { down_read(maps__lock(maps)); if (maps__maps_by_address_sorted(maps)) { - struct map **mapp = - bsearch(&ip, maps__maps_by_address(maps), maps__nr_maps(maps), - sizeof(*mapp), map__addr_cmp); + struct map **mapp = NULL; + struct map **maps_by_address = maps__maps_by_address(maps); + unsigned int nr_maps = maps__nr_maps(maps); + if (maps_by_address && nr_maps) + mapp = bsearch(&ip, maps_by_address, nr_maps, sizeof(*mapp), + map__addr_cmp); if (mapp) result = map__get(*mapp); done = true; @@ -1065,8 +1198,13 @@ struct map *maps__find_next_entry(struct maps *maps, struct map *map) struct map *result = NULL; down_read(maps__lock(maps)); + while (!maps__maps_by_address_sorted(maps)) { + up_read(maps__lock(maps)); + maps__sort_by_address(maps); + down_read(maps__lock(maps)); + } i = maps__by_address_index(maps, map); - if (i < maps__nr_maps(maps)) + if (++i < maps__nr_maps(maps)) result = map__get(maps__maps_by_address(maps)[i]); up_read(maps__lock(maps)); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 6dda47bb774f..0b49fce251fc 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -8,6 +8,7 @@ #include <unistd.h> #include <api/fs/fs.h> #include <linux/kernel.h> +#include "cpumap.h" #include "map_symbol.h" #include "mem-events.h" #include "mem-info.h" @@ -28,8 +29,7 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { }; #undef E -static char mem_loads_name[100]; -static char mem_stores_name[100]; +bool perf_mem_record[PERF_MEM_EVENTS__MAX] = { 0 }; struct perf_mem_event *perf_pmu__mem_events_ptr(struct perf_pmu *pmu, int i) { @@ -78,7 +78,8 @@ int perf_pmu__mem_events_num_mem_pmus(struct perf_pmu *pmu) return num; } -static const char *perf_pmu__mem_events_name(int i, struct perf_pmu *pmu) +static const char *perf_pmu__mem_events_name(struct perf_pmu *pmu, int i, + char *buf, size_t buf_size) { struct perf_mem_event *e; @@ -86,38 +87,38 @@ static const char *perf_pmu__mem_events_name(int i, struct perf_pmu *pmu) return NULL; e = &pmu->mem_events[i]; - if (!e) + if (!e || !e->name) return NULL; if (i == PERF_MEM_EVENTS__LOAD || i == PERF_MEM_EVENTS__LOAD_STORE) { if (e->ldlat) { if (!e->aux_event) { /* ARM and Most of Intel */ - scnprintf(mem_loads_name, sizeof(mem_loads_name), + scnprintf(buf, buf_size, e->name, pmu->name, perf_mem_events__loads_ldlat); } else { /* Intel with mem-loads-aux event */ - scnprintf(mem_loads_name, sizeof(mem_loads_name), + scnprintf(buf, buf_size, e->name, pmu->name, pmu->name, perf_mem_events__loads_ldlat); } } else { if (!e->aux_event) { /* AMD and POWER */ - scnprintf(mem_loads_name, sizeof(mem_loads_name), + scnprintf(buf, buf_size, e->name, pmu->name); - } else + } else { return NULL; + } } - - return mem_loads_name; + return buf; } if (i == PERF_MEM_EVENTS__STORE) { - scnprintf(mem_stores_name, sizeof(mem_stores_name), + scnprintf(buf, buf_size, e->name, pmu->name); - return mem_stores_name; + return buf; } return NULL; @@ -162,7 +163,7 @@ int perf_pmu__mem_events_parse(struct perf_pmu *pmu, const char *str) continue; if (strstr(e->tag, tok)) - e->record = found = true; + perf_mem_record[j] = found = true; } tok = strtok_r(NULL, ",", &saveptr); @@ -186,12 +187,12 @@ static bool perf_pmu__mem_events_supported(const char *mnt, struct perf_pmu *pmu if (!e->event_name) return true; - scnprintf(path, PATH_MAX, "%s/devices/%s/events/%s", mnt, pmu->name, e->event_name); + scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/events/%s", mnt, pmu->name, e->event_name); return !stat(path, &st); } -int perf_pmu__mem_events_init(struct perf_pmu *pmu) +static int __perf_pmu__mem_events_init(struct perf_pmu *pmu) { const char *mnt = sysfs__mount(); bool found = false; @@ -218,58 +219,104 @@ int perf_pmu__mem_events_init(struct perf_pmu *pmu) return found ? 0 : -ENOENT; } +int perf_pmu__mem_events_init(void) +{ + struct perf_pmu *pmu = NULL; + + while ((pmu = perf_pmus__scan_mem(pmu)) != NULL) { + if (__perf_pmu__mem_events_init(pmu)) + return -ENOENT; + } + + return 0; +} + void perf_pmu__mem_events_list(struct perf_pmu *pmu) { int j; for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { + char buf[128]; struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j); fprintf(stderr, "%-*s%-*s%s", e->tag ? 13 : 0, e->tag ? : "", e->tag && verbose > 0 ? 25 : 0, - e->tag && verbose > 0 ? perf_pmu__mem_events_name(j, pmu) : "", + e->tag && verbose > 0 + ? perf_pmu__mem_events_name(pmu, j, buf, sizeof(buf)) + : "", e->supported ? ": available\n" : ""); } } -int perf_mem_events__record_args(const char **rec_argv, int *argv_nr) +int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, char **event_name_storage_out) { const char *mnt = sysfs__mount(); struct perf_pmu *pmu = NULL; - struct perf_mem_event *e; int i = *argv_nr; - const char *s; - char *copy; + struct perf_cpu_map *cpu_map = NULL; + size_t event_name_storage_size = + perf_pmu__mem_events_num_mem_pmus(NULL) * PERF_MEM_EVENTS__MAX * 128; + size_t event_name_storage_remaining = event_name_storage_size; + char *event_name_storage = malloc(event_name_storage_size); + char *event_name_storage_ptr = event_name_storage; + + if (!event_name_storage) + return -ENOMEM; + *event_name_storage_out = NULL; while ((pmu = perf_pmus__scan_mem(pmu)) != NULL) { for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - e = perf_pmu__mem_events_ptr(pmu, j); + const char *s; + struct perf_mem_event *e = perf_pmu__mem_events_ptr(pmu, j); + int ret; - if (!e->record) + if (!perf_mem_record[j]) continue; if (!e->supported) { + char buf[128]; + pr_err("failed: event '%s' not supported\n", - perf_pmu__mem_events_name(j, pmu)); + perf_pmu__mem_events_name(pmu, j, buf, sizeof(buf))); + free(event_name_storage); return -1; } - s = perf_pmu__mem_events_name(j, pmu); + s = perf_pmu__mem_events_name(pmu, j, event_name_storage_ptr, + event_name_storage_remaining); if (!s || !perf_pmu__mem_events_supported(mnt, pmu, e)) continue; - copy = strdup(s); - if (!copy) - return -1; - rec_argv[i++] = "-e"; - rec_argv[i++] = copy; + rec_argv[i++] = event_name_storage_ptr; + event_name_storage_remaining -= strlen(event_name_storage_ptr) + 1; + event_name_storage_ptr += strlen(event_name_storage_ptr) + 1; + + ret = perf_cpu_map__merge(&cpu_map, pmu->cpus); + if (ret < 0) { + free(event_name_storage); + return ret; + } + } + } + + if (cpu_map) { + struct perf_cpu_map *online = cpu_map__online(); + + if (!perf_cpu_map__equal(cpu_map, online)) { + char buf[200]; + + cpu_map__snprint(cpu_map, buf, sizeof(buf)); + pr_warning("Memory events are enabled on a subset of CPUs: %s\n", buf); } + perf_cpu_map__put(online); + perf_cpu_map__put(cpu_map); } *argv_nr = i; + *event_name_storage_out = event_name_storage; return 0; } @@ -338,6 +385,12 @@ static const char * const mem_lvl[] = { }; static const char * const mem_lvlnum[] = { + [PERF_MEM_LVLNUM_L1] = "L1", + [PERF_MEM_LVLNUM_L2] = "L2", + [PERF_MEM_LVLNUM_L3] = "L3", + [PERF_MEM_LVLNUM_L4] = "L4", + [PERF_MEM_LVLNUM_L2_MHB] = "L2 MHB", + [PERF_MEM_LVLNUM_MSC] = "Memory-side Cache", [PERF_MEM_LVLNUM_UNC] = "Uncached", [PERF_MEM_LVLNUM_CXL] = "CXL", [PERF_MEM_LVLNUM_IO] = "I/O", @@ -420,7 +473,7 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, const struct mem_info *mem_inf if (mem_lvlnum[lvl]) l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]); else - l += scnprintf(out + l, sz - l, "L%d", lvl); + l += scnprintf(out + l, sz - l, "Unknown level %d", lvl); l += scnprintf(out + l, sz - l, " %s", hit_miss); return l; @@ -627,7 +680,10 @@ do { \ if (lvl & P(LVL, LFB)) stats->ld_fbhit++; if (lvl & P(LVL, L1 )) stats->ld_l1hit++; if (lvl & P(LVL, L2)) { - stats->ld_l2hit++; + if (snoop & P(SNOOP, HITM)) + HITM_INC(lcl_hitm); + else + stats->ld_l2hit++; if (snoopx & P(SNOOPX, PEER)) PEER_INC(lcl_peer); @@ -746,3 +802,181 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add) stats->nomap += add->nomap; stats->noparse += add->noparse; } + +/* + * It returns an index in hist_entry->mem_stat array for the given val which + * represents a data-src based on the mem_stat_type. + */ +int mem_stat_index(const enum mem_stat_type mst, const u64 val) +{ + union perf_mem_data_src src = { + .val = val, + }; + + switch (mst) { + case PERF_MEM_STAT_OP: + switch (src.mem_op) { + case PERF_MEM_OP_LOAD: + return MEM_STAT_OP_LOAD; + case PERF_MEM_OP_STORE: + return MEM_STAT_OP_STORE; + case PERF_MEM_OP_LOAD | PERF_MEM_OP_STORE: + return MEM_STAT_OP_LDST; + default: + if (src.mem_op & PERF_MEM_OP_PFETCH) + return MEM_STAT_OP_PFETCH; + if (src.mem_op & PERF_MEM_OP_EXEC) + return MEM_STAT_OP_EXEC; + return MEM_STAT_OP_OTHER; + } + case PERF_MEM_STAT_CACHE: + switch (src.mem_lvl_num) { + case PERF_MEM_LVLNUM_L1: + return MEM_STAT_CACHE_L1; + case PERF_MEM_LVLNUM_L2: + return MEM_STAT_CACHE_L2; + case PERF_MEM_LVLNUM_L3: + return MEM_STAT_CACHE_L3; + case PERF_MEM_LVLNUM_L4: + return MEM_STAT_CACHE_L4; + case PERF_MEM_LVLNUM_LFB: + return MEM_STAT_CACHE_L1_BUF; + case PERF_MEM_LVLNUM_L2_MHB: + return MEM_STAT_CACHE_L2_BUF; + default: + return MEM_STAT_CACHE_OTHER; + } + case PERF_MEM_STAT_MEMORY: + switch (src.mem_lvl_num) { + case PERF_MEM_LVLNUM_MSC: + return MEM_STAT_MEMORY_MSC; + case PERF_MEM_LVLNUM_RAM: + return MEM_STAT_MEMORY_RAM; + case PERF_MEM_LVLNUM_UNC: + return MEM_STAT_MEMORY_UNC; + case PERF_MEM_LVLNUM_CXL: + return MEM_STAT_MEMORY_CXL; + case PERF_MEM_LVLNUM_IO: + return MEM_STAT_MEMORY_IO; + case PERF_MEM_LVLNUM_PMEM: + return MEM_STAT_MEMORY_PMEM; + default: + return MEM_STAT_MEMORY_OTHER; + } + case PERF_MEM_STAT_SNOOP: + switch (src.mem_snoop) { + case PERF_MEM_SNOOP_HIT: + return MEM_STAT_SNOOP_HIT; + case PERF_MEM_SNOOP_HITM: + return MEM_STAT_SNOOP_HITM; + case PERF_MEM_SNOOP_MISS: + return MEM_STAT_SNOOP_MISS; + default: + return MEM_STAT_SNOOP_OTHER; + } + case PERF_MEM_STAT_DTLB: + switch (src.mem_dtlb) { + case PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT: + return MEM_STAT_DTLB_L1_HIT; + case PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT: + return MEM_STAT_DTLB_L2_HIT; + case PERF_MEM_TLB_L1 | PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT: + return MEM_STAT_DTLB_ANY_HIT; + default: + if (src.mem_dtlb & PERF_MEM_TLB_MISS) + return MEM_STAT_DTLB_MISS; + return MEM_STAT_DTLB_OTHER; + } + default: + break; + } + return -1; +} + +/* To align output, returned string should be shorter than MEM_STAT_PRINT_LEN */ +const char *mem_stat_name(const enum mem_stat_type mst, const int idx) +{ + switch (mst) { + case PERF_MEM_STAT_OP: + switch (idx) { + case MEM_STAT_OP_LOAD: + return "Load"; + case MEM_STAT_OP_STORE: + return "Store"; + case MEM_STAT_OP_LDST: + return "Ld+St"; + case MEM_STAT_OP_PFETCH: + return "Pfetch"; + case MEM_STAT_OP_EXEC: + return "Exec"; + case MEM_STAT_OP_OTHER: + default: + return "Other"; + } + case PERF_MEM_STAT_CACHE: + switch (idx) { + case MEM_STAT_CACHE_L1: + return "L1"; + case MEM_STAT_CACHE_L2: + return "L2"; + case MEM_STAT_CACHE_L3: + return "L3"; + case MEM_STAT_CACHE_L4: + return "L4"; + case MEM_STAT_CACHE_L1_BUF: + return "L1-buf"; + case MEM_STAT_CACHE_L2_BUF: + return "L2-buf"; + case MEM_STAT_CACHE_OTHER: + default: + return "Other"; + } + case PERF_MEM_STAT_MEMORY: + switch (idx) { + case MEM_STAT_MEMORY_RAM: + return "RAM"; + case MEM_STAT_MEMORY_MSC: + return "MSC"; + case MEM_STAT_MEMORY_UNC: + return "Uncach"; + case MEM_STAT_MEMORY_CXL: + return "CXL"; + case MEM_STAT_MEMORY_IO: + return "IO"; + case MEM_STAT_MEMORY_PMEM: + return "PMEM"; + case MEM_STAT_MEMORY_OTHER: + default: + return "Other"; + } + case PERF_MEM_STAT_SNOOP: + switch (idx) { + case MEM_STAT_SNOOP_HIT: + return "Hit"; + case MEM_STAT_SNOOP_HITM: + return "HitM"; + case MEM_STAT_SNOOP_MISS: + return "Miss"; + case MEM_STAT_SNOOP_OTHER: + default: + return "Other"; + } + case PERF_MEM_STAT_DTLB: + switch (idx) { + case MEM_STAT_DTLB_L1_HIT: + return "L1-Hit"; + case MEM_STAT_DTLB_L2_HIT: + return "L2-Hit"; + case MEM_STAT_DTLB_ANY_HIT: + return "L?-Hit"; + case MEM_STAT_DTLB_MISS: + return "Miss"; + case MEM_STAT_DTLB_OTHER: + default: + return "Other"; + } + default: + break; + } + return "N/A"; +} diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index ca31014d7934..5b98076904b0 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -6,7 +6,6 @@ #include <linux/types.h> struct perf_mem_event { - bool record; bool supported; bool ldlat; u32 aux_event; @@ -28,9 +27,10 @@ struct perf_pmu; extern unsigned int perf_mem_events__loads_ldlat; extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX]; +extern bool perf_mem_record[PERF_MEM_EVENTS__MAX]; int perf_pmu__mem_events_parse(struct perf_pmu *pmu, const char *str); -int perf_pmu__mem_events_init(struct perf_pmu *pmu); +int perf_pmu__mem_events_init(void); struct perf_mem_event *perf_pmu__mem_events_ptr(struct perf_pmu *pmu, int i); struct perf_pmu *perf_mem_events_find_pmu(void); @@ -38,7 +38,8 @@ int perf_pmu__mem_events_num_mem_pmus(struct perf_pmu *pmu); bool is_mem_loads_aux_event(struct evsel *leader); void perf_pmu__mem_events_list(struct perf_pmu *pmu); -int perf_mem_events__record_args(const char **rec_argv, int *argv_nr); +int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, + char **event_name_storage_out); int perf_mem__tlb_scnprintf(char *out, size_t sz, const struct mem_info *mem_info); int perf_mem__lvl_scnprintf(char *out, size_t sz, const struct mem_info *mem_info); @@ -88,4 +89,61 @@ struct hist_entry; int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi); void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add); +enum mem_stat_type { + PERF_MEM_STAT_OP, + PERF_MEM_STAT_CACHE, + PERF_MEM_STAT_MEMORY, + PERF_MEM_STAT_SNOOP, + PERF_MEM_STAT_DTLB, +}; + +#define MEM_STAT_PRINT_LEN 7 /* 1 space + 5 digits + 1 percent sign */ + +enum mem_stat_op { + MEM_STAT_OP_LOAD, + MEM_STAT_OP_STORE, + MEM_STAT_OP_LDST, + MEM_STAT_OP_PFETCH, + MEM_STAT_OP_EXEC, + MEM_STAT_OP_OTHER, +}; + +enum mem_stat_cache { + MEM_STAT_CACHE_L1, + MEM_STAT_CACHE_L2, + MEM_STAT_CACHE_L3, + MEM_STAT_CACHE_L4, + MEM_STAT_CACHE_L1_BUF, + MEM_STAT_CACHE_L2_BUF, + MEM_STAT_CACHE_OTHER, +}; + +enum mem_stat_memory { + MEM_STAT_MEMORY_RAM, + MEM_STAT_MEMORY_MSC, + MEM_STAT_MEMORY_UNC, + MEM_STAT_MEMORY_CXL, + MEM_STAT_MEMORY_IO, + MEM_STAT_MEMORY_PMEM, + MEM_STAT_MEMORY_OTHER, +}; + +enum mem_stat_snoop { + MEM_STAT_SNOOP_HIT, + MEM_STAT_SNOOP_HITM, + MEM_STAT_SNOOP_MISS, + MEM_STAT_SNOOP_OTHER, +}; + +enum mem_stat_dtlb { + MEM_STAT_DTLB_L1_HIT, + MEM_STAT_DTLB_L2_HIT, + MEM_STAT_DTLB_ANY_HIT, + MEM_STAT_DTLB_MISS, + MEM_STAT_DTLB_OTHER, +}; + +int mem_stat_index(const enum mem_stat_type mst, const u64 data_src); +const char *mem_stat_name(const enum mem_stat_type mst, const int idx); + #endif /* __PERF_MEM_EVENTS_H */ diff --git a/tools/perf/util/mem-info.c b/tools/perf/util/mem-info.c index 27d67721a695..d3efa9c139f2 100644 --- a/tools/perf/util/mem-info.c +++ b/tools/perf/util/mem-info.c @@ -33,3 +33,16 @@ struct mem_info *mem_info__new(void) return result; } + +struct mem_info *mem_info__clone(struct mem_info *mi) +{ + struct mem_info *result = mem_info__new(); + + if (result) { + addr_map_symbol__copy(mem_info__iaddr(result), mem_info__iaddr(mi)); + addr_map_symbol__copy(mem_info__daddr(result), mem_info__daddr(mi)); + mem_info__data_src(result)->val = mem_info__data_src(mi)->val; + } + + return result; +} diff --git a/tools/perf/util/mem-info.h b/tools/perf/util/mem-info.h index 0f68e29f311b..df75e94ed3d0 100644 --- a/tools/perf/util/mem-info.h +++ b/tools/perf/util/mem-info.h @@ -15,6 +15,7 @@ DECLARE_RC_STRUCT(mem_info) { }; struct mem_info *mem_info__new(void); +struct mem_info *mem_info__clone(struct mem_info *mi); struct mem_info *mem_info__get(struct mem_info *mi); void mem_info__put(struct mem_info *mi); diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 69f6a46402c3..25c75fdbfc52 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -14,6 +14,7 @@ #include "pmus.h" #include "print-events.h" #include "smt.h" +#include "tool_pmu.h" #include "expr.h" #include "rblist.h" #include <string.h> @@ -102,7 +103,7 @@ static void metric_event_delete(struct rblist *rblist __maybe_unused, free(me); } -static void metricgroup__rblist_init(struct rblist *metric_events) +void metricgroup__rblist_init(struct rblist *metric_events) { rblist__init(metric_events); metric_events->node_cmp = metric_event_cmp; @@ -151,6 +152,8 @@ struct metric { * Should events of the metric be grouped? */ bool group_events; + /** Show events even if in the Default metric group. */ + bool default_show_events; /** * Parsed events for the metric. Optional as events may be taken from a * different metric whose group contains all the IDs necessary for this @@ -178,7 +181,7 @@ static void metric__watchdog_constraint_hint(const char *name, bool foot) " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); } -static bool metric__group_events(const struct pmu_metric *pm) +static bool metric__group_events(const struct pmu_metric *pm, bool metric_no_threshold) { switch (pm->event_grouping) { case MetricNoGroupEvents: @@ -190,6 +193,13 @@ static bool metric__group_events(const struct pmu_metric *pm) return false; case MetricNoGroupEventsSmt: return !smt_on(); + case MetricNoGroupEventsThresholdAndNmi: + if (metric_no_threshold) + return true; + if (!sysctl__nmi_watchdog_enabled()) + return true; + metric__watchdog_constraint_hint(pm->metric_name, /*foot=*/false); + return false; case MetricGroupEvents: default: return true; @@ -211,6 +221,7 @@ static void metric__free(struct metric *m) static struct metric *metric__new(const struct pmu_metric *pm, const char *modifier, bool metric_no_group, + bool metric_no_threshold, int runtime, const char *user_requested_cpu_list, bool system_wide) @@ -245,7 +256,8 @@ static struct metric *metric__new(const struct pmu_metric *pm, } m->pctx->sctx.runtime = runtime; m->pctx->sctx.system_wide = system_wide; - m->group_events = !metric_no_group && metric__group_events(pm); + m->group_events = !metric_no_group && metric__group_events(pm, metric_no_threshold); + m->default_show_events = pm->default_show_events; m->metric_refs = NULL; m->evlist = NULL; @@ -297,8 +309,8 @@ static int setup_metric_events(const char *pmu, struct hashmap *ids, struct expr_id_data *val_ptr; /* Don't match events for the wrong hybrid PMU. */ - if (!all_pmus && ev->pmu_name && evsel__is_hybrid(ev) && - strcmp(ev->pmu_name, pmu)) + if (!all_pmus && ev->pmu && evsel__is_hybrid(ev) && + strcmp(ev->pmu->name, pmu)) continue; /* * Check for duplicate events with the same name. For @@ -352,7 +364,7 @@ static int setup_metric_events(const char *pmu, struct hashmap *ids, return 0; } -static bool match_metric(const char *metric_or_groups, const char *sought) +static bool match_metric_or_groups(const char *metric_or_groups, const char *sought) { int len; char *m; @@ -368,117 +380,19 @@ static bool match_metric(const char *metric_or_groups, const char *sought) (metric_or_groups[len] == 0 || metric_or_groups[len] == ';')) return true; m = strchr(metric_or_groups, ';'); - return m && match_metric(m + 1, sought); + return m && match_metric_or_groups(m + 1, sought); } -static bool match_pm_metric(const struct pmu_metric *pm, const char *pmu, const char *metric) +static bool match_pm_metric_or_groups(const struct pmu_metric *pm, const char *pmu, + const char *metric_or_groups) { const char *pm_pmu = pm->pmu ?: "cpu"; if (strcmp(pmu, "all") && strcmp(pm_pmu, pmu)) return false; - return match_metric(pm->metric_group, metric) || - match_metric(pm->metric_name, metric); -} - -/** struct mep - RB-tree node for building printing information. */ -struct mep { - /** nd - RB-tree element. */ - struct rb_node nd; - /** @metric_group: Owned metric group name, separated others with ';'. */ - char *metric_group; - const char *metric_name; - const char *metric_desc; - const char *metric_long_desc; - const char *metric_expr; - const char *metric_threshold; - const char *metric_unit; -}; - -static int mep_cmp(struct rb_node *rb_node, const void *entry) -{ - struct mep *a = container_of(rb_node, struct mep, nd); - struct mep *b = (struct mep *)entry; - int ret; - - ret = strcmp(a->metric_group, b->metric_group); - if (ret) - return ret; - - return strcmp(a->metric_name, b->metric_name); -} - -static struct rb_node *mep_new(struct rblist *rl __maybe_unused, const void *entry) -{ - struct mep *me = malloc(sizeof(struct mep)); - - if (!me) - return NULL; - - memcpy(me, entry, sizeof(struct mep)); - return &me->nd; -} - -static void mep_delete(struct rblist *rl __maybe_unused, - struct rb_node *nd) -{ - struct mep *me = container_of(nd, struct mep, nd); - - zfree(&me->metric_group); - free(me); -} - -static struct mep *mep_lookup(struct rblist *groups, const char *metric_group, - const char *metric_name) -{ - struct rb_node *nd; - struct mep me = { - .metric_group = strdup(metric_group), - .metric_name = metric_name, - }; - nd = rblist__find(groups, &me); - if (nd) { - free(me.metric_group); - return container_of(nd, struct mep, nd); - } - rblist__add_node(groups, &me); - nd = rblist__find(groups, &me); - if (nd) - return container_of(nd, struct mep, nd); - return NULL; -} - -static int metricgroup__add_to_mep_groups(const struct pmu_metric *pm, - struct rblist *groups) -{ - const char *g; - char *omg, *mg; - - mg = strdup(pm->metric_group ?: pm->metric_name); - if (!mg) - return -ENOMEM; - omg = mg; - while ((g = strsep(&mg, ";")) != NULL) { - struct mep *me; - - g = skip_spaces(g); - if (strlen(g)) - me = mep_lookup(groups, g, pm->metric_name); - else - me = mep_lookup(groups, pm->metric_name, pm->metric_name); - - if (me) { - me->metric_desc = pm->desc; - me->metric_long_desc = pm->long_desc; - me->metric_expr = pm->metric_expr; - me->metric_threshold = pm->metric_threshold; - me->metric_unit = pm->unit; - } - } - free(omg); - - return 0; + return match_metric_or_groups(pm->metric_group, metric_or_groups) || + match_metric_or_groups(pm->metric_name, metric_or_groups); } struct metricgroup_iter_data { @@ -506,53 +420,30 @@ static int metricgroup__sys_event_iter(const struct pmu_metric *pm, return 0; } -static int metricgroup__add_to_mep_groups_callback(const struct pmu_metric *pm, - const struct pmu_metrics_table *table __maybe_unused, - void *vdata) +int metricgroup__for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn, + void *data) { - struct rblist *groups = vdata; + struct metricgroup_iter_data sys_data = { + .fn = fn, + .data = data, + }; + const struct pmu_metrics_table *tables[2] = { + table, + pmu_metrics_table__default(), + }; - return metricgroup__add_to_mep_groups(pm, groups); -} + for (size_t i = 0; i < ARRAY_SIZE(tables); i++) { + int ret; -void metricgroup__print(const struct print_callbacks *print_cb, void *print_state) -{ - struct rblist groups; - const struct pmu_metrics_table *table; - struct rb_node *node, *next; - - rblist__init(&groups); - groups.node_new = mep_new; - groups.node_cmp = mep_cmp; - groups.node_delete = mep_delete; - table = pmu_metrics_table__find(); - if (table) { - pmu_metrics_table__for_each_metric(table, - metricgroup__add_to_mep_groups_callback, - &groups); - } - { - struct metricgroup_iter_data data = { - .fn = metricgroup__add_to_mep_groups_callback, - .data = &groups, - }; - pmu_for_each_sys_metric(metricgroup__sys_event_iter, &data); - } + if (!tables[i]) + continue; - for (node = rb_first_cached(&groups.entries); node; node = next) { - struct mep *me = container_of(node, struct mep, nd); - - print_cb->print_metric(print_state, - me->metric_group, - me->metric_name, - me->metric_desc, - me->metric_long_desc, - me->metric_expr, - me->metric_threshold, - me->metric_unit); - next = rb_next(node); - rblist__remove_node(&groups, node); + ret = pmu_metrics_table__for_each_metric(tables[i], fn, data); + if (ret) + return ret; } + + return pmu_for_each_sys_metric(metricgroup__sys_event_iter, &sys_data); } static const char *code_characters = ",-=@"; @@ -673,20 +564,20 @@ static int metricgroup__build_event_string(struct strbuf *events, struct hashmap_entry *cur; size_t bkt; bool no_group = true, has_tool_events = false; - bool tool_events[PERF_TOOL_MAX] = {false}; + bool tool_events[TOOL_PMU__EVENT_MAX] = {false}; int ret = 0; #define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0) hashmap__for_each_entry(ctx->ids, cur, bkt) { const char *sep, *rsep, *id = cur->pkey; - enum perf_tool_event ev; + enum tool_pmu_event ev; pr_debug("found event %s\n", id); /* Always move tool events outside of the group. */ - ev = perf_tool_event__from_str(id); - if (ev != PERF_TOOL_NONE) { + ev = tool_pmu__str_to_event(id); + if (ev != TOOL_PMU__EVENT_NONE) { has_tool_events = true; tool_events[ev] = true; continue; @@ -754,14 +645,14 @@ static int metricgroup__build_event_string(struct strbuf *events, if (has_tool_events) { int i; - perf_tool_event__for_each_event(i) { + tool_pmu__for_each_event(i) { if (tool_events[i]) { if (!no_group) { ret = strbuf_addch(events, ','); RETURN_IF_NON_ZERO(ret); } no_group = false; - ret = strbuf_addstr(events, perf_tool_event__to_str(i)); + ret = strbuf_addstr(events, tool_pmu__event_to_str(i)); RETURN_IF_NON_ZERO(ret); } } @@ -801,11 +692,6 @@ struct metricgroup_add_iter_data { const struct pmu_metrics_table *table; }; -static bool metricgroup__find_metric(const char *pmu, - const char *metric, - const struct pmu_metrics_table *table, - struct pmu_metric *pm); - static int add_metric(struct list_head *metric_list, const struct pmu_metric *pm, const char *modifier, @@ -817,6 +703,16 @@ static int add_metric(struct list_head *metric_list, const struct visited_metric *visited, const struct pmu_metrics_table *table); +static int metricgroup__find_metric_callback(const struct pmu_metric *pm, + const struct pmu_metrics_table *table __maybe_unused, + void *vdata) +{ + struct pmu_metric *copied_pm = vdata; + + memcpy(copied_pm, pm, sizeof(*pm)); + return 0; +} + /** * resolve_metric - Locate metrics within the root metric and recursively add * references to them. @@ -837,7 +733,7 @@ static int add_metric(struct list_head *metric_list, * architecture perf is running upon. */ static int resolve_metric(struct list_head *metric_list, - const char *pmu, + struct perf_pmu *pmu, const char *modifier, bool metric_no_group, bool metric_no_threshold, @@ -867,7 +763,9 @@ static int resolve_metric(struct list_head *metric_list, hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) { struct pmu_metric pm; - if (metricgroup__find_metric(pmu, cur->pkey, table, &pm)) { + if (pmu_metrics_table__find_metric(table, pmu, cur->pkey, + metricgroup__find_metric_callback, + &pm) != PMU_METRICS__NOT_FOUND) { pending = realloc(pending, (pending_cnt + 1) * sizeof(struct to_resolve)); if (!pending) @@ -952,8 +850,8 @@ static int __add_metric(struct list_head *metric_list, * This metric is the root of a tree and may reference other * metrics that are added recursively. */ - root_metric = metric__new(pm, modifier, metric_no_group, runtime, - user_requested_cpu_list, system_wide); + root_metric = metric__new(pm, modifier, metric_no_group, metric_no_threshold, + runtime, user_requested_cpu_list, system_wide); if (!root_metric) return -ENOMEM; @@ -1018,7 +916,12 @@ static int __add_metric(struct list_head *metric_list, } if (!ret) { /* Resolve referenced metrics. */ - const char *pmu = pm->pmu ?: "cpu"; + struct perf_pmu *pmu; + + if (pm->pmu && pm->pmu[0] != '\0') + pmu = perf_pmus__find(pm->pmu); + else + pmu = perf_pmus__scan_core(/*pmu=*/ NULL); ret = resolve_metric(metric_list, pmu, modifier, metric_no_group, metric_no_threshold, user_requested_cpu_list, @@ -1035,44 +938,6 @@ static int __add_metric(struct list_head *metric_list, return ret; } -struct metricgroup__find_metric_data { - const char *pmu; - const char *metric; - struct pmu_metric *pm; -}; - -static int metricgroup__find_metric_callback(const struct pmu_metric *pm, - const struct pmu_metrics_table *table __maybe_unused, - void *vdata) -{ - struct metricgroup__find_metric_data *data = vdata; - const char *pm_pmu = pm->pmu ?: "cpu"; - - if (strcmp(data->pmu, "all") && strcmp(pm_pmu, data->pmu)) - return 0; - - if (!match_metric(pm->metric_name, data->metric)) - return 0; - - memcpy(data->pm, pm, sizeof(*pm)); - return 1; -} - -static bool metricgroup__find_metric(const char *pmu, - const char *metric, - const struct pmu_metrics_table *table, - struct pmu_metric *pm) -{ - struct metricgroup__find_metric_data data = { - .pmu = pmu, - .metric = metric, - .pm = pm, - }; - - return pmu_metrics_table__for_each_metric(table, metricgroup__find_metric_callback, &data) - ? true : false; -} - static int add_metric(struct list_head *metric_list, const struct pmu_metric *pm, const char *modifier, @@ -1111,29 +976,6 @@ static int add_metric(struct list_head *metric_list, return ret; } -static int metricgroup__add_metric_sys_event_iter(const struct pmu_metric *pm, - const struct pmu_metrics_table *table __maybe_unused, - void *data) -{ - struct metricgroup_add_iter_data *d = data; - int ret; - - if (!match_pm_metric(pm, d->pmu, d->metric_name)) - return 0; - - ret = add_metric(d->metric_list, pm, d->modifier, d->metric_no_group, - d->metric_no_threshold, d->user_requested_cpu_list, - d->system_wide, d->root_metric, d->visited, d->table); - if (ret) - goto out; - - *(d->has_match) = true; - -out: - *(d->ret) = ret; - return ret; -} - /** * metric_list_cmp - list_sort comparator that sorts metrics with more events to * the front. tool events are excluded from the count. @@ -1147,14 +989,14 @@ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, int i, left_count, right_count; left_count = hashmap__size(left->pctx->ids); - perf_tool_event__for_each_event(i) { - if (!expr__get_id(left->pctx, perf_tool_event__to_str(i), &data)) + tool_pmu__for_each_event(i) { + if (!expr__get_id(left->pctx, tool_pmu__event_to_str(i), &data)) left_count--; } right_count = hashmap__size(right->pctx->ids); - perf_tool_event__for_each_event(i) { - if (!expr__get_id(right->pctx, perf_tool_event__to_str(i), &data)) + tool_pmu__for_each_event(i) { + if (!expr__get_id(right->pctx, tool_pmu__event_to_str(i), &data)) right_count--; } @@ -1199,9 +1041,9 @@ static int metricgroup__add_metric_callback(const struct pmu_metric *pm, struct metricgroup__add_metric_data *data = vdata; int ret = 0; - if (pm->metric_expr && match_pm_metric(pm, data->pmu, data->metric_name)) { + if (pm->metric_expr && match_pm_metric_or_groups(pm, data->pmu, data->metric_name)) { bool metric_no_group = data->metric_no_group || - match_metric(pm->metricgroup_no_group, data->metric_name); + match_metric_or_groups(pm->metricgroup_no_group, data->metric_name); data->has_match = true; ret = add_metric(data->list, pm, data->modifier, metric_no_group, @@ -1237,55 +1079,26 @@ static int metricgroup__add_metric(const char *pmu, const char *metric_name, con { LIST_HEAD(list); int ret; - bool has_match = false; - - { - struct metricgroup__add_metric_data data = { - .list = &list, - .pmu = pmu, - .metric_name = metric_name, - .modifier = modifier, - .metric_no_group = metric_no_group, - .metric_no_threshold = metric_no_threshold, - .user_requested_cpu_list = user_requested_cpu_list, - .system_wide = system_wide, - .has_match = false, - }; - /* - * Iterate over all metrics seeing if metric matches either the - * name or group. When it does add the metric to the list. - */ - ret = pmu_metrics_table__for_each_metric(table, metricgroup__add_metric_callback, - &data); - if (ret) - goto out; + struct metricgroup__add_metric_data data = { + .list = &list, + .pmu = pmu, + .metric_name = metric_name, + .modifier = modifier, + .metric_no_group = metric_no_group, + .metric_no_threshold = metric_no_threshold, + .user_requested_cpu_list = user_requested_cpu_list, + .system_wide = system_wide, + .has_match = false, + }; - has_match = data.has_match; - } - { - struct metricgroup_iter_data data = { - .fn = metricgroup__add_metric_sys_event_iter, - .data = (void *) &(struct metricgroup_add_iter_data) { - .metric_list = &list, - .pmu = pmu, - .metric_name = metric_name, - .modifier = modifier, - .metric_no_group = metric_no_group, - .user_requested_cpu_list = user_requested_cpu_list, - .system_wide = system_wide, - .has_match = &has_match, - .ret = &ret, - .table = table, - }, - }; - - pmu_for_each_sys_metric(metricgroup__sys_event_iter, &data); - } - /* End of pmu events. */ - if (!has_match) + /* + * Iterate over all metrics seeing if metric matches either the + * name or group. When it does add the metric to the list. + */ + ret = metricgroup__for_each_metric(table, metricgroup__add_metric_callback, &data); + if (!ret && !data.has_match) ret = -EINVAL; -out: /* * add to metric_list so that they can be released * even if it's failed @@ -1374,18 +1187,18 @@ static void metricgroup__free_metrics(struct list_head *metric_list) * to true if tool event is found. */ static void find_tool_events(const struct list_head *metric_list, - bool tool_events[PERF_TOOL_MAX]) + bool tool_events[TOOL_PMU__EVENT_MAX]) { struct metric *m; list_for_each_entry(m, metric_list, nd) { int i; - perf_tool_event__for_each_event(i) { + tool_pmu__for_each_event(i) { struct expr_id_data *data; if (!tool_events[i] && - !expr__get_id(m->pctx, perf_tool_event__to_str(i), &data)) + !expr__get_id(m->pctx, tool_pmu__event_to_str(i), &data)) tool_events[i] = true; } } @@ -1436,7 +1249,7 @@ err_out: * parse_ids - Build the event string for the ids and parse them creating an * evlist. The encoded metric_ids are decoded. * @metric_no_merge: is metric sharing explicitly disabled. - * @fake_pmu: used when testing metrics not supported by the current CPU. + * @fake_pmu: use a fake PMU when testing metrics not supported by the current CPU. * @ids: the event identifiers parsed from a metric. * @modifier: any modifiers added to the events. * @group_events: should events be placed in a weak group. @@ -1444,9 +1257,9 @@ err_out: * the overall list of metrics. * @out_evlist: the created list of events. */ -static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu, +static int parse_ids(bool metric_no_merge, bool fake_pmu, struct expr_parse_ctx *ids, const char *modifier, - bool group_events, const bool tool_events[PERF_TOOL_MAX], + bool group_events, const bool tool_events[TOOL_PMU__EVENT_MAX], struct evlist **out_evlist) { struct parse_events_error parse_error; @@ -1471,9 +1284,9 @@ static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu, * event1 if #smt_on else 0 * Add a tool event to avoid a parse error on an empty string. */ - perf_tool_event__for_each_event(i) { + tool_pmu__for_each_event(i) { if (tool_events[i]) { - char *tmp = strdup(perf_tool_event__to_str(i)); + char *tmp = strdup(tool_pmu__event_to_str(i)); if (!tmp) return -ENOMEM; @@ -1521,6 +1334,51 @@ err_out: return ret; } +/* How many times will a given evsel be used in a set of metrics? */ +static int count_uses(struct list_head *metric_list, struct evsel *evsel) +{ + const char *metric_id = evsel__metric_id(evsel); + struct metric *m; + int uses = 0; + + list_for_each_entry(m, metric_list, nd) { + if (hashmap__find(m->pctx->ids, metric_id, NULL)) + uses++; + } + return uses; +} + +/* + * Select the evsel that stat-display will use to trigger shadow/metric + * printing. Pick the least shared non-tool evsel, encouraging metrics to be + * with a hardware counter that is specific to them. + */ +static struct evsel *pick_display_evsel(struct list_head *metric_list, + struct evsel **metric_events) +{ + struct evsel *selected = metric_events[0]; + size_t selected_uses; + bool selected_is_tool; + + if (!selected) + return NULL; + + selected_uses = count_uses(metric_list, selected); + selected_is_tool = evsel__is_tool(selected); + for (int i = 1; metric_events[i]; i++) { + struct evsel *candidate = metric_events[i]; + size_t candidate_uses = count_uses(metric_list, candidate); + + if ((selected_is_tool && !evsel__is_tool(candidate)) || + (candidate_uses < selected_uses)) { + selected = candidate; + selected_uses = candidate_uses; + selected_is_tool = evsel__is_tool(selected); + } + } + return selected; +} + static int parse_groups(struct evlist *perf_evlist, const char *pmu, const char *str, bool metric_no_group, @@ -1528,19 +1386,16 @@ static int parse_groups(struct evlist *perf_evlist, bool metric_no_threshold, const char *user_requested_cpu_list, bool system_wide, - struct perf_pmu *fake_pmu, - struct rblist *metric_events_list, + bool fake_pmu, const struct pmu_metrics_table *table) { struct evlist *combined_evlist = NULL; LIST_HEAD(metric_list); struct metric *m; - bool tool_events[PERF_TOOL_MAX] = {false}; + bool tool_events[TOOL_PMU__EVENT_MAX] = {false}; bool is_default = !strcmp(str, "Default"); int ret; - if (metric_events_list->nr_entries == 0) - metricgroup__rblist_init(metric_events_list); ret = metricgroup__add_metric_list(pmu, str, metric_no_group, metric_no_threshold, user_requested_cpu_list, system_wide, &metric_list, table); @@ -1631,7 +1486,9 @@ static int parse_groups(struct evlist *perf_evlist, goto out; } - me = metricgroup__lookup(metric_events_list, metric_events[0], true); + me = metricgroup__lookup(&perf_evlist->metric_events, + pick_display_evsel(&metric_list, metric_events), + /*create=*/true); expr = malloc(sizeof(struct metric_expr)); if (!expr) { @@ -1655,9 +1512,20 @@ static int parse_groups(struct evlist *perf_evlist, if (!expr->metric_name) { ret = -ENOMEM; + free(expr); free(metric_events); goto out; } + if (m->default_show_events) { + struct evsel *pos; + + for (int i = 0; metric_events[i]; i++) + metric_events[i]->default_show_events = true; + evlist__for_each_entry(metric_evlist, pos) { + if (pos->metric_leader && pos->metric_leader->default_show_events) + pos->default_show_events = true; + } + } expr->metric_threshold = m->metric_threshold; expr->metric_unit = m->metric_unit; expr->metric_events = metric_events; @@ -1691,8 +1559,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist, bool metric_no_threshold, const char *user_requested_cpu_list, bool system_wide, - bool hardware_aware_grouping, - struct rblist *metric_events) + bool hardware_aware_grouping) { const struct pmu_metrics_table *table = pmu_metrics_table__find(); @@ -1703,13 +1570,12 @@ int metricgroup__parse_groups(struct evlist *perf_evlist, return parse_groups(perf_evlist, pmu, str, metric_no_group, metric_no_merge, metric_no_threshold, user_requested_cpu_list, system_wide, - /*fake_pmu=*/NULL, metric_events, table); + /*fake_pmu=*/false, table); } int metricgroup__parse_groups_test(struct evlist *evlist, const struct pmu_metrics_table *table, - const char *str, - struct rblist *metric_events) + const char *str) { return parse_groups(evlist, "all", str, /*metric_no_group=*/false, @@ -1717,35 +1583,41 @@ int metricgroup__parse_groups_test(struct evlist *evlist, /*metric_no_threshold=*/false, /*user_requested_cpu_list=*/NULL, /*system_wide=*/false, - &perf_pmu__fake, metric_events, table); + /*fake_pmu=*/true, table); } struct metricgroup__has_metric_data { const char *pmu; - const char *metric; + const char *metric_or_groups; }; -static int metricgroup__has_metric_callback(const struct pmu_metric *pm, - const struct pmu_metrics_table *table __maybe_unused, - void *vdata) +static int metricgroup__has_metric_or_groups_callback(const struct pmu_metric *pm, + const struct pmu_metrics_table *table + __maybe_unused, + void *vdata) { struct metricgroup__has_metric_data *data = vdata; - return match_pm_metric(pm, data->pmu, data->metric) ? 1 : 0; + return match_pm_metric_or_groups(pm, data->pmu, data->metric_or_groups) ? 1 : 0; } -bool metricgroup__has_metric(const char *pmu, const char *metric) +bool metricgroup__has_metric_or_groups(const char *pmu, const char *metric_or_groups) { - const struct pmu_metrics_table *table = pmu_metrics_table__find(); + const struct pmu_metrics_table *tables[2] = { + pmu_metrics_table__find(), + pmu_metrics_table__default(), + }; struct metricgroup__has_metric_data data = { .pmu = pmu, - .metric = metric, + .metric_or_groups = metric_or_groups, }; - if (!table) - return false; - - return pmu_metrics_table__for_each_metric(table, metricgroup__has_metric_callback, &data) - ? true : false; + for (size_t i = 0; i < ARRAY_SIZE(tables); i++) { + if (pmu_metrics_table__for_each_metric(tables[i], + metricgroup__has_metric_or_groups_callback, + &data)) + return true; + } + return false; } static int metricgroup__topdown_max_level_callback(const struct pmu_metric *pm, @@ -1799,13 +1671,14 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, evsel = evlist__find_evsel(evlist, old_me->evsel->core.idx); if (!evsel) return -EINVAL; - new_me = metricgroup__lookup(new_metric_events, evsel, true); + new_me = metricgroup__lookup(new_metric_events, evsel, /*create=*/true); if (!new_me) return -ENOMEM; pr_debug("copying metric event for cgroup '%s': %s (idx=%d)\n", cgrp ? cgrp->name : "root", evsel->name, evsel->core.idx); + new_me->is_default = old_me->is_default; list_for_each_entry(old_expr, &old_me->head, nd) { new_expr = malloc(sizeof(*new_expr)); if (!new_expr) @@ -1819,6 +1692,7 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, new_expr->metric_unit = old_expr->metric_unit; new_expr->runtime = old_expr->runtime; + new_expr->default_metricgroup_name = old_expr->default_metricgroup_name; if (old_expr->metric_refs) { /* calculate number of metric_events */ diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 779f6ede1b51..4be6bfc13c46 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -16,7 +16,7 @@ struct cgroup; /** * A node in a rblist keyed by the evsel. The global rblist of metric events - * generally exists in perf_stat_config. The evsel is looked up in the rblist + * generally exists in evlist. The evsel is looked up in the rblist * yielding a list of metric_expr. */ struct metric_event { @@ -77,17 +77,17 @@ int metricgroup__parse_groups(struct evlist *perf_evlist, bool metric_no_threshold, const char *user_requested_cpu_list, bool system_wide, - bool hardware_aware_grouping, - struct rblist *metric_events); + bool hardware_aware_grouping); int metricgroup__parse_groups_test(struct evlist *evlist, const struct pmu_metrics_table *table, - const char *str, - struct rblist *metric_events); + const char *str); -void metricgroup__print(const struct print_callbacks *print_cb, void *print_state); -bool metricgroup__has_metric(const char *pmu, const char *metric); +int metricgroup__for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn, + void *data); +bool metricgroup__has_metric_or_groups(const char *pmu, const char *metric_or_groups); unsigned int metricgroups__topdown_max_level(void); int arch_get_runtimeparam(const struct pmu_metric *pm); +void metricgroup__rblist_init(struct rblist *metric_events); void metricgroup__rblist_exit(struct rblist *metric_events); int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 122ee198a86e..b69f926d314b 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -7,6 +7,7 @@ */ #include <sys/mman.h> +#include <errno.h> #include <inttypes.h> #include <asm/bug.h> #include <linux/zalloc.h> @@ -230,9 +231,7 @@ void mmap__munmap(struct mmap *map) { bitmap_free(map->affinity_mask.bits); -#ifndef PYTHON_PERF zstd_fini(&map->zstd_data); -#endif perf_mmap__aio_munmap(map); if (map->data != NULL) { @@ -246,9 +245,8 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask) { int idx, nr_cpus; struct perf_cpu cpu; - const struct perf_cpu_map *cpu_map = NULL; + struct perf_cpu_map *cpu_map = cpu_map__online(); - cpu_map = cpu_map__online(); if (!cpu_map) return; @@ -258,6 +256,7 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask) if (cpu__get_node(cpu) == node) __set_bit(cpu.cpu, mask->bits); } + perf_cpu_map__put(cpu_map); } static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp) @@ -295,12 +294,10 @@ int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu map->core.flush = mp->flush; -#ifndef PYTHON_PERF if (zstd_init(&map->zstd_data, mp->comp_level)) { pr_debug2("failed to init mmap compressor, error %d\n", errno); return -1; } -#endif if (mp->comp_level && !perf_mmap__aio_enabled(map)) { map->data = mmap(NULL, mmap__mmap_len(map), PROT_READ|PROT_WRITE, @@ -360,14 +357,3 @@ int perf_mmap__push(struct mmap *md, void *to, out: return rc; } - -int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original, struct mmap_cpu_mask *clone) -{ - clone->nbits = original->nbits; - clone->bits = bitmap_zalloc(original->nbits); - if (!clone->bits) - return -ENOMEM; - - memcpy(clone->bits, original->bits, MMAP_CPU_MASK_BYTES(original)); - return 0; -} diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 0df6e1621c7e..4d72c5fa5084 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -61,7 +61,4 @@ size_t mmap__mmap_len(struct mmap *map); void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag); -int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original, - struct mmap_cpu_mask *clone); - #endif /*__PERF_MMAP_H */ diff --git a/tools/perf/util/mutex.c b/tools/perf/util/mutex.c index bca7f0717f35..7aa1f3f55a7d 100644 --- a/tools/perf/util/mutex.c +++ b/tools/perf/util/mutex.c @@ -17,7 +17,7 @@ static void check_err(const char *fn, int err) #define CHECK_ERR(err) check_err(__func__, err) -static void __mutex_init(struct mutex *mtx, bool pshared) +static void __mutex_init(struct mutex *mtx, bool pshared, bool recursive) { pthread_mutexattr_t attr; @@ -27,21 +27,27 @@ static void __mutex_init(struct mutex *mtx, bool pshared) /* In normal builds enable error checking, such as recursive usage. */ CHECK_ERR(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK)); #endif + if (recursive) + CHECK_ERR(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)); if (pshared) CHECK_ERR(pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)); - CHECK_ERR(pthread_mutex_init(&mtx->lock, &attr)); CHECK_ERR(pthread_mutexattr_destroy(&attr)); } void mutex_init(struct mutex *mtx) { - __mutex_init(mtx, /*pshared=*/false); + __mutex_init(mtx, /*pshared=*/false, /*recursive=*/false); } void mutex_init_pshared(struct mutex *mtx) { - __mutex_init(mtx, /*pshared=*/true); + __mutex_init(mtx, /*pshared=*/true, /*recursive=*/false); +} + +void mutex_init_recursive(struct mutex *mtx) +{ + __mutex_init(mtx, /*pshared=*/false, /*recursive=*/true); } void mutex_destroy(struct mutex *mtx) diff --git a/tools/perf/util/mutex.h b/tools/perf/util/mutex.h index 40661120cacc..70232d8d094f 100644 --- a/tools/perf/util/mutex.h +++ b/tools/perf/util/mutex.h @@ -33,10 +33,22 @@ /* Documents if a type is a lockable type. */ #define LOCKABLE __attribute__((lockable)) +/* Documents a function that expects a lock not to be held prior to entry. */ +#define LOCKS_EXCLUDED(...) __attribute__((locks_excluded(__VA_ARGS__))) + +/* Documents a function that returns a lock. */ +#define LOCK_RETURNED(x) __attribute__((lock_returned(x))) + /* Documents functions that acquire a lock in the body of a function, and do not release it. */ #define EXCLUSIVE_LOCK_FUNCTION(...) __attribute__((exclusive_lock_function(__VA_ARGS__))) /* + * Documents functions that acquire a shared (reader) lock in the body of a + * function, and do not release it. + */ +#define SHARED_LOCK_FUNCTION(...) __attribute__((shared_lock_function(__VA_ARGS__))) + +/* * Documents functions that expect a lock to be held on entry to the function, * and release it in the body of the function. */ @@ -49,6 +61,9 @@ /* Documents a function that expects a mutex to be held prior to entry. */ #define EXCLUSIVE_LOCKS_REQUIRED(...) __attribute__((exclusive_locks_required(__VA_ARGS__))) +/* Documents a function that expects a shared (reader) lock to be held prior to entry. */ +#define SHARED_LOCKS_REQUIRED(...) __attribute__((shared_locks_required(__VA_ARGS__))) + /* Turns off thread safety checking within the body of a particular function. */ #define NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) @@ -57,10 +72,14 @@ #define GUARDED_BY(x) #define PT_GUARDED_BY(x) #define LOCKABLE +#define LOCKS_EXCLUDED(...) +#define LOCK_RETURNED(x) #define EXCLUSIVE_LOCK_FUNCTION(...) +#define SHARED_LOCK_FUNCTION(...) #define UNLOCK_FUNCTION(...) #define EXCLUSIVE_TRYLOCK_FUNCTION(...) #define EXCLUSIVE_LOCKS_REQUIRED(...) +#define SHARED_LOCKS_REQUIRED(...) #define NO_THREAD_SAFETY_ANALYSIS #endif @@ -85,6 +104,8 @@ void mutex_init(struct mutex *mtx); * process-private attribute. */ void mutex_init_pshared(struct mutex *mtx); +/* Initializes a mutex that may be recursively held on the same thread. */ +void mutex_init_recursive(struct mutex *mtx); void mutex_destroy(struct mutex *mtx); void mutex_lock(struct mutex *mtx) EXCLUSIVE_LOCK_FUNCTION(*mtx); diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index cb185c5659d6..01502570b32d 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -6,7 +6,6 @@ #include "namespaces.h" #include "event.h" -#include "get_current_dir_name.h" #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> @@ -266,11 +265,16 @@ pid_t nsinfo__pid(const struct nsinfo *nsi) return RC_CHK_ACCESS(nsi)->pid; } -pid_t nsinfo__in_pidns(const struct nsinfo *nsi) +bool nsinfo__in_pidns(const struct nsinfo *nsi) { return RC_CHK_ACCESS(nsi)->in_pidns; } +void nsinfo__set_in_pidns(struct nsinfo *nsi) +{ + RC_CHK_ACCESS(nsi)->in_pidns = true; +} + void nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc) { @@ -288,14 +292,14 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, if (!nsi || !nsinfo__need_setns(nsi)) return; - if (snprintf(curpath, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX) + if (!getcwd(curpath, sizeof(curpath))) return; - oldcwd = get_current_dir_name(); + oldcwd = strdup(curpath); if (!oldcwd) return; - oldns = open(curpath, O_RDONLY); + oldns = open("/proc/self/ns/mnt", O_RDONLY); if (oldns < 0) goto errout; diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index 8c0731c6cbb7..e95c79b80e27 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -58,7 +58,8 @@ void nsinfo__clear_need_setns(struct nsinfo *nsi); pid_t nsinfo__tgid(const struct nsinfo *nsi); pid_t nsinfo__nstgid(const struct nsinfo *nsi); pid_t nsinfo__pid(const struct nsinfo *nsi); -pid_t nsinfo__in_pidns(const struct nsinfo *nsi); +bool nsinfo__in_pidns(const struct nsinfo *nsi); +void nsinfo__set_in_pidns(struct nsinfo *nsi); void nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc); void nsinfo__mountns_exit(struct nscookie *nc); diff --git a/tools/perf/util/off_cpu.h b/tools/perf/util/off_cpu.h index 2dd67c60f211..64bf763ddf50 100644 --- a/tools/perf/util/off_cpu.h +++ b/tools/perf/util/off_cpu.h @@ -13,9 +13,10 @@ struct record_opts; #define OFFCPU_SAMPLE_TYPES (PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | \ PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | \ - PERF_SAMPLE_PERIOD | PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_PERIOD | PERF_SAMPLE_RAW | \ PERF_SAMPLE_CGROUP) +#define OFFCPU_THRESH 500000000ULL #ifdef HAVE_BPF_SKEL int off_cpu_prepare(struct evlist *evlist, struct target *target, diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 6ed0f9c5581d..17c1c36a7bf9 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -7,7 +7,9 @@ #include <errno.h> #include <sys/ioctl.h> #include <sys/param.h> +#include "cpumap.h" #include "term.h" +#include "env.h" #include "evlist.h" #include "evsel.h" #include <subcmd/parse-options.h> @@ -15,144 +17,47 @@ #include "string2.h" #include "strbuf.h" #include "debug.h" -#include <api/fs/tracing_path.h> #include <perf/cpumap.h> #include <util/parse-events-bison.h> #include <util/parse-events-flex.h> #include "pmu.h" #include "pmus.h" +#include "tp_pmu.h" #include "asm/bug.h" +#include "ui/ui.h" #include "util/parse-branch-options.h" #include "util/evsel_config.h" #include "util/event.h" #include "util/bpf-filter.h" +#include "util/stat.h" +#include "util/tool_pmu.h" #include "util/util.h" #include "tracepoint.h" +#include <api/fs/tracing_path.h> #define MAX_NAME_LEN 100 -#ifdef PARSER_DEBUG -extern int parse_events_debug; -#endif static int get_config_terms(const struct parse_events_terms *head_config, struct list_head *head_terms); static int parse_events_terms__copy(const struct parse_events_terms *src, struct parse_events_terms *dest); - -const struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = { - .symbol = "cpu-cycles", - .alias = "cycles", - }, - [PERF_COUNT_HW_INSTRUCTIONS] = { - .symbol = "instructions", - .alias = "", - }, - [PERF_COUNT_HW_CACHE_REFERENCES] = { - .symbol = "cache-references", - .alias = "", - }, - [PERF_COUNT_HW_CACHE_MISSES] = { - .symbol = "cache-misses", - .alias = "", - }, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { - .symbol = "branch-instructions", - .alias = "branches", - }, - [PERF_COUNT_HW_BRANCH_MISSES] = { - .symbol = "branch-misses", - .alias = "", - }, - [PERF_COUNT_HW_BUS_CYCLES] = { - .symbol = "bus-cycles", - .alias = "", - }, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = { - .symbol = "stalled-cycles-frontend", - .alias = "idle-cycles-frontend", - }, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = { - .symbol = "stalled-cycles-backend", - .alias = "idle-cycles-backend", - }, - [PERF_COUNT_HW_REF_CPU_CYCLES] = { - .symbol = "ref-cycles", - .alias = "", - }, +static int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct strbuf *sb); + +static const char *const event_types[] = { + [PERF_TYPE_HARDWARE] = "hardware", + [PERF_TYPE_SOFTWARE] = "software", + [PERF_TYPE_TRACEPOINT] = "tracepoint", + [PERF_TYPE_HW_CACHE] = "hardware-cache", + [PERF_TYPE_RAW] = "raw", + [PERF_TYPE_BREAKPOINT] = "breakpoint", }; -const struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { - [PERF_COUNT_SW_CPU_CLOCK] = { - .symbol = "cpu-clock", - .alias = "", - }, - [PERF_COUNT_SW_TASK_CLOCK] = { - .symbol = "task-clock", - .alias = "", - }, - [PERF_COUNT_SW_PAGE_FAULTS] = { - .symbol = "page-faults", - .alias = "faults", - }, - [PERF_COUNT_SW_CONTEXT_SWITCHES] = { - .symbol = "context-switches", - .alias = "cs", - }, - [PERF_COUNT_SW_CPU_MIGRATIONS] = { - .symbol = "cpu-migrations", - .alias = "migrations", - }, - [PERF_COUNT_SW_PAGE_FAULTS_MIN] = { - .symbol = "minor-faults", - .alias = "", - }, - [PERF_COUNT_SW_PAGE_FAULTS_MAJ] = { - .symbol = "major-faults", - .alias = "", - }, - [PERF_COUNT_SW_ALIGNMENT_FAULTS] = { - .symbol = "alignment-faults", - .alias = "", - }, - [PERF_COUNT_SW_EMULATION_FAULTS] = { - .symbol = "emulation-faults", - .alias = "", - }, - [PERF_COUNT_SW_DUMMY] = { - .symbol = "dummy", - .alias = "", - }, - [PERF_COUNT_SW_BPF_OUTPUT] = { - .symbol = "bpf-output", - .alias = "", - }, - [PERF_COUNT_SW_CGROUP_SWITCHES] = { - .symbol = "cgroup-switches", - .alias = "", - }, -}; - -const char *event_type(int type) +const char *event_type(size_t type) { - switch (type) { - case PERF_TYPE_HARDWARE: - return "hardware"; - - case PERF_TYPE_SOFTWARE: - return "software"; - - case PERF_TYPE_TRACEPOINT: - return "tracepoint"; - - case PERF_TYPE_HW_CACHE: - return "hardware-cache"; + if (type >= PERF_TYPE_MAX) + return "unknown"; - default: - break; - } - - return "unknown"; + return event_types[type]; } static char *get_config_str(const struct parse_events_terms *head_terms, @@ -180,6 +85,48 @@ static char *get_config_name(const struct parse_events_terms *head_terms) return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_NAME); } +static struct perf_cpu_map *get_config_cpu(const struct parse_events_terms *head_terms, + bool fake_pmu) +{ + struct parse_events_term *term; + struct perf_cpu_map *cpus = NULL; + + if (!head_terms) + return NULL; + + list_for_each_entry(term, &head_terms->terms, list) { + struct perf_cpu_map *term_cpus; + + if (term->type_term != PARSE_EVENTS__TERM_TYPE_CPU) + continue; + + if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { + term_cpus = perf_cpu_map__new_int(term->val.num); + } else { + struct perf_pmu *pmu = perf_pmus__find(term->val.str); + + if (pmu) { + term_cpus = pmu->is_core && perf_cpu_map__is_empty(pmu->cpus) + ? cpu_map__online() + : perf_cpu_map__get(pmu->cpus); + } else { + term_cpus = perf_cpu_map__new(term->val.str); + if (!term_cpus && fake_pmu) { + /* + * Assume the PMU string makes sense on a different + * machine and fake a value with all online CPUs. + */ + term_cpus = cpu_map__online(); + } + } + } + perf_cpu_map__merge(&cpus, term_cpus); + perf_cpu_map__put(term_cpus); + } + + return cpus; +} + /** * fix_raw - For each raw term see if there is an event (aka alias) in pmu that * matches the raw's string value. If the string value matches an @@ -229,49 +176,104 @@ __add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, bool init_attr, const char *name, const char *metric_id, struct perf_pmu *pmu, - struct list_head *config_terms, bool auto_merge_stats, - const char *cpu_list) + struct list_head *config_terms, struct evsel *first_wildcard_match, + struct perf_cpu_map *user_cpus, u64 alternate_hw_config) { struct evsel *evsel; - struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) : - cpu_list ? perf_cpu_map__new(cpu_list) : NULL; + bool is_pmu_core; + struct perf_cpu_map *cpus, *pmu_cpus; + bool has_user_cpus = !perf_cpu_map__is_empty(user_cpus); - if (pmu) - perf_pmu__warn_invalid_formats(pmu); + /* + * Ensure the first_wildcard_match's PMU matches that of the new event + * being added. Otherwise try to match with another event further down + * the evlist. + */ + if (first_wildcard_match) { + struct evsel *pos = list_prev_entry(first_wildcard_match, core.node); + + first_wildcard_match = NULL; + list_for_each_entry_continue(pos, list, core.node) { + if (perf_pmu__name_no_suffix_match(pos->pmu, pmu->name)) { + first_wildcard_match = pos; + break; + } + if (pos->pmu->is_core && (!pmu || pmu->is_core)) { + first_wildcard_match = pos; + break; + } + } + } - if (pmu && (attr->type == PERF_TYPE_RAW || attr->type >= PERF_TYPE_MAX)) { - perf_pmu__warn_invalid_config(pmu, attr->config, name, - PERF_PMU_FORMAT_VALUE_CONFIG, "config"); - perf_pmu__warn_invalid_config(pmu, attr->config1, name, - PERF_PMU_FORMAT_VALUE_CONFIG1, "config1"); - perf_pmu__warn_invalid_config(pmu, attr->config2, name, - PERF_PMU_FORMAT_VALUE_CONFIG2, "config2"); - perf_pmu__warn_invalid_config(pmu, attr->config3, name, - PERF_PMU_FORMAT_VALUE_CONFIG3, "config3"); + if (pmu) { + perf_pmu__warn_invalid_formats(pmu); + if (attr->type == PERF_TYPE_RAW || attr->type >= PERF_TYPE_MAX) { + perf_pmu__warn_invalid_config(pmu, attr->config, name, + PERF_PMU_FORMAT_VALUE_CONFIG, "config"); + perf_pmu__warn_invalid_config(pmu, attr->config1, name, + PERF_PMU_FORMAT_VALUE_CONFIG1, "config1"); + perf_pmu__warn_invalid_config(pmu, attr->config2, name, + PERF_PMU_FORMAT_VALUE_CONFIG2, "config2"); + perf_pmu__warn_invalid_config(pmu, attr->config3, name, + PERF_PMU_FORMAT_VALUE_CONFIG3, "config3"); + perf_pmu__warn_invalid_config(pmu, attr->config4, name, + PERF_PMU_FORMAT_VALUE_CONFIG4, "config4"); + } + } + /* + * If a PMU wasn't given, such as for legacy events, find now that + * warnings won't be generated. + */ + if (!pmu) + pmu = perf_pmus__find_by_attr(attr); + + if (pmu) { + is_pmu_core = pmu->is_core; + pmu_cpus = perf_cpu_map__get(pmu->cpus); + if (perf_cpu_map__is_empty(pmu_cpus)) { + if (perf_pmu__is_tool(pmu)) + pmu_cpus = tool_pmu__cpus(attr); + else + pmu_cpus = cpu_map__online(); + } + } else { + is_pmu_core = (attr->type == PERF_TYPE_HARDWARE || + attr->type == PERF_TYPE_HW_CACHE); + pmu_cpus = is_pmu_core ? cpu_map__online() : NULL; } + + if (has_user_cpus) + cpus = perf_cpu_map__get(user_cpus); + else + cpus = perf_cpu_map__get(pmu_cpus); + if (init_attr) event_attr_init(attr); evsel = evsel__new_idx(attr, *idx); - if (!evsel) { - perf_cpu_map__put(cpus); - return NULL; + if (!evsel) + goto out_err; + + if (name) { + evsel->name = strdup(name); + if (!evsel->name) + goto out_err; + } + + if (metric_id) { + evsel->metric_id = strdup(metric_id); + if (!evsel->metric_id) + goto out_err; } (*idx)++; evsel->core.cpus = cpus; - evsel->core.own_cpus = perf_cpu_map__get(cpus); + evsel->core.pmu_cpus = pmu_cpus; evsel->core.requires_cpu = pmu ? pmu->is_uncore : false; - evsel->core.is_pmu_core = pmu ? pmu->is_core : false; - evsel->auto_merge_stats = auto_merge_stats; + evsel->core.is_pmu_core = is_pmu_core; evsel->pmu = pmu; - evsel->pmu_name = pmu ? strdup(pmu->name) : NULL; - - if (name) - evsel->name = strdup(name); - - if (metric_id) - evsel->metric_id = strdup(metric_id); + evsel->alternate_hw_config = alternate_hw_config; + evsel->first_wildcard_match = first_wildcard_match; if (config_terms) list_splice_init(config_terms, &evsel->config_terms); @@ -279,7 +281,17 @@ __add_event(struct list_head *list, int *idx, if (list) list_add_tail(&evsel->core.node, list); + if (has_user_cpus) + evsel__warn_user_requested_cpus(evsel, user_cpus); + return evsel; +out_err: + perf_cpu_map__put(cpus); + perf_cpu_map__put(pmu_cpus); + zfree(&evsel->name); + zfree(&evsel->metric_id); + free(evsel); + return NULL; } struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, @@ -288,41 +300,19 @@ struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, { return __add_event(/*list=*/NULL, &idx, attr, /*init_attr=*/false, name, metric_id, pmu, /*config_terms=*/NULL, - /*auto_merge_stats=*/false, /*cpu_list=*/NULL); + /*first_wildcard_match=*/NULL, /*cpu_list=*/NULL, + /*alternate_hw_config=*/PERF_COUNT_HW_MAX); } static int add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, const char *name, - const char *metric_id, struct list_head *config_terms) + const char *metric_id, struct list_head *config_terms, + u64 alternate_hw_config) { return __add_event(list, idx, attr, /*init_attr*/true, name, metric_id, /*pmu=*/NULL, config_terms, - /*auto_merge_stats=*/false, /*cpu_list=*/NULL) ? 0 : -ENOMEM; -} - -static int add_event_tool(struct list_head *list, int *idx, - enum perf_tool_event tool_event) -{ - struct evsel *evsel; - struct perf_event_attr attr = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_DUMMY, - }; - - evsel = __add_event(list, idx, &attr, /*init_attr=*/true, /*name=*/NULL, - /*metric_id=*/NULL, /*pmu=*/NULL, - /*config_terms=*/NULL, /*auto_merge_stats=*/false, - /*cpu_list=*/"0"); - if (!evsel) - return -ENOMEM; - evsel->tool_event = tool_event; - if (tool_event == PERF_TOOL_DURATION_TIME - || tool_event == PERF_TOOL_USER_TIME - || tool_event == PERF_TOOL_SYSTEM_TIME) { - free((char *)evsel->unit); - evsel->unit = strdup("ns"); - } - return 0; + /*first_wildcard_match=*/NULL, /*cpu_list=*/NULL, + alternate_hw_config) ? 0 : -ENOMEM; } /** @@ -354,13 +344,13 @@ static int parse_aliases(const char *str, const char *const names[][EVSEL__MAX_A typedef int config_term_func_t(struct perf_event_attr *attr, struct parse_events_term *term, - struct parse_events_error *err); + struct parse_events_state *parse_state); static int config_term_common(struct perf_event_attr *attr, struct parse_events_term *term, - struct parse_events_error *err); + struct parse_events_state *parse_state); static int config_attr(struct perf_event_attr *attr, const struct parse_events_terms *head, - struct parse_events_error *err, + struct parse_events_state *parse_state, config_term_func_t config_term); /** @@ -446,72 +436,8 @@ bool parse_events__filter_pmu(const struct parse_events_state *parse_state, static int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, struct perf_pmu *pmu, const struct parse_events_terms *const_parsed_terms, - bool auto_merge_stats); - -int parse_events_add_cache(struct list_head *list, int *idx, const char *name, - struct parse_events_state *parse_state, - struct parse_events_terms *parsed_terms) -{ - struct perf_pmu *pmu = NULL; - bool found_supported = false; - const char *config_name = get_config_name(parsed_terms); - const char *metric_id = get_config_metric_id(parsed_terms); - - while ((pmu = perf_pmus__scan(pmu)) != NULL) { - LIST_HEAD(config_terms); - struct perf_event_attr attr; - int ret; - - if (parse_events__filter_pmu(parse_state, pmu)) - continue; - - if (perf_pmu__have_event(pmu, name)) { - /* - * The PMU has the event so add as not a legacy cache - * event. - */ - ret = parse_events_add_pmu(parse_state, list, pmu, - parsed_terms, - perf_pmu__auto_merge_stats(pmu)); - if (ret) - return ret; - continue; - } - - if (!pmu->is_core) { - /* Legacy cache events are only supported by core PMUs. */ - continue; - } + struct evsel *first_wildcard_match); - memset(&attr, 0, sizeof(attr)); - attr.type = PERF_TYPE_HW_CACHE; - - ret = parse_events__decode_legacy_cache(name, pmu->type, &attr.config); - if (ret) - return ret; - - found_supported = true; - - if (parsed_terms) { - if (config_attr(&attr, parsed_terms, parse_state->error, - config_term_common)) - return -EINVAL; - - if (get_config_terms(parsed_terms, &config_terms)) - return -ENOMEM; - } - - if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name, - metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, - /*cpu_list=*/NULL) == NULL) - return -ENOMEM; - - free_config_terms(&config_terms); - } - return found_supported ? 0 : -EINVAL; -} - -#ifdef HAVE_LIBTRACEEVENT static void tracepoint_error(struct parse_events_error *e, int err, const char *sys, const char *name, int column) { @@ -570,103 +496,104 @@ static int add_tracepoint(struct parse_events_state *parse_state, return 0; } -static int add_tracepoint_multi_event(struct parse_events_state *parse_state, - struct list_head *list, - const char *sys_name, const char *evt_name, - struct parse_events_error *err, - struct parse_events_terms *head_config, YYLTYPE *loc) -{ - char *evt_path; - struct dirent *evt_ent; - DIR *evt_dir; - int ret = 0, found = 0; - - evt_path = get_events_file(sys_name); - if (!evt_path) { - tracepoint_error(err, errno, sys_name, evt_name, loc->first_column); - return -1; - } - evt_dir = opendir(evt_path); - if (!evt_dir) { - put_events_file(evt_path); - tracepoint_error(err, errno, sys_name, evt_name, loc->first_column); - return -1; - } +struct add_tracepoint_multi_args { + struct parse_events_state *parse_state; + struct list_head *list; + const char *sys_glob; + const char *evt_glob; + struct parse_events_error *err; + struct parse_events_terms *head_config; + YYLTYPE *loc; + int found; +}; - while (!ret && (evt_ent = readdir(evt_dir))) { - if (!strcmp(evt_ent->d_name, ".") - || !strcmp(evt_ent->d_name, "..") - || !strcmp(evt_ent->d_name, "enable") - || !strcmp(evt_ent->d_name, "filter")) - continue; +static int add_tracepoint_multi_event_cb(void *state, const char *sys_name, const char *evt_name) +{ + struct add_tracepoint_multi_args *args = state; + int ret; - if (!strglobmatch(evt_ent->d_name, evt_name)) - continue; + if (!strglobmatch(evt_name, args->evt_glob)) + return 0; - found++; + args->found++; + ret = add_tracepoint(args->parse_state, args->list, sys_name, evt_name, + args->err, args->head_config, args->loc); - ret = add_tracepoint(parse_state, list, sys_name, evt_ent->d_name, - err, head_config, loc); - } + return ret; +} - if (!found) { - tracepoint_error(err, ENOENT, sys_name, evt_name, loc->first_column); - ret = -1; +static int add_tracepoint_multi_event(struct add_tracepoint_multi_args *args, const char *sys_name) +{ + if (strpbrk(args->evt_glob, "*?") == NULL) { + /* Not a glob. */ + args->found++; + return add_tracepoint(args->parse_state, args->list, sys_name, args->evt_glob, + args->err, args->head_config, args->loc); } - put_events_file(evt_path); - closedir(evt_dir); - return ret; + return tp_pmu__for_each_tp_event(sys_name, args, add_tracepoint_multi_event_cb); } -static int add_tracepoint_event(struct parse_events_state *parse_state, - struct list_head *list, - const char *sys_name, const char *evt_name, - struct parse_events_error *err, - struct parse_events_terms *head_config, YYLTYPE *loc) +static int add_tracepoint_multi_sys_cb(void *state, const char *sys_name) { - return strpbrk(evt_name, "*?") ? - add_tracepoint_multi_event(parse_state, list, sys_name, evt_name, - err, head_config, loc) : - add_tracepoint(parse_state, list, sys_name, evt_name, - err, head_config, loc); + struct add_tracepoint_multi_args *args = state; + + if (!strglobmatch(sys_name, args->sys_glob)) + return 0; + + return add_tracepoint_multi_event(args, sys_name); } static int add_tracepoint_multi_sys(struct parse_events_state *parse_state, struct list_head *list, - const char *sys_name, const char *evt_name, + const char *sys_glob, const char *evt_glob, struct parse_events_error *err, struct parse_events_terms *head_config, YYLTYPE *loc) { - struct dirent *events_ent; - DIR *events_dir; - int ret = 0; + struct add_tracepoint_multi_args args = { + .parse_state = parse_state, + .list = list, + .sys_glob = sys_glob, + .evt_glob = evt_glob, + .err = err, + .head_config = head_config, + .loc = loc, + .found = 0, + }; + int ret; - events_dir = tracing_events__opendir(); - if (!events_dir) { - tracepoint_error(err, errno, sys_name, evt_name, loc->first_column); - return -1; + if (strpbrk(sys_glob, "*?") == NULL) { + /* Not a glob. */ + ret = add_tracepoint_multi_event(&args, sys_glob); + } else { + ret = tp_pmu__for_each_tp_sys(&args, add_tracepoint_multi_sys_cb); + } + if (args.found == 0) { + tracepoint_error(err, ENOENT, sys_glob, evt_glob, loc->first_column); + return -ENOENT; } + return ret; +} - while (!ret && (events_ent = readdir(events_dir))) { - if (!strcmp(events_ent->d_name, ".") - || !strcmp(events_ent->d_name, "..") - || !strcmp(events_ent->d_name, "enable") - || !strcmp(events_ent->d_name, "header_event") - || !strcmp(events_ent->d_name, "header_page")) - continue; +size_t default_breakpoint_len(void) +{ +#if defined(__i386__) + static int len; - if (!strglobmatch(events_ent->d_name, sys_name)) - continue; + if (len == 0) { + struct perf_env env = {}; - ret = add_tracepoint_event(parse_state, list, events_ent->d_name, - evt_name, err, head_config, loc); + perf_env__init(&env); + len = perf_env__kernel_is_64_bit(&env) ? sizeof(u64) : sizeof(long); + perf_env__exit(&env); } - - closedir(events_dir); - return ret; + return len; +#elif defined(__aarch64__) + return 4; +#else + return sizeof(long); +#endif } -#endif /* HAVE_LIBTRACEEVENT */ static int parse_breakpoint_type(const char *type, struct perf_event_attr *attr) @@ -726,7 +653,7 @@ int parse_events_add_breakpoint(struct parse_events_state *parse_state, /* Provide some defaults if len is not specified */ if (!len) { if (attr.bp_type == HW_BREAKPOINT_X) - len = sizeof(long); + len = default_breakpoint_len(); else len = HW_BREAKPOINT_LEN_4; } @@ -737,8 +664,7 @@ int parse_events_add_breakpoint(struct parse_events_state *parse_state, attr.sample_period = 1; if (head_config) { - if (config_attr(&attr, head_config, parse_state->error, - config_term_common)) + if (config_attr(&attr, head_config, parse_state, config_term_common)) return -EINVAL; if (get_config_terms(head_config, &config_terms)) @@ -748,7 +674,7 @@ int parse_events_add_breakpoint(struct parse_events_state *parse_state, name = get_config_name(head_config); return add_event(list, &parse_state->idx, &attr, name, /*mertic_id=*/NULL, - &config_terms); + &config_terms, /*alternate_hw_config=*/PERF_COUNT_HW_MAX); } static int check_type_val(struct parse_events_term *term, @@ -770,7 +696,7 @@ static int check_type_val(struct parse_events_term *term, static bool config_term_shrinked; -static const char *config_term_name(enum parse_events__term_type term_type) +const char *parse_events__term_type_str(enum parse_events__term_type term_type) { /* * Update according to parse-events.l @@ -781,6 +707,7 @@ static const char *config_term_name(enum parse_events__term_type term_type) [PARSE_EVENTS__TERM_TYPE_CONFIG1] = "config1", [PARSE_EVENTS__TERM_TYPE_CONFIG2] = "config2", [PARSE_EVENTS__TERM_TYPE_CONFIG3] = "config3", + [PARSE_EVENTS__TERM_TYPE_CONFIG4] = "config4", [PARSE_EVENTS__TERM_TYPE_NAME] = "name", [PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD] = "period", [PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ] = "freq", @@ -797,11 +724,14 @@ static const char *config_term_name(enum parse_events__term_type term_type) [PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config", [PARSE_EVENTS__TERM_TYPE_PERCORE] = "percore", [PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT] = "aux-output", + [PARSE_EVENTS__TERM_TYPE_AUX_ACTION] = "aux-action", [PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE] = "aux-sample-size", [PARSE_EVENTS__TERM_TYPE_METRIC_ID] = "metric-id", [PARSE_EVENTS__TERM_TYPE_RAW] = "raw", - [PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE] = "legacy-cache", - [PARSE_EVENTS__TERM_TYPE_HARDWARE] = "hardware", + [PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG] = "legacy-hardware-config", + [PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG] = "legacy-cache-config", + [PARSE_EVENTS__TERM_TYPE_CPU] = "cpu", + [PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV] = "ratio-to-prev", }; if ((unsigned int)term_type >= __PARSE_EVENTS__TERM_TYPE_NR) return "unknown term"; @@ -827,10 +757,12 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er case PARSE_EVENTS__TERM_TYPE_CONFIG1: case PARSE_EVENTS__TERM_TYPE_CONFIG2: case PARSE_EVENTS__TERM_TYPE_CONFIG3: + case PARSE_EVENTS__TERM_TYPE_CONFIG4: case PARSE_EVENTS__TERM_TYPE_NAME: case PARSE_EVENTS__TERM_TYPE_METRIC_ID: case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: case PARSE_EVENTS__TERM_TYPE_PERCORE: + case PARSE_EVENTS__TERM_TYPE_CPU: return true; case PARSE_EVENTS__TERM_TYPE_USER: case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: @@ -846,17 +778,19 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er case PARSE_EVENTS__TERM_TYPE_OVERWRITE: case PARSE_EVENTS__TERM_TYPE_DRV_CFG: case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: + case PARSE_EVENTS__TERM_TYPE_AUX_ACTION: case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: case PARSE_EVENTS__TERM_TYPE_RAW: - case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: - case PARSE_EVENTS__TERM_TYPE_HARDWARE: + case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: + case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG: default: if (!err) return false; /* term_type is validated so indexing is safe */ if (asprintf(&err_str, "'%s' is not usable in 'perf stat'", - config_term_name(term_type)) >= 0) + parse_events__term_type_str(term_type)) >= 0) parse_events_error__handle(err, -1, err_str, NULL); return false; } @@ -869,12 +803,12 @@ void parse_events__shrink_config_terms(void) static int config_term_common(struct perf_event_attr *attr, struct parse_events_term *term, - struct parse_events_error *err) + struct parse_events_state *parse_state) { -#define CHECK_TYPE_VAL(type) \ -do { \ - if (check_type_val(term, err, PARSE_EVENTS__TERM_TYPE_ ## type)) \ - return -EINVAL; \ +#define CHECK_TYPE_VAL(type) \ +do { \ + if (check_type_val(term, parse_state->error, PARSE_EVENTS__TERM_TYPE_ ## type)) \ + return -EINVAL; \ } while (0) switch (term->type_term) { @@ -894,6 +828,10 @@ do { \ CHECK_TYPE_VAL(NUM); attr->config3 = term->val.num; break; + case PARSE_EVENTS__TERM_TYPE_CONFIG4: + CHECK_TYPE_VAL(NUM); + attr->config4 = term->val.num; + break; case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: CHECK_TYPE_VAL(NUM); break; @@ -905,7 +843,7 @@ do { \ if (strcmp(term->val.str, "no") && parse_branch_str(term->val.str, &attr->branch_sample_type)) { - parse_events_error__handle(err, term->err_val, + parse_events_error__handle(parse_state->error, term->err_val, strdup("invalid branch sample type"), NULL); return -EINVAL; @@ -914,7 +852,7 @@ do { \ case PARSE_EVENTS__TERM_TYPE_TIME: CHECK_TYPE_VAL(NUM); if (term->val.num > 1) { - parse_events_error__handle(err, term->err_val, + parse_events_error__handle(parse_state->error, term->err_val, strdup("expected 0 or 1"), NULL); return -EINVAL; @@ -956,7 +894,7 @@ do { \ case PARSE_EVENTS__TERM_TYPE_PERCORE: CHECK_TYPE_VAL(NUM); if ((unsigned int)term->val.num > 1) { - parse_events_error__handle(err, term->err_val, + parse_events_error__handle(parse_state->error, term->err_val, strdup("expected 0 or 1"), NULL); return -EINVAL; @@ -965,22 +903,66 @@ do { \ case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: CHECK_TYPE_VAL(NUM); break; + case PARSE_EVENTS__TERM_TYPE_AUX_ACTION: + CHECK_TYPE_VAL(STR); + break; case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: CHECK_TYPE_VAL(NUM); if (term->val.num > UINT_MAX) { - parse_events_error__handle(err, term->err_val, + parse_events_error__handle(parse_state->error, term->err_val, strdup("too big"), NULL); return -EINVAL; } break; + case PARSE_EVENTS__TERM_TYPE_CPU: { + struct perf_cpu_map *map; + + if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { + if (term->val.num >= (u64)cpu__max_present_cpu().cpu) { + parse_events_error__handle(parse_state->error, term->err_val, + strdup("too big"), + /*help=*/NULL); + return -EINVAL; + } + break; + } + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_STR); + if (perf_pmus__find(term->val.str) != NULL) + break; + + map = perf_cpu_map__new(term->val.str); + if (!map && !parse_state->fake_pmu) { + parse_events_error__handle(parse_state->error, term->err_val, + strdup("not a valid PMU or CPU number"), + /*help=*/NULL); + return -EINVAL; + } + perf_cpu_map__put(map); + break; + } + case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: + CHECK_TYPE_VAL(STR); + if (strtod(term->val.str, NULL) <= 0) { + parse_events_error__handle(parse_state->error, term->err_val, + strdup("zero or negative"), + NULL); + return -EINVAL; + } + if (errno == ERANGE) { + parse_events_error__handle(parse_state->error, term->err_val, + strdup("too big"), + NULL); + return -EINVAL; + } + break; case PARSE_EVENTS__TERM_TYPE_DRV_CFG: case PARSE_EVENTS__TERM_TYPE_USER: - case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: - case PARSE_EVENTS__TERM_TYPE_HARDWARE: + case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG: default: - parse_events_error__handle(err, term->err_term, - strdup(config_term_name(term->type_term)), + parse_events_error__handle(parse_state->error, term->err_term, + strdup(parse_events__term_type_str(term->type_term)), parse_events_formats_error_string(NULL)); return -EINVAL; } @@ -994,66 +976,72 @@ do { \ * if an invalid config term is provided for legacy events * (for example, instructions/badterm/...), which is confusing. */ - if (!config_term_avail(term->type_term, err)) + if (!config_term_avail(term->type_term, parse_state->error)) return -EINVAL; return 0; #undef CHECK_TYPE_VAL } -static int config_term_pmu(struct perf_event_attr *attr, - struct parse_events_term *term, - struct parse_events_error *err) +static bool check_pmu_is_core(__u32 type, const struct parse_events_term *term, + struct parse_events_error *err) { - if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE) { - struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); + struct perf_pmu *pmu = NULL; - if (!pmu) { - char *err_str; + /* Avoid loading all PMUs with perf_pmus__find_by_type, just scan the core ones. */ + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { + if (pmu->type == type) + return true; + } + parse_events_error__handle(err, term->err_val, + strdup("needs a core PMU"), + NULL); + return false; +} - if (asprintf(&err_str, "Failed to find PMU for type %d", attr->type) >= 0) - parse_events_error__handle(err, term->err_term, - err_str, /*help=*/NULL); +static int config_term_pmu(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_state *parse_state) +{ + if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG) { + if (check_type_val(term, parse_state->error, PARSE_EVENTS__TERM_TYPE_NUM)) + return -EINVAL; + if (term->val.num >= PERF_COUNT_HW_MAX) { + parse_events_error__handle(parse_state->error, term->err_val, + strdup("too big"), + NULL); return -EINVAL; } - /* - * Rewrite the PMU event to a legacy cache one unless the PMU - * doesn't support legacy cache events or the event is present - * within the PMU. - */ - if (perf_pmu__supports_legacy_cache(pmu) && - !perf_pmu__have_event(pmu, term->config)) { - attr->type = PERF_TYPE_HW_CACHE; - return parse_events__decode_legacy_cache(term->config, pmu->type, - &attr->config); - } else { - term->type_term = PARSE_EVENTS__TERM_TYPE_USER; - term->no_value = true; - } + if (!check_pmu_is_core(attr->type, term, parse_state->error)) + return -EINVAL; + attr->config = term->val.num; + if (perf_pmus__supports_extended_type()) + attr->config |= (__u64)attr->type << PERF_PMU_TYPE_SHIFT; + attr->type = PERF_TYPE_HARDWARE; + return 0; } - if (term->type_term == PARSE_EVENTS__TERM_TYPE_HARDWARE) { - struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); - - if (!pmu) { - char *err_str; + if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG) { + int cache_type, cache_op, cache_result; - if (asprintf(&err_str, "Failed to find PMU for type %d", attr->type) >= 0) - parse_events_error__handle(err, term->err_term, - err_str, /*help=*/NULL); + if (check_type_val(term, parse_state->error, PARSE_EVENTS__TERM_TYPE_NUM)) + return -EINVAL; + cache_type = term->val.num & 0xFF; + cache_op = (term->val.num >> 8) & 0xFF; + cache_result = (term->val.num >> 16) & 0xFF; + if ((term->val.num & ~0xFFFFFF) || + cache_type >= PERF_COUNT_HW_CACHE_MAX || + cache_op >= PERF_COUNT_HW_CACHE_OP_MAX || + cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) { + parse_events_error__handle(parse_state->error, term->err_val, + strdup("too big"), + NULL); return -EINVAL; } - /* - * If the PMU has a sysfs or json event prefer it over - * legacy. ARM requires this. - */ - if (perf_pmu__have_event(pmu, term->config)) { - term->type_term = PARSE_EVENTS__TERM_TYPE_USER; - term->no_value = true; - } else { - attr->type = PERF_TYPE_HARDWARE; - attr->config = term->val.num; - if (perf_pmus__supports_extended_type()) - attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; - } + if (!check_pmu_is_core(attr->type, term, parse_state->error)) + return -EINVAL; + attr->config = term->val.num; + if (perf_pmus__supports_extended_type()) + attr->config |= (__u64)attr->type << PERF_PMU_TYPE_SHIFT; + attr->type = PERF_TYPE_HW_CACHE; return 0; } if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER || @@ -1064,13 +1052,12 @@ static int config_term_pmu(struct perf_event_attr *attr, */ return 0; } - return config_term_common(attr, term, err); + return config_term_common(attr, term, parse_state); } -#ifdef HAVE_LIBTRACEEVENT static int config_term_tracepoint(struct perf_event_attr *attr, struct parse_events_term *term, - struct parse_events_error *err) + struct parse_events_state *parse_state) { switch (term->type_term) { case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: @@ -1082,13 +1069,17 @@ static int config_term_tracepoint(struct perf_event_attr *attr, case PARSE_EVENTS__TERM_TYPE_OVERWRITE: case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: + case PARSE_EVENTS__TERM_TYPE_AUX_ACTION: case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: - return config_term_common(attr, term, err); + return config_term_common(attr, term, parse_state); case PARSE_EVENTS__TERM_TYPE_USER: case PARSE_EVENTS__TERM_TYPE_CONFIG: case PARSE_EVENTS__TERM_TYPE_CONFIG1: case PARSE_EVENTS__TERM_TYPE_CONFIG2: case PARSE_EVENTS__TERM_TYPE_CONFIG3: + case PARSE_EVENTS__TERM_TYPE_CONFIG4: + case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG: case PARSE_EVENTS__TERM_TYPE_NAME: case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: @@ -1098,30 +1089,28 @@ static int config_term_tracepoint(struct perf_event_attr *attr, case PARSE_EVENTS__TERM_TYPE_PERCORE: case PARSE_EVENTS__TERM_TYPE_METRIC_ID: case PARSE_EVENTS__TERM_TYPE_RAW: - case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: - case PARSE_EVENTS__TERM_TYPE_HARDWARE: + case PARSE_EVENTS__TERM_TYPE_CPU: + case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: default: - if (err) { - parse_events_error__handle(err, term->err_term, - strdup(config_term_name(term->type_term)), - strdup("valid terms: call-graph,stack-size\n")); - } + parse_events_error__handle(parse_state->error, term->err_term, + strdup(parse_events__term_type_str(term->type_term)), + strdup("valid terms: call-graph,stack-size\n") + ); return -EINVAL; } return 0; } -#endif static int config_attr(struct perf_event_attr *attr, const struct parse_events_terms *head, - struct parse_events_error *err, + struct parse_events_state *parse_state, config_term_func_t config_term) { struct parse_events_term *term; list_for_each_entry(term, &head->terms, list) - if (config_term(attr, term, err)) + if (config_term(attr, term, parse_state)) return -EINVAL; return 0; @@ -1217,20 +1206,28 @@ do { \ ADD_CONFIG_TERM_VAL(AUX_OUTPUT, aux_output, term->val.num ? 1 : 0, term->weak); break; + case PARSE_EVENTS__TERM_TYPE_AUX_ACTION: + ADD_CONFIG_TERM_STR(AUX_ACTION, term->val.str, term->weak); + break; case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: ADD_CONFIG_TERM_VAL(AUX_SAMPLE_SIZE, aux_sample_size, term->val.num, term->weak); break; + case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: + ADD_CONFIG_TERM_STR(RATIO_TO_PREV, term->val.str, term->weak); + break; case PARSE_EVENTS__TERM_TYPE_USER: case PARSE_EVENTS__TERM_TYPE_CONFIG: case PARSE_EVENTS__TERM_TYPE_CONFIG1: case PARSE_EVENTS__TERM_TYPE_CONFIG2: case PARSE_EVENTS__TERM_TYPE_CONFIG3: + case PARSE_EVENTS__TERM_TYPE_CONFIG4: + case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG: case PARSE_EVENTS__TERM_TYPE_NAME: case PARSE_EVENTS__TERM_TYPE_METRIC_ID: case PARSE_EVENTS__TERM_TYPE_RAW: - case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: - case PARSE_EVENTS__TERM_TYPE_HARDWARE: + case PARSE_EVENTS__TERM_TYPE_CPU: default: break; } @@ -1263,6 +1260,9 @@ static int get_config_chgs(struct perf_pmu *pmu, struct parse_events_terms *head case PARSE_EVENTS__TERM_TYPE_CONFIG1: case PARSE_EVENTS__TERM_TYPE_CONFIG2: case PARSE_EVENTS__TERM_TYPE_CONFIG3: + case PARSE_EVENTS__TERM_TYPE_CONFIG4: + case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG: case PARSE_EVENTS__TERM_TYPE_NAME: case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: @@ -1279,11 +1279,12 @@ static int get_config_chgs(struct perf_pmu *pmu, struct parse_events_terms *head case PARSE_EVENTS__TERM_TYPE_DRV_CFG: case PARSE_EVENTS__TERM_TYPE_PERCORE: case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: + case PARSE_EVENTS__TERM_TYPE_AUX_ACTION: case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: case PARSE_EVENTS__TERM_TYPE_METRIC_ID: case PARSE_EVENTS__TERM_TYPE_RAW: - case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: - case PARSE_EVENTS__TERM_TYPE_HARDWARE: + case PARSE_EVENTS__TERM_TYPE_CPU: + case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: default: break; } @@ -1303,41 +1304,28 @@ int parse_events_add_tracepoint(struct parse_events_state *parse_state, struct parse_events_terms *head_config, void *loc_) { YYLTYPE *loc = loc_; -#ifdef HAVE_LIBTRACEEVENT + if (head_config) { struct perf_event_attr attr; - if (config_attr(&attr, head_config, err, - config_term_tracepoint)) + if (config_attr(&attr, head_config, parse_state, config_term_tracepoint)) return -EINVAL; } - if (strpbrk(sys, "*?")) - return add_tracepoint_multi_sys(parse_state, list, sys, event, - err, head_config, loc); - else - return add_tracepoint_event(parse_state, list, sys, event, - err, head_config, loc); -#else - (void)parse_state; - (void)list; - (void)sys; - (void)event; - (void)head_config; - parse_events_error__handle(err, loc->first_column, strdup("unsupported tracepoint"), - strdup("libtraceevent is necessary for tracepoint support")); - return -1; -#endif + return add_tracepoint_multi_sys(parse_state, list, sys, event, + err, head_config, loc); } static int __parse_events_add_numeric(struct parse_events_state *parse_state, struct list_head *list, struct perf_pmu *pmu, u32 type, u32 extended_type, - u64 config, const struct parse_events_terms *head_config) + u64 config, const struct parse_events_terms *head_config, + struct evsel *first_wildcard_match) { struct perf_event_attr attr; LIST_HEAD(config_terms); const char *name, *metric_id; + struct perf_cpu_map *cpus; int ret; memset(&attr, 0, sizeof(attr)); @@ -1349,8 +1337,7 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state, } if (head_config) { - if (config_attr(&attr, head_config, parse_state->error, - config_term_common)) + if (config_attr(&attr, head_config, parse_state, config_term_common)) return -EINVAL; if (get_config_terms(head_config, &config_terms)) @@ -1359,9 +1346,11 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state, name = get_config_name(head_config); metric_id = get_config_metric_id(head_config); + cpus = get_config_cpu(head_config, parse_state->fake_pmu); ret = __add_event(list, &parse_state->idx, &attr, /*init_attr*/true, name, - metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, - /*cpu_list=*/NULL) ? 0 : -ENOMEM; + metric_id, pmu, &config_terms, first_wildcard_match, + cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) ? 0 : -ENOMEM; + perf_cpu_map__put(cpus); free_config_terms(&config_terms); return ret; } @@ -1377,6 +1366,7 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, /* Wildcards on numeric values are only supported by core PMUs. */ if (wildcard && perf_pmus__supports_extended_type()) { + struct evsel *first_wildcard_match = NULL; while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { int ret; @@ -1386,22 +1376,20 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, ret = __parse_events_add_numeric(parse_state, list, pmu, type, pmu->type, - config, head_config); + config, head_config, + first_wildcard_match); if (ret) return ret; + if (first_wildcard_match == NULL) + first_wildcard_match = + container_of(list->prev, struct evsel, core.node); } if (found_supported) return 0; } return __parse_events_add_numeric(parse_state, list, perf_pmus__find_by_type(type), - type, /*extended_type=*/0, config, head_config); -} - -int parse_events_add_tool(struct parse_events_state *parse_state, - struct list_head *list, - int tool_event) -{ - return add_event_tool(list, &parse_state->idx, tool_event); + type, /*extended_type=*/0, config, head_config, + /*first_wildcard_match=*/NULL); } static bool config_term_percore(struct list_head *config_terms) @@ -1419,8 +1407,9 @@ static bool config_term_percore(struct list_head *config_terms) static int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, struct perf_pmu *pmu, const struct parse_events_terms *const_parsed_terms, - bool auto_merge_stats) + struct evsel *first_wildcard_match) { + u64 alternate_hw_config = PERF_COUNT_HW_MAX; struct perf_event_attr attr; struct perf_pmu_info info; struct evsel *evsel; @@ -1428,6 +1417,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, LIST_HEAD(config_terms); struct parse_events_terms parsed_terms; bool alias_rewrote_terms = false; + struct perf_cpu_map *term_cpu = NULL; if (verbose > 1) { struct strbuf sb; @@ -1455,8 +1445,8 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true, /*name=*/NULL, /*metric_id=*/NULL, pmu, - /*config_terms=*/NULL, auto_merge_stats, - /*cpu_list=*/NULL); + /*config_terms=*/NULL, first_wildcard_match, + /*cpu_list=*/NULL, alternate_hw_config); return evsel ? 0 : -ENOMEM; } @@ -1470,14 +1460,15 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, fix_raw(&parsed_terms, pmu); /* Configure attr/terms with a known PMU, this will set hardcoded terms. */ - if (config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) { + if (config_attr(&attr, &parsed_terms, parse_state, config_term_pmu)) { parse_events_terms__exit(&parsed_terms); return -EINVAL; } /* Look for event names in the terms and rewrite into format based terms. */ - if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, &parsed_terms, - &info, &alias_rewrote_terms, err)) { + if (perf_pmu__check_alias(pmu, &parsed_terms, + &info, &alias_rewrote_terms, + &alternate_hw_config, err)) { parse_events_terms__exit(&parsed_terms); return -EINVAL; } @@ -1493,7 +1484,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, /* Configure attr/terms again if an alias was expanded. */ if (alias_rewrote_terms && - config_attr(&attr, &parsed_terms, parse_state->error, config_term_pmu)) { + config_attr(&attr, &parsed_terms, parse_state, config_term_pmu)) { parse_events_terms__exit(&parsed_terms); return -EINVAL; } @@ -1513,17 +1504,20 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, return -ENOMEM; } - if (!parse_state->fake_pmu && - perf_pmu__config(pmu, &attr, &parsed_terms, parse_state->error)) { + /* Skip configuring hard coded terms that were applied by config_attr. */ + if (perf_pmu__config(pmu, &attr, &parsed_terms, /*apply_hardcoded=*/false, + parse_state->error)) { free_config_terms(&config_terms); parse_events_terms__exit(&parsed_terms); return -EINVAL; } + term_cpu = get_config_cpu(&parsed_terms, parse_state->fake_pmu); evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true, get_config_name(&parsed_terms), get_config_metric_id(&parsed_terms), pmu, - &config_terms, auto_merge_stats, /*cpu_list=*/NULL); + &config_terms, first_wildcard_match, term_cpu, alternate_hw_config); + perf_cpu_map__put(term_cpu); if (!evsel) { parse_events_terms__exit(&parsed_terms); return -ENOMEM; @@ -1534,17 +1528,16 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, evsel->percore = config_term_percore(&evsel->config_terms); - if (parse_state->fake_pmu) { - parse_events_terms__exit(&parsed_terms); - return 0; - } - parse_events_terms__exit(&parsed_terms); free((char *)evsel->unit); evsel->unit = strdup(info.unit); evsel->scale = info.scale; evsel->per_pkg = info.per_pkg; evsel->snapshot = info.snapshot; + evsel->retirement_latency.mean = info.retirement_latency_mean; + evsel->retirement_latency.min = info.retirement_latency_min; + evsel->retirement_latency.max = info.retirement_latency_max; + return 0; } @@ -1560,6 +1553,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, int ok = 0; const char *config; struct parse_events_terms parsed_terms; + struct evsel *first_wildcard_match = NULL; *listp = NULL; @@ -1591,8 +1585,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, INIT_LIST_HEAD(list); - while ((pmu = perf_pmus__scan(pmu)) != NULL) { - bool auto_merge_stats; + while ((pmu = perf_pmus__scan_for_event(pmu, event_name)) != NULL) { if (parse_events__filter_pmu(parse_state, pmu)) continue; @@ -1600,9 +1593,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, if (!perf_pmu__have_event(pmu, event_name)) continue; - auto_merge_stats = perf_pmu__auto_merge_stats(pmu); if (!parse_events_add_pmu(parse_state, list, pmu, - &parsed_terms, auto_merge_stats)) { + &parsed_terms, first_wildcard_match)) { struct strbuf sb; strbuf_init(&sb, /*hint=*/ 0); @@ -1611,16 +1603,18 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, strbuf_release(&sb); ok++; } + if (first_wildcard_match == NULL) + first_wildcard_match = container_of(list->prev, struct evsel, core.node); } if (parse_state->fake_pmu) { - if (!parse_events_add_pmu(parse_state, list, parse_state->fake_pmu, &parsed_terms, - /*auto_merge_stats=*/true)) { + if (!parse_events_add_pmu(parse_state, list, perf_pmus__fake_pmu(), &parsed_terms, + first_wildcard_match)) { struct strbuf sb; strbuf_init(&sb, /*hint=*/ 0); parse_events_terms__to_strbuf(&parsed_terms, &sb); - pr_debug("%s -> %s/%s/\n", event_name, "fake_pmu", sb.buf); + pr_debug("%s -> fake/%s/\n", event_name, sb.buf); strbuf_release(&sb); ok++; } @@ -1646,6 +1640,7 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state struct perf_pmu *pmu; int ok = 0; char *help; + struct evsel *first_wildcard_match = NULL; *listp = malloc(sizeof(**listp)); if (!*listp) @@ -1654,24 +1649,34 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state INIT_LIST_HEAD(*listp); /* Attempt to add to list assuming event_or_pmu is a PMU name. */ - pmu = parse_state->fake_pmu ?: perf_pmus__find(event_or_pmu); + pmu = perf_pmus__find(event_or_pmu); if (pmu && !parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms, - /*auto_merge_stats=*/false)) + first_wildcard_match)) return 0; + if (parse_state->fake_pmu) { + if (!parse_events_add_pmu(parse_state, *listp, perf_pmus__fake_pmu(), + const_parsed_terms, + first_wildcard_match)) + return 0; + } + pmu = NULL; /* Failed to add, try wildcard expansion of event_or_pmu as a PMU name. */ - while ((pmu = perf_pmus__scan(pmu)) != NULL) { - if (!parse_events__filter_pmu(parse_state, pmu) && - perf_pmu__match(pmu, event_or_pmu)) { - bool auto_merge_stats = perf_pmu__auto_merge_stats(pmu); - - if (!parse_events_add_pmu(parse_state, *listp, pmu, - const_parsed_terms, - auto_merge_stats)) { - ok++; - parse_state->wild_card_pmus = true; - } + while ((pmu = perf_pmus__scan_matching_wildcard(pmu, event_or_pmu)) != NULL) { + + if (parse_events__filter_pmu(parse_state, pmu)) + continue; + + if (!parse_events_add_pmu(parse_state, *listp, pmu, + const_parsed_terms, + first_wildcard_match)) { + ok++; + parse_state->wild_card_pmus = true; + } + if (first_wildcard_match == NULL) { + first_wildcard_match = + container_of((*listp)->prev, struct evsel, core.node); } } if (ok) @@ -1679,7 +1684,8 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state /* Failure to add, assume event_or_pmu is an event name. */ zfree(listp); - if (!parse_events_multi_pmu_add(parse_state, event_or_pmu, const_parsed_terms, listp, loc)) + if (!parse_events_multi_pmu_add(parse_state, event_or_pmu, + const_parsed_terms, listp, loc)) return 0; if (asprintf(&help, "Unable to find PMU or event on a PMU of '%s'", event_or_pmu) < 0) @@ -1728,17 +1734,11 @@ static int parse_events__modifier_list(struct parse_events_state *parse_state, int eH = group ? evsel->core.attr.exclude_host : 0; int eG = group ? evsel->core.attr.exclude_guest : 0; int exclude = eu | ek | eh; - int exclude_GH = group ? evsel->exclude_GH : 0; + int exclude_GH = eG | eH; - if (mod.precise) { - /* use of precise requires exclude_guest */ - eG = 1; - } if (mod.user) { if (!exclude) exclude = eu = ek = eh = 1; - if (!exclude_GH && !perf_guest) - eG = 1; eu = 0; } if (mod.kernel) { @@ -1761,6 +1761,13 @@ static int parse_events__modifier_list(struct parse_events_state *parse_state, exclude_GH = eG = eH = 1; eH = 0; } + if (!exclude_GH && exclude_GH_default) { + if (perf_host) + eG = 1; + else if (perf_guest) + eH = 1; + } + evsel->core.attr.exclude_user = eu; evsel->core.attr.exclude_kernel = ek; evsel->core.attr.exclude_hv = eh; @@ -1809,6 +1816,10 @@ static int parse_events__modifier_list(struct parse_events_state *parse_state, evsel->weak_group = true; if (mod.bpf) evsel->bpf_counter = true; + if (mod.retire_lat) + evsel->retire_lat = true; + if (mod.dont_regroup) + evsel->dont_regroup = true; } return 0; } @@ -1846,7 +1857,6 @@ int parse_events__set_default_name(struct list_head *list, char *name) } static int parse_events__scanner(const char *str, - FILE *input, struct parse_events_state *parse_state) { YY_BUFFER_STATE buffer; @@ -1857,10 +1867,7 @@ static int parse_events__scanner(const char *str, if (ret) return ret; - if (str) - buffer = parse_events__scan_string(str, scanner); - else - parse_events_set_in(input, scanner); + buffer = parse_events__scan_string(str, scanner); #ifdef PARSER_DEBUG parse_events_debug = 1; @@ -1868,10 +1875,8 @@ static int parse_events__scanner(const char *str, #endif ret = parse_events_parse(parse_state, scanner); - if (str) { - parse_events__flush_buffer(buffer, scanner); - parse_events__delete_buffer(buffer, scanner); - } + parse_events__flush_buffer(buffer, scanner); + parse_events__delete_buffer(buffer, scanner); parse_events_lex_destroy(scanner); return ret; } @@ -1879,7 +1884,7 @@ static int parse_events__scanner(const char *str, /* * parse event config string, return a list of event terms. */ -int parse_events_terms(struct parse_events_terms *terms, const char *str, FILE *input) +int parse_events_terms(struct parse_events_terms *terms, const char *str) { struct parse_events_state parse_state = { .terms = NULL, @@ -1887,7 +1892,7 @@ int parse_events_terms(struct parse_events_terms *terms, const char *str, FILE * }; int ret; - ret = parse_events__scanner(str, input, &parse_state); + ret = parse_events__scanner(str, &parse_state); if (!ret) list_splice(&parse_state.terms->terms, &terms->terms); @@ -1957,8 +1962,8 @@ static int evsel__compute_group_pmu_name(struct evsel *evsel, } } } - /* Assign the actual name taking care that the fake PMU lacks a name. */ - evsel->group_pmu_name = strdup(group_pmu_name ?: "fake"); + /* Record computed name. */ + evsel->group_pmu_name = strdup(group_pmu_name); return evsel->group_pmu_name ? 0 : -ENOMEM; } @@ -1977,59 +1982,82 @@ static int evlist__cmp(void *_fg_idx, const struct list_head *l, const struct li int *force_grouped_idx = _fg_idx; int lhs_sort_idx, rhs_sort_idx, ret; const char *lhs_pmu_name, *rhs_pmu_name; - bool lhs_has_group, rhs_has_group; /* - * First sort by grouping/leader. Read the leader idx only if the evsel - * is part of a group, by default ungrouped events will be sorted - * relative to grouped events based on where the first ungrouped event - * occurs. If both events don't have a group we want to fall-through to - * the arch specific sorting, that can reorder and fix things like - * Intel's topdown events. + * Get the indexes of the 2 events to sort. If the events are + * in groups then the leader's index is used otherwise the + * event's index is used. An index may be forced for events that + * must be in the same group, namely Intel topdown events. */ - if (lhs_core->leader != lhs_core || lhs_core->nr_members > 1) { - lhs_has_group = true; - lhs_sort_idx = lhs_core->leader->idx; + if (lhs->dont_regroup) { + lhs_sort_idx = lhs_core->idx; + } else if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)) { + lhs_sort_idx = *force_grouped_idx; } else { - lhs_has_group = false; - lhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs) - ? *force_grouped_idx - : lhs_core->idx; - } - if (rhs_core->leader != rhs_core || rhs_core->nr_members > 1) { - rhs_has_group = true; - rhs_sort_idx = rhs_core->leader->idx; + bool lhs_has_group = lhs_core->leader != lhs_core || lhs_core->nr_members > 1; + + lhs_sort_idx = lhs_has_group ? lhs_core->leader->idx : lhs_core->idx; + } + if (rhs->dont_regroup) { + rhs_sort_idx = rhs_core->idx; + } else if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)) { + rhs_sort_idx = *force_grouped_idx; } else { - rhs_has_group = false; - rhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs) - ? *force_grouped_idx - : rhs_core->idx; + bool rhs_has_group = rhs_core->leader != rhs_core || rhs_core->nr_members > 1; + + rhs_sort_idx = rhs_has_group ? rhs_core->leader->idx : rhs_core->idx; } + /* If the indices differ then respect the insertion order. */ if (lhs_sort_idx != rhs_sort_idx) return lhs_sort_idx - rhs_sort_idx; - /* Group by PMU if there is a group. Groups can't span PMUs. */ - if (lhs_has_group && rhs_has_group) { - lhs_pmu_name = lhs->group_pmu_name; - rhs_pmu_name = rhs->group_pmu_name; - ret = strcmp(lhs_pmu_name, rhs_pmu_name); - if (ret) - return ret; - } + /* + * Ignoring forcing, lhs_sort_idx == rhs_sort_idx so lhs and rhs should + * be in the same group. Events in the same group need to be ordered by + * their grouping PMU name as the group will be broken to ensure only + * events on the same PMU are programmed together. + * + * With forcing the lhs_sort_idx == rhs_sort_idx shows that one or both + * events are being forced to be at force_group_index. If only one event + * is being forced then the other event is the group leader of the group + * we're trying to force the event into. Ensure for the force grouped + * case that the PMU name ordering is also respected. + */ + lhs_pmu_name = lhs->group_pmu_name; + rhs_pmu_name = rhs->group_pmu_name; + ret = strcmp(lhs_pmu_name, rhs_pmu_name); + if (ret) + return ret; - /* Architecture specific sorting. */ + /* + * Architecture specific sorting, by default sort events in the same + * group with the same PMU by their insertion index. On Intel topdown + * constraints must be adhered to - slots first, etc. + */ return arch_evlist__cmp(lhs, rhs); } +int __weak arch_evlist__add_required_events(struct list_head *list __always_unused) +{ + return 0; +} + static int parse_events__sort_events_and_fix_groups(struct list_head *list) { int idx = 0, force_grouped_idx = -1; struct evsel *pos, *cur_leader = NULL; struct perf_evsel *cur_leaders_grp = NULL; - bool idx_changed = false, cur_leader_force_grouped = false; + bool idx_changed = false; int orig_num_leaders = 0, num_leaders = 0; int ret; + struct evsel *force_grouped_leader = NULL; + bool last_event_was_forced_leader = false; + + /* On x86 topdown metrics events require a slots event. */ + ret = arch_evlist__add_required_events(list); + if (ret) + return ret; /* * Compute index to insert ungrouped events at. Place them where the @@ -2052,10 +2080,13 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list) */ pos->core.idx = idx++; - /* Remember an index to sort all forced grouped events together to. */ - if (force_grouped_idx == -1 && pos == pos_leader && pos->core.nr_members < 2 && - arch_evsel__must_be_in_group(pos)) - force_grouped_idx = pos->core.idx; + /* + * Remember an index to sort all forced grouped events + * together to. Use the group leader as some events + * must appear first within the group. + */ + if (force_grouped_idx == -1 && arch_evsel__must_be_in_group(pos)) + force_grouped_idx = pos_leader->core.idx; } /* Sort events. */ @@ -2067,10 +2098,10 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list) */ idx = 0; list_for_each_entry(pos, list, core.node) { - const struct evsel *pos_leader = evsel__leader(pos); + struct evsel *pos_leader = evsel__leader(pos); const char *pos_pmu_name = pos->group_pmu_name; const char *cur_leader_pmu_name; - bool pos_force_grouped = force_grouped_idx != -1 && + bool pos_force_grouped = force_grouped_idx != -1 && !pos->dont_regroup && arch_evsel__must_be_in_group(pos); /* Reset index and nr_members. */ @@ -2083,31 +2114,65 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list) * Set the group leader respecting the given groupings and that * groups can't span PMUs. */ - if (!cur_leader) - cur_leader = pos; - + if (!cur_leader || pos->dont_regroup) { + cur_leader = pos->dont_regroup ? pos_leader : pos; + cur_leaders_grp = &cur_leader->core; + if (pos_force_grouped) + force_grouped_leader = pos; + } cur_leader_pmu_name = cur_leader->group_pmu_name; - if ((cur_leaders_grp != pos->core.leader && - (!pos_force_grouped || !cur_leader_force_grouped)) || - strcmp(cur_leader_pmu_name, pos_pmu_name)) { - /* Event is for a different group/PMU than last. */ + if (strcmp(cur_leader_pmu_name, pos_pmu_name)) { + /* PMU changed so the group/leader must change. */ cur_leader = pos; - /* - * Remember the leader's group before it is overwritten, - * so that later events match as being in the same - * group. - */ cur_leaders_grp = pos->core.leader; + if (pos_force_grouped && force_grouped_leader == NULL) + force_grouped_leader = pos; + } else if (cur_leaders_grp != pos->core.leader) { + bool split_even_if_last_leader_was_forced = true; + /* - * Avoid forcing events into groups with events that - * don't need to be in the group. + * Event is for a different group. If the last event was + * the forced group leader then subsequent group events + * and forced events should be in the same group. If + * there are no other forced group events then the + * forced group leader wasn't really being forced into a + * group, it just set arch_evsel__must_be_in_group, and + * we don't want the group to split here. */ - cur_leader_force_grouped = pos_force_grouped; + if (force_grouped_idx != -1 && last_event_was_forced_leader) { + struct evsel *pos2 = pos; + /* + * Search the whole list as the group leaders + * aren't currently valid. + */ + list_for_each_entry_continue(pos2, list, core.node) { + if (pos->core.leader == pos2->core.leader && + arch_evsel__must_be_in_group(pos2)) { + split_even_if_last_leader_was_forced = false; + break; + } + } + } + if (!last_event_was_forced_leader || split_even_if_last_leader_was_forced) { + if (pos_force_grouped) { + if (force_grouped_leader) { + cur_leader = force_grouped_leader; + cur_leaders_grp = force_grouped_leader->core.leader; + } else { + cur_leader = force_grouped_leader = pos; + cur_leaders_grp = &pos->core; + } + } else { + cur_leader = pos; + cur_leaders_grp = pos->core.leader; + } + } } if (pos_leader != cur_leader) { /* The leader changed so update it. */ evsel__set_leader(pos, cur_leader); } + last_event_was_forced_leader = (force_grouped_leader == pos); } list_for_each_entry(pos, list, core.node) { struct evsel *pos_leader = evsel__leader(pos); @@ -2120,7 +2185,7 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list) } int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filter, - struct parse_events_error *err, struct perf_pmu *fake_pmu, + struct parse_events_error *err, bool fake_pmu, bool warn_if_reordered, bool fake_tp) { struct parse_events_state parse_state = { @@ -2135,7 +2200,7 @@ int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filte }; int ret, ret2; - ret = parse_events__scanner(str, /*input=*/ NULL, &parse_state); + ret = parse_events__scanner(str, &parse_state); if (!ret && list_empty(&parse_state.list)) { WARN_ONCE(true, "WARNING: event parser found nothing\n"); @@ -2146,14 +2211,23 @@ int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filte if (ret2 < 0) return ret; - if (ret2 && warn_if_reordered && !parse_state.wild_card_pmus) - pr_warning("WARNING: events were regrouped to match PMUs\n"); - /* * Add list to the evlist even with errors to allow callers to clean up. */ evlist__splice_list_tail(evlist, &parse_state.list); + if (ret2 && warn_if_reordered && !parse_state.wild_card_pmus) { + pr_warning("WARNING: events were regrouped to match PMUs\n"); + + if (verbose > 0) { + struct strbuf sb = STRBUF_INIT; + + evlist__uniquify_evsel_names(evlist, &stat_config); + evlist__format_evsels(evlist, &sb, 2048); + pr_debug("evlist after sorting/fixing: '%s'\n", sb.buf); + strbuf_release(&sb); + } + } if (!ret) { struct evsel *last; @@ -2178,6 +2252,8 @@ int parse_event(struct evlist *evlist, const char *str) parse_events_error__init(&err); ret = parse_events(evlist, str, &err); + if (ret && verbose > 0) + parse_events_error__print(&err, str); parse_events_error__exit(&err); return ret; } @@ -2339,7 +2415,7 @@ int parse_events_option(const struct option *opt, const char *str, parse_events_error__init(&err); ret = __parse_events(*args->evlistp, str, args->pmu_filter, &err, - /*fake_pmu=*/NULL, /*warn_if_reordered=*/true, + /*fake_pmu=*/false, /*warn_if_reordered=*/true, /*fake_tp=*/false); if (ret) { @@ -2406,12 +2482,17 @@ foreach_evsel_in_last_glob(struct evlist *evlist, return 0; } +/* Will a tracepoint filter work for str or should a BPF filter be used? */ +static bool is_possible_tp_filter(const char *str) +{ + return strstr(str, "uid") == NULL; +} + static int set_filter(struct evsel *evsel, const void *arg) { const char *str = arg; - bool found = false; int nr_addr_filters = 0; - struct perf_pmu *pmu = NULL; + struct perf_pmu *pmu; if (evsel == NULL) { fprintf(stderr, @@ -2419,7 +2500,7 @@ static int set_filter(struct evsel *evsel, const void *arg) return -1; } - if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) { + if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT && is_possible_tp_filter(str)) { if (evsel__append_tp_filter(evsel, str) < 0) { fprintf(stderr, "not enough memory to hold filter string\n"); @@ -2429,16 +2510,11 @@ static int set_filter(struct evsel *evsel, const void *arg) return 0; } - while ((pmu = perf_pmus__scan(pmu)) != NULL) - if (pmu->type == evsel->core.attr.type) { - found = true; - break; - } - - if (found) + pmu = evsel__find_pmu(evsel); + if (pmu) { perf_pmu__scan_file(pmu, "nr_addr_filters", "%d", &nr_addr_filters); - + } if (!nr_addr_filters) return perf_bpf_filter__parse(&evsel->bpf_filters, str); @@ -2460,6 +2536,30 @@ int parse_filter(const struct option *opt, const char *str, (const void *)str); } +int parse_uid_filter(struct evlist *evlist, uid_t uid) +{ + struct option opt = { + .value = &evlist, + }; + char buf[128]; + int ret; + + snprintf(buf, sizeof(buf), "uid == %d", uid); + ret = parse_filter(&opt, buf, /*unset=*/0); + if (ret) { + if (use_browser >= 1) { + /* + * Use ui__warning so a pop up appears above the + * underlying BPF error message. + */ + ui__warning("Failed to add UID filtering that uses BPF filtering.\n"); + } else { + fprintf(stderr, "Failed to add UID filtering that uses BPF filtering.\n"); + } + } + return ret; +} + static int add_exclude_perf_filter(struct evsel *evsel, const void *arg __maybe_unused) { @@ -2539,7 +2639,7 @@ int parse_events_term__num(struct parse_events_term **term, struct parse_events_term temp = { .type_val = PARSE_EVENTS__TERM_TYPE_NUM, .type_term = type_term, - .config = config ? : strdup(config_term_name(type_term)), + .config = config ? : strdup(parse_events__term_type_str(type_term)), .no_value = no_value, .err_term = loc_term ? loc_term->first_column : 0, .err_val = loc_val ? loc_val->first_column : 0, @@ -2573,7 +2673,7 @@ int parse_events_term__term(struct parse_events_term **term, void *loc_term, void *loc_val) { return parse_events_term__str(term, term_lhs, NULL, - strdup(config_term_name(term_rhs)), + strdup(parse_events__term_type_str(term_rhs)), loc_term, loc_val); } @@ -2650,7 +2750,7 @@ void parse_events_terms__delete(struct parse_events_terms *terms) free(terms); } -int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct strbuf *sb) +static int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct strbuf *sb) { struct parse_events_term *term; bool first = true; @@ -2680,7 +2780,8 @@ int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct if (ret < 0) return ret; } else if ((unsigned int)term->type_term < __PARSE_EVENTS__TERM_TYPE_NR) { - ret = strbuf_addf(sb, "%s=", config_term_name(term->type_term)); + ret = strbuf_addf(sb, "%s=", + parse_events__term_type_str(term->type_term)); if (ret < 0) return ret; } @@ -2700,7 +2801,7 @@ static void config_terms_list(char *buf, size_t buf_sz) buf[0] = '\0'; for (i = 0; i < __PARSE_EVENTS__TERM_TYPE_NR; i++) { - const char *name = config_term_name(i); + const char *name = parse_events__term_type_str(i); if (!config_term_avail(i, NULL)) continue; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index e13de2c8b706..3577ab213730 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -9,8 +9,8 @@ #include <stdbool.h> #include <linux/types.h> #include <linux/perf_event.h> -#include <stdio.h> #include <string.h> +#include <sys/types.h> struct evsel; struct evlist; @@ -20,7 +20,7 @@ struct option; struct perf_pmu; struct strbuf; -const char *event_type(int type); +const char *event_type(size_t type); /* Arguments encoded in opt->value. */ struct parse_events_option_args { @@ -31,20 +31,21 @@ int parse_events_option(const struct option *opt, const char *str, int unset); int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset); __attribute__((nonnull(1, 2, 4))) int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filter, - struct parse_events_error *error, struct perf_pmu *fake_pmu, + struct parse_events_error *error, bool fake_pmu, bool warn_if_reordered, bool fake_tp); __attribute__((nonnull(1, 2, 3))) static inline int parse_events(struct evlist *evlist, const char *str, struct parse_events_error *err) { - return __parse_events(evlist, str, /*pmu_filter=*/NULL, err, /*fake_pmu=*/NULL, + return __parse_events(evlist, str, /*pmu_filter=*/NULL, err, /*fake_pmu=*/false, /*warn_if_reordered=*/true, /*fake_tp=*/false); } int parse_event(struct evlist *evlist, const char *str); int parse_filter(const struct option *opt, const char *str, int unset); +int parse_uid_filter(struct evlist *evlist, uid_t uid); int exclude_perf(const struct option *opt, const char *arg, int unset); enum parse_events__term_val_type { @@ -58,6 +59,7 @@ enum parse_events__term_type { PARSE_EVENTS__TERM_TYPE_CONFIG1, PARSE_EVENTS__TERM_TYPE_CONFIG2, PARSE_EVENTS__TERM_TYPE_CONFIG3, + PARSE_EVENTS__TERM_TYPE_CONFIG4, PARSE_EVENTS__TERM_TYPE_NAME, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ, @@ -74,12 +76,15 @@ enum parse_events__term_type { PARSE_EVENTS__TERM_TYPE_DRV_CFG, PARSE_EVENTS__TERM_TYPE_PERCORE, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT, + PARSE_EVENTS__TERM_TYPE_AUX_ACTION, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE, PARSE_EVENTS__TERM_TYPE_METRIC_ID, PARSE_EVENTS__TERM_TYPE_RAW, - PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE, - PARSE_EVENTS__TERM_TYPE_HARDWARE, -#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_HARDWARE + 1) + PARSE_EVENTS__TERM_TYPE_CPU, + PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV, + PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG, + PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG, +#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG + 1) }; struct parse_events_term { @@ -150,8 +155,8 @@ struct parse_events_state { struct parse_events_terms *terms; /* Start token. */ int stoken; - /* Special fake PMU marker for testing. */ - struct perf_pmu *fake_pmu; + /* Use the fake PMU marker for testing. */ + bool fake_pmu; /* Skip actual tracepoint processing for testing. */ bool fake_tp; /* If non-null, when wildcard matching only match the given PMU. */ @@ -162,6 +167,8 @@ struct parse_events_state { bool wild_card_pmus; }; +const char *parse_events__term_type_str(enum parse_events__term_type term_type); + bool parse_events__filter_pmu(const struct parse_events_state *parse_state, const struct perf_pmu *pmu); void parse_events__shrink_config_terms(void); @@ -186,8 +193,7 @@ void parse_events_term__delete(struct parse_events_term *term); void parse_events_terms__delete(struct parse_events_terms *terms); void parse_events_terms__init(struct parse_events_terms *terms); void parse_events_terms__exit(struct parse_events_terms *terms); -int parse_events_terms(struct parse_events_terms *terms, const char *str, FILE *input); -int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct strbuf *sb); +int parse_events_terms(struct parse_events_terms *terms, const char *str); struct parse_events_modifier { u8 precise; /* Number of repeated 'p' for precision. */ @@ -203,6 +209,8 @@ struct parse_events_modifier { bool hypervisor : 1; /* 'h' */ bool guest : 1; /* 'G' */ bool host : 1; /* 'H' */ + bool retire_lat : 1; /* 'R' */ + bool dont_regroup : 1; /* 'X' */ }; int parse_events__modifier_event(struct parse_events_state *parse_state, void *loc, @@ -220,12 +228,6 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, u32 type, u64 config, const struct parse_events_terms *head_config, bool wildcard); -int parse_events_add_tool(struct parse_events_state *parse_state, - struct list_head *list, - int tool_event); -int parse_events_add_cache(struct list_head *list, int *idx, const char *name, - struct parse_events_state *parse_state, - struct parse_events_terms *parsed_terms); int parse_events__decode_legacy_cache(const char *name, int pmu_type, __u64 *config); int parse_events_add_breakpoint(struct parse_events_state *parse_state, struct list_head *list, @@ -253,8 +255,6 @@ struct event_symbol { const char *symbol; const char *alias; }; -extern const struct event_symbol event_symbols_hw[]; -extern const struct event_symbol event_symbols_sw[]; char *parse_events_formats_error_string(char *additional_terms); @@ -285,4 +285,6 @@ static inline bool is_sdt_event(char *str __maybe_unused) } #endif /* HAVE_LIBELF_SUPPORT */ +size_t default_breakpoint_len(void); + #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 16045c383ada..251ce4321878 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -5,16 +5,14 @@ %option stack %option bison-locations %option yylineno -%option reject +%option noyywrap %{ #include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> #include "parse-events.h" #include "parse-events-bison.h" -#include "evsel.h" char *parse_events_get_text(yyscan_t yyscanner); YYSTYPE *parse_events_get_lval(yyscan_t yyscanner); @@ -53,27 +51,26 @@ static int str(yyscan_t scanner, int token) YYSTYPE *yylval = parse_events_get_lval(scanner); char *text = parse_events_get_text(scanner); - if (text[0] != '\'') { - yylval->str = strdup(text); - } else { - /* - * If a text tag specified on the command line - * contains opening single quite ' then it is - * expected that the tag ends with single quote - * as well, like this: - * name=\'CPU_CLK_UNHALTED.THREAD:cmask=1\' - * quotes need to be escaped to bypass shell - * processing. - */ - yylval->str = strndup(&text[1], strlen(text) - 2); - } - + yylval->str = strdup(text); return token; } -static int lc_str(yyscan_t scanner, const struct parse_events_state *state) +static int quoted_str(yyscan_t scanner, int token) { - return str(scanner, state->match_legacy_cache_terms ? PE_LEGACY_CACHE : PE_NAME); + YYSTYPE *yylval = parse_events_get_lval(scanner); + char *text = parse_events_get_text(scanner); + + /* + * If a text tag specified on the command line + * contains opening single quite ' then it is + * expected that the tag ends with single quote + * as well, like this: + * name=\'CPU_CLK_UNHALTED.THREAD:cmask=1\' + * quotes need to be escaped to bypass shell + * processing. + */ + yylval->str = strndup(&text[1], strlen(text) - 2); + return token; } /* @@ -113,22 +110,6 @@ do { \ yyless(0); \ } while (0) -static int sym(yyscan_t scanner, int type, int config) -{ - YYSTYPE *yylval = parse_events_get_lval(scanner); - - yylval->num = (type << 16) + config; - return type == PERF_TYPE_HARDWARE ? PE_VALUE_SYM_HW : PE_VALUE_SYM_SW; -} - -static int tool(yyscan_t scanner, enum perf_tool_event event) -{ - YYSTYPE *yylval = parse_events_get_lval(scanner); - - yylval->num = event; - return PE_VALUE_SYM_TOOL; -} - static int term(yyscan_t scanner, enum parse_events__term_type type) { YYSTYPE *yylval = parse_events_get_lval(scanner); @@ -137,16 +118,6 @@ static int term(yyscan_t scanner, enum parse_events__term_type type) return PE_TERM; } -static int hw_term(yyscan_t scanner, int config) -{ - YYSTYPE *yylval = parse_events_get_lval(scanner); - char *text = parse_events_get_text(scanner); - - yylval->hardware_term.str = strdup(text); - yylval->hardware_term.num = PERF_TYPE_HARDWARE + config; - return PE_TERM_HW; -} - static void modifiers_error(struct parse_events_state *parse_state, yyscan_t scanner, int pos, char mod_char, const char *mod_name) { @@ -209,6 +180,8 @@ static int modifiers(struct parse_events_state *parse_state, yyscan_t scanner) CASE('W', weak); CASE('e', exclusive); CASE('b', bpf); + CASE('R', retire_lat); + CASE('X', dont_regroup); default: return PE_ERROR; } @@ -225,10 +198,6 @@ do { \ yycolumn += yyleng; \ } while (0); -#define USER_REJECT \ - yycolumn -= yyleng; \ - REJECT - %} %x mem @@ -242,18 +211,23 @@ event [^,{}/]+ num_dec [0-9]+ num_hex 0x[a-fA-F0-9]{1,16} num_raw_hex [a-fA-F0-9]{1,16} -name [a-zA-Z0-9_*?\[\]][a-zA-Z0-9_*?.\[\]!\-]* -name_tag [\'][a-zA-Z0-9_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\'] -name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* +/* Regular pattern to match the token PE_NAME. */ +name_start [a-zA-Z0-9_*?\[\]] +name {name_start}[a-zA-Z0-9_*?.\[\]!\-]* +/* PE_NAME token when inside a config term list, allows ':'. */ +term_name {name_start}[a-zA-Z0-9_*?.\[\]!\-:]* +/* + * PE_NAME token when quoted, allows ':,.='. + * Matches the RHS of terms like: name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks'. + */ +quoted_name [\']{name_start}[a-zA-Z0-9_*?.\[\]!\-:,\.=]*[\'] drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? /* - * If you add a modifier you need to update check_modifier(). + * If you add a modifier you need to update modifiers(). * Also, the letters in modifier_event must not be in modifier_bp. */ -modifier_event [ukhpPGHSDIWeb]{1,15} +modifier_event [ukhpPGHSDIWebRX]{1,17} modifier_bp [rwx]{1,3} -lc_type (L1-dcache|l1-d|l1d|L1-data|L1-icache|l1-i|l1i|L1-instruction|LLC|L2|dTLB|d-tlb|Data-TLB|iTLB|i-tlb|Instruction-TLB|branch|branches|bpu|btb|bpc|node) -lc_op_result (load|loads|read|store|stores|write|prefetch|prefetches|speculative-read|speculative-load|refs|Reference|ops|access|misses|miss) digit [0-9] non_digit [^0-9] @@ -313,6 +287,7 @@ config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); } config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); } config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); } config3 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG3); } +config4 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG4); } name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); } period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); } freq { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ); } @@ -328,26 +303,20 @@ overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); } no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); } percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); } aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); } +aux-action { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_ACTION); } aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); } metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); } -cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); } -stalled-cycles-frontend|idle-cycles-frontend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } -stalled-cycles-backend|idle-cycles-backend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } -instructions { return hw_term(yyscanner, PERF_COUNT_HW_INSTRUCTIONS); } -cache-references { return hw_term(yyscanner, PERF_COUNT_HW_CACHE_REFERENCES); } -cache-misses { return hw_term(yyscanner, PERF_COUNT_HW_CACHE_MISSES); } -branch-instructions|branches { return hw_term(yyscanner, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); } -branch-misses { return hw_term(yyscanner, PERF_COUNT_HW_BRANCH_MISSES); } -bus-cycles { return hw_term(yyscanner, PERF_COUNT_HW_BUS_CYCLES); } -ref-cycles { return hw_term(yyscanner, PERF_COUNT_HW_REF_CPU_CYCLES); } +cpu { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CPU); } +ratio-to-prev { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV); } +legacy-hardware-config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG); } +legacy-cache-config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG); } r{num_raw_hex} { return str(yyscanner, PE_RAW); } r0x{num_raw_hex} { return str(yyscanner, PE_RAW); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } -{lc_type} { return lc_str(yyscanner, _parse_state); } -{lc_type}-{lc_op_result} { return lc_str(yyscanner, _parse_state); } -{lc_type}-{lc_op_result}-{lc_op_result} { return lc_str(yyscanner, _parse_state); } -{name_minus} { return str(yyscanner, PE_NAME); } +{num_dec} { return value(_parse_state, yyscanner, 10); } +{num_hex} { return value(_parse_state, yyscanner, 16); } +{term_name} { return str(yyscanner, PE_NAME); } @{drv_cfg_term} { return drv_str(yyscanner, PE_DRV_CFG_TERM); } } @@ -383,35 +352,6 @@ r0x{num_raw_hex} { return str(yyscanner, PE_RAW); } <<EOF>> { BEGIN(INITIAL); } } -cpu-cycles|cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); } -stalled-cycles-frontend|idle-cycles-frontend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } -stalled-cycles-backend|idle-cycles-backend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } -instructions { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); } -cache-references { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); } -cache-misses { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); } -branch-instructions|branches { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); } -branch-misses { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); } -bus-cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES); } -ref-cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES); } -cpu-clock { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); } -task-clock { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK); } -page-faults|faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS); } -minor-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN); } -major-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ); } -context-switches|cs { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); } -cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS); } -alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); } -emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } -dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } -duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); } -user_time { return tool(yyscanner, PERF_TOOL_USER_TIME); } -system_time { return tool(yyscanner, PERF_TOOL_SYSTEM_TIME); } -bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } -cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); } - -{lc_type} { return str(yyscanner, PE_LEGACY_CACHE); } -{lc_type}-{lc_op_result} { return str(yyscanner, PE_LEGACY_CACHE); } -{lc_type}-{lc_op_result}-{lc_op_result} { return str(yyscanner, PE_LEGACY_CACHE); } mem: { BEGIN(mem); return PE_PREFIX_MEM; } r{num_raw_hex} { return str(yyscanner, PE_RAW); } {num_dec} { return value(_parse_state, yyscanner, 10); } @@ -419,7 +359,7 @@ r{num_raw_hex} { return str(yyscanner, PE_RAW); } {modifier_event} { return modifiers(_parse_state, yyscanner); } {name} { return str(yyscanner, PE_NAME); } -{name_tag} { return str(yyscanner, PE_NAME); } +{quoted_name} { return quoted_str(yyscanner, PE_NAME); } "/" { BEGIN(config); return '/'; } , { BEGIN(event); return ','; } : { return ':'; } @@ -430,8 +370,3 @@ r{num_raw_hex} { return str(yyscanner, PE_RAW); } . { } %% - -int parse_events_wrap(void *scanner __maybe_unused) -{ - return 1; -} diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index b3c51f06cbdc..c194de5ec1ec 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -55,26 +55,18 @@ static void free_list_evsel(struct list_head* list_evsel) %} %token PE_START_EVENTS PE_START_TERMS -%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_TERM -%token PE_VALUE_SYM_TOOL +%token PE_VALUE PE_TERM %token PE_EVENT_NAME %token PE_RAW PE_NAME %token PE_MODIFIER_EVENT PE_MODIFIER_BP PE_BP_COLON PE_BP_SLASH -%token PE_LEGACY_CACHE %token PE_PREFIX_MEM %token PE_ERROR %token PE_DRV_CFG_TERM -%token PE_TERM_HW %type <num> PE_VALUE -%type <num> PE_VALUE_SYM_HW -%type <num> PE_VALUE_SYM_SW -%type <num> PE_VALUE_SYM_TOOL %type <mod> PE_MODIFIER_EVENT %type <term_type> PE_TERM -%type <num> value_sym %type <str> PE_RAW %type <str> PE_NAME -%type <str> PE_LEGACY_CACHE %type <str> PE_MODIFIER_BP %type <str> PE_EVENT_NAME %type <str> PE_DRV_CFG_TERM @@ -87,8 +79,6 @@ static void free_list_evsel(struct list_head* list_evsel) %type <list_terms> opt_pmu_config %destructor { parse_events_terms__delete ($$); } <list_terms> %type <list_evsel> event_pmu -%type <list_evsel> event_legacy_symbol -%type <list_evsel> event_legacy_cache %type <list_evsel> event_legacy_mem %type <list_evsel> event_legacy_tracepoint %type <list_evsel> event_legacy_numeric @@ -104,8 +94,6 @@ static void free_list_evsel(struct list_head* list_evsel) %destructor { free_list_evsel ($$); } <list_evsel> %type <tracepoint_name> tracepoint_name %destructor { free ($$.sys); free ($$.event); } <tracepoint_name> -%type <hardware_term> PE_TERM_HW -%destructor { free ($$.str); } <hardware_term> %union { @@ -120,10 +108,6 @@ static void free_list_evsel(struct list_head* list_evsel) char *sys; char *event; } tracepoint_name; - struct hardware_term { - char *str; - u64 num; - } hardware_term; } %% @@ -266,8 +250,6 @@ PE_EVENT_NAME event_def event_def event_def: event_pmu | - event_legacy_symbol | - event_legacy_cache sep_dc | event_legacy_mem sep_dc | event_legacy_tracepoint sep_dc | event_legacy_numeric sep_dc | @@ -292,7 +274,7 @@ PE_NAME sep_dc struct list_head *list; int err; - err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list, &@1); + err = parse_events_multi_pmu_add(_parse_state, $1, /*const_parsed_terms*/NULL, &list, &@1); if (err < 0) { struct parse_events_state *parse_state = _parse_state; struct parse_events_error *error = parse_state->error; @@ -308,85 +290,6 @@ PE_NAME sep_dc $$ = list; } -value_sym: -PE_VALUE_SYM_HW -| -PE_VALUE_SYM_SW - -event_legacy_symbol: -value_sym '/' event_config '/' -{ - struct list_head *list; - int type = $1 >> 16; - int config = $1 & 255; - int err; - bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE); - - list = alloc_list(); - if (!list) - YYNOMEM; - err = parse_events_add_numeric(_parse_state, list, type, config, $3, wildcard); - parse_events_terms__delete($3); - if (err) { - free_list_evsel(list); - PE_ABORT(err); - } - $$ = list; -} -| -value_sym sep_slash_slash_dc -{ - struct list_head *list; - int type = $1 >> 16; - int config = $1 & 255; - bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE); - int err; - - list = alloc_list(); - if (!list) - YYNOMEM; - err = parse_events_add_numeric(_parse_state, list, type, config, /*head_config=*/NULL, wildcard); - if (err) - PE_ABORT(err); - $$ = list; -} -| -PE_VALUE_SYM_TOOL sep_slash_slash_dc -{ - struct list_head *list; - int err; - - list = alloc_list(); - if (!list) - YYNOMEM; - err = parse_events_add_tool(_parse_state, list, $1); - if (err) - YYNOMEM; - $$ = list; -} - -event_legacy_cache: -PE_LEGACY_CACHE opt_event_config -{ - struct parse_events_state *parse_state = _parse_state; - struct list_head *list; - int err; - - list = alloc_list(); - if (!list) - YYNOMEM; - - err = parse_events_add_cache(list, &parse_state->idx, $1, parse_state, $2); - - parse_events_terms__delete($2); - free($1); - if (err) { - free_list_evsel(list); - PE_ABORT(err); - } - $$ = list; -} - event_legacy_mem: PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event_config { @@ -605,12 +508,7 @@ event_term $$ = head; } -name_or_raw: PE_RAW | PE_NAME | PE_LEGACY_CACHE -| -PE_TERM_HW -{ - $$ = $1.str; -} +name_or_raw: PE_RAW | PE_NAME event_term: PE_RAW @@ -652,19 +550,6 @@ name_or_raw '=' PE_VALUE $$ = term; } | -PE_LEGACY_CACHE -{ - struct parse_events_term *term; - int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE, - $1, /*num=*/1, /*novalue=*/true, &@1, /*loc_val=*/NULL); - - if (err) { - free($1); - PE_ABORT(err); - } - $$ = term; -} -| PE_NAME { struct parse_events_term *term; @@ -678,20 +563,6 @@ PE_NAME $$ = term; } | -PE_TERM_HW -{ - struct parse_events_term *term; - int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_HARDWARE, - $1.str, $1.num & 255, /*novalue=*/false, - &@1, /*loc_val=*/NULL); - - if (err) { - free($1.str); - PE_ABORT(err); - } - $$ = term; -} -| PE_TERM '=' name_or_raw { struct parse_events_term *term; @@ -760,8 +631,6 @@ PE_DRV_CFG_TERM sep_dc: ':' | -sep_slash_slash_dc: '/' '/' | ':' | - %% void parse_events_error(YYLTYPE *loc, void *_parse_state, diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index 00adf872bf00..2e62f272fda8 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -68,14 +68,12 @@ bool is_directory(const char *base_path, const struct dirent *dent) return S_ISDIR(st.st_mode); } -bool is_executable_file(const char *base_path, const struct dirent *dent) +bool is_directory_at(int dir_fd, const char *path) { - char path[PATH_MAX]; struct stat st; - snprintf(path, sizeof(path), "%s/%s", base_path, dent->d_name); - if (stat(path, &st)) + if (fstatat(dir_fd, path, &st, /*flags=*/0)) return false; - return !S_ISDIR(st.st_mode) && (st.st_mode & S_IXUSR); + return S_ISDIR(st.st_mode); } diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h index d94902c22222..fb850fb55c60 100644 --- a/tools/perf/util/path.h +++ b/tools/perf/util/path.h @@ -12,6 +12,6 @@ int path__join3(char *bf, size_t size, const char *path1, const char *path2, con bool is_regular_file(const char *file); bool is_directory(const char *base_path, const struct dirent *dent); -bool is_executable_file(const char *base_path, const struct dirent *dent); +bool is_directory_at(int dir_fd, const char *path); #endif /* _PERF_PATH_H */ diff --git a/tools/perf/util/perf-regs-arch/Build b/tools/perf/util/perf-regs-arch/Build index d9d596d330a7..be95402aa540 100644 --- a/tools/perf/util/perf-regs-arch/Build +++ b/tools/perf/util/perf-regs-arch/Build @@ -1,9 +1,9 @@ -perf-y += perf_regs_aarch64.o -perf-y += perf_regs_arm.o -perf-y += perf_regs_csky.o -perf-y += perf_regs_loongarch.o -perf-y += perf_regs_mips.o -perf-y += perf_regs_powerpc.o -perf-y += perf_regs_riscv.o -perf-y += perf_regs_s390.o -perf-y += perf_regs_x86.o +perf-util-y += perf_regs_aarch64.o +perf-util-y += perf_regs_arm.o +perf-util-y += perf_regs_csky.o +perf-util-y += perf_regs_loongarch.o +perf-util-y += perf_regs_mips.o +perf-util-y += perf_regs_powerpc.o +perf-util-y += perf_regs_riscv.o +perf-util-y += perf_regs_s390.o +perf-util-y += perf_regs_x86.o diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c index 1de3b69cdf4a..6ecf38314f01 100644 --- a/tools/perf/util/perf_api_probe.c +++ b/tools/perf/util/perf_api_probe.c @@ -59,10 +59,10 @@ out_delete: static bool perf_probe_api(setup_probe_fn_t fn) { - const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL}; + struct perf_pmu *pmu; struct perf_cpu_map *cpus; struct perf_cpu cpu; - int ret, i = 0; + int ret = 0; cpus = perf_cpu_map__new_online_cpus(); if (!cpus) @@ -70,12 +70,23 @@ static bool perf_probe_api(setup_probe_fn_t fn) cpu = perf_cpu_map__cpu(cpus, 0); perf_cpu_map__put(cpus); - do { - ret = perf_do_probe_api(fn, cpu, try[i++]); - if (!ret) - return true; - } while (ret == -EAGAIN && try[i]); - + ret = perf_do_probe_api(fn, cpu, "software/cpu-clock/u"); + if (!ret) + return true; + + pmu = perf_pmus__scan_core(/*pmu=*/NULL); + if (pmu) { + const char *try[] = {"cycles", "instructions", NULL}; + char buf[256]; + int i = 0; + + while (ret == -EAGAIN && try[i]) { + snprintf(buf, sizeof(buf), "%s/%s/u", pmu->name, try[i++]); + ret = perf_do_probe_api(fn, cpu, buf); + if (!ret) + return true; + } + } return false; } diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 59fbbba79697..741c3d657a8b 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -79,24 +79,22 @@ static void __p_read_format(char *buf, size_t size, u64 value) #define ENUM_ID_TO_STR_CASE(x) case x: return (#x); static const char *stringify_perf_type_id(struct perf_pmu *pmu, u32 type) { - if (pmu) - return pmu->name; - switch (type) { ENUM_ID_TO_STR_CASE(PERF_TYPE_HARDWARE) ENUM_ID_TO_STR_CASE(PERF_TYPE_SOFTWARE) ENUM_ID_TO_STR_CASE(PERF_TYPE_TRACEPOINT) ENUM_ID_TO_STR_CASE(PERF_TYPE_HW_CACHE) - ENUM_ID_TO_STR_CASE(PERF_TYPE_RAW) ENUM_ID_TO_STR_CASE(PERF_TYPE_BREAKPOINT) + case PERF_TYPE_RAW: + return pmu ? pmu->name : "PERF_TYPE_RAW"; default: - return NULL; + return pmu ? pmu->name : NULL; } } static const char *stringify_perf_hw_id(u64 value) { - switch (value) { + switch (value & PERF_HW_EVENT_MASK) { ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CPU_CYCLES) ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_INSTRUCTIONS) ENUM_ID_TO_STR_CASE(PERF_COUNT_HW_CACHE_REFERENCES) @@ -169,83 +167,100 @@ static const char *stringify_perf_sw_id(u64 value) } #undef ENUM_ID_TO_STR_CASE -#define PRINT_ID(_s, _f) \ -do { \ - const char *__s = _s; \ - if (__s == NULL) \ - snprintf(buf, size, _f, value); \ - else \ - snprintf(buf, size, _f" (%s)", value, __s); \ -} while (0) -#define print_id_unsigned(_s) PRINT_ID(_s, "%"PRIu64) -#define print_id_hex(_s) PRINT_ID(_s, "%#"PRIx64) +static void print_id_unsigned(char *buf, size_t size, u64 value, const char *s) +{ + if (s == NULL) + snprintf(buf, size, "%"PRIu64, value); + else + snprintf(buf, size, "%"PRIu64" (%s)", value, s); +} + +static void print_id_hex(char *buf, size_t size, u64 value, const char *s) +{ + if (s == NULL) + snprintf(buf, size, "%#"PRIx64, value); + else + snprintf(buf, size, "%#"PRIx64" (%s)", value, s); +} -static void __p_type_id(struct perf_pmu *pmu, char *buf, size_t size, u64 value) +static void __p_type_id(char *buf, size_t size, struct perf_pmu *pmu, u32 type) { - print_id_unsigned(stringify_perf_type_id(pmu, value)); + print_id_unsigned(buf, size, type, stringify_perf_type_id(pmu, type)); } -static void __p_config_hw_id(char *buf, size_t size, u64 value) +static void __p_config_hw_id(char *buf, size_t size, struct perf_pmu *pmu, u64 config) { - print_id_hex(stringify_perf_hw_id(value)); + const char *name = stringify_perf_hw_id(config); + + if (name == NULL) { + if (pmu == NULL) { + snprintf(buf, size, "%#"PRIx64, config); + } else { + snprintf(buf, size, "%#"PRIx64" (%s/config=%#"PRIx64"/)", config, pmu->name, + config); + } + } else { + if (pmu == NULL) + snprintf(buf, size, "%#"PRIx64" (%s)", config, name); + else + snprintf(buf, size, "%#"PRIx64" (%s/%s/)", config, pmu->name, name); + } } -static void __p_config_sw_id(char *buf, size_t size, u64 value) +static void __p_config_sw_id(char *buf, size_t size, u64 id) { - print_id_hex(stringify_perf_sw_id(value)); + print_id_hex(buf, size, id, stringify_perf_sw_id(id)); } -static void __p_config_hw_cache_id(char *buf, size_t size, u64 value) +static void __p_config_hw_cache_id(char *buf, size_t size, struct perf_pmu *pmu, u64 config) { - const char *hw_cache_str = stringify_perf_hw_cache_id(value & 0xff); + const char *hw_cache_str = stringify_perf_hw_cache_id(config & 0xff); const char *hw_cache_op_str = - stringify_perf_hw_cache_op_id((value & 0xff00) >> 8); + stringify_perf_hw_cache_op_id((config & 0xff00) >> 8); const char *hw_cache_op_result_str = - stringify_perf_hw_cache_op_result_id((value & 0xff0000) >> 16); - - if (hw_cache_str == NULL || hw_cache_op_str == NULL || - hw_cache_op_result_str == NULL) { - snprintf(buf, size, "%#"PRIx64, value); + stringify_perf_hw_cache_op_result_id((config & 0xff0000) >> 16); + + if (hw_cache_str == NULL || hw_cache_op_str == NULL || hw_cache_op_result_str == NULL) { + if (pmu == NULL) { + snprintf(buf, size, "%#"PRIx64, config); + } else { + snprintf(buf, size, "%#"PRIx64" (%s/config=%#"PRIx64"/)", config, pmu->name, + config); + } } else { - snprintf(buf, size, "%#"PRIx64" (%s | %s | %s)", value, - hw_cache_op_result_str, hw_cache_op_str, hw_cache_str); + if (pmu == NULL) { + snprintf(buf, size, "%#"PRIx64" (%s | %s | %s)", config, + hw_cache_op_result_str, hw_cache_op_str, hw_cache_str); + } else { + snprintf(buf, size, "%#"PRIx64" (%s/%s | %s | %s/)", config, pmu->name, + hw_cache_op_result_str, hw_cache_op_str, hw_cache_str); + } } } -#ifdef HAVE_LIBTRACEEVENT -static void __p_config_tracepoint_id(char *buf, size_t size, u64 value) +static void __p_config_tracepoint_id(char *buf, size_t size, u64 id) { - char *str = tracepoint_id_to_name(value); + char *str = tracepoint_id_to_name(id); - print_id_hex(str); + print_id_hex(buf, size, id, str); free(str); } -#endif -static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type, u64 value) +static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type, u64 config) { - const char *name = perf_pmu__name_from_config(pmu, value); - - if (name) { - print_id_hex(name); - return; - } switch (type) { case PERF_TYPE_HARDWARE: - return __p_config_hw_id(buf, size, value); + return __p_config_hw_id(buf, size, pmu, config); case PERF_TYPE_SOFTWARE: - return __p_config_sw_id(buf, size, value); + return __p_config_sw_id(buf, size, config); case PERF_TYPE_HW_CACHE: - return __p_config_hw_cache_id(buf, size, value); + return __p_config_hw_cache_id(buf, size, pmu, config); case PERF_TYPE_TRACEPOINT: -#ifdef HAVE_LIBTRACEEVENT - return __p_config_tracepoint_id(buf, size, value); -#endif + return __p_config_tracepoint_id(buf, size, config); case PERF_TYPE_RAW: case PERF_TYPE_BREAKPOINT: default: - snprintf(buf, size, "%#"PRIx64, value); - return; + return print_id_hex(buf, size, config, perf_pmu__name_from_config(pmu, config)); } } @@ -257,7 +272,7 @@ static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type #define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val) #define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val) #define p_read_format(val) __p_read_format(buf, BUF_SIZE, val) -#define p_type_id(val) __p_type_id(pmu, buf, BUF_SIZE, val) +#define p_type_id(val) __p_type_id(buf, BUF_SIZE, pmu, val) #define p_config_id(val) __p_config_id(pmu, buf, BUF_SIZE, attr->type, val) #define PRINT_ATTRn(_n, _f, _p, _a) \ @@ -277,6 +292,13 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, char buf[BUF_SIZE]; int ret = 0; + if (!pmu && (attr->type == PERF_TYPE_HARDWARE || attr->type == PERF_TYPE_HW_CACHE)) { + u32 extended_type = attr->config >> PERF_PMU_TYPE_SHIFT; + + if (extended_type) + pmu = perf_pmus__find_by_type(extended_type); + } + PRINT_ATTRn("type", type, p_type_id, true); PRINT_ATTRf(size, p_unsigned); PRINT_ATTRn("config", config, p_config_id, true); @@ -321,6 +343,8 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(inherit_thread, p_unsigned); PRINT_ATTRf(remove_on_exec, p_unsigned); PRINT_ATTRf(sigtrap, p_unsigned); + PRINT_ATTRf(defer_callchain, p_unsigned); + PRINT_ATTRf(defer_output, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned, false); PRINT_ATTRf(bp_type, p_unsigned); @@ -335,6 +359,9 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(sample_max_stack, p_unsigned); PRINT_ATTRf(aux_sample_size, p_unsigned); PRINT_ATTRf(sig_data, p_unsigned); + PRINT_ATTRf(aux_start_paused, p_unsigned); + PRINT_ATTRf(aux_pause, p_unsigned); + PRINT_ATTRf(aux_resume, p_unsigned); return ret; } diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index 5ccfe4b64cdf..d9043f4afbe7 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -15,6 +15,7 @@ #include "util/strbuf.h" #include "util/thread_map.h" +#include <errno.h> #include <string.h> #include <linux/kernel.h> #include <perfmon/pfmlib_perf_event.h> @@ -47,10 +48,6 @@ int parse_libpfm_events_option(const struct option *opt, const char *str, p_orig = p = strdup(str); if (!p) return -1; - /* - * force loading of the PMU list - */ - perf_pmus__scan(NULL); for (q = p; strsep(&p, ",{}"); q = p) { sep = p ? str + (p - p_orig - 1) : ""; @@ -233,7 +230,8 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, } if (is_libpfm_event_supported(name, cpus, threads)) { - print_cb->print_event(print_state, pinfo->name, topic, + print_cb->print_event(print_state, topic, pinfo->name, + /*pmu_type=*/PERF_TYPE_RAW, name, info->equiv, /*scale_unit=*/NULL, /*deprecated=*/NULL, "PFM event", @@ -267,8 +265,9 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, continue; print_cb->print_event(print_state, - pinfo->name, topic, + pinfo->name, + /*pmu_type=*/PERF_TYPE_RAW, name, /*alias=*/NULL, /*scale_unit=*/NULL, /*deprecated=*/NULL, "PFM event", diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 888ce9912275..956ea273c2c7 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -12,17 +12,24 @@ #include <stdbool.h> #include <dirent.h> #include <api/fs/fs.h> +#include <api/io.h> +#include <api/io_dir.h> #include <locale.h> #include <fnmatch.h> #include <math.h> #include "debug.h" #include "evsel.h" #include "pmu.h" +#include "drm_pmu.h" +#include "hwmon_pmu.h" #include "pmus.h" +#include "tool_pmu.h" +#include "tp_pmu.h" #include <util/pmu-bison.h> #include <util/pmu-flex.h> #include "parse-events.h" #include "print-events.h" +#include "hashmap.h" #include "header.h" #include "string2.h" #include "strbuf.h" @@ -30,19 +37,15 @@ #include "util/evsel_config.h" #include <regex.h> -struct perf_pmu perf_pmu__fake = { - .name = "fake", -}; - #define UNIT_MAX_LEN 31 /* max length for event unit name */ enum event_source { - /* An event loaded from /sys/devices/<pmu>/events. */ + /* An event loaded from /sys/bus/event_source/devices/<pmu>/events. */ EVENT_SRC_SYSFS, /* An event loaded from a CPUID matched json file. */ EVENT_SRC_CPU_JSON, /* - * An event loaded from a /sys/devices/<pmu>/identifier matched json + * An event loaded from a /sys/bus/event_source/devices/<pmu>/identifier matched json * file. */ EVENT_SRC_SYS_JSON, @@ -64,10 +67,13 @@ struct perf_pmu_alias { * json events. */ char *topic; - /** @terms: Owned list of the original parsed parameters. */ - struct parse_events_terms terms; - /** @list: List element of struct perf_pmu aliases. */ - struct list_head list; + /** @terms: Owned copy of the event terms. */ + char *terms; + /** + * @legacy_terms: If the event aliases a legacy event, holds a copy + * ofthe legacy event string. + */ + char *legacy_terms; /** * @pmu_name: The name copied from the json struct pmu_event. This can * differ from the PMU name as it won't have suffixes. @@ -77,6 +83,12 @@ struct perf_pmu_alias { char unit[UNIT_MAX_LEN+1]; /** @scale: Value to scale read counter values by. */ double scale; + /** @retirement_latency_mean: Value to be given for unsampled retirement latency mean. */ + double retirement_latency_mean; + /** @retirement_latency_min: Value to be given for unsampled retirement latency min. */ + double retirement_latency_min; + /** @retirement_latency_max: Value to be given for unsampled retirement latency max. */ + double retirement_latency_max; /** * @per_pkg: Does the file * <sysfs>/bus/event_source/devices/<pmu_name>/events/<name>.per-pkg or @@ -94,6 +106,12 @@ struct perf_pmu_alias { * default. */ bool deprecated; + /** + * @legacy_deprecated_checked: Legacy events may not be supported by the + * PMU need to be checked. If they aren't supported they are marked + * deprecated. + */ + bool legacy_deprecated_checked; /** @from_sysfs: Was the alias from sysfs or a json event? */ bool from_sysfs; /** @info_loaded: Have the scale, unit and other values been read from disk? */ @@ -196,19 +214,17 @@ static void perf_pmu_format__load(const struct perf_pmu *pmu, struct perf_pmu_fo */ static int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_load) { - struct dirent *evt_ent; - DIR *format_dir; + struct io_dirent64 *evt_ent; + struct io_dir format_dir; int ret = 0; - format_dir = fdopendir(dirfd); - if (!format_dir) - return -EINVAL; + io_dir__init(&format_dir, dirfd); - while ((evt_ent = readdir(format_dir)) != NULL) { + while ((evt_ent = io_dir__readdir(&format_dir)) != NULL) { struct perf_pmu_format *format; char *name = evt_ent->d_name; - if (!strcmp(name, ".") || !strcmp(name, "..")) + if (io_dir__is_dir(&format_dir, evt_ent)) continue; format = perf_pmu__new_format(&pmu->format, name); @@ -235,7 +251,7 @@ static int perf_pmu__format_parse(struct perf_pmu *pmu, int dirfd, bool eager_lo } } - closedir(format_dir); + close(format_dir.dirfd); return ret; } @@ -259,7 +275,7 @@ static int pmu_format(struct perf_pmu *pmu, int dirfd, const char *name, bool ea return 0; } -int perf_pmu__convert_scale(const char *scale, char **end, double *sval) +static int parse_double(const char *scale, char **end, double *sval) { char *lc; int ret = 0; @@ -296,6 +312,11 @@ out: return ret; } +int perf_pmu__convert_scale(const char *scale, char **end, double *sval) +{ + return parse_double(scale, end, sval); +} + static int perf_pmu__parse_scale(struct perf_pmu *pmu, struct perf_pmu_alias *alias) { struct stat st; @@ -367,8 +388,8 @@ error: return -1; } -static int -perf_pmu__parse_per_pkg(struct perf_pmu *pmu, struct perf_pmu_alias *alias) +static bool perf_pmu__parse_event_source_bool(const char *pmu_name, const char *event_name, + const char *suffix) { char path[PATH_MAX]; size_t len; @@ -376,59 +397,67 @@ perf_pmu__parse_per_pkg(struct perf_pmu *pmu, struct perf_pmu_alias *alias) len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); if (!len) - return 0; - scnprintf(path + len, sizeof(path) - len, "%s/events/%s.per-pkg", pmu->name, alias->name); + return false; + + scnprintf(path + len, sizeof(path) - len, "%s/events/%s.%s", pmu_name, event_name, suffix); fd = open(path, O_RDONLY); if (fd == -1) - return -1; + return false; - close(fd); +#ifndef NDEBUG + { + char buf[8]; - alias->per_pkg = true; - return 0; + len = read(fd, buf, sizeof(buf)); + assert(len == 1 || len == 2); + assert(buf[0] == '1'); + } +#endif + + close(fd); + return true; } -static int perf_pmu__parse_snapshot(struct perf_pmu *pmu, struct perf_pmu_alias *alias) +static void perf_pmu__parse_per_pkg(struct perf_pmu *pmu, struct perf_pmu_alias *alias) { - char path[PATH_MAX]; - size_t len; - int fd; - - len = perf_pmu__event_source_devices_scnprintf(path, sizeof(path)); - if (!len) - return 0; - scnprintf(path + len, sizeof(path) - len, "%s/events/%s.snapshot", pmu->name, alias->name); - - fd = open(path, O_RDONLY); - if (fd == -1) - return -1; + alias->per_pkg = perf_pmu__parse_event_source_bool(pmu->name, alias->name, "per-pkg"); +} - alias->snapshot = true; - close(fd); - return 0; +static void perf_pmu__parse_snapshot(struct perf_pmu *pmu, struct perf_pmu_alias *alias) +{ + alias->snapshot = perf_pmu__parse_event_source_bool(pmu->name, alias->name, "snapshot"); } /* Delete an alias entry. */ -static void perf_pmu_free_alias(struct perf_pmu_alias *newalias) +static void perf_pmu_free_alias(struct perf_pmu_alias *alias) { - zfree(&newalias->name); - zfree(&newalias->desc); - zfree(&newalias->long_desc); - zfree(&newalias->topic); - zfree(&newalias->pmu_name); - parse_events_terms__exit(&newalias->terms); - free(newalias); + if (!alias) + return; + + zfree(&alias->name); + zfree(&alias->desc); + zfree(&alias->long_desc); + zfree(&alias->topic); + zfree(&alias->pmu_name); + zfree(&alias->terms); + zfree(&alias->legacy_terms); + free(alias); } static void perf_pmu__del_aliases(struct perf_pmu *pmu) { - struct perf_pmu_alias *alias, *tmp; + struct hashmap_entry *entry; + size_t bkt; - list_for_each_entry_safe(alias, tmp, &pmu->aliases, list) { - list_del(&alias->list); - perf_pmu_free_alias(alias); - } + if (!pmu->aliases) + return; + + hashmap__for_each_entry(pmu->aliases, entry, bkt) + perf_pmu_free_alias(entry->pvalue); + + hashmap__free(pmu->aliases); + pmu->aliases = NULL; } static struct perf_pmu_alias *perf_pmu__find_alias(struct perf_pmu *pmu, @@ -436,35 +465,37 @@ static struct perf_pmu_alias *perf_pmu__find_alias(struct perf_pmu *pmu, bool load) { struct perf_pmu_alias *alias; + bool has_sysfs_event; + char event_file_name[NAME_MAX + 8]; - if (load && !pmu->sysfs_aliases_loaded) { - bool has_sysfs_event; - char event_file_name[FILENAME_MAX + 8]; + if (hashmap__find(pmu->aliases, name, &alias)) + return alias; - /* - * Test if alias/event 'name' exists in the PMU's sysfs/events - * directory. If not skip parsing the sysfs aliases. Sysfs event - * name must be all lower or all upper case. - */ - scnprintf(event_file_name, sizeof(event_file_name), "events/%s", name); - for (size_t i = 7, n = 7 + strlen(name); i < n; i++) - event_file_name[i] = tolower(event_file_name[i]); + if (!load || pmu->sysfs_aliases_loaded) + return NULL; - has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name); - if (!has_sysfs_event) { - for (size_t i = 7, n = 7 + strlen(name); i < n; i++) - event_file_name[i] = toupper(event_file_name[i]); + /* + * Test if alias/event 'name' exists in the PMU's sysfs/events + * directory. If not skip parsing the sysfs aliases. Sysfs event + * name must be all lower or all upper case. + */ + scnprintf(event_file_name, sizeof(event_file_name), "events/%s", name); + for (size_t i = 7, n = 7 + strlen(name); i < n; i++) + event_file_name[i] = tolower(event_file_name[i]); - has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name); - } - if (has_sysfs_event) - pmu_aliases_parse(pmu); + has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name); + if (!has_sysfs_event) { + for (size_t i = 7, n = 7 + strlen(name); i < n; i++) + event_file_name[i] = toupper(event_file_name[i]); + has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name); } - list_for_each_entry(alias, &pmu->aliases, list) { - if (!strcasecmp(alias->name, name)) + if (has_sysfs_event) { + pmu_aliases_parse(pmu); + if (hashmap__find(pmu->aliases, name, &alias)) return alias; } + return NULL; } @@ -503,6 +534,7 @@ static void read_alias_info(struct perf_pmu *pmu, struct perf_pmu_alias *alias) struct update_alias_data { struct perf_pmu *pmu; struct perf_pmu_alias *alias; + bool legacy; }; static int update_alias(const struct pmu_event *pe, @@ -518,8 +550,13 @@ static int update_alias(const struct pmu_event *pe, assign_str(pe->name, "topic", &data->alias->topic, pe->topic); data->alias->per_pkg = pe->perpkg; if (pe->event) { - parse_events_terms__exit(&data->alias->terms); - ret = parse_events_terms(&data->alias->terms, pe->event, /*input=*/NULL); + if (data->legacy) { + zfree(&data->alias->legacy_terms); + data->alias->legacy_terms = strdup(pe->event); + } else { + zfree(&data->alias->terms); + data->alias->terms = strdup(pe->event); + } } if (!ret && pe->unit) { char *unit; @@ -528,15 +565,27 @@ static int update_alias(const struct pmu_event *pe, if (!ret) snprintf(data->alias->unit, sizeof(data->alias->unit), "%s", unit); } + if (!ret && pe->retirement_latency_mean) { + ret = parse_double(pe->retirement_latency_mean, NULL, + &data->alias->retirement_latency_mean); + } + if (!ret && pe->retirement_latency_min) { + ret = parse_double(pe->retirement_latency_min, NULL, + &data->alias->retirement_latency_min); + } + if (!ret && pe->retirement_latency_max) { + ret = parse_double(pe->retirement_latency_max, NULL, + &data->alias->retirement_latency_max); + } return ret; } static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, - const char *desc, const char *val, FILE *val_fd, + const char *desc, const char *val, int val_fd, const struct pmu_event *pe, enum event_source src) { - struct perf_pmu_alias *alias; - int ret; + struct perf_pmu_alias *alias, *old_alias; + int ret = 0; const char *long_desc = NULL, *topic = NULL, *unit = NULL, *pmu_name = NULL; bool deprecated = false, perpkg = false; @@ -559,24 +608,49 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, if (!alias) return -ENOMEM; - parse_events_terms__init(&alias->terms); alias->scale = 1.0; alias->unit[0] = '\0'; alias->per_pkg = perpkg; alias->snapshot = false; alias->deprecated = deprecated; + alias->retirement_latency_mean = 0.0; + alias->retirement_latency_min = 0.0; + alias->retirement_latency_max = 0.0; - ret = parse_events_terms(&alias->terms, val, val_fd); - if (ret) { - pr_err("Cannot parse alias %s: %d\n", val, ret); - free(alias); - return ret; + if (!ret && pe && pe->retirement_latency_mean) { + ret = parse_double(pe->retirement_latency_mean, NULL, + &alias->retirement_latency_mean); } + if (!ret && pe && pe->retirement_latency_min) { + ret = parse_double(pe->retirement_latency_min, NULL, + &alias->retirement_latency_min); + } + if (!ret && pe && pe->retirement_latency_max) { + ret = parse_double(pe->retirement_latency_max, NULL, + &alias->retirement_latency_max); + } + if (ret) + return ret; + if (val_fd < 0) { + alias->terms = strdup(val); + } else { + char buf[256]; + struct io io; + size_t line_len; + + io__init(&io, val_fd, buf, sizeof(buf)); + ret = io__getline(&io, &alias->terms, &line_len) < 0 ? -errno : 0; + if (ret) { + pr_err("Failed to read alias %s\n", name); + return ret; + } + if (line_len >= 1 && alias->terms[line_len - 1] == '\n') + alias->terms[line_len - 1] = '\0'; + } alias->name = strdup(name); alias->desc = desc ? strdup(desc) : NULL; - alias->long_desc = long_desc ? strdup(long_desc) : - desc ? strdup(desc) : NULL; + alias->long_desc = long_desc ? strdup(long_desc) : NULL; alias->topic = topic ? strdup(topic) : NULL; alias->pmu_name = pmu_name ? strdup(pmu_name) : NULL; if (unit) { @@ -590,15 +664,29 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, default: case EVENT_SRC_SYSFS: alias->from_sysfs = true; - if (pmu->events_table) { + if (pmu->events_table || pmu->is_core) { /* Update an event from sysfs with json data. */ struct update_alias_data data = { .pmu = pmu, .alias = alias, + .legacy = false, }; - if (pmu_events_table__find_event(pmu->events_table, pmu, name, - update_alias, &data) == 0) - pmu->cpu_json_aliases++; + if ((pmu_events_table__find_event(pmu->events_table, pmu, name, + update_alias, &data) == 0)) { + /* + * Override sysfs encodings with json encodings + * specific to the cpuid. + */ + pmu->cpu_common_json_aliases++; + } + if (pmu->is_core) { + /* Add in legacy encodings. */ + data.legacy = true; + if (pmu_events_table__find_event( + perf_pmu__default_core_events_table(), + pmu, name, update_alias, &data) == 0) + pmu->cpu_common_json_aliases++; + } } pmu->sysfs_aliases++; break; @@ -610,7 +698,8 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, break; } - list_add_tail(&alias->list, &pmu->aliases); + hashmap__set(pmu->aliases, alias->name, alias, /*old_key=*/ NULL, &old_alias); + perf_pmu_free_alias(old_alias); return 0; } @@ -637,17 +726,14 @@ static inline bool pmu_alias_info_file(const char *name) */ static int __pmu_aliases_parse(struct perf_pmu *pmu, int events_dir_fd) { - struct dirent *evt_ent; - DIR *event_dir; + struct io_dirent64 *evt_ent; + struct io_dir event_dir; - event_dir = fdopendir(events_dir_fd); - if (!event_dir) - return -EINVAL; + io_dir__init(&event_dir, events_dir_fd); - while ((evt_ent = readdir(event_dir))) { + while ((evt_ent = io_dir__readdir(&event_dir))) { char *name = evt_ent->d_name; int fd; - FILE *file; if (!strcmp(name, ".") || !strcmp(name, "..")) continue; @@ -663,20 +749,14 @@ static int __pmu_aliases_parse(struct perf_pmu *pmu, int events_dir_fd) pr_debug("Cannot open %s\n", name); continue; } - file = fdopen(fd, "r"); - if (!file) { - close(fd); - continue; - } if (perf_pmu__new_alias(pmu, name, /*desc=*/ NULL, - /*val=*/ NULL, file, /*pe=*/ NULL, + /*val=*/ NULL, fd, /*pe=*/ NULL, EVENT_SRC_SYSFS) < 0) pr_debug("Cannot set up %s\n", name); - fclose(file); + close(fd); } - closedir(event_dir); pmu->sysfs_aliases_loaded = true; return 0; } @@ -707,7 +787,7 @@ static int pmu_aliases_parse(struct perf_pmu *pmu) static int pmu_aliases_parse_eager(struct perf_pmu *pmu, int sysfs_fd) { - char path[FILENAME_MAX + 7]; + char path[NAME_MAX + 8]; int ret, events_dir_fd; scnprintf(path, sizeof(path), "%s/events", pmu->name); @@ -721,29 +801,29 @@ static int pmu_aliases_parse_eager(struct perf_pmu *pmu, int sysfs_fd) return ret; } -static int pmu_alias_terms(struct perf_pmu_alias *alias, int err_loc, struct list_head *terms) +static int pmu_alias_terms(struct perf_pmu_alias *alias, struct list_head *terms) { - struct parse_events_term *term, *cloned; - struct parse_events_terms clone_terms; - - parse_events_terms__init(&clone_terms); - list_for_each_entry(term, &alias->terms.terms, list) { - int ret = parse_events_term__clone(&cloned, term); + struct parse_events_terms alias_terms; + struct parse_events_term *term; + int ret; - if (ret) { - parse_events_terms__exit(&clone_terms); - return ret; - } + parse_events_terms__init(&alias_terms); + ret = parse_events_terms(&alias_terms, alias->terms); + if (ret) { + pr_err("Cannot parse '%s' terms '%s': %d\n", + alias->name, alias->terms, ret); + parse_events_terms__exit(&alias_terms); + return ret; + } + list_for_each_entry(term, &alias_terms.terms, list) { /* * Weak terms don't override command line options, * which we don't want for implicit terms in aliases. */ - cloned->weak = true; - cloned->err_term = cloned->err_val = err_loc; - list_add_tail(&cloned->list, &clone_terms.terms); + term->weak = true; } - list_splice_init(&clone_terms.terms, terms); - parse_events_terms__exit(&clone_terms); + list_splice_init(&alias_terms.terms, terms); + parse_events_terms__exit(&alias_terms); return 0; } @@ -751,32 +831,41 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias, int err_loc, struct lis * Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64) * may have a "cpus" file. */ -static struct perf_cpu_map *pmu_cpumask(int dirfd, const char *name, bool is_core) +static struct perf_cpu_map *pmu_cpumask(int dirfd, const char *pmu_name, bool is_core) { - struct perf_cpu_map *cpus; const char *templates[] = { "cpumask", "cpus", NULL }; const char **template; - char pmu_name[PATH_MAX]; - struct perf_pmu pmu = {.name = pmu_name}; - FILE *file; - strlcpy(pmu_name, name, sizeof(pmu_name)); for (template = templates; *template; template++) { - file = perf_pmu__open_file_at(&pmu, dirfd, *template); - if (!file) + struct io io; + char buf[128]; + char *cpumask = NULL; + size_t cpumask_len; + ssize_t ret; + struct perf_cpu_map *cpus; + + io.fd = perf_pmu__pathname_fd(dirfd, pmu_name, *template, O_RDONLY); + if (io.fd < 0) continue; - cpus = perf_cpu_map__read(file); - fclose(file); + + io__init(&io, io.fd, buf, sizeof(buf)); + ret = io__getline(&io, &cpumask, &cpumask_len); + close(io.fd); + if (ret < 0) + continue; + + cpus = perf_cpu_map__new(cpumask); + free(cpumask); if (cpus) return cpus; } /* Nothing found, for core PMUs assume this means all CPUs. */ - return is_core ? perf_cpu_map__get(cpu_map__online()) : NULL; + return is_core ? cpu_map__online() : NULL; } static bool pmu_is_uncore(int dirfd, const char *name) @@ -822,118 +911,144 @@ static int is_sysfs_pmu_core(const char *name) return file_available(path); } -char *perf_pmu__getcpuid(struct perf_pmu *pmu) -{ - char *cpuid; - static bool printed; - - cpuid = getenv("PERF_CPUID"); - if (cpuid) - cpuid = strdup(cpuid); - if (!cpuid) - cpuid = get_cpuid_str(pmu); - if (!cpuid) - return NULL; - - if (!printed) { - pr_debug("Using CPUID %s\n", cpuid); - printed = true; - } - return cpuid; -} - -__weak const struct pmu_metrics_table *pmu_metrics_table__find(void) +/** + * Return the length of the PMU name not including the suffix for uncore PMUs. + * + * We want to deduplicate many similar uncore PMUs by stripping their suffixes, + * but there are never going to be too many core PMUs and the suffixes might be + * interesting. "arm_cortex_a53" vs "arm_cortex_a57" or "cpum_cf" for example. + * + * @skip_duplicate_pmus: False in verbose mode so all uncore PMUs are visible + */ +static size_t pmu_deduped_name_len(const struct perf_pmu *pmu, const char *name, + bool skip_duplicate_pmus) { - return perf_pmu__find_metrics_table(NULL); + return skip_duplicate_pmus && !pmu->is_core + ? pmu_name_len_no_suffix(name) + : strlen(name); } /** - * perf_pmu__match_ignoring_suffix - Does the pmu_name match tok ignoring any - * trailing suffix? The Suffix must be in form - * tok_{digits}, or tok{digits}. + * perf_pmu__match_wildcard - Does the pmu_name start with tok and is then only + * followed by nothing or a suffix? tok may contain + * part of a suffix. * @pmu_name: The pmu_name with possible suffix. - * @tok: The possible match to pmu_name without suffix. + * @tok: The wildcard argument to match. */ -static bool perf_pmu__match_ignoring_suffix(const char *pmu_name, const char *tok) +static bool perf_pmu__match_wildcard(const char *pmu_name, const char *tok) { - const char *p; + const char *p, *suffix; + bool has_hex = false; + size_t tok_len = strlen(tok); - if (strncmp(pmu_name, tok, strlen(tok))) + /* Check start of pmu_name for equality. */ + if (strncmp(pmu_name, tok, tok_len)) return false; - p = pmu_name + strlen(tok); + suffix = p = pmu_name + tok_len; if (*p == 0) return true; - if (*p == '_') + if (*p == '_') { ++p; + ++suffix; + } /* Ensure we end in a number */ while (1) { - if (!isdigit(*p)) + if (!isxdigit(*p)) return false; + if (!has_hex) + has_hex = !isdigit(*p); if (*(++p) == 0) break; } + if (has_hex) + return (p - suffix) > 2; + return true; } /** - * pmu_uncore_alias_match - does name match the PMU name? - * @pmu_name: the json struct pmu_event name. This may lack a suffix (which + * perf_pmu__match_ignoring_suffix_uncore - Does the pmu_name match tok ignoring + * any trailing suffix on pmu_name and + * tok? The Suffix must be in form + * tok_{digits}, or tok{digits}. + * @pmu_name: The pmu_name with possible suffix. + * @tok: The possible match to pmu_name. + */ +static bool perf_pmu__match_ignoring_suffix_uncore(const char *pmu_name, const char *tok) +{ + size_t pmu_name_len, tok_len; + + /* For robustness, check for NULL. */ + if (pmu_name == NULL) + return tok == NULL; + + /* uncore_ prefixes are ignored. */ + if (!strncmp(pmu_name, "uncore_", 7)) + pmu_name += 7; + if (!strncmp(tok, "uncore_", 7)) + tok += 7; + + pmu_name_len = pmu_name_len_no_suffix(pmu_name); + tok_len = pmu_name_len_no_suffix(tok); + if (pmu_name_len != tok_len) + return false; + + return strncmp(pmu_name, tok, pmu_name_len) == 0; +} + + +/** + * perf_pmu__match_wildcard_uncore - does to_match match the PMU's name? + * @pmu_name: The pmu->name or pmu->alias to match against. + * @to_match: the json struct pmu_event name. This may lack a suffix (which * matches) or be of the form "socket,pmuname" which will match * "socketX_pmunameY". - * @name: a real full PMU name as from sysfs. */ -static bool pmu_uncore_alias_match(const char *pmu_name, const char *name) +static bool perf_pmu__match_wildcard_uncore(const char *pmu_name, const char *to_match) { - char *tmp = NULL, *tok, *str; - bool res; - - if (strchr(pmu_name, ',') == NULL) - return perf_pmu__match_ignoring_suffix(name, pmu_name); + char *mutable_to_match, *tok, *tmp; - str = strdup(pmu_name); - if (!str) + if (!pmu_name) return false; - /* - * uncore alias may be from different PMU with common prefix - */ - tok = strtok_r(str, ",", &tmp); - if (strncmp(pmu_name, tok, strlen(tok))) { - res = false; - goto out; - } + /* uncore_ prefixes are ignored. */ + if (!strncmp(pmu_name, "uncore_", 7)) + pmu_name += 7; + if (!strncmp(to_match, "uncore_", 7)) + to_match += 7; - /* - * Match more complex aliases where the alias name is a comma-delimited - * list of tokens, orderly contained in the matching PMU name. - * - * Example: For alias "socket,pmuname" and PMU "socketX_pmunameY", we - * match "socket" in "socketX_pmunameY" and then "pmuname" in - * "pmunameY". - */ - while (1) { - char *next_tok = strtok_r(NULL, ",", &tmp); + if (strchr(to_match, ',') == NULL) + return perf_pmu__match_wildcard(pmu_name, to_match); - name = strstr(name, tok); - if (!name || - (!next_tok && !perf_pmu__match_ignoring_suffix(name, tok))) { - res = false; - goto out; + /* Process comma separated list of PMU name components. */ + mutable_to_match = strdup(to_match); + if (!mutable_to_match) + return false; + + tok = strtok_r(mutable_to_match, ",", &tmp); + while (tok) { + size_t tok_len = strlen(tok); + + if (strncmp(pmu_name, tok, tok_len)) { + /* Mismatch between part of pmu_name and tok. */ + free(mutable_to_match); + return false; } - if (!next_tok) - break; - tok = next_tok; - name += strlen(tok); + /* Move pmu_name forward over tok and suffix. */ + pmu_name += tok_len; + while (*pmu_name != '\0' && isdigit(*pmu_name)) + pmu_name++; + if (*pmu_name == '_') + pmu_name++; + + tok = strtok_r(NULL, ",", &tmp); } - - res = true; -out: - free(str); - return res; + free(mutable_to_match); + return *pmu_name == '\0'; } bool pmu_uncore_identifier_match(const char *compat, const char *id) @@ -964,7 +1079,7 @@ static int pmu_add_cpu_aliases_map_callback(const struct pmu_event *pe, { struct perf_pmu *pmu = vdata; - perf_pmu__new_alias(pmu, pe->name, pe->desc, pe->event, /*val_fd=*/ NULL, + perf_pmu__new_alias(pmu, pe->name, pe->desc, pe->event, /*val_fd=*/ -1, pe, EVENT_SRC_CPU_JSON); return 0; } @@ -980,13 +1095,16 @@ void pmu_add_cpu_aliases_table(struct perf_pmu *pmu, const struct pmu_events_tab static void pmu_add_cpu_aliases(struct perf_pmu *pmu) { - if (!pmu->events_table) + if (!pmu->events_table && !pmu->is_core) return; if (pmu->cpu_aliases_added) return; pmu_add_cpu_aliases_table(pmu, pmu->events_table); + if (pmu->is_core) + pmu_add_cpu_aliases_table(pmu, perf_pmu__default_core_events_table()); + pmu->cpu_aliases_added = true; } @@ -996,20 +1114,27 @@ static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe, { struct perf_pmu *pmu = vdata; - if (!pe->compat || !pe->pmu) + if (!pe->compat || !pe->pmu) { + /* No data to match. */ + return 0; + } + + if (!perf_pmu__match_wildcard_uncore(pmu->name, pe->pmu) && + !perf_pmu__match_wildcard_uncore(pmu->alias_name, pe->pmu)) { + /* PMU name/alias_name don't match. */ return 0; + } - if (pmu_uncore_alias_match(pe->pmu, pmu->name) && - pmu_uncore_identifier_match(pe->compat, pmu->id)) { + if (pmu_uncore_identifier_match(pe->compat, pmu->id)) { + /* Id matched. */ perf_pmu__new_alias(pmu, pe->name, pe->desc, pe->event, - /*val_fd=*/ NULL, + /*val_fd=*/ -1, pe, EVENT_SRC_SYS_JSON); } - return 0; } @@ -1059,43 +1184,107 @@ perf_pmu__arch_init(struct perf_pmu *pmu) pmu->mem_events = perf_mem_events; } +/* Variant of str_hash that does tolower on each character. */ +static size_t aliases__hash(long key, void *ctx __maybe_unused) +{ + const char *s = (const char *)key; + size_t h = 0; + + while (*s) { + h = h * 31 + tolower(*s); + s++; + } + return h; +} + +static bool aliases__equal(long key1, long key2, void *ctx __maybe_unused) +{ + return strcasecmp((const char *)key1, (const char *)key2) == 0; +} + +int perf_pmu__init(struct perf_pmu *pmu, __u32 type, const char *name) +{ + pmu->type = type; + INIT_LIST_HEAD(&pmu->format); + INIT_LIST_HEAD(&pmu->caps); + + pmu->name = strdup(name); + if (!pmu->name) + return -ENOMEM; + + pmu->aliases = hashmap__new(aliases__hash, aliases__equal, /*ctx=*/ NULL); + if (!pmu->aliases) + return -ENOMEM; + + return 0; +} + +static __u32 wellknown_pmu_type(const char *pmu_name) +{ + struct { + const char *pmu_name; + __u32 type; + } wellknown_pmus[] = { + { + "software", + PERF_TYPE_SOFTWARE + }, + { + "tracepoint", + PERF_TYPE_TRACEPOINT + }, + { + "breakpoint", + PERF_TYPE_BREAKPOINT + }, + }; + for (size_t i = 0; i < ARRAY_SIZE(wellknown_pmus); i++) { + if (!strcmp(wellknown_pmus[i].pmu_name, pmu_name)) + return wellknown_pmus[i].type; + } + return PERF_TYPE_MAX; +} + struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *name, bool eager_load) { struct perf_pmu *pmu; - __u32 type; pmu = zalloc(sizeof(*pmu)); if (!pmu) return NULL; - pmu->name = strdup(name); - if (!pmu->name) - goto err; + if (perf_pmu__init(pmu, PERF_PMU_TYPE_FAKE, name) != 0) { + perf_pmu__delete(pmu); + return NULL; + } /* * Read type early to fail fast if a lookup name isn't a PMU. Ensure * that type value is successfully assigned (return 1). */ - if (perf_pmu__scan_file_at(pmu, dirfd, "type", "%u", &type) != 1) - goto err; - - INIT_LIST_HEAD(&pmu->format); - INIT_LIST_HEAD(&pmu->aliases); - INIT_LIST_HEAD(&pmu->caps); + if (perf_pmu__scan_file_at(pmu, dirfd, "type", "%u", &pmu->type) != 1) { + /* Double check the PMU's name isn't wellknown. */ + pmu->type = wellknown_pmu_type(name); + if (pmu->type == PERF_TYPE_MAX) { + perf_pmu__delete(pmu); + return NULL; + } + } /* * The pmu data we store & need consists of the pmu * type value and format definitions. Load both right * now. */ - if (pmu_format(pmu, dirfd, name, eager_load)) - goto err; + if (pmu_format(pmu, dirfd, name, eager_load)) { + perf_pmu__delete(pmu); + return NULL; + } pmu->is_core = is_pmu_core(name); pmu->cpus = pmu_cpumask(dirfd, name, pmu->is_core); - pmu->type = type; pmu->is_uncore = pmu_is_uncore(dirfd, name); if (pmu->is_uncore) pmu->id = pmu_id(name); @@ -1117,10 +1306,6 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char pmu_aliases_parse_eager(pmu, dirfd); return pmu; -err: - zfree(&pmu->name); - free(pmu); - return NULL; } /* Creates the PMU when sysfs scanning fails. */ @@ -1142,12 +1327,17 @@ struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pm pmu->cpus = cpu_map__online(); INIT_LIST_HEAD(&pmu->format); - INIT_LIST_HEAD(&pmu->aliases); + pmu->aliases = hashmap__new(aliases__hash, aliases__equal, /*ctx=*/ NULL); INIT_LIST_HEAD(&pmu->caps); list_add_tail(&pmu->list, core_pmus); return pmu; } +bool perf_pmu__is_fake(const struct perf_pmu *pmu) +{ + return pmu->type == PERF_PMU_TYPE_FAKE; +} + void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu) { struct perf_pmu_format *format; @@ -1158,7 +1348,7 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu) pmu->formats_checked = true; /* fake pmu doesn't have format list */ - if (pmu == &perf_pmu__fake) + if (perf_pmu__is_fake(pmu)) return; list_for_each_entry(format, &pmu->format, list) { @@ -1174,8 +1364,12 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu) bool evsel__is_aux_event(const struct evsel *evsel) { - struct perf_pmu *pmu = evsel__find_pmu(evsel); + struct perf_pmu *pmu; + if (evsel->needs_auxtrace_mmap) + return true; + + pmu = evsel__find_pmu(evsel); return pmu && pmu->auxtrace; } @@ -1337,7 +1531,8 @@ static int pmu_config_term(const struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_term *term, struct parse_events_terms *head_terms, - bool zero, struct parse_events_error *err) + bool zero, bool apply_hardcoded, + struct parse_events_error *err) { struct perf_pmu_format *format; __u64 *vp; @@ -1351,11 +1546,78 @@ static int pmu_config_term(const struct perf_pmu *pmu, return 0; /* - * Hardcoded terms should be already in, so nothing - * to be done for them. + * Hardcoded terms are generally handled in event parsing, which + * traditionally have had to handle not having a PMU. An alias may + * have hard coded config values, optionally apply them below. */ - if (parse_events__is_hardcoded_term(term)) + if (parse_events__is_hardcoded_term(term)) { + /* Config terms set all bits in the config. */ + DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS); + + if (!apply_hardcoded) + return 0; + + bitmap_fill(bits, PERF_PMU_FORMAT_BITS); + + switch (term->type_term) { + case PARSE_EVENTS__TERM_TYPE_CONFIG: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config, zero); + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG1: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config1, zero); + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG2: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config2, zero); + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG3: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config3, zero); + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG4: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config4, zero); + break; + case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + assert(term->val.num < PERF_COUNT_HW_MAX); + assert(pmu->is_core); + attr->config = term->val.num; + if (perf_pmus__supports_extended_type()) + attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + attr->type = PERF_TYPE_HARDWARE; + break; + case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG: { +#ifndef NDEBUG + int cache_type = term->val.num & 0xFF; + int cache_op = (term->val.num >> 8) & 0xFF; + int cache_result = (term->val.num >> 16) & 0xFF; + + assert(cache_type < PERF_COUNT_HW_CACHE_MAX); + assert(cache_op < PERF_COUNT_HW_CACHE_OP_MAX); + assert(cache_result < PERF_COUNT_HW_CACHE_RESULT_MAX); +#endif + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + assert((term->val.num & ~0xFFFFFF) == 0); + assert(pmu->is_core); + attr->config = term->val.num; + if (perf_pmus__supports_extended_type()) + attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + attr->type = PERF_TYPE_HW_CACHE; + break; + } + case PARSE_EVENTS__TERM_TYPE_USER: /* Not hardcoded. */ + return -EINVAL; + case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: + /* Skip non-config terms. */ + break; + default: + break; + } return 0; + } format = pmu_find_format(&pmu->format, term->config); if (!format) { @@ -1393,6 +1655,9 @@ static int pmu_config_term(const struct perf_pmu *pmu, case PERF_PMU_FORMAT_VALUE_CONFIG3: vp = &attr->config3; break; + case PERF_PMU_FORMAT_VALUE_CONFIG4: + vp = &attr->config4; + break; default: return -EINVAL; } @@ -1437,13 +1702,12 @@ static int pmu_config_term(const struct perf_pmu *pmu, if (err) { char *err_str; - parse_events_error__handle(err, term->err_val, - asprintf(&err_str, - "value too big for format (%s), maximum is %llu", - format->name, (unsigned long long)max_val) < 0 - ? strdup("value too big for format") - : err_str, - NULL); + if (asprintf(&err_str, + "value too big for format (%s), maximum is %llu", + format->name, (unsigned long long)max_val) < 0) { + err_str = strdup("value too big for format"); + } + parse_events_error__handle(err, term->err_val, err_str, /*help=*/NULL); return -EINVAL; } /* @@ -1459,12 +1723,18 @@ static int pmu_config_term(const struct perf_pmu *pmu, int perf_pmu__config_terms(const struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_terms *terms, - bool zero, struct parse_events_error *err) + bool zero, bool apply_hardcoded, + struct parse_events_error *err) { struct parse_events_term *term; + if (perf_pmu__is_hwmon(pmu)) + return hwmon_pmu__config_terms(pmu, attr, terms, err); + if (perf_pmu__is_drm(pmu)) + return drm_pmu__config_terms(pmu, attr, terms, err); + list_for_each_entry(term, &terms->terms, list) { - if (pmu_config_term(pmu, attr, term, terms, zero, err)) + if (pmu_config_term(pmu, attr, term, terms, zero, apply_hardcoded, err)) return -EINVAL; } @@ -1478,11 +1748,16 @@ int perf_pmu__config_terms(const struct perf_pmu *pmu, */ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_terms *head_terms, + bool apply_hardcoded, struct parse_events_error *err) { bool zero = !!pmu->perf_event_attr_init_default; - return perf_pmu__config_terms(pmu, attr, head_terms, zero, err); + /* Fake PMU doesn't have proper terms so nothing to configure in attr. */ + if (perf_pmu__is_fake(pmu)) + return 0; + + return perf_pmu__config_terms(pmu, attr, head_terms, zero, apply_hardcoded, err); } static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu, @@ -1514,10 +1789,14 @@ static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu, return alias; /* Alias doesn't exist, try to get it from the json events. */ - if (pmu->events_table && - pmu_events_table__find_event(pmu->events_table, pmu, name, - pmu_add_cpu_aliases_map_callback, - pmu) == 0) { + if ((pmu_events_table__find_event(pmu->events_table, pmu, name, + pmu_add_cpu_aliases_map_callback, + pmu) == 0) || + (pmu->is_core && + pmu_events_table__find_event(perf_pmu__default_core_events_table(), + pmu, name, + pmu_add_cpu_aliases_map_callback, + pmu) == 0)) { alias = perf_pmu__find_alias(pmu, name, /*load=*/ false); } return alias; @@ -1567,13 +1846,31 @@ static int check_info_data(struct perf_pmu *pmu, return 0; } +static int perf_pmu__parse_terms_to_attr(struct perf_pmu *pmu, const char *terms_str, + struct perf_event_attr *attr) +{ + struct parse_events_terms terms; + int ret; + + parse_events_terms__init(&terms); + ret = parse_events_terms(&terms, terms_str); + if (ret) { + pr_debug("Failed to parse terms '%s': %d\n", terms_str, ret); + parse_events_terms__exit(&terms); + return ret; + } + ret = perf_pmu__config(pmu, attr, &terms, /*apply_hardcoded=*/true, /*err=*/NULL); + parse_events_terms__exit(&terms); + return ret; +} + /* * Find alias in the terms list and replace it with the terms * defined for the alias */ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms, struct perf_pmu_info *info, bool *rewrote_terms, - struct parse_events_error *err) + u64 *alternate_hw_config, struct parse_events_error *err) { struct parse_events_term *term, *h; struct perf_pmu_alias *alias; @@ -1589,30 +1886,65 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ info->unit = NULL; info->scale = 0.0; info->snapshot = false; + info->retirement_latency_mean = 0.0; + info->retirement_latency_min = 0.0; + info->retirement_latency_max = 0.0; + + if (perf_pmu__is_hwmon(pmu)) { + ret = hwmon_pmu__check_alias(head_terms, info, err); + goto out; + } + if (perf_pmu__is_drm(pmu)) { + ret = drm_pmu__check_alias(pmu, head_terms, info, err); + goto out; + } + + /* Fake PMU doesn't rewrite terms. */ + if (perf_pmu__is_fake(pmu)) + goto out; list_for_each_entry_safe(term, h, &head_terms->terms, list) { alias = pmu_find_alias(pmu, term); if (!alias) continue; - ret = pmu_alias_terms(alias, term->err_term, &term->list); + ret = pmu_alias_terms(alias, &term->list); if (ret) { parse_events_error__handle(err, term->err_term, - strdup("Failure to duplicate terms"), + strdup("Failed to parse terms"), NULL); return ret; } + *rewrote_terms = true; ret = check_info_data(pmu, alias, info, err, term->err_term); if (ret) return ret; + if (alias->legacy_terms) { + struct perf_event_attr attr = {.config = 0,}; + + ret = perf_pmu__parse_terms_to_attr(pmu, alias->legacy_terms, &attr); + if (ret) { + parse_events_error__handle(err, term->err_term, + strdup("Error evaluating legacy terms"), + NULL); + return ret; + } + if (attr.type == PERF_TYPE_HARDWARE) + *alternate_hw_config = attr.config & PERF_HW_EVENT_MASK; + } + if (alias->per_pkg) info->per_pkg = true; + info->retirement_latency_mean = alias->retirement_latency_mean; + info->retirement_latency_min = alias->retirement_latency_min; + info->retirement_latency_max = alias->retirement_latency_max; + list_del_init(&term->list); parse_events_term__delete(term); } - +out: /* * if no unit or scale found in aliases, then * set defaults as for evsel @@ -1685,6 +2017,9 @@ int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_call "config1=0..0xffffffffffffffff", "config2=0..0xffffffffffffffff", "config3=0..0xffffffffffffffff", + "config4=0..0xffffffffffffffff", + "legacy-hardware-config=0..9,", + "legacy-cache-config=0..0xffffff,", "name=string", "period=number", "freq=number", @@ -1700,17 +2035,20 @@ int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_call "no-overwrite", "percore", "aux-output", + "aux-action=(pause|resume|start-paused)", "aux-sample-size=number", + "cpu=number", + "ratio-to-prev=string", }; struct perf_pmu_format *format; int ret; /* * max-events and driver-config are missing above as are the internal - * types user, metric-id, raw, legacy cache and hardware. Assert against - * the enum parse_events__term_type so they are kept in sync. + * types user, metric-id, and raw. Assert against the enum + * parse_events__term_type so they are kept in sync. */ - _Static_assert(ARRAY_SIZE(terms) == __PARSE_EVENTS__TERM_TYPE_NR - 6, + _Static_assert(ARRAY_SIZE(terms) == __PARSE_EVENTS__TERM_TYPE_NR - 4, "perf_pmu__for_each_format()'s terms must be kept in sync with enum parse_events__term_type"); list_for_each_entry(format, &pmu->format, list) { perf_pmu_format__load(pmu, format); @@ -1753,26 +2091,54 @@ bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name) { if (!name) return false; + if (perf_pmu__is_tool(pmu) && tool_pmu__skip_event(name)) + return false; + if (perf_pmu__is_tracepoint(pmu)) + return tp_pmu__have_event(pmu, name); + if (perf_pmu__is_hwmon(pmu)) + return hwmon_pmu__have_event(pmu, name); + if (perf_pmu__is_drm(pmu)) + return drm_pmu__have_event(pmu, name); if (perf_pmu__find_alias(pmu, name, /*load=*/ true) != NULL) return true; - if (pmu->cpu_aliases_added || !pmu->events_table) + if (pmu->cpu_aliases_added || (!pmu->events_table && !pmu->is_core)) return false; - return pmu_events_table__find_event(pmu->events_table, pmu, name, NULL, NULL) == 0; + if (pmu_events_table__find_event(pmu->events_table, pmu, name, NULL, NULL) == 0) + return true; + return pmu->is_core && + pmu_events_table__find_event(perf_pmu__default_core_events_table(), + pmu, name, NULL, NULL) == 0; } size_t perf_pmu__num_events(struct perf_pmu *pmu) { size_t nr; + if (perf_pmu__is_tracepoint(pmu)) + return tp_pmu__num_events(pmu); + if (perf_pmu__is_hwmon(pmu)) + return hwmon_pmu__num_events(pmu); + if (perf_pmu__is_drm(pmu)) + return drm_pmu__num_events(pmu); + pmu_aliases_parse(pmu); - nr = pmu->sysfs_aliases + pmu->sys_json_aliases;; + nr = pmu->sysfs_aliases + pmu->sys_json_aliases; + + if (pmu->cpu_aliases_added) { + nr += pmu->cpu_json_aliases; + } else if (pmu->events_table || pmu->is_core) { + nr += pmu_events_table__num_events(pmu->events_table, pmu); + if (pmu->is_core) { + nr += pmu_events_table__num_events( + perf_pmu__default_core_events_table(), pmu); + } + nr -= pmu->cpu_common_json_aliases; + } else { + assert(pmu->cpu_json_aliases == 0 && pmu->cpu_common_json_aliases == 0); + } - if (pmu->cpu_aliases_added) - nr += pmu->cpu_json_aliases; - else if (pmu->events_table) - nr += pmu_events_table__num_events(pmu->events_table, pmu) - pmu->cpu_json_aliases; - else - assert(pmu->cpu_json_aliases == 0); + if (perf_pmu__is_tool(pmu)) + nr -= tool_pmu__num_skip_events(); return pmu->selectable ? nr + 1 : nr; } @@ -1787,19 +2153,37 @@ static int sub_non_neg(int a, int b) static char *format_alias(char *buf, int len, const struct perf_pmu *pmu, const struct perf_pmu_alias *alias, bool skip_duplicate_pmus) { + struct parse_events_terms terms; struct parse_events_term *term; - int pmu_name_len = skip_duplicate_pmus - ? pmu_name_len_no_suffix(pmu->name, /*num=*/NULL) - : (int)strlen(pmu->name); - int used = snprintf(buf, len, "%.*s/%s", pmu_name_len, pmu->name, alias->name); + int ret, used; + size_t pmu_name_len = pmu_deduped_name_len(pmu, pmu->name, + skip_duplicate_pmus); + + /* Paramemterized events have the parameters shown. */ + if (strstr(alias->terms, "=?")) { + /* No parameters. */ + snprintf(buf, len, "%.*s/%s/", (int)pmu_name_len, pmu->name, alias->name); + return buf; + } - list_for_each_entry(term, &alias->terms.terms, list) { + parse_events_terms__init(&terms); + ret = parse_events_terms(&terms, alias->terms); + if (ret) { + pr_err("Failure to parse '%s' terms '%s': %d\n", + alias->name, alias->terms, ret); + parse_events_terms__exit(&terms); + snprintf(buf, len, "%.*s/%s/", (int)pmu_name_len, pmu->name, alias->name); + return buf; + } + used = snprintf(buf, len, "%.*s/%s", (int)pmu_name_len, pmu->name, alias->name); + + list_for_each_entry(term, &terms.terms, list) { if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) used += snprintf(buf + used, sub_non_neg(len, used), ",%s=%s", term->config, term->val.str); } - + parse_events_terms__exit(&terms); if (sub_non_neg(len, used) > 0) { buf[used] = '/'; used++; @@ -1813,28 +2197,73 @@ static char *format_alias(char *buf, int len, const struct perf_pmu *pmu, return buf; } +static bool perf_pmu_alias__check_deprecated(struct perf_pmu *pmu, struct perf_pmu_alias *alias) +{ + struct perf_event_attr attr = {.config = 0,}; + const char *check_terms; + bool has_legacy_config; + + if (alias->legacy_deprecated_checked) + return alias->deprecated; + + alias->legacy_deprecated_checked = true; + if (alias->deprecated) + return true; + + check_terms = alias->terms; + has_legacy_config = + strstr(check_terms, "legacy-hardware-config=") != NULL || + strstr(check_terms, "legacy-cache-config=") != NULL; + if (!has_legacy_config && alias->legacy_terms) { + check_terms = alias->legacy_terms; + has_legacy_config = + strstr(check_terms, "legacy-hardware-config=") != NULL || + strstr(check_terms, "legacy-cache-config=") != NULL; + } + if (!has_legacy_config) + return false; + + if (perf_pmu__parse_terms_to_attr(pmu, check_terms, &attr) != 0) { + /* Parsing failed, set as deprecated. */ + alias->deprecated = true; + } else if (attr.type < PERF_TYPE_MAX) { + /* Flag unsupported legacy events as deprecated. */ + alias->deprecated = !is_event_supported(attr.type, attr.config); + } + return alias->deprecated; +} + int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, void *state, pmu_event_callback cb) { char buf[1024]; - struct perf_pmu_alias *event; struct pmu_event_info info = { .pmu = pmu, + .event_type_desc = "Kernel PMU event", }; int ret = 0; - struct strbuf sb; + struct hashmap_entry *entry; + size_t bkt; + + if (perf_pmu__is_tracepoint(pmu)) + return tp_pmu__for_each_event(pmu, state, cb); + if (perf_pmu__is_hwmon(pmu)) + return hwmon_pmu__for_each_event(pmu, state, cb); + if (perf_pmu__is_drm(pmu)) + return drm_pmu__for_each_event(pmu, state, cb); - strbuf_init(&sb, /*hint=*/ 0); pmu_aliases_parse(pmu); pmu_add_cpu_aliases(pmu); - list_for_each_entry(event, &pmu->aliases, list) { - size_t buf_used; - int pmu_name_len; + hashmap__for_each_entry(pmu->aliases, entry, bkt) { + struct perf_pmu_alias *event = entry->pvalue; + size_t buf_used, pmu_name_len; + + if (perf_pmu__is_tool(pmu) && tool_pmu__skip_event(event->name)) + continue; info.pmu_name = event->pmu_name ?: pmu->name; - pmu_name_len = skip_duplicate_pmus - ? pmu_name_len_no_suffix(info.pmu_name, /*num=*/NULL) - : (int)strlen(info.pmu_name); + pmu_name_len = pmu_deduped_name_len(pmu, info.pmu_name, + skip_duplicate_pmus); info.alias = NULL; if (event->desc) { info.name = event->name; @@ -1857,16 +2286,14 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, info.desc = event->desc; info.long_desc = event->long_desc; info.encoding_desc = buf + buf_used; - parse_events_terms__to_strbuf(&event->terms, &sb); buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used, - "%.*s/%s/", pmu_name_len, info.pmu_name, sb.buf) + 1; + "%.*s/%s/", (int)pmu_name_len, info.pmu_name, event->terms) + 1; + info.str = event->terms; info.topic = event->topic; - info.str = sb.buf; - info.deprecated = event->deprecated; + info.deprecated = perf_pmu_alias__check_deprecated(pmu, event); ret = cb(state, &info); if (ret) goto out; - strbuf_setlen(&sb, /*len=*/ 0); } if (pmu->selectable) { info.name = buf; @@ -1882,19 +2309,88 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, ret = cb(state, &info); } out: - strbuf_release(&sb); return ret; } -bool pmu__name_match(const struct perf_pmu *pmu, const char *pmu_name) +static bool perf_pmu___name_match(const struct perf_pmu *pmu, const char *to_match, bool wildcard) { - return !strcmp(pmu->name, pmu_name) || - (pmu->is_uncore && pmu_uncore_alias_match(pmu_name, pmu->name)) || + const char *names[2] = { + pmu->name, + pmu->alias_name, + }; + if (pmu->is_core) { + for (size_t i = 0; i < ARRAY_SIZE(names); i++) { + const char *name = names[i]; + + if (!name) + continue; + + if (!strcmp(name, to_match)) { + /* Exact name match. */ + return true; + } + } + if (!strcmp(to_match, "default_core")) { + /* + * jevents and tests use default_core as a marker for any core + * PMU as the PMU name varies across architectures. + */ + return true; + } + return false; + } + if (!pmu->is_uncore) { /* - * jevents and tests use default_core as a marker for any core - * PMU as the PMU name varies across architectures. + * PMU isn't core or uncore, some kind of broken CPU mask + * situation. Only match exact name. */ - (pmu->is_core && !strcmp(pmu_name, "default_core")); + for (size_t i = 0; i < ARRAY_SIZE(names); i++) { + const char *name = names[i]; + + if (!name) + continue; + + if (!strcmp(name, to_match)) { + /* Exact name match. */ + return true; + } + } + return false; + } + for (size_t i = 0; i < ARRAY_SIZE(names); i++) { + const char *name = names[i]; + + if (!name) + continue; + + if (wildcard && perf_pmu__match_wildcard_uncore(name, to_match)) + return true; + if (!wildcard && perf_pmu__match_ignoring_suffix_uncore(name, to_match)) + return true; + } + return false; +} + +/** + * perf_pmu__name_wildcard_match - Called by the jevents generated code to see + * if pmu matches the json to_match string. + * @pmu: The pmu whose name/alias to match. + * @to_match: The possible match to pmu_name. + */ +bool perf_pmu__name_wildcard_match(const struct perf_pmu *pmu, const char *to_match) +{ + return perf_pmu___name_match(pmu, to_match, /*wildcard=*/true); +} + +/** + * perf_pmu__name_no_suffix_match - Does pmu's name match to_match ignoring any + * trailing suffix on the pmu_name and/or tok? + * @pmu: The pmu whose name/alias to match. + * @to_match: The possible match to pmu_name. + */ +bool perf_pmu__name_no_suffix_match(const struct perf_pmu *pmu, const char *to_match) +{ + return perf_pmu___name_match(pmu, to_match, /*wildcard=*/false); } bool perf_pmu__is_software(const struct perf_pmu *pmu) @@ -1914,6 +2410,7 @@ bool perf_pmu__is_software(const struct perf_pmu *pmu) case PERF_TYPE_HW_CACHE: return false; case PERF_TYPE_RAW: return false; case PERF_TYPE_BREAKPOINT: return true; + case PERF_PMU_TYPE_TOOL: return true; default: break; } for (size_t i = 0; i < ARRAY_SIZE(known_sw_pmus); i++) { @@ -2025,6 +2522,17 @@ static void perf_pmu__del_caps(struct perf_pmu *pmu) } } +struct perf_pmu_caps *perf_pmu__get_cap(struct perf_pmu *pmu, const char *name) +{ + struct perf_pmu_caps *caps; + + list_for_each_entry(caps, &pmu->caps, list) { + if (!strcmp(caps->name, name)) + return caps; + } + return NULL; +} + /* * Reading/parsing the given pmu capabilities, which should be located at: * /sys/bus/event_source/devices/<dev>/caps as sysfs group attributes. @@ -2032,10 +2540,9 @@ static void perf_pmu__del_caps(struct perf_pmu *pmu) */ int perf_pmu__caps_parse(struct perf_pmu *pmu) { - struct stat st; char caps_path[PATH_MAX]; - DIR *caps_dir; - struct dirent *evt_ent; + struct io_dir caps_dir; + struct io_dirent64 *evt_ent; int caps_fd; if (pmu->caps_initialized) @@ -2046,24 +2553,21 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) if (!perf_pmu__pathname_scnprintf(caps_path, sizeof(caps_path), pmu->name, "caps")) return -1; - if (stat(caps_path, &st) < 0) { + caps_fd = open(caps_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY); + if (caps_fd == -1) { pmu->caps_initialized = true; return 0; /* no error if caps does not exist */ } - caps_dir = opendir(caps_path); - if (!caps_dir) - return -EINVAL; - - caps_fd = dirfd(caps_dir); + io_dir__init(&caps_dir, caps_fd); - while ((evt_ent = readdir(caps_dir)) != NULL) { + while ((evt_ent = io_dir__readdir(&caps_dir)) != NULL) { char *name = evt_ent->d_name; char value[128]; FILE *file; int fd; - if (!strcmp(name, ".") || !strcmp(name, "..")) + if (io_dir__is_dir(&caps_dir, evt_ent)) continue; fd = openat(caps_fd, name, O_RDONLY); @@ -2085,7 +2589,7 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) fclose(file); } - closedir(caps_dir); + close(caps_fd); pmu->caps_initialized = true; return pmu->nr_caps; @@ -2140,34 +2644,31 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, name ?: "N/A", buf, config_name, config); } -bool perf_pmu__match(const struct perf_pmu *pmu, const char *tok) +bool perf_pmu__wildcard_match(const struct perf_pmu *pmu, const char *wildcard_to_match) { - const char *name = pmu->name; - bool need_fnmatch = strchr(tok, '*') != NULL; - - if (!strncmp(tok, "uncore_", 7)) - tok += 7; - if (!strncmp(name, "uncore_", 7)) - name += 7; + const char *names[2] = { + pmu->name, + pmu->alias_name, + }; + bool need_fnmatch = strisglob(wildcard_to_match); - if (perf_pmu__match_ignoring_suffix(name, tok) || - (need_fnmatch && !fnmatch(tok, name, 0))) - return true; + if (!strncmp(wildcard_to_match, "uncore_", 7)) + wildcard_to_match += 7; - name = pmu->alias_name; - if (!name) - return false; + for (size_t i = 0; i < ARRAY_SIZE(names); i++) { + const char *pmu_name = names[i]; - if (!strncmp(name, "uncore_", 7)) - name += 7; + if (!pmu_name) + continue; - return perf_pmu__match_ignoring_suffix(name, tok) || - (need_fnmatch && !fnmatch(tok, name, 0)); -} + if (!strncmp(pmu_name, "uncore_", 7)) + pmu_name += 7; -double __weak perf_pmu__cpu_slots_per_cycle(void) -{ - return NAN; + if (perf_pmu__match_wildcard(pmu_name, wildcard_to_match) || + (need_fnmatch && !fnmatch(wildcard_to_match, pmu_name, 0))) + return true; + } + return false; } int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size) @@ -2222,6 +2723,14 @@ int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, void perf_pmu__delete(struct perf_pmu *pmu) { + if (!pmu) + return; + + if (perf_pmu__is_hwmon(pmu)) + hwmon_pmu__exit(pmu); + else if (perf_pmu__is_drm(pmu)) + drm_pmu__exit(pmu); + perf_pmu__del_formats(&pmu->format); perf_pmu__del_aliases(pmu); perf_pmu__del_caps(pmu); @@ -2236,16 +2745,18 @@ void perf_pmu__delete(struct perf_pmu *pmu) const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config) { - struct perf_pmu_alias *event; + struct hashmap_entry *entry; + size_t bkt; if (!pmu) return NULL; pmu_aliases_parse(pmu); pmu_add_cpu_aliases(pmu); - list_for_each_entry(event, &pmu->aliases, list) { + hashmap__for_each_entry(pmu->aliases, entry, bkt) { + struct perf_pmu_alias *event = entry->pvalue; struct perf_event_attr attr = {.config = 0,}; - int ret = perf_pmu__config(pmu, &attr, &event->terms, NULL); + int ret = perf_pmu__parse_terms_to_attr(pmu, event->terms, &attr); if (ret == 0 && config == attr.config) return event->name; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index b2d3fd291f02..8f11bfe8ed6d 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -14,6 +14,7 @@ #include "mem-events.h" struct evsel_config_term; +struct hashmap; struct perf_cpu_map; struct print_callbacks; @@ -22,6 +23,7 @@ enum { PERF_PMU_FORMAT_VALUE_CONFIG1, PERF_PMU_FORMAT_VALUE_CONFIG2, PERF_PMU_FORMAT_VALUE_CONFIG3, + PERF_PMU_FORMAT_VALUE_CONFIG4, PERF_PMU_FORMAT_VALUE_CONFIG_END, }; @@ -36,6 +38,30 @@ struct perf_pmu_caps { struct list_head list; }; +enum pmu_kind { + /* A perf event syscall PMU. */ + PERF_PMU_KIND_PE, + /* A perf tool provided DRM PMU. */ + PERF_PMU_KIND_DRM, + /* A perf tool provided HWMON PMU. */ + PERF_PMU_KIND_HWMON, + /* Perf tool provided PMU for tool events like time. */ + PERF_PMU_KIND_TOOL, + /* A testing PMU kind. */ + PERF_PMU_KIND_FAKE +}; + +enum { + PERF_PMU_TYPE_PE_START = 0, + PERF_PMU_TYPE_PE_END = 0xFFFDFFFF, + PERF_PMU_TYPE_DRM_START = 0xFFFE0000, + PERF_PMU_TYPE_DRM_END = 0xFFFEFFFF, + PERF_PMU_TYPE_HWMON_START = 0xFFFF0000, + PERF_PMU_TYPE_HWMON_END = 0xFFFFFFFD, + PERF_PMU_TYPE_TOOL = 0xFFFFFFFE, + PERF_PMU_TYPE_FAKE = 0xFFFFFFFF, +}; + /** * struct perf_pmu */ @@ -116,7 +142,7 @@ struct perf_pmu { * event read from <sysfs>/bus/event_source/devices/<name>/events/ or * from json events in pmu-events.c. */ - struct list_head aliases; + struct hashmap *aliases; /** * @events_table: The events table for json events in pmu-events.c. */ @@ -127,6 +153,11 @@ struct perf_pmu { uint32_t cpu_json_aliases; /** @sys_json_aliases: Number of json event aliases loaded matching the PMU's identifier. */ uint32_t sys_json_aliases; + /** + * @cpu_common_json_aliases: Number of json events that overlapped with sysfs when + * loading all sysfs events. + */ + uint32_t cpu_common_json_aliases; /** @sysfs_aliases_loaded: Are sysfs aliases loaded from disk? */ bool sysfs_aliases_loaded; /** @@ -165,6 +196,10 @@ struct perf_pmu { * exclude_host. */ bool exclude_guest; + /** + * @checked: Are the missing features checked? + */ + bool checked; } missing_features; /** @@ -173,12 +208,12 @@ struct perf_pmu { struct perf_mem_event *mem_events; }; -/** @perf_pmu__fake: A special global PMU used for testing. */ -extern struct perf_pmu perf_pmu__fake; - struct perf_pmu_info { const char *unit; double scale; + double retirement_latency_mean; + double retirement_latency_min; + double retirement_latency_max; bool per_pkg; bool snapshot; }; @@ -193,6 +228,7 @@ struct pmu_event_info { const char *encoding_desc; const char *topic; const char *pmu_name; + const char *event_type_desc; const char *str; bool deprecated; }; @@ -204,16 +240,18 @@ typedef int (*pmu_format_callback)(void *state, const char *name, int config, void pmu_add_sys_aliases(struct perf_pmu *pmu); int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_terms *head_terms, + bool apply_hardcoded, struct parse_events_error *error); int perf_pmu__config_terms(const struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_terms *terms, - bool zero, struct parse_events_error *error); + bool zero, bool apply_hardcoded, + struct parse_events_error *error); __u64 perf_pmu__format_bits(struct perf_pmu *pmu, const char *name); int perf_pmu__format_type(struct perf_pmu *pmu, const char *name); int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms, struct perf_pmu_info *info, bool *rewrote_terms, - struct parse_events_error *err); + u64 *alternate_hw_config, struct parse_events_error *err); int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void *state, pmu_event_callback cb); void perf_pmu_format__set_value(void *format, int config, unsigned long *bits); @@ -227,7 +265,8 @@ bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name); size_t perf_pmu__num_events(struct perf_pmu *pmu); int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, void *state, pmu_event_callback cb); -bool pmu__name_match(const struct perf_pmu *pmu, const char *pmu_name); +bool perf_pmu__name_wildcard_match(const struct perf_pmu *pmu, const char *to_match); +bool perf_pmu__name_no_suffix_match(const struct perf_pmu *pmu, const char *to_match); /** * perf_pmu_is_software - is the PMU a software PMU as in it uses the @@ -251,12 +290,12 @@ void perf_pmu__arch_init(struct perf_pmu *pmu); void pmu_add_cpu_aliases_table(struct perf_pmu *pmu, const struct pmu_events_table *table); -char *perf_pmu__getcpuid(struct perf_pmu *pmu); -const struct pmu_metrics_table *pmu_metrics_table__find(void); bool pmu_uncore_identifier_match(const char *compat, const char *id); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); +struct perf_pmu_caps *perf_pmu__get_cap(struct perf_pmu *pmu, const char *name); + int perf_pmu__caps_parse(struct perf_pmu *pmu); void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, @@ -264,20 +303,40 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, const char *config_name); void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu); -bool perf_pmu__match(const struct perf_pmu *pmu, const char *tok); +bool perf_pmu__wildcard_match(const struct perf_pmu *pmu, const char *wildcard_to_match); -double perf_pmu__cpu_slots_per_cycle(void); int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size); int perf_pmu__pathname_scnprintf(char *buf, size_t size, const char *pmu_name, const char *filename); int perf_pmu__event_source_devices_fd(void); int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, int flags); +int perf_pmu__init(struct perf_pmu *pmu, __u32 type, const char *name); struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name, bool eager_load); struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus); void perf_pmu__delete(struct perf_pmu *pmu); -struct perf_pmu *perf_pmus__find_core_pmu(void); + const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config); +bool perf_pmu__is_fake(const struct perf_pmu *pmu); + +static inline enum pmu_kind perf_pmu__kind(const struct perf_pmu *pmu) +{ + __u32 type; + + if (!pmu) + return PERF_PMU_KIND_PE; + + type = pmu->type; + if (type <= PERF_PMU_TYPE_PE_END) + return PERF_PMU_KIND_PE; + if (type <= PERF_PMU_TYPE_DRM_END) + return PERF_PMU_KIND_DRM; + if (type <= PERF_PMU_TYPE_HWMON_END) + return PERF_PMU_KIND_HWMON; + if (type == PERF_PMU_TYPE_TOOL) + return PERF_PMU_KIND_TOOL; + return PERF_PMU_KIND_FAKE; +} #endif /* __PMU_H */ diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c index b9b4c5eb5002..98be2eb8f1f0 100644 --- a/tools/perf/util/pmus.c +++ b/tools/perf/util/pmus.c @@ -3,20 +3,24 @@ #include <linux/list_sort.h> #include <linux/string.h> #include <linux/zalloc.h> +#include <api/io_dir.h> #include <subcmd/pager.h> #include <sys/types.h> #include <ctype.h> -#include <dirent.h> #include <pthread.h> #include <string.h> #include <unistd.h> #include "cpumap.h" #include "debug.h" +#include "drm_pmu.h" #include "evsel.h" #include "pmus.h" #include "pmu.h" +#include "hwmon_pmu.h" +#include "tool_pmu.h" #include "print-events.h" #include "strbuf.h" +#include "string2.h" /* * core_pmus: A PMU belongs to core_pmus if it's name is "cpu" or it's sysfs @@ -35,36 +39,75 @@ */ static LIST_HEAD(core_pmus); static LIST_HEAD(other_pmus); -static bool read_sysfs_core_pmus; -static bool read_sysfs_all_pmus; +enum perf_tool_pmu_type { + PERF_TOOL_PMU_TYPE_PE_CORE, + PERF_TOOL_PMU_TYPE_PE_OTHER, + PERF_TOOL_PMU_TYPE_TOOL, + PERF_TOOL_PMU_TYPE_HWMON, + PERF_TOOL_PMU_TYPE_DRM, + +#define PERF_TOOL_PMU_TYPE_PE_CORE_MASK (1 << PERF_TOOL_PMU_TYPE_PE_CORE) +#define PERF_TOOL_PMU_TYPE_PE_OTHER_MASK (1 << PERF_TOOL_PMU_TYPE_PE_OTHER) +#define PERF_TOOL_PMU_TYPE_TOOL_MASK (1 << PERF_TOOL_PMU_TYPE_TOOL) +#define PERF_TOOL_PMU_TYPE_HWMON_MASK (1 << PERF_TOOL_PMU_TYPE_HWMON) +#define PERF_TOOL_PMU_TYPE_DRM_MASK (1 << PERF_TOOL_PMU_TYPE_DRM) + +#define PERF_TOOL_PMU_TYPE_ALL_MASK (PERF_TOOL_PMU_TYPE_PE_CORE_MASK | \ + PERF_TOOL_PMU_TYPE_PE_OTHER_MASK | \ + PERF_TOOL_PMU_TYPE_TOOL_MASK | \ + PERF_TOOL_PMU_TYPE_HWMON_MASK | \ + PERF_TOOL_PMU_TYPE_DRM_MASK) +}; +static unsigned int read_pmu_types; -static void pmu_read_sysfs(bool core_only); +static void pmu_read_sysfs(unsigned int to_read_pmus); -int pmu_name_len_no_suffix(const char *str, unsigned long *num) +size_t pmu_name_len_no_suffix(const char *str) { int orig_len, len; + bool has_hex_digits = false; orig_len = len = strlen(str); - /* Non-uncore PMUs have their full length, for example, i915. */ - if (!strstarts(str, "uncore_")) - return len; - - /* - * Count trailing digits and '_', if '_{num}' suffix isn't present use - * the full length. - */ - while (len > 0 && isdigit(str[len - 1])) + /* Count trailing digits. */ + while (len > 0 && isxdigit(str[len - 1])) { + if (!isdigit(str[len - 1])) + has_hex_digits = true; len--; + } if (len > 0 && len != orig_len && str[len - 1] == '_') { - if (num) - *num = strtoul(&str[len], NULL, 10); - return len - 1; + /* + * There is a '_{num}' suffix. For decimal suffixes any length + * will do, for hexadecimal ensure more than 2 hex digits so + * that S390's cpum_cf PMU doesn't match. + */ + if (!has_hex_digits || (orig_len - len) > 2) + return len - 1; } + /* Use the full length. */ return orig_len; } +int pmu_name_cmp(const char *lhs_pmu_name, const char *rhs_pmu_name) +{ + unsigned long long lhs_num = 0, rhs_num = 0; + size_t lhs_pmu_name_len = pmu_name_len_no_suffix(lhs_pmu_name); + size_t rhs_pmu_name_len = pmu_name_len_no_suffix(rhs_pmu_name); + int ret = strncmp(lhs_pmu_name, rhs_pmu_name, + lhs_pmu_name_len < rhs_pmu_name_len ? lhs_pmu_name_len : rhs_pmu_name_len); + + if (lhs_pmu_name_len != rhs_pmu_name_len || ret != 0 || lhs_pmu_name_len == 0) + return ret; + + if (lhs_pmu_name_len + 1 < strlen(lhs_pmu_name)) + lhs_num = strtoull(&lhs_pmu_name[lhs_pmu_name_len + 1], NULL, 16); + if (rhs_pmu_name_len + 1 < strlen(rhs_pmu_name)) + rhs_num = strtoull(&rhs_pmu_name[rhs_pmu_name_len + 1], NULL, 16); + + return lhs_num < rhs_num ? -1 : (lhs_num > rhs_num ? 1 : 0); +} + void perf_pmus__destroy(void) { struct perf_pmu *pmu, *tmp; @@ -79,8 +122,7 @@ void perf_pmus__destroy(void) perf_pmu__delete(pmu); } - read_sysfs_core_pmus = false; - read_sysfs_all_pmus = false; + read_pmu_types = 0; } static struct perf_pmu *pmu_find(const char *name) @@ -106,6 +148,7 @@ struct perf_pmu *perf_pmus__find(const char *name) struct perf_pmu *pmu; int dirfd; bool core_pmu; + unsigned int to_read_pmus = 0; /* * Once PMU is loaded it stays in the list, @@ -116,11 +159,11 @@ struct perf_pmu *perf_pmus__find(const char *name) if (pmu) return pmu; - if (read_sysfs_all_pmus) + if (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK) return NULL; core_pmu = is_pmu_core(name); - if (core_pmu && read_sysfs_core_pmus) + if (core_pmu && (read_pmu_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK)) return NULL; dirfd = perf_pmu__event_source_devices_fd(); @@ -128,15 +171,29 @@ struct perf_pmu *perf_pmus__find(const char *name) /*eager_load=*/false); close(dirfd); - if (!pmu) { - /* - * Looking up an inidividual PMU failed. This may mean name is - * an alias, so read the PMUs from sysfs and try to find again. - */ - pmu_read_sysfs(core_pmu); + if (pmu) + return pmu; + + /* Looking up an individual perf event PMU failed, check if a tool PMU should be read. */ + if (!strncmp(name, "hwmon_", 6)) + to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK; + else if (!strncmp(name, "drm_", 4)) + to_read_pmus |= PERF_TOOL_PMU_TYPE_DRM_MASK; + else if (!strcmp(name, "tool")) + to_read_pmus |= PERF_TOOL_PMU_TYPE_TOOL_MASK; + + if (to_read_pmus) { + pmu_read_sysfs(to_read_pmus); pmu = pmu_find(name); + if (pmu) + return pmu; } - return pmu; + /* Read all necessary PMUs from sysfs and see if the PMU is found. */ + to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK; + if (!core_pmu) + to_read_pmus |= PERF_TOOL_PMU_TYPE_PE_OTHER_MASK; + pmu_read_sysfs(to_read_pmus); + return pmu_find(name); } static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name) @@ -153,11 +210,11 @@ static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name) if (pmu) return pmu; - if (read_sysfs_all_pmus) + if (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK) return NULL; core_pmu = is_pmu_core(name); - if (core_pmu && read_sysfs_core_pmus) + if (core_pmu && (read_pmu_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK)) return NULL; return perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name, @@ -167,63 +224,68 @@ static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name) static int pmus_cmp(void *priv __maybe_unused, const struct list_head *lhs, const struct list_head *rhs) { - unsigned long lhs_num = 0, rhs_num = 0; struct perf_pmu *lhs_pmu = container_of(lhs, struct perf_pmu, list); struct perf_pmu *rhs_pmu = container_of(rhs, struct perf_pmu, list); - const char *lhs_pmu_name = lhs_pmu->name ?: ""; - const char *rhs_pmu_name = rhs_pmu->name ?: ""; - int lhs_pmu_name_len = pmu_name_len_no_suffix(lhs_pmu_name, &lhs_num); - int rhs_pmu_name_len = pmu_name_len_no_suffix(rhs_pmu_name, &rhs_num); - int ret = strncmp(lhs_pmu_name, rhs_pmu_name, - lhs_pmu_name_len < rhs_pmu_name_len ? lhs_pmu_name_len : rhs_pmu_name_len); - if (lhs_pmu_name_len != rhs_pmu_name_len || ret != 0 || lhs_pmu_name_len == 0) - return ret; - - return lhs_num < rhs_num ? -1 : (lhs_num > rhs_num ? 1 : 0); + return pmu_name_cmp(lhs_pmu->name ?: "", rhs_pmu->name ?: ""); } /* Add all pmus in sysfs to pmu list: */ -static void pmu_read_sysfs(bool core_only) +static void pmu_read_sysfs(unsigned int to_read_types) { - int fd; - DIR *dir; - struct dirent *dent; + struct perf_pmu *tool_pmu; - if (read_sysfs_all_pmus || (core_only && read_sysfs_core_pmus)) + if ((read_pmu_types & to_read_types) == to_read_types) { + /* All requested PMU types have been read. */ return; + } - fd = perf_pmu__event_source_devices_fd(); - if (fd < 0) - return; + if (to_read_types & (PERF_TOOL_PMU_TYPE_PE_CORE_MASK | PERF_TOOL_PMU_TYPE_PE_OTHER_MASK)) { + int fd = perf_pmu__event_source_devices_fd(); + struct io_dir dir; + struct io_dirent64 *dent; + bool core_only = (to_read_types & PERF_TOOL_PMU_TYPE_PE_OTHER_MASK) == 0; - dir = fdopendir(fd); - if (!dir) { - close(fd); - return; - } + if (fd < 0) + goto skip_pe_pmus; - while ((dent = readdir(dir))) { - if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) - continue; - if (core_only && !is_pmu_core(dent->d_name)) - continue; - /* add to static LIST_HEAD(core_pmus) or LIST_HEAD(other_pmus): */ - perf_pmu__find2(fd, dent->d_name); - } + io_dir__init(&dir, fd); - closedir(dir); - if (list_empty(&core_pmus)) { + while ((dent = io_dir__readdir(&dir)) != NULL) { + if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) + continue; + if (core_only && !is_pmu_core(dent->d_name)) + continue; + /* add to static LIST_HEAD(core_pmus) or LIST_HEAD(other_pmus): */ + perf_pmu__find2(fd, dent->d_name); + } + + close(fd); + } +skip_pe_pmus: + if ((to_read_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK) && list_empty(&core_pmus)) { if (!perf_pmu__create_placeholder_core_pmu(&core_pmus)) pr_err("Failure to set up any core PMUs\n"); } list_sort(NULL, &core_pmus, pmus_cmp); - list_sort(NULL, &other_pmus, pmus_cmp); - if (!list_empty(&core_pmus)) { - read_sysfs_core_pmus = true; - if (!core_only) - read_sysfs_all_pmus = true; + + if ((to_read_types & PERF_TOOL_PMU_TYPE_TOOL_MASK) != 0 && + (read_pmu_types & PERF_TOOL_PMU_TYPE_TOOL_MASK) == 0) { + tool_pmu = tool_pmu__new(); + if (tool_pmu) + list_add_tail(&tool_pmu->list, &other_pmus); } + if ((to_read_types & PERF_TOOL_PMU_TYPE_HWMON_MASK) != 0 && + (read_pmu_types & PERF_TOOL_PMU_TYPE_HWMON_MASK) == 0) + perf_pmus__read_hwmon_pmus(&other_pmus); + + if ((to_read_types & PERF_TOOL_PMU_TYPE_DRM_MASK) != 0 && + (read_pmu_types & PERF_TOOL_PMU_TYPE_DRM_MASK) == 0) + perf_pmus__read_drm_pmus(&other_pmus); + + list_sort(NULL, &other_pmus, pmus_cmp); + + read_pmu_types |= to_read_types; } static struct perf_pmu *__perf_pmus__find_by_type(unsigned int type) @@ -244,12 +306,23 @@ static struct perf_pmu *__perf_pmus__find_by_type(unsigned int type) struct perf_pmu *perf_pmus__find_by_type(unsigned int type) { + unsigned int to_read_pmus; struct perf_pmu *pmu = __perf_pmus__find_by_type(type); - if (pmu || read_sysfs_all_pmus) + if (pmu || (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK)) return pmu; - pmu_read_sysfs(/*core_only=*/false); + if (type >= PERF_PMU_TYPE_PE_START && type <= PERF_PMU_TYPE_PE_END) { + to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK | + PERF_TOOL_PMU_TYPE_PE_OTHER_MASK; + } else if (type >= PERF_PMU_TYPE_DRM_START && type <= PERF_PMU_TYPE_DRM_END) { + to_read_pmus = PERF_TOOL_PMU_TYPE_DRM_MASK; + } else if (type >= PERF_PMU_TYPE_HWMON_START && type <= PERF_PMU_TYPE_HWMON_END) { + to_read_pmus = PERF_TOOL_PMU_TYPE_HWMON_MASK; + } else { + to_read_pmus = PERF_TOOL_PMU_TYPE_TOOL_MASK; + } + pmu_read_sysfs(to_read_pmus); pmu = __perf_pmus__find_by_type(type); return pmu; } @@ -263,7 +336,7 @@ struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu) bool use_core_pmus = !pmu || pmu->is_core; if (!pmu) { - pmu_read_sysfs(/*core_only=*/false); + pmu_read_sysfs(PERF_TOOL_PMU_TYPE_ALL_MASK); pmu = list_prepare_entry(pmu, &core_pmus, list); } if (use_core_pmus) { @@ -281,7 +354,7 @@ struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu) struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu) { if (!pmu) { - pmu_read_sysfs(/*core_only=*/true); + pmu_read_sysfs(PERF_TOOL_PMU_TYPE_PE_CORE_MASK); return list_first_entry_or_null(&core_pmus, typeof(*pmu), list); } list_for_each_entry_continue(pmu, &core_pmus, list) @@ -290,6 +363,92 @@ struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu) return NULL; } +struct perf_pmu *perf_pmus__scan_for_event(struct perf_pmu *pmu, const char *event) +{ + bool use_core_pmus = !pmu || pmu->is_core; + + if (!pmu) { + /* Hwmon filename values that aren't used. */ + enum hwmon_type type; + int number; + /* + * Core PMUs, other sysfs PMUs and tool PMU can take all event + * types or aren't wother optimizing for. + */ + unsigned int to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK | + PERF_TOOL_PMU_TYPE_PE_OTHER_MASK | + PERF_TOOL_PMU_TYPE_TOOL_MASK; + + /* Could the event be a hwmon event? */ + if (parse_hwmon_filename(event, &type, &number, /*item=*/NULL, /*alarm=*/NULL)) + to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK; + + /* Could the event be a DRM event? */ + if (strlen(event) > 4 && strncmp("drm-", event, 4) == 0) + to_read_pmus |= PERF_TOOL_PMU_TYPE_DRM_MASK; + + pmu_read_sysfs(to_read_pmus); + pmu = list_prepare_entry(pmu, &core_pmus, list); + } + if (use_core_pmus) { + list_for_each_entry_continue(pmu, &core_pmus, list) + return pmu; + + pmu = NULL; + pmu = list_prepare_entry(pmu, &other_pmus, list); + } + list_for_each_entry_continue(pmu, &other_pmus, list) + return pmu; + return NULL; +} + +struct perf_pmu *perf_pmus__scan_matching_wildcard(struct perf_pmu *pmu, const char *wildcard) +{ + bool use_core_pmus = !pmu || pmu->is_core; + + if (!pmu) { + /* + * Core PMUs, other sysfs PMUs and tool PMU can have any name or + * aren't wother optimizing for. + */ + unsigned int to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK | + PERF_TOOL_PMU_TYPE_PE_OTHER_MASK | + PERF_TOOL_PMU_TYPE_TOOL_MASK; + + /* + * Hwmon PMUs have an alias from a sysfs name like hwmon0, + * hwmon1, etc. or have a name of hwmon_<name>. They therefore + * can only have a wildcard match if the wildcard begins with + * "hwmon". Similarly drm PMUs must start "drm_", avoid reading + * such events unless the PMU could match. + */ + if (strisglob(wildcard)) { + to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK | + PERF_TOOL_PMU_TYPE_DRM_MASK; + } else if (strlen(wildcard) >= 4 && strncmp("drm_", wildcard, 4) == 0) { + to_read_pmus |= PERF_TOOL_PMU_TYPE_DRM_MASK; + } else if (strlen(wildcard) >= 5 && strncmp("hwmon", wildcard, 5) == 0) { + to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK; + } + + pmu_read_sysfs(to_read_pmus); + pmu = list_prepare_entry(pmu, &core_pmus, list); + } + if (use_core_pmus) { + list_for_each_entry_continue(pmu, &core_pmus, list) { + if (perf_pmu__wildcard_match(pmu, wildcard)) + return pmu; + } + pmu = NULL; + pmu = list_prepare_entry(pmu, &other_pmus, list); + } + list_for_each_entry_continue(pmu, &other_pmus, list) { + if (perf_pmu__wildcard_match(pmu, wildcard)) + return pmu; + } + return NULL; +} + static struct perf_pmu *perf_pmus__scan_skip_duplicates(struct perf_pmu *pmu) { bool use_core_pmus = !pmu || pmu->is_core; @@ -297,14 +456,14 @@ static struct perf_pmu *perf_pmus__scan_skip_duplicates(struct perf_pmu *pmu) const char *last_pmu_name = (pmu && pmu->name) ? pmu->name : ""; if (!pmu) { - pmu_read_sysfs(/*core_only=*/false); + pmu_read_sysfs(PERF_TOOL_PMU_TYPE_ALL_MASK); pmu = list_prepare_entry(pmu, &core_pmus, list); } else - last_pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: "", NULL); + last_pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: ""); if (use_core_pmus) { list_for_each_entry_continue(pmu, &core_pmus, list) { - int pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: "", /*num=*/NULL); + int pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: ""); if (last_pmu_name_len == pmu_name_len && !strncmp(last_pmu_name, pmu->name ?: "", pmu_name_len)) @@ -316,7 +475,7 @@ static struct perf_pmu *perf_pmus__scan_skip_duplicates(struct perf_pmu *pmu) pmu = list_prepare_entry(pmu, &other_pmus, list); } list_for_each_entry_continue(pmu, &other_pmus, list) { - int pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: "", /*num=*/NULL); + int pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: ""); if (last_pmu_name_len == pmu_name_len && !strncmp(last_pmu_name, pmu->name ?: "", pmu_name_len)) @@ -360,6 +519,7 @@ struct sevent { const char *encoding_desc; const char *topic; const char *pmu_name; + const char *event_type_desc; bool deprecated; }; @@ -422,6 +582,7 @@ static int perf_pmus__print_pmu_events__callback(void *vstate, pr_err("Unexpected event %s/%s/\n", info->pmu->name, info->name); return 1; } + assert(info->pmu != NULL || info->name != NULL); s = &state->aliases[state->index]; s->pmu = info->pmu; #define COPY_STR(str) s->str = info->str ? strdup(info->str) : NULL @@ -433,6 +594,7 @@ static int perf_pmus__print_pmu_events__callback(void *vstate, COPY_STR(encoding_desc); COPY_STR(topic); COPY_STR(pmu_name); + COPY_STR(event_type_desc); #undef COPY_STR s->deprecated = info->deprecated; state->index++; @@ -477,20 +639,22 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p qsort(aliases, len, sizeof(struct sevent), cmp_sevent); for (int j = 0; j < len; j++) { /* Skip duplicates */ - if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1])) - continue; + if (j < len - 1 && pmu_alias_is_duplicate(&aliases[j], &aliases[j + 1])) + goto free; print_cb->print_event(print_state, - aliases[j].pmu_name, aliases[j].topic, + aliases[j].pmu_name, + aliases[j].pmu->type, aliases[j].name, aliases[j].alias, aliases[j].scale_unit, aliases[j].deprecated, - "Kernel PMU event", + aliases[j].event_type_desc, aliases[j].desc, aliases[j].long_desc, aliases[j].encoding_desc); +free: zfree(&aliases[j].name); zfree(&aliases[j].alias); zfree(&aliases[j].scale_unit); @@ -499,6 +663,7 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p zfree(&aliases[j].encoding_desc); zfree(&aliases[j].topic); zfree(&aliases[j].pmu_name); + zfree(&aliases[j].event_type_desc); } if (printed && pager_in_use()) printf("\n"); @@ -566,7 +731,7 @@ void perf_pmus__print_raw_pmu_events(const struct print_callbacks *print_cb, voi .long_string = STRBUF_INIT, .num_formats = 0, }; - int len = pmu_name_len_no_suffix(pmu->name, /*num=*/NULL); + int len = pmu_name_len_no_suffix(pmu->name); const char *desc = "(see 'man perf-list' or 'man perf-record' on how to encode it)"; if (!pmu->is_core) @@ -585,6 +750,7 @@ void perf_pmus__print_raw_pmu_events(const struct print_callbacks *print_cb, voi print_cb->print_event(print_state, /*topic=*/NULL, /*pmu_name=*/NULL, + pmu->type, format_args.short_string.buf, /*event_alias=*/NULL, /*scale_unit=*/NULL, @@ -650,47 +816,40 @@ bool perf_pmus__supports_extended_type(void) return perf_pmus__do_support_extended_type; } -char *perf_pmus__default_pmu_name(void) +struct perf_pmu *perf_pmus__find_by_attr(const struct perf_event_attr *attr) { - int fd; - DIR *dir; - struct dirent *dent; - char *result = NULL; + struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); + u32 type = attr->type; + bool legacy_core_type = type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE; - if (!list_empty(&core_pmus)) - return strdup(list_first_entry(&core_pmus, struct perf_pmu, list)->name); + if (!pmu && legacy_core_type && perf_pmus__supports_extended_type()) { + type = attr->config >> PERF_PMU_TYPE_SHIFT; - fd = perf_pmu__event_source_devices_fd(); - if (fd < 0) - return strdup("cpu"); - - dir = fdopendir(fd); - if (!dir) { - close(fd); - return strdup("cpu"); + pmu = perf_pmus__find_by_type(type); } - - while ((dent = readdir(dir))) { - if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) - continue; - if (is_pmu_core(dent->d_name)) { - result = strdup(dent->d_name); - break; - } + if (!pmu && (legacy_core_type || type == PERF_TYPE_RAW)) { + /* + * For legacy events, if there was no extended type info then + * assume the PMU is the first core PMU. + * + * On architectures like ARM there is no sysfs PMU with type + * PERF_TYPE_RAW, assume the RAW events are going to be handled + * by the first core PMU. + */ + pmu = perf_pmus__find_core_pmu(); } - - closedir(dir); - return result ?: strdup("cpu"); + return pmu; } struct perf_pmu *evsel__find_pmu(const struct evsel *evsel) { struct perf_pmu *pmu = evsel->pmu; - if (!pmu) { - pmu = perf_pmus__find_by_type(evsel->core.attr.type); - ((struct evsel *)evsel)->pmu = pmu; - } + if (pmu) + return pmu; + + pmu = perf_pmus__find_by_attr(&evsel->core.attr); + ((struct evsel *)evsel)->pmu = pmu; return pmu; } @@ -708,3 +867,21 @@ struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name) */ return perf_pmu__lookup(&other_pmus, test_sysfs_dirfd, name, /*eager_load=*/true); } + +struct perf_pmu *perf_pmus__add_test_hwmon_pmu(const char *hwmon_dir, + const char *sysfs_name, + const char *name) +{ + return hwmon_pmu__new(&other_pmus, hwmon_dir, sysfs_name, name); +} + +struct perf_pmu *perf_pmus__fake_pmu(void) +{ + static struct perf_pmu fake = { + .name = "fake", + .type = PERF_PMU_TYPE_FAKE, + .format = LIST_HEAD_INIT(fake.format), + }; + + return &fake; +} diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h index 9d4ded80b8e9..7cb36863711a 100644 --- a/tools/perf/util/pmus.h +++ b/tools/perf/util/pmus.h @@ -2,18 +2,27 @@ #ifndef __PMUS_H #define __PMUS_H +#include <stdbool.h> +#include <stddef.h> + +struct perf_event_attr; struct perf_pmu; struct print_callbacks; -int pmu_name_len_no_suffix(const char *str, unsigned long *num); +size_t pmu_name_len_no_suffix(const char *str); +/* Exposed for testing only. */ +int pmu_name_cmp(const char *lhs_pmu_name, const char *rhs_pmu_name); void perf_pmus__destroy(void); struct perf_pmu *perf_pmus__find(const char *name); struct perf_pmu *perf_pmus__find_by_type(unsigned int type); +struct perf_pmu *perf_pmus__find_by_attr(const struct perf_event_attr *attr); struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu); struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu); +struct perf_pmu *perf_pmus__scan_for_event(struct perf_pmu *pmu, const char *event); +struct perf_pmu *perf_pmus__scan_matching_wildcard(struct perf_pmu *pmu, const char *wildcard); const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str); @@ -22,8 +31,12 @@ void perf_pmus__print_raw_pmu_events(const struct print_callbacks *print_cb, voi bool perf_pmus__have_event(const char *pname, const char *name); int perf_pmus__num_core_pmus(void); bool perf_pmus__supports_extended_type(void); -char *perf_pmus__default_pmu_name(void); struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name); +struct perf_pmu *perf_pmus__add_test_hwmon_pmu(const char *hwmon_dir, + const char *sysfs_name, + const char *name); +struct perf_pmu *perf_pmus__fake_pmu(void); +struct perf_pmu *perf_pmus__find_core_pmu(void); #endif /* __PMUS_H */ diff --git a/tools/perf/util/powerpc-vpadtl.c b/tools/perf/util/powerpc-vpadtl.c new file mode 100644 index 000000000000..d1c3396f182f --- /dev/null +++ b/tools/perf/util/powerpc-vpadtl.c @@ -0,0 +1,733 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * VPA DTL PMU support + */ + +#include <linux/string.h> +#include <errno.h> +#include <inttypes.h> +#include "color.h" +#include "evlist.h" +#include "session.h" +#include "auxtrace.h" +#include "data.h" +#include "machine.h" +#include "debug.h" +#include "powerpc-vpadtl.h" +#include "sample.h" +#include "tool.h" + +/* + * Structure to save the auxtrace queue + */ +struct powerpc_vpadtl { + struct auxtrace auxtrace; + struct auxtrace_queues queues; + struct auxtrace_heap heap; + u32 auxtrace_type; + struct perf_session *session; + struct machine *machine; + u32 pmu_type; + u64 sample_id; +}; + +struct boottb_freq { + u64 boot_tb; + u64 tb_freq; + u64 timebase; + u64 padded[3]; +}; + +struct powerpc_vpadtl_queue { + struct powerpc_vpadtl *vpa; + unsigned int queue_nr; + struct auxtrace_buffer *buffer; + struct thread *thread; + bool on_heap; + struct powerpc_vpadtl_entry *dtl; + u64 timestamp; + unsigned long pkt_len; + unsigned long buf_len; + u64 boot_tb; + u64 tb_freq; + unsigned int tb_buffer; + unsigned int size; + bool done; + pid_t pid; + pid_t tid; + int cpu; +}; + +const char *dispatch_reasons[11] = { + "external_interrupt", + "firmware_internal_event", + "H_PROD", + "decrementer_interrupt", + "system_reset", + "firmware_internal_event", + "conferred_cycles", + "time_slice", + "virtual_memory_page_fault", + "expropriated_adjunct", + "priv_doorbell"}; + +const char *preempt_reasons[10] = { + "unused", + "firmware_internal_event", + "H_CEDE", + "H_CONFER", + "time_slice", + "migration_hibernation_page_fault", + "virtual_memory_page_fault", + "H_CONFER_ADJUNCT", + "hcall_adjunct", + "HDEC_adjunct"}; + +#define dtl_entry_size sizeof(struct powerpc_vpadtl_entry) + +/* + * Function to dump the dispatch trace data when perf report + * is invoked with -D + */ +static void powerpc_vpadtl_dump(struct powerpc_vpadtl *vpa __maybe_unused, + unsigned char *buf, size_t len) +{ + struct powerpc_vpadtl_entry *dtl; + int pkt_len, pos = 0; + const char *color = PERF_COLOR_BLUE; + + color_fprintf(stdout, color, + ". ... VPA DTL PMU data: size %zu bytes, entries is %zu\n", + len, len/dtl_entry_size); + + if (len % dtl_entry_size) + len = len - (len % dtl_entry_size); + + while (len) { + pkt_len = dtl_entry_size; + printf("."); + color_fprintf(stdout, color, " %08x: ", pos); + dtl = (struct powerpc_vpadtl_entry *)buf; + if (dtl->timebase != 0) { + printf("dispatch_reason:%s, preempt_reason:%s, " + "enqueue_to_dispatch_time:%d, ready_to_enqueue_time:%d, " + "waiting_to_ready_time:%d\n", + dispatch_reasons[dtl->dispatch_reason], + preempt_reasons[dtl->preempt_reason], + be32_to_cpu(dtl->enqueue_to_dispatch_time), + be32_to_cpu(dtl->ready_to_enqueue_time), + be32_to_cpu(dtl->waiting_to_ready_time)); + } else { + struct boottb_freq *boot_tb = (struct boottb_freq *)buf; + + printf("boot_tb: %" PRIu64 ", tb_freq: %" PRIu64 "\n", + boot_tb->boot_tb, boot_tb->tb_freq); + } + + pos += pkt_len; + buf += pkt_len; + len -= pkt_len; + } +} + +static unsigned long long powerpc_vpadtl_timestamp(struct powerpc_vpadtl_queue *vpaq) +{ + struct powerpc_vpadtl_entry *record = vpaq->dtl; + unsigned long long timestamp = 0; + unsigned long long boot_tb; + unsigned long long diff; + double result, div; + double boot_freq; + /* + * Formula used to get timestamp that can be co-related with + * other perf events: + * ((timbase from DTL entry - boot time) / frequency) * 1000000000 + */ + if (record->timebase) { + boot_tb = vpaq->boot_tb; + boot_freq = vpaq->tb_freq; + diff = be64_to_cpu(record->timebase) - boot_tb; + div = diff / boot_freq; + result = div; + result = result * 1000000000; + timestamp = result; + } + + return timestamp; +} + +static struct powerpc_vpadtl *session_to_vpa(struct perf_session *session) +{ + return container_of(session->auxtrace, struct powerpc_vpadtl, auxtrace); +} + +static void powerpc_vpadtl_dump_event(struct powerpc_vpadtl *vpa, unsigned char *buf, + size_t len) +{ + printf(".\n"); + powerpc_vpadtl_dump(vpa, buf, len); +} + +/* + * Generate perf sample for each entry in the dispatch trace log. + * - sample ip is picked from srr0 field of powerpc_vpadtl_entry + * - sample cpu is logical cpu. + * - cpumode is set to PERF_RECORD_MISC_KERNEL + * - Additionally save the details in raw_data of sample. This + * is to print the relevant fields in perf_sample__fprintf_synth() + * when called from builtin-script + */ +static int powerpc_vpadtl_sample(struct powerpc_vpadtl_entry *record, + struct powerpc_vpadtl *vpa, u64 save, int cpu) +{ + struct perf_sample sample; + union perf_event event; + + sample.ip = be64_to_cpu(record->srr0); + sample.period = 1; + sample.cpu = cpu; + sample.id = vpa->sample_id; + sample.callchain = NULL; + sample.branch_stack = NULL; + memset(&event, 0, sizeof(event)); + sample.cpumode = PERF_RECORD_MISC_KERNEL; + sample.time = save; + sample.raw_data = record; + sample.raw_size = sizeof(record); + event.sample.header.type = PERF_RECORD_SAMPLE; + event.sample.header.misc = sample.cpumode; + event.sample.header.size = sizeof(struct perf_event_header); + + if (perf_session__deliver_synth_event(vpa->session, &event, &sample)) { + pr_debug("Failed to create sample for dtl entry\n"); + return -1; + } + + return 0; +} + +static int powerpc_vpadtl_get_buffer(struct powerpc_vpadtl_queue *vpaq) +{ + struct auxtrace_buffer *buffer = vpaq->buffer; + struct auxtrace_queues *queues = &vpaq->vpa->queues; + struct auxtrace_queue *queue; + + queue = &queues->queue_array[vpaq->queue_nr]; + buffer = auxtrace_buffer__next(queue, buffer); + + if (!buffer) + return 0; + + vpaq->buffer = buffer; + vpaq->size = buffer->size; + + /* If the aux_buffer doesn't have data associated, try to load it */ + if (!buffer->data) { + /* get the file desc associated with the perf data file */ + int fd = perf_data__fd(vpaq->vpa->session->data); + + buffer->data = auxtrace_buffer__get_data(buffer, fd); + if (!buffer->data) + return -ENOMEM; + } + + vpaq->buf_len = buffer->size; + + if (buffer->size % dtl_entry_size) + vpaq->buf_len = buffer->size - (buffer->size % dtl_entry_size); + + if (vpaq->tb_buffer != buffer->buffer_nr) { + vpaq->pkt_len = 0; + vpaq->tb_buffer = 0; + } + + return 1; +} + +/* + * The first entry in the queue for VPA DTL PMU has the boot timebase, + * frequency details which are needed to get timestamp which is required to + * correlate with other events. Save the boot_tb and tb_freq as part of + * powerpc_vpadtl_queue. The very next entry is the actual trace data to + * be returned. + */ +static int powerpc_vpadtl_decode(struct powerpc_vpadtl_queue *vpaq) +{ + int ret; + char *buf; + struct boottb_freq *boottb; + + ret = powerpc_vpadtl_get_buffer(vpaq); + if (ret <= 0) + return ret; + + boottb = (struct boottb_freq *)vpaq->buffer->data; + if (boottb->timebase == 0) { + vpaq->boot_tb = boottb->boot_tb; + vpaq->tb_freq = boottb->tb_freq; + vpaq->pkt_len += dtl_entry_size; + } + + buf = vpaq->buffer->data; + buf += vpaq->pkt_len; + vpaq->dtl = (struct powerpc_vpadtl_entry *)buf; + + vpaq->tb_buffer = vpaq->buffer->buffer_nr; + vpaq->buffer = NULL; + vpaq->buf_len = 0; + + return 1; +} + +static int powerpc_vpadtl_decode_all(struct powerpc_vpadtl_queue *vpaq) +{ + int ret; + unsigned char *buf; + + if (!vpaq->buf_len || vpaq->pkt_len == vpaq->size) { + ret = powerpc_vpadtl_get_buffer(vpaq); + if (ret <= 0) + return ret; + } + + if (vpaq->buffer) { + buf = vpaq->buffer->data; + buf += vpaq->pkt_len; + vpaq->dtl = (struct powerpc_vpadtl_entry *)buf; + if ((long long)be64_to_cpu(vpaq->dtl->timebase) <= 0) { + if (vpaq->pkt_len != dtl_entry_size && vpaq->buf_len) { + vpaq->pkt_len += dtl_entry_size; + vpaq->buf_len -= dtl_entry_size; + } + return -1; + } + vpaq->pkt_len += dtl_entry_size; + vpaq->buf_len -= dtl_entry_size; + } else { + return 0; + } + + return 1; +} + +static int powerpc_vpadtl_run_decoder(struct powerpc_vpadtl_queue *vpaq, u64 *timestamp) +{ + struct powerpc_vpadtl *vpa = vpaq->vpa; + struct powerpc_vpadtl_entry *record; + int ret; + unsigned long long vpaq_timestamp; + + while (1) { + ret = powerpc_vpadtl_decode_all(vpaq); + if (!ret) { + pr_debug("All data in the queue has been processed.\n"); + return 1; + } + + /* + * Error is detected when decoding VPA PMU trace. Continue to + * the next trace data and find out more dtl entries. + */ + if (ret < 0) + continue; + + record = vpaq->dtl; + + vpaq_timestamp = powerpc_vpadtl_timestamp(vpaq); + + /* Update timestamp for the last record */ + if (vpaq_timestamp > vpaq->timestamp) + vpaq->timestamp = vpaq_timestamp; + + /* + * If the timestamp of the queue is later than timestamp of the + * coming perf event, bail out so can allow the perf event to + * be processed ahead. + */ + if (vpaq->timestamp >= *timestamp) { + *timestamp = vpaq->timestamp; + vpaq->pkt_len -= dtl_entry_size; + vpaq->buf_len += dtl_entry_size; + return 0; + } + + ret = powerpc_vpadtl_sample(record, vpa, vpaq_timestamp, vpaq->cpu); + if (ret) + continue; + } + return 0; +} + +/* + * For each of the PERF_RECORD_XX record, compare the timestamp + * of perf record with timestamp of top element in the auxtrace heap. + * Process the auxtrace queue if the timestamp of element from heap is + * lower than timestamp from entry in perf record. + * + * Update the timestamp of the auxtrace heap with the timestamp + * of last processed entry from the auxtrace buffer. + */ +static int powerpc_vpadtl_process_queues(struct powerpc_vpadtl *vpa, u64 timestamp) +{ + unsigned int queue_nr; + u64 ts; + int ret; + + while (1) { + struct auxtrace_queue *queue; + struct powerpc_vpadtl_queue *vpaq; + + if (!vpa->heap.heap_cnt) + return 0; + + if (vpa->heap.heap_array[0].ordinal >= timestamp) + return 0; + + queue_nr = vpa->heap.heap_array[0].queue_nr; + queue = &vpa->queues.queue_array[queue_nr]; + vpaq = queue->priv; + + auxtrace_heap__pop(&vpa->heap); + + if (vpa->heap.heap_cnt) { + ts = vpa->heap.heap_array[0].ordinal + 1; + if (ts > timestamp) + ts = timestamp; + } else { + ts = timestamp; + } + + ret = powerpc_vpadtl_run_decoder(vpaq, &ts); + if (ret < 0) { + auxtrace_heap__add(&vpa->heap, queue_nr, ts); + return ret; + } + + if (!ret) { + ret = auxtrace_heap__add(&vpa->heap, queue_nr, ts); + if (ret < 0) + return ret; + } else { + vpaq->on_heap = false; + } + } + return 0; +} + +static struct powerpc_vpadtl_queue *powerpc_vpadtl__alloc_queue(struct powerpc_vpadtl *vpa, + unsigned int queue_nr) +{ + struct powerpc_vpadtl_queue *vpaq; + + vpaq = zalloc(sizeof(*vpaq)); + if (!vpaq) + return NULL; + + vpaq->vpa = vpa; + vpaq->queue_nr = queue_nr; + + return vpaq; +} + +/* + * When the Dispatch Trace Log data is collected along with other events + * like sched tracepoint events, it needs to be correlated and present + * interleaved along with these events. Perf events can be collected + * parallely across the CPUs. + * + * An auxtrace_queue is created for each CPU. Data within each queue is in + * increasing order of timestamp. Allocate and setup auxtrace queues here. + * All auxtrace queues is maintained in auxtrace heap in the increasing order + * of timestamp. So always the lowest timestamp (entries to be processed first) + * is on top of the heap. + * + * To add to auxtrace heap, fetch the timestamp from first DTL entry + * for each of the queue. + */ +static int powerpc_vpadtl__setup_queue(struct powerpc_vpadtl *vpa, + struct auxtrace_queue *queue, + unsigned int queue_nr) +{ + struct powerpc_vpadtl_queue *vpaq = queue->priv; + + if (list_empty(&queue->head) || vpaq) + return 0; + + vpaq = powerpc_vpadtl__alloc_queue(vpa, queue_nr); + if (!vpaq) + return -ENOMEM; + + queue->priv = vpaq; + + if (queue->cpu != -1) + vpaq->cpu = queue->cpu; + + if (!vpaq->on_heap) { + int ret; +retry: + ret = powerpc_vpadtl_decode(vpaq); + if (!ret) + return 0; + + if (ret < 0) + goto retry; + + vpaq->timestamp = powerpc_vpadtl_timestamp(vpaq); + + ret = auxtrace_heap__add(&vpa->heap, queue_nr, vpaq->timestamp); + if (ret) + return ret; + vpaq->on_heap = true; + } + + return 0; +} + +static int powerpc_vpadtl__setup_queues(struct powerpc_vpadtl *vpa) +{ + unsigned int i; + int ret; + + for (i = 0; i < vpa->queues.nr_queues; i++) { + ret = powerpc_vpadtl__setup_queue(vpa, &vpa->queues.queue_array[i], i); + if (ret) + return ret; + } + + return 0; +} + +static int powerpc_vpadtl__update_queues(struct powerpc_vpadtl *vpa) +{ + if (vpa->queues.new_data) { + vpa->queues.new_data = false; + return powerpc_vpadtl__setup_queues(vpa); + } + + return 0; +} + +static int powerpc_vpadtl_process_event(struct perf_session *session, + union perf_event *event __maybe_unused, + struct perf_sample *sample, + const struct perf_tool *tool) +{ + struct powerpc_vpadtl *vpa = session_to_vpa(session); + int err = 0; + + if (dump_trace) + return 0; + + if (!tool->ordered_events) { + pr_err("VPA requires ordered events\n"); + return -EINVAL; + } + + if (sample->time) { + err = powerpc_vpadtl__update_queues(vpa); + if (err) + return err; + + err = powerpc_vpadtl_process_queues(vpa, sample->time); + } + + return err; +} + +/* + * Process PERF_RECORD_AUXTRACE records + */ +static int powerpc_vpadtl_process_auxtrace_event(struct perf_session *session, + union perf_event *event, + const struct perf_tool *tool __maybe_unused) +{ + struct powerpc_vpadtl *vpa = session_to_vpa(session); + struct auxtrace_buffer *buffer; + int fd = perf_data__fd(session->data); + off_t data_offset; + int err; + + if (!dump_trace) + return 0; + + if (perf_data__is_pipe(session->data)) { + data_offset = 0; + } else { + data_offset = lseek(fd, 0, SEEK_CUR); + if (data_offset == -1) + return -errno; + } + + err = auxtrace_queues__add_event(&vpa->queues, session, event, + data_offset, &buffer); + + if (err) + return err; + + /* Dump here now we have copied a piped trace out of the pipe */ + if (auxtrace_buffer__get_data(buffer, fd)) { + powerpc_vpadtl_dump_event(vpa, buffer->data, buffer->size); + auxtrace_buffer__put_data(buffer); + } + + return 0; +} + +static int powerpc_vpadtl_flush(struct perf_session *session __maybe_unused, + const struct perf_tool *tool __maybe_unused) +{ + return 0; +} + +static void powerpc_vpadtl_free_events(struct perf_session *session) +{ + struct powerpc_vpadtl *vpa = session_to_vpa(session); + struct auxtrace_queues *queues = &vpa->queues; + + for (unsigned int i = 0; i < queues->nr_queues; i++) + zfree(&queues->queue_array[i].priv); + + auxtrace_queues__free(queues); +} + +static void powerpc_vpadtl_free(struct perf_session *session) +{ + struct powerpc_vpadtl *vpa = session_to_vpa(session); + + auxtrace_heap__free(&vpa->heap); + powerpc_vpadtl_free_events(session); + session->auxtrace = NULL; + free(vpa); +} + +static const char * const powerpc_vpadtl_info_fmts[] = { + [POWERPC_VPADTL_TYPE] = " PMU Type %"PRId64"\n", +}; + +static void powerpc_vpadtl_print_info(__u64 *arr) +{ + if (!dump_trace) + return; + + fprintf(stdout, powerpc_vpadtl_info_fmts[POWERPC_VPADTL_TYPE], arr[POWERPC_VPADTL_TYPE]); +} + +static void set_event_name(struct evlist *evlist, u64 id, + const char *name) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->core.id && evsel->core.id[0] == id) { + if (evsel->name) + zfree(&evsel->name); + evsel->name = strdup(name); + break; + } + } +} + +static int +powerpc_vpadtl_synth_events(struct powerpc_vpadtl *vpa, struct perf_session *session) +{ + struct evlist *evlist = session->evlist; + struct evsel *evsel; + struct perf_event_attr attr; + bool found = false; + u64 id; + int err; + + evlist__for_each_entry(evlist, evsel) { + if (strstarts(evsel->name, "vpa_dtl")) { + found = true; + break; + } + } + + if (!found) { + pr_debug("No selected events with VPA trace data\n"); + return 0; + } + + memset(&attr, 0, sizeof(struct perf_event_attr)); + attr.size = sizeof(struct perf_event_attr); + attr.sample_type = evsel->core.attr.sample_type; + attr.sample_id_all = evsel->core.attr.sample_id_all; + attr.type = PERF_TYPE_SYNTH; + attr.config = PERF_SYNTH_POWERPC_VPA_DTL; + + /* create new id val to be a fixed offset from evsel id */ + id = auxtrace_synth_id_range_start(evsel); + + err = perf_session__deliver_synth_attr_event(session, &attr, id); + if (err) + return err; + + vpa->sample_id = id; + set_event_name(evlist, id, "vpa-dtl"); + + return 0; +} + +/* + * Process the PERF_RECORD_AUXTRACE_INFO records and setup + * the infrastructure to process auxtrace events. PERF_RECORD_AUXTRACE_INFO + * is processed first since it is of type perf_user_event_type. + * Initialise the aux buffer queues using auxtrace_queues__init(). + * auxtrace_queue is created for each CPU. + */ +int powerpc_vpadtl_process_auxtrace_info(union perf_event *event, + struct perf_session *session) +{ + struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; + size_t min_sz = sizeof(u64) * POWERPC_VPADTL_TYPE; + struct powerpc_vpadtl *vpa; + int err; + + if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + + min_sz) + return -EINVAL; + + vpa = zalloc(sizeof(struct powerpc_vpadtl)); + if (!vpa) + return -ENOMEM; + + err = auxtrace_queues__init(&vpa->queues); + if (err) + goto err_free; + + vpa->session = session; + vpa->machine = &session->machines.host; /* No kvm support */ + vpa->auxtrace_type = auxtrace_info->type; + vpa->pmu_type = auxtrace_info->priv[POWERPC_VPADTL_TYPE]; + + vpa->auxtrace.process_event = powerpc_vpadtl_process_event; + vpa->auxtrace.process_auxtrace_event = powerpc_vpadtl_process_auxtrace_event; + vpa->auxtrace.flush_events = powerpc_vpadtl_flush; + vpa->auxtrace.free_events = powerpc_vpadtl_free_events; + vpa->auxtrace.free = powerpc_vpadtl_free; + session->auxtrace = &vpa->auxtrace; + + powerpc_vpadtl_print_info(&auxtrace_info->priv[0]); + + if (dump_trace) + return 0; + + err = powerpc_vpadtl_synth_events(vpa, session); + if (err) + goto err_free_queues; + + err = auxtrace_queues__process_index(&vpa->queues, session); + if (err) + goto err_free_queues; + + return 0; + +err_free_queues: + auxtrace_queues__free(&vpa->queues); + session->auxtrace = NULL; + +err_free: + free(vpa); + return err; +} diff --git a/tools/perf/util/powerpc-vpadtl.h b/tools/perf/util/powerpc-vpadtl.h new file mode 100644 index 000000000000..ca809660b9bb --- /dev/null +++ b/tools/perf/util/powerpc-vpadtl.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * VPA DTL PMU Support + */ + +#ifndef INCLUDE__PERF_POWERPC_VPADTL_H__ +#define INCLUDE__PERF_POWERPC_VPADTL_H__ + +enum { + POWERPC_VPADTL_TYPE, + VPADTL_AUXTRACE_PRIV_MAX, +}; + +#define VPADTL_AUXTRACE_PRIV_SIZE (VPADTL_AUXTRACE_PRIV_MAX * sizeof(u64)) + +union perf_event; +struct perf_session; +struct perf_pmu; + +int powerpc_vpadtl_process_auxtrace_info(union perf_event *event, + struct perf_session *session); + +#endif diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index 3f38c27f0157..8f3ed83853a9 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -29,6 +29,7 @@ #include "tracepoint.h" #include "pfm.h" #include "thread_map.h" +#include "tool_pmu.h" #include "util.h" #define MAX_NAME_LEN 100 @@ -43,111 +44,6 @@ static const char * const event_type_descriptors[] = { "Hardware breakpoint", }; -static const struct event_symbol event_symbols_tool[PERF_TOOL_MAX] = { - [PERF_TOOL_DURATION_TIME] = { - .symbol = "duration_time", - .alias = "", - }, - [PERF_TOOL_USER_TIME] = { - .symbol = "user_time", - .alias = "", - }, - [PERF_TOOL_SYSTEM_TIME] = { - .symbol = "system_time", - .alias = "", - }, -}; - -/* - * Print the events from <debugfs_mount_point>/tracing/events - */ -void print_tracepoint_events(const struct print_callbacks *print_cb __maybe_unused, void *print_state __maybe_unused) -{ - char *events_path = get_tracing_file("events"); - int events_fd = open(events_path, O_PATH); - struct dirent **sys_namelist = NULL; - int sys_items; - - put_tracing_file(events_path); - if (events_fd < 0) { - pr_err("Error: failed to open tracing events directory\n"); - return; - } - - sys_items = tracing_events__scandir_alphasort(&sys_namelist); - - for (int i = 0; i < sys_items; i++) { - struct dirent *sys_dirent = sys_namelist[i]; - struct dirent **evt_namelist = NULL; - int dir_fd; - int evt_items; - - if (sys_dirent->d_type != DT_DIR || - !strcmp(sys_dirent->d_name, ".") || - !strcmp(sys_dirent->d_name, "..")) - goto next_sys; - - dir_fd = openat(events_fd, sys_dirent->d_name, O_PATH); - if (dir_fd < 0) - goto next_sys; - - evt_items = scandirat(events_fd, sys_dirent->d_name, &evt_namelist, NULL, alphasort); - for (int j = 0; j < evt_items; j++) { - /* - * Buffer sized at twice the max filename length + 1 - * separator + 1 \0 terminator. - */ - char buf[NAME_MAX * 2 + 2]; - /* 16 possible hex digits and 22 other characters and \0. */ - char encoding[16 + 22]; - struct dirent *evt_dirent = evt_namelist[j]; - struct io id; - __u64 config; - - if (evt_dirent->d_type != DT_DIR || - !strcmp(evt_dirent->d_name, ".") || - !strcmp(evt_dirent->d_name, "..")) - goto next_evt; - - snprintf(buf, sizeof(buf), "%s/id", evt_dirent->d_name); - io__init(&id, openat(dir_fd, buf, O_RDONLY), buf, sizeof(buf)); - - if (id.fd < 0) - goto next_evt; - - if (io__get_dec(&id, &config) < 0) { - close(id.fd); - goto next_evt; - } - close(id.fd); - - snprintf(buf, sizeof(buf), "%s:%s", - sys_dirent->d_name, evt_dirent->d_name); - snprintf(encoding, sizeof(encoding), "tracepoint/config=0x%llx/", config); - print_cb->print_event(print_state, - /*topic=*/NULL, - /*pmu_name=*/NULL, /* really "tracepoint" */ - /*event_name=*/buf, - /*event_alias=*/NULL, - /*scale_unit=*/NULL, - /*deprecated=*/false, - "Tracepoint event", - /*desc=*/NULL, - /*long_desc=*/NULL, - encoding); -next_evt: - free(evt_namelist[j]); - } - close(dir_fd); - free(evt_namelist); -next_sys: - free(sys_namelist[i]); - } - - free(sys_namelist); - close(events_fd); -} - void print_sdt_events(const struct print_callbacks *print_cb, void *print_state) { struct strlist *bidlist, *sdtlist; @@ -225,6 +121,7 @@ void print_sdt_events(const struct print_callbacks *print_cb, void *print_state) print_cb->print_event(print_state, /*topic=*/NULL, /*pmu_name=*/NULL, + PERF_TYPE_TRACEPOINT, evt_name ?: sdt_name->s, /*event_alias=*/NULL, /*deprecated=*/false, @@ -281,6 +178,7 @@ bool is_event_supported(u8 type, u64 config) ret = evsel__open(evsel, NULL, tmap) >= 0; } + evsel__close(evsel); evsel__delete(evsel); } @@ -288,127 +186,137 @@ bool is_event_supported(u8 type, u64 config) return ret; } -int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state) +/** struct mep - RB-tree node for building printing information. */ +struct mep { + /** nd - RB-tree element. */ + struct rb_node nd; + /** @metric_group: Owned metric group name, separated others with ';'. */ + char *metric_group; + const char *metric_name; + const char *metric_desc; + const char *metric_long_desc; + const char *metric_expr; + const char *metric_threshold; + const char *metric_unit; + const char *pmu_name; +}; + +static int mep_cmp(struct rb_node *rb_node, const void *entry) { - struct perf_pmu *pmu = NULL; - const char *event_type_descriptor = event_type_descriptors[PERF_TYPE_HW_CACHE]; + struct mep *a = container_of(rb_node, struct mep, nd); + struct mep *b = (struct mep *)entry; + int ret; - /* - * Only print core PMUs, skipping uncore for performance and - * PERF_TYPE_SOFTWARE that can succeed in opening legacy cache evenst. - */ - while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { - if (pmu->is_uncore || pmu->type == PERF_TYPE_SOFTWARE) - continue; + ret = strcmp(a->metric_group, b->metric_group); + if (ret) + return ret; - for (int type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { - for (int op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { - /* skip invalid cache type */ - if (!evsel__is_cache_op_valid(type, op)) - continue; - - for (int res = 0; res < PERF_COUNT_HW_CACHE_RESULT_MAX; res++) { - char name[64]; - char alias_name[128]; - __u64 config; - int ret; - - __evsel__hw_cache_type_op_res_name(type, op, res, - name, sizeof(name)); - - ret = parse_events__decode_legacy_cache(name, pmu->type, - &config); - if (ret || !is_event_supported(PERF_TYPE_HW_CACHE, config)) - continue; - snprintf(alias_name, sizeof(alias_name), "%s/%s/", - pmu->name, name); - print_cb->print_event(print_state, - "cache", - pmu->name, - name, - alias_name, - /*scale_unit=*/NULL, - /*deprecated=*/false, - event_type_descriptor, - /*desc=*/NULL, - /*long_desc=*/NULL, - /*encoding_desc=*/NULL); - } - } - } - } - return 0; + return strcmp(a->metric_name, b->metric_name); } -void print_tool_events(const struct print_callbacks *print_cb, void *print_state) +static struct rb_node *mep_new(struct rblist *rl __maybe_unused, const void *entry) { - // Start at 1 because the first enum entry means no tool event. - for (int i = 1; i < PERF_TOOL_MAX; ++i) { - print_cb->print_event(print_state, - "tool", - /*pmu_name=*/NULL, - event_symbols_tool[i].symbol, - event_symbols_tool[i].alias, - /*scale_unit=*/NULL, - /*deprecated=*/false, - "Tool event", - /*desc=*/NULL, - /*long_desc=*/NULL, - /*encoding_desc=*/NULL); - } + struct mep *me = malloc(sizeof(struct mep)); + + if (!me) + return NULL; + + memcpy(me, entry, sizeof(struct mep)); + return &me->nd; } -void print_symbol_events(const struct print_callbacks *print_cb, void *print_state, - unsigned int type, const struct event_symbol *syms, - unsigned int max) +static void mep_delete(struct rblist *rl __maybe_unused, + struct rb_node *nd) { - struct strlist *evt_name_list = strlist__new(NULL, NULL); - struct str_node *nd; - - if (!evt_name_list) { - pr_debug("Failed to allocate new strlist for symbol events\n"); - return; - } - for (unsigned int i = 0; i < max; i++) { - /* - * New attr.config still not supported here, the latest - * example was PERF_COUNT_SW_CGROUP_SWITCHES - */ - if (syms[i].symbol == NULL) - continue; + struct mep *me = container_of(nd, struct mep, nd); - if (!is_event_supported(type, i)) - continue; + zfree(&me->metric_group); + free(me); +} - if (strlen(syms[i].alias)) { - char name[MAX_NAME_LEN]; +static struct mep *mep_lookup(struct rblist *groups, const char *metric_group, + const char *metric_name) +{ + struct rb_node *nd; + struct mep me = { + .metric_group = strdup(metric_group), + .metric_name = metric_name, + }; + nd = rblist__find(groups, &me); + if (nd) { + free(me.metric_group); + return container_of(nd, struct mep, nd); + } + rblist__add_node(groups, &me); + nd = rblist__find(groups, &me); + if (nd) + return container_of(nd, struct mep, nd); + return NULL; +} - snprintf(name, MAX_NAME_LEN, "%s OR %s", syms[i].symbol, syms[i].alias); - strlist__add(evt_name_list, name); - } else - strlist__add(evt_name_list, syms[i].symbol); +static int metricgroup__add_to_mep_groups_callback(const struct pmu_metric *pm, + const struct pmu_metrics_table *table __maybe_unused, + void *vdata) +{ + struct rblist *groups = vdata; + const char *g; + char *omg, *mg; + + mg = strdup(pm->metric_group ?: pm->metric_name); + if (!mg) + return -ENOMEM; + omg = mg; + while ((g = strsep(&mg, ";")) != NULL) { + struct mep *me; + + g = skip_spaces(g); + if (strlen(g)) + me = mep_lookup(groups, g, pm->metric_name); + else + me = mep_lookup(groups, pm->metric_name, pm->metric_name); + + if (me) { + me->metric_desc = pm->desc; + me->metric_long_desc = pm->long_desc; + me->metric_expr = pm->metric_expr; + me->metric_threshold = pm->metric_threshold; + me->metric_unit = pm->unit; + me->pmu_name = pm->pmu; + } } + free(omg); - strlist__for_each_entry(nd, evt_name_list) { - char *alias = strstr(nd->s, " OR "); + return 0; +} - if (alias) { - *alias = '\0'; - alias += 4; - } - print_cb->print_event(print_state, - /*topic=*/NULL, - /*pmu_name=*/NULL, - nd->s, - alias, - /*scale_unit=*/NULL, - /*deprecated=*/false, - event_type_descriptors[type], - /*desc=*/NULL, - /*long_desc=*/NULL, - /*encoding_desc=*/NULL); +void metricgroup__print(const struct print_callbacks *print_cb, void *print_state) +{ + struct rblist groups; + struct rb_node *node, *next; + const struct pmu_metrics_table *table = pmu_metrics_table__find(); + + rblist__init(&groups); + groups.node_new = mep_new; + groups.node_cmp = mep_cmp; + groups.node_delete = mep_delete; + + metricgroup__for_each_metric(table, metricgroup__add_to_mep_groups_callback, &groups); + + for (node = rb_first_cached(&groups.entries); node; node = next) { + struct mep *me = container_of(node, struct mep, nd); + + print_cb->print_metric(print_state, + me->metric_group, + me->metric_name, + me->metric_desc, + me->metric_long_desc, + me->metric_expr, + me->metric_threshold, + me->metric_unit, + me->pmu_name); + next = rb_next(node); + rblist__remove_node(&groups, node); } - strlist__delete(evt_name_list); } /* @@ -416,20 +324,12 @@ void print_symbol_events(const struct print_callbacks *print_cb, void *print_sta */ void print_events(const struct print_callbacks *print_cb, void *print_state) { - print_symbol_events(print_cb, print_state, PERF_TYPE_HARDWARE, - event_symbols_hw, PERF_COUNT_HW_MAX); - print_symbol_events(print_cb, print_state, PERF_TYPE_SOFTWARE, - event_symbols_sw, PERF_COUNT_SW_MAX); - - print_tool_events(print_cb, print_state); - - print_hwcache_events(print_cb, print_state); - perf_pmus__print_pmu_events(print_cb, print_state); print_cb->print_event(print_state, /*topic=*/NULL, /*pmu_name=*/NULL, + PERF_TYPE_RAW, "rNNN", /*event_alias=*/NULL, /*scale_unit=*/NULL, @@ -444,6 +344,7 @@ void print_events(const struct print_callbacks *print_cb, void *print_state) print_cb->print_event(print_state, /*topic=*/NULL, /*pmu_name=*/NULL, + PERF_TYPE_BREAKPOINT, "mem:<addr>[/len][:access]", /*scale_unit=*/NULL, /*event_alias=*/NULL, @@ -453,8 +354,6 @@ void print_events(const struct print_callbacks *print_cb, void *print_state) /*long_desc=*/NULL, /*encoding_desc=*/NULL); - print_tracepoint_events(print_cb, print_state); - print_sdt_events(print_cb, print_state); metricgroup__print(print_cb, print_state); diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h index bf4290bef0cd..eabba5d4a1fd 100644 --- a/tools/perf/util/print-events.h +++ b/tools/perf/util/print-events.h @@ -12,7 +12,7 @@ struct print_callbacks { void (*print_start)(void *print_state); void (*print_end)(void *print_state); void (*print_event)(void *print_state, const char *topic, - const char *pmu_name, + const char *pmu_name, u32 pmu_type, const char *event_name, const char *event_alias, const char *scale_unit, bool deprecated, const char *event_type_desc, @@ -25,19 +25,15 @@ struct print_callbacks { const char *long_desc, const char *expr, const char *threshold, - const char *unit); + const char *unit, + const char *pmu_name); bool (*skip_duplicate_pmus)(void *print_state); }; /** Print all events, the default when no options are specified. */ void print_events(const struct print_callbacks *print_cb, void *print_state); -int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state); void print_sdt_events(const struct print_callbacks *print_cb, void *print_state); -void print_symbol_events(const struct print_callbacks *print_cb, void *print_state, - unsigned int type, const struct event_symbol *syms, - unsigned int max); -void print_tool_events(const struct print_callbacks *print_cb, void *print_state); -void print_tracepoint_events(const struct print_callbacks *print_cb, void *print_state); +void metricgroup__print(const struct print_callbacks *print_cb, void *print_state); bool is_event_supported(u8 type, u64 config); #endif /* __PERF_PRINT_EVENTS_H */ diff --git a/tools/perf/util/print_insn.c b/tools/perf/util/print_insn.c index a950e9157d2d..02e6fbb8ca04 100644 --- a/tools/perf/util/print_insn.c +++ b/tools/perf/util/print_insn.c @@ -7,6 +7,7 @@ #include <inttypes.h> #include <string.h> #include <stdbool.h> +#include "capstone.h" #include "debug.h" #include "sample.h" #include "symbol.h" @@ -29,76 +30,6 @@ size_t sample__fprintf_insn_raw(struct perf_sample *sample, FILE *fp) return printed; } -#ifdef HAVE_LIBCAPSTONE_SUPPORT -#include <capstone/capstone.h> - -static int capstone_init(struct machine *machine, csh *cs_handle, bool is64) -{ - cs_arch arch; - cs_mode mode; - - if (machine__is(machine, "x86_64") && is64) { - arch = CS_ARCH_X86; - mode = CS_MODE_64; - } else if (machine__normalized_is(machine, "x86")) { - arch = CS_ARCH_X86; - mode = CS_MODE_32; - } else if (machine__normalized_is(machine, "arm64")) { - arch = CS_ARCH_ARM64; - mode = CS_MODE_ARM; - } else if (machine__normalized_is(machine, "arm")) { - arch = CS_ARCH_ARM; - mode = CS_MODE_ARM + CS_MODE_V8; - } else if (machine__normalized_is(machine, "s390")) { - arch = CS_ARCH_SYSZ; - mode = CS_MODE_BIG_ENDIAN; - } else { - return -1; - } - - if (cs_open(arch, mode, cs_handle) != CS_ERR_OK) { - pr_warning_once("cs_open failed\n"); - return -1; - } - - if (machine__normalized_is(machine, "x86")) { - cs_option(*cs_handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT); - /* - * Resolving address operands to symbols is implemented - * on x86 by investigating instruction details. - */ - cs_option(*cs_handle, CS_OPT_DETAIL, CS_OPT_ON); - } - - return 0; -} - -static size_t print_insn_x86(struct thread *thread, u8 cpumode, cs_insn *insn, - int print_opts, FILE *fp) -{ - struct addr_location al; - size_t printed = 0; - - if (insn->detail && insn->detail->x86.op_count == 1) { - cs_x86_op *op = &insn->detail->x86.operands[0]; - - addr_location__init(&al); - if (op->type == X86_OP_IMM && - thread__find_symbol(thread, cpumode, op->imm, &al)) { - printed += fprintf(fp, "%s ", insn[0].mnemonic); - printed += symbol__fprintf_symname_offs(al.sym, &al, fp); - if (print_opts & PRINT_INSN_IMM_HEX) - printed += fprintf(fp, " [%#" PRIx64 "]", op->imm); - addr_location__exit(&al); - return printed; - } - addr_location__exit(&al); - } - - printed += fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str); - return printed; -} - static bool is64bitip(struct machine *machine, struct addr_location *al) { const struct dso *dso = al->map ? map__dso(al->map) : NULL; @@ -115,32 +46,8 @@ ssize_t fprintf_insn_asm(struct machine *machine, struct thread *thread, u8 cpum bool is64bit, const uint8_t *code, size_t code_size, uint64_t ip, int *lenp, int print_opts, FILE *fp) { - size_t printed; - cs_insn *insn; - csh cs_handle; - size_t count; - int ret; - - /* TODO: Try to initiate capstone only once but need a proper place. */ - ret = capstone_init(machine, &cs_handle, is64bit); - if (ret < 0) - return ret; - - count = cs_disasm(cs_handle, code, code_size, ip, 1, &insn); - if (count > 0) { - if (machine__normalized_is(machine, "x86")) - printed = print_insn_x86(thread, cpumode, &insn[0], print_opts, fp); - else - printed = fprintf(fp, "%s %s", insn[0].mnemonic, insn[0].op_str); - if (lenp) - *lenp = insn->size; - cs_free(insn, count); - } else { - printed = -1; - } - - cs_close(&cs_handle); - return printed; + return capstone__fprintf_insn_asm(machine, thread, cpumode, is64bit, code, code_size, + ip, lenp, print_opts, fp); } size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *thread, @@ -158,13 +65,3 @@ size_t sample__fprintf_insn_asm(struct perf_sample *sample, struct thread *threa return printed; } -#else -size_t sample__fprintf_insn_asm(struct perf_sample *sample __maybe_unused, - struct thread *thread __maybe_unused, - struct machine *machine __maybe_unused, - FILE *fp __maybe_unused, - struct addr_location *al __maybe_unused) -{ - return 0; -} -#endif diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index a17c9b8a7a79..710e4620923e 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -40,6 +40,7 @@ #include "session.h" #include "string2.h" #include "strbuf.h" +#include "parse-events.h" #include <subcmd/pager.h> #include <linux/ctype.h> @@ -51,6 +52,9 @@ #define PERFPROBE_GROUP "probe" +/* Defined in kernel/trace/trace.h */ +#define MAX_EVENT_NAME_LEN 64 + bool probe_event_dry_run; /* Dry run flag */ struct probe_conf probe_conf = { .magic_num = DEFAULT_PROBE_MAGIC_NUM }; @@ -71,12 +75,14 @@ int e_snprintf(char *str, size_t size, const char *format, ...) } static struct machine *host_machine; +static struct perf_env host_env; /* Initialize symbol maps and path of vmlinux/modules */ int init_probe_symbol_maps(bool user_only) { int ret; + perf_env__init(&host_env); symbol_conf.allow_aliases = true; ret = symbol__init(NULL); if (ret < 0) { @@ -90,7 +96,7 @@ int init_probe_symbol_maps(bool user_only) if (symbol_conf.vmlinux_name) pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name); - host_machine = machine__new_host(); + host_machine = machine__new_host(&host_env); if (!host_machine) { pr_debug("machine__new_host() failed.\n"); symbol__exit(); @@ -107,6 +113,7 @@ void exit_probe_symbol_maps(void) machine__delete(host_machine); host_machine = NULL; symbol__exit(); + perf_env__exit(&host_env); } static struct ref_reloc_sym *kernel_get_ref_reloc_sym(struct map **pmap) @@ -342,7 +349,7 @@ elf_err: return mod_name; } -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT static int kernel_get_module_dso(const char *module, struct dso **pdso) { @@ -498,7 +505,7 @@ static struct debuginfo *open_from_debuginfod(struct dso *dso, struct nsinfo *ns if (!c) return NULL; - build_id__sprintf(dso__bid(dso), sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); fd = debuginfod_find_debuginfo(c, (const unsigned char *)sbuild_id, 0, &path); if (fd >= 0) @@ -1036,6 +1043,17 @@ static int _show_one_line(FILE *fp, int l, bool skip, bool show_num) return rv; } +static int sprint_line_description(char *sbuf, size_t size, struct line_range *lr) +{ + if (!lr->function) + return snprintf(sbuf, size, "file: %s, line: %d", lr->file, lr->start); + + if (lr->file) + return snprintf(sbuf, size, "function: %s, file:%s, line: %d", lr->function, lr->file, lr->start); + + return snprintf(sbuf, size, "function: %s, line:%d", lr->function, lr->start); +} + #define show_one_line_with_num(f,l) _show_one_line(f,l,false,true) #define show_one_line(f,l) _show_one_line(f,l,false,false) #define skip_one_line(f,l) _show_one_line(f,l,true,false) @@ -1048,7 +1066,6 @@ static int _show_one_line(FILE *fp, int l, bool skip, bool show_num) static int __show_line_range(struct line_range *lr, const char *module, bool user) { - struct build_id bid; int l = 1; struct int_node *ln; struct debuginfo *dinfo; @@ -1065,17 +1082,23 @@ static int __show_line_range(struct line_range *lr, const char *module, ret = debuginfo__find_line_range(dinfo, lr); if (!ret) { /* Not found, retry with an alternative */ + pr_debug2("Failed to find line range in debuginfo. Fallback to alternative\n"); ret = get_alternative_line_range(dinfo, lr, module, user); if (!ret) ret = debuginfo__find_line_range(dinfo, lr); + else /* Ignore error, we just failed to find it. */ + ret = -ENOENT; } if (dinfo->build_id) { + struct build_id bid; + build_id__init(&bid, dinfo->build_id, BUILD_ID_SIZE); - build_id__sprintf(&bid, sbuild_id); + build_id__snprintf(&bid, sbuild_id, sizeof(sbuild_id)); } debuginfo__delete(dinfo); if (ret == 0 || ret == -ENOENT) { - pr_warning("Specified source line is not found.\n"); + sprint_line_description(sbuf, sizeof(sbuf), lr); + pr_warning("Specified source line(%s) is not found.\n", sbuf); return -ENOENT; } else if (ret < 0) { pr_warning("Debuginfo analysis failed.\n"); @@ -1250,7 +1273,7 @@ out: return ret; } -#else /* !HAVE_DWARF_SUPPORT */ +#else /* !HAVE_LIBDW_SUPPORT */ static void debuginfo_cache__exit(void) { @@ -1343,32 +1366,41 @@ static bool is_c_func_name(const char *name) * * SRC[:SLN[+NUM|-ELN]] * FNC[@SRC][:SLN[+NUM|-ELN]] + * + * FNC@SRC accepts `FNC@*` which forcibly specify FNC as function name. + * SRC and FUNC can be quoted by double/single quotes. */ int parse_line_range_desc(const char *arg, struct line_range *lr) { - char *range, *file, *name = strdup(arg); - int err; + char *buf = strdup(arg); + char *p; + int err = 0; - if (!name) + if (!buf) return -ENOMEM; lr->start = 0; lr->end = INT_MAX; - range = strchr(name, ':'); - if (range) { - *range++ = '\0'; + p = strpbrk_esq(buf, ":"); + if (p) { + if (p == buf) { + semantic_error("No file/function name in '%s'.\n", p); + err = -EINVAL; + goto out; + } + *(p++) = '\0'; - err = parse_line_num(&range, &lr->start, "start line"); + err = parse_line_num(&p, &lr->start, "start line"); if (err) - goto err; + goto out; - if (*range == '+' || *range == '-') { - const char c = *range++; + if (*p == '+' || *p == '-') { + const char c = *(p++); - err = parse_line_num(&range, &lr->end, "end line"); + err = parse_line_num(&p, &lr->end, "end line"); if (err) - goto err; + goto out; if (c == '+') { lr->end += lr->start; @@ -1388,36 +1420,43 @@ int parse_line_range_desc(const char *arg, struct line_range *lr) if (lr->start > lr->end) { semantic_error("Start line must be smaller" " than end line.\n"); - goto err; + goto out; } - if (*range != '\0') { - semantic_error("Tailing with invalid str '%s'.\n", range); - goto err; + if (*p != '\0') { + semantic_error("Tailing with invalid str '%s'.\n", p); + goto out; } } - file = strchr(name, '@'); - if (file) { - *file = '\0'; - lr->file = strdup(++file); - if (lr->file == NULL) { - err = -ENOMEM; - goto err; + p = strpbrk_esq(buf, "@"); + if (p) { + *p++ = '\0'; + if (strcmp(p, "*")) { + lr->file = strdup_esq(p); + if (lr->file == NULL) { + err = -ENOMEM; + goto out; + } + } + if (*buf != '\0') + lr->function = strdup_esq(buf); + if (!lr->function && !lr->file) { + semantic_error("Only '@*' is not allowed.\n"); + err = -EINVAL; + goto out; } - lr->function = name; - } else if (strchr(name, '/') || strchr(name, '.')) - lr->file = name; - else if (is_c_func_name(name))/* We reuse it for checking funcname */ - lr->function = name; + } else if (strpbrk_esq(buf, "/.")) + lr->file = strdup_esq(buf); + else if (is_c_func_name(buf))/* We reuse it for checking funcname */ + lr->function = strdup_esq(buf); else { /* Invalid name */ - semantic_error("'%s' is not a valid function name.\n", name); + semantic_error("'%s' is not a valid function name.\n", buf); err = -EINVAL; - goto err; + goto out; } - return 0; -err: - free(name); +out: + free(buf); return err; } @@ -1425,19 +1464,19 @@ static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev) { char *ptr; - ptr = strpbrk_esc(*arg, ":"); + ptr = strpbrk_esq(*arg, ":"); if (ptr) { *ptr = '\0'; if (!pev->sdt && !is_c_func_name(*arg)) goto ng_name; - pev->group = strdup_esc(*arg); + pev->group = strdup_esq(*arg); if (!pev->group) return -ENOMEM; *arg = ptr + 1; } else pev->group = NULL; - pev->event = strdup_esc(*arg); + pev->event = strdup_esq(*arg); if (pev->event == NULL) return -ENOMEM; @@ -1476,7 +1515,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) arg++; } - ptr = strpbrk_esc(arg, ";=@+%"); + ptr = strpbrk_esq(arg, ";=@+%"); if (pev->sdt) { if (ptr) { if (*ptr != '@') { @@ -1490,7 +1529,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) pev->target = build_id_cache__origname(tmp); free(tmp); } else - pev->target = strdup_esc(ptr + 1); + pev->target = strdup_esq(ptr + 1); if (!pev->target) return -ENOMEM; *ptr = '\0'; @@ -1531,7 +1570,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) file_spec = true; } - ptr = strpbrk_esc(arg, ";:+@%"); + ptr = strpbrk_esq(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1540,7 +1579,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) if (arg[0] == '\0') tmp = NULL; else { - tmp = strdup_esc(arg); + tmp = strdup_esq(arg); if (tmp == NULL) return -ENOMEM; } @@ -1578,7 +1617,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) return -ENOMEM; break; } - ptr = strpbrk_esc(arg, ";:+@%"); + ptr = strpbrk_esq(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1605,7 +1644,9 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) semantic_error("SRC@SRC is not allowed.\n"); return -EINVAL; } - pp->file = strdup_esc(arg); + if (!strcmp(arg, "*")) + break; + pp->file = strdup_esq(arg); if (pp->file == NULL) return -ENOMEM; break; @@ -2378,6 +2419,7 @@ void clear_perf_probe_event(struct perf_probe_event *pev) } pev->nargs = 0; zfree(&pev->args); + nsinfo__zput(pev->nsi); } #define strdup_or_goto(str, label) \ @@ -2738,7 +2780,7 @@ int show_perf_probe_events(struct strfilter *filter) static int get_new_event_name(char *buf, size_t len, const char *base, struct strlist *namelist, bool ret_event, - bool allow_suffix) + bool allow_suffix, bool not_C_symname) { int i, ret; char *p, *nbase; @@ -2749,15 +2791,32 @@ static int get_new_event_name(char *buf, size_t len, const char *base, if (!nbase) return -ENOMEM; - /* Cut off the dot suffixes (e.g. .const, .isra) and version suffixes */ - p = strpbrk(nbase, ".@"); - if (p && p != nbase) - *p = '\0'; + if (not_C_symname) { + /* Replace non-alnum with '_' */ + char *s, *d; + + s = d = nbase; + do { + if (*s && !isalnum(*s)) { + if (d != nbase && *(d - 1) != '_') + *d++ = '_'; + } else + *d++ = *s; + } while (*s++); + } else { + /* Cut off the dot suffixes (e.g. .const, .isra) and version suffixes */ + p = strpbrk(nbase, ".@"); + if (p && p != nbase) + *p = '\0'; + } /* Try no suffix number */ ret = e_snprintf(buf, len, "%s%s", nbase, ret_event ? "__return" : ""); if (ret < 0) { - pr_warning("snprintf() failed: %d; the event name nbase='%s' is too long\n", ret, nbase); + pr_warning("snprintf() failed: %d; the event name '%s' is too long\n" + " Hint: Set a shorter event with syntax \"EVENT=PROBEDEF\"\n" + " EVENT: Event name (max length: %d bytes).\n", + ret, nbase, MAX_EVENT_NAME_LEN); goto out; } if (!strlist__has_entry(namelist, buf)) @@ -2777,7 +2836,10 @@ static int get_new_event_name(char *buf, size_t len, const char *base, for (i = 1; i < MAX_EVENT_INDEX; i++) { ret = e_snprintf(buf, len, "%s_%d", nbase, i); if (ret < 0) { - pr_debug("snprintf() failed: %d\n", ret); + pr_warning("Add suffix failed: %d; the event name '%s' is too long\n" + " Hint: Set a shorter event with syntax \"EVENT=PROBEDEF\"\n" + " EVENT: Event name (max length: %d bytes).\n", + ret, nbase, MAX_EVENT_NAME_LEN); goto out; } if (!strlist__has_entry(namelist, buf)) @@ -2841,7 +2903,8 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, bool allow_suffix) { const char *event, *group; - char buf[64]; + bool not_C_symname = true; + char buf[MAX_EVENT_NAME_LEN]; int ret; /* If probe_event or trace_event already have the name, reuse it */ @@ -2855,8 +2918,10 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, (strncmp(pev->point.function, "0x", 2) != 0) && !strisglob(pev->point.function)) event = pev->point.function; - else + else { event = tev->point.realname; + not_C_symname = !is_known_C_lang(tev->lang); + } } if (pev->group && !pev->sdt) group = pev->group; @@ -2865,9 +2930,16 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, else group = PERFPROBE_GROUP; + if (strlen(group) >= MAX_EVENT_NAME_LEN) { + pr_err("Probe group string='%s' is too long (>= %d bytes)\n", + group, MAX_EVENT_NAME_LEN); + return -ENOMEM; + } + /* Get an unused new event name */ ret = get_new_event_name(buf, sizeof(buf), event, namelist, - tev->point.retprobe, allow_suffix); + tev->point.retprobe, allow_suffix, + not_C_symname); if (ret < 0) return ret; @@ -3696,66 +3768,12 @@ void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs) /* Loop 3: cleanup and free trace events */ for (i = 0; i < npevs; i++) { pev = &pevs[i]; - for (j = 0; j < pevs[i].ntevs; j++) - clear_probe_trace_event(&pevs[i].tevs[j]); - zfree(&pevs[i].tevs); - pevs[i].ntevs = 0; - nsinfo__zput(pev->nsi); - clear_perf_probe_event(&pevs[i]); - } -} - -int add_perf_probe_events(struct perf_probe_event *pevs, int npevs) -{ - int ret; - - ret = init_probe_symbol_maps(pevs->uprobes); - if (ret < 0) - return ret; - - ret = convert_perf_probe_events(pevs, npevs); - if (ret == 0) - ret = apply_perf_probe_events(pevs, npevs); - - cleanup_perf_probe_events(pevs, npevs); - - exit_probe_symbol_maps(); - return ret; -} - -int del_perf_probe_events(struct strfilter *filter) -{ - int ret, ret2, ufd = -1, kfd = -1; - char *str = strfilter__string(filter); - - if (!str) - return -EINVAL; - - /* Get current event names */ - ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW); - if (ret < 0) - goto out; - - ret = probe_file__del_events(kfd, filter); - if (ret < 0 && ret != -ENOENT) - goto error; - - ret2 = probe_file__del_events(ufd, filter); - if (ret2 < 0 && ret2 != -ENOENT) { - ret = ret2; - goto error; + for (j = 0; j < pev->ntevs; j++) + clear_probe_trace_event(&pev->tevs[j]); + zfree(&pev->tevs); + pev->ntevs = 0; + clear_perf_probe_event(pev); } - ret = 0; - -error: - if (kfd >= 0) - close(kfd); - if (ufd >= 0) - close(ufd); -out: - free(str); - - return ret; } int show_available_funcs(const char *target, struct nsinfo *nsi, diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 7e3b6c3d1f74..71905ede0207 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -58,6 +58,7 @@ struct probe_trace_event { char *group; /* Group name */ struct probe_trace_point point; /* Trace point */ int nargs; /* Number of args */ + int lang; /* Dwarf language code */ bool uprobes; /* uprobes only */ struct probe_trace_arg *args; /* Arguments */ }; @@ -159,7 +160,6 @@ void line_range__clear(struct line_range *lr); /* Initialize line range */ int line_range__init(struct line_range *lr); -int add_perf_probe_events(struct perf_probe_event *pevs, int npevs); int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs); int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs); int show_probe_trace_events(struct perf_probe_event *pevs, int npevs); @@ -168,8 +168,6 @@ void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs); struct strfilter; -int del_perf_probe_events(struct strfilter *filter); - int show_perf_probe_event(const char *group, const char *event, struct perf_probe_event *pev, const char *module, bool use_stdout); diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 3d50de3217d5..5069fb61f48c 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -366,25 +366,6 @@ int probe_file__del_strlist(int fd, struct strlist *namelist) return ret; } -int probe_file__del_events(int fd, struct strfilter *filter) -{ - struct strlist *namelist; - int ret; - - namelist = strlist__new(NULL, NULL); - if (!namelist) - return -ENOMEM; - - ret = probe_file__get_events(fd, filter, namelist); - if (ret < 0) - goto out; - - ret = probe_file__del_strlist(fd, namelist); -out: - strlist__delete(namelist); - return ret; -} - /* Caller must ensure to remove this entry from list */ static void probe_cache_entry__delete(struct probe_cache_entry *entry) { @@ -467,10 +448,10 @@ static int probe_cache__open(struct probe_cache *pcache, const char *target, if (!target || !strcmp(target, DSO__NAME_KALLSYMS)) { target = DSO__NAME_KALLSYMS; is_kallsyms = true; - ret = sysfs__sprintf_build_id("/", sbuildid); + ret = sysfs__snprintf_build_id("/", sbuildid, sizeof(sbuildid)); } else { nsinfo__mountns_enter(nsi, &nsc); - ret = filename__sprintf_build_id(target, sbuildid); + ret = filename__snprintf_build_id(target, sbuildid, sizeof(sbuildid)); nsinfo__mountns_exit(&nsc); } diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h index 0dba88c0f5f0..c2bb6a5b9dcc 100644 --- a/tools/perf/util/probe-file.h +++ b/tools/perf/util/probe-file.h @@ -44,7 +44,6 @@ struct strlist *probe_file__get_namelist(int fd); struct strlist *probe_file__get_rawlist(int fd); int probe_file__add_event(int fd, struct probe_trace_event *tev); -int probe_file__del_events(int fd, struct strfilter *filter); int probe_file__get_events(int fd, struct strfilter *filter, struct strlist *plist); int probe_file__del_strlist(int fd, struct strlist *namelist); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 630e16c54ed5..5ffd97ee4898 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -35,6 +35,19 @@ /* Kprobe tracer basic type is up to u64 */ #define MAX_BASIC_TYPE_BITS 64 +bool is_known_C_lang(int lang) +{ + switch (lang) { + case DW_LANG_C89: + case DW_LANG_C: + case DW_LANG_C99: + case DW_LANG_C11: + return true; + default: + return false; + } +} + /* * Probe finder related functions */ @@ -56,7 +69,7 @@ static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs) */ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr, Dwarf_Op *fb_ops, Dwarf_Die *sp_die, - unsigned int machine, + const struct probe_finder *pf, struct probe_trace_arg *tvar) { Dwarf_Attribute attr; @@ -166,7 +179,7 @@ static_var: if (!tvar) return ret2; - regs = get_dwarf_regstr(regn, machine); + regs = get_dwarf_regstr(regn, pf->e_machine, pf->e_flags); if (!regs) { /* This should be a bug in DWARF or this tool */ pr_warning("Mapping for the register number %u " @@ -451,7 +464,7 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf) dwarf_diename(vr_die)); ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops, - &pf->sp_die, pf->machine, pf->tvar); + &pf->sp_die, pf, pf->tvar); if (ret == -ENOENT && pf->skip_empty_arg) /* This can be found in other place. skip it */ return 0; @@ -602,7 +615,6 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1); if (ret <= 0 || nops == 0) { pf->fb_ops = NULL; -#ifdef HAVE_DWARF_CFI_SUPPORT } else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa && (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) { if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, &frame) != 0 && @@ -613,7 +625,6 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) free(frame); return -ENOENT; } -#endif /* HAVE_DWARF_CFI_SUPPORT */ } /* Call finder's callback handler */ @@ -837,7 +848,6 @@ static int probe_point_lazy_walker(const char *fname, int lineno, /* Find probe points from lazy pattern */ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) { - struct build_id bid; char sbuild_id[SBUILD_ID_SIZE] = ""; int ret = 0; char *fpath; @@ -847,8 +857,10 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) comp_dir = cu_get_comp_dir(&pf->cu_die); if (pf->dbg->build_id) { + struct build_id bid; + build_id__init(&bid, pf->dbg->build_id, BUILD_ID_SIZE); - build_id__sprintf(&bid, sbuild_id); + build_id__snprintf(&bid, sbuild_id, sizeof(sbuild_id)); } ret = find_source_path(pf->fname, sbuild_id, comp_dir, &fpath); if (ret < 0) { @@ -962,6 +974,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) pr_debug("Matched function: %s [%lx]\n", dwarf_diename(sp_die), (unsigned long)dwarf_dieoffset(sp_die)); pf->fname = fname; + pf->abstrace_dieoffset = dwarf_dieoffset(sp_die); if (pp->line) { /* Function relative line */ dwarf_decl_line(sp_die, &pf->lno); pf->lno += pp->line; @@ -1136,9 +1149,9 @@ static int debuginfo__find_probes(struct debuginfo *dbg, if (gelf_getehdr(elf, &ehdr) == NULL) return -EINVAL; - pf->machine = ehdr.e_machine; + pf->e_machine = ehdr.e_machine; + pf->e_flags = ehdr.e_flags; -#ifdef HAVE_DWARF_CFI_SUPPORT do { GElf_Shdr shdr; @@ -1148,7 +1161,6 @@ static int debuginfo__find_probes(struct debuginfo *dbg, pf->cfi_dbg = dwarf_getcfi(dbg->dbg); } while (0); -#endif /* HAVE_DWARF_CFI_SUPPORT */ ret = debuginfo__find_probe_location(dbg, pf); return ret; @@ -1169,13 +1181,15 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data) struct local_vars_finder *vf = data; struct probe_finder *pf = vf->pf; int tag; + Dwarf_Attribute attr; + Dwarf_Die var_die; tag = dwarf_tag(die_mem); if (tag == DW_TAG_formal_parameter || (tag == DW_TAG_variable && vf->vars)) { if (convert_variable_location(die_mem, vf->pf->addr, vf->pf->fb_ops, &pf->sp_die, - pf->machine, NULL) == 0) { + pf, /*tvar=*/NULL) == 0) { vf->args[vf->nargs].var = (char *)dwarf_diename(die_mem); if (vf->args[vf->nargs].var == NULL) { vf->ret = -ENOMEM; @@ -1186,10 +1200,22 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data) } } - if (dwarf_haspc(die_mem, vf->pf->addr)) + if (dwarf_haspc(die_mem, vf->pf->addr)) { + /* + * when DW_AT_entry_pc contains instruction address, + * also check if the DW_AT_abstract_origin of die_mem + * points to correct die. + */ + if (dwarf_attr(die_mem, DW_AT_abstract_origin, &attr)) { + dwarf_formref_die(&attr, &var_die); + if (pf->abstrace_dieoffset != dwarf_dieoffset(&var_die)) + goto out; + } return DIE_FIND_CB_CONTINUE; - else - return DIE_FIND_CB_SIBLING; + } + +out: + return DIE_FIND_CB_SIBLING; } static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf, @@ -1273,6 +1299,8 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) goto end; } + tev->lang = dwarf_srclang(dwarf_diecu(sc_die, &pf->cu_die, NULL, NULL)); + pr_debug("Probe point found: %s+%lu\n", tev->point.symbol, tev->point.offset); @@ -1379,6 +1407,8 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, if (ret >= 0 && tf.pf.skip_empty_arg) ret = fill_empty_trace_arg(pev, tf.tevs, tf.ntevs); + dwarf_cfi_end(tf.pf.cfi_eh); + if (ret < 0 || tf.ntevs == 0) { for (i = 0; i < tf.ntevs; i++) clear_probe_trace_event(&tf.tevs[i]); @@ -1404,7 +1434,7 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data) tag == DW_TAG_variable) { ret = convert_variable_location(die_mem, af->pf.addr, af->pf.fb_ops, &af->pf.sp_die, - af->pf.machine, NULL); + &af->pf, /*tvar=*/NULL); if (ret == 0 || ret == -ERANGE) { int ret2; bool externs = !af->child; @@ -1583,8 +1613,21 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, /* Find a corresponding function (name, baseline and baseaddr) */ if (die_find_realfunc(&cudie, (Dwarf_Addr)addr, &spdie)) { - /* Get function entry information */ - func = basefunc = dwarf_diename(&spdie); + /* + * Get function entry information. + * + * As described in the document DWARF Debugging Information + * Format Version 5, section 2.22 Linkage Names, "mangled names, + * are used in various ways, ... to distinguish multiple + * entities that have the same name". + * + * Firstly try to get distinct linkage name, if fail then + * rollback to get associated name in DIE. + */ + func = basefunc = die_get_linkage_name(&spdie); + if (!func) + func = basefunc = dwarf_diename(&spdie); + if (!func || die_entrypc(&spdie, &baseaddr) != 0 || dwarf_decl_line(&spdie, &baseline) != 0) { @@ -1863,7 +1906,11 @@ int find_source_path(const char *raw_path, const char *sbuild_id, const char *prefix = symbol_conf.source_prefix; if (sbuild_id && !prefix) { - if (!get_source_from_debuginfod(raw_path, sbuild_id, new_path)) + char prefixed_raw_path[PATH_MAX]; + + path__join(prefixed_raw_path, sizeof(prefixed_raw_path), comp_dir, raw_path); + + if (!get_source_from_debuginfod(prefixed_raw_path, sbuild_id, new_path)) return 0; } diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 3add5ff516e1..ecd6d937c592 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -21,11 +21,14 @@ static inline int is_c_varname(const char *name) return isalpha(name[0]) || name[0] == '_'; } -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #include "dwarf-aux.h" #include "debuginfo.h" +/* Check the language code is known C */ +bool is_known_C_lang(int lang); + /* Find probe_trace_events specified by perf_probe_event from debuginfo */ int debuginfo__find_trace_events(struct debuginfo *dbg, struct perf_probe_event *pev, @@ -60,17 +63,17 @@ struct probe_finder { const char *fname; /* Real file name */ Dwarf_Die cu_die; /* Current CU */ Dwarf_Die sp_die; + Dwarf_Off abstrace_dieoffset; struct intlist *lcache; /* Line cache for lazy match */ /* For variable searching */ -#if _ELFUTILS_PREREQ(0, 142) - /* Call Frame Information from .eh_frame */ + /* Call Frame Information from .eh_frame. Owned by this struct. */ Dwarf_CFI *cfi_eh; - /* Call Frame Information from .debug_frame */ + /* Call Frame Information from .debug_frame. Not owned. */ Dwarf_CFI *cfi_dbg; -#endif Dwarf_Op *fb_ops; /* Frame base attribute */ - unsigned int machine; /* Target machine arch */ + unsigned int e_machine; /* ELF target machine arch */ + unsigned int e_flags; /* ELF target machine flags */ struct perf_probe_arg *pvar; /* Current target variable */ struct probe_trace_arg *tvar; /* Current result variable */ bool skip_empty_arg; /* Skip non-exist args */ @@ -104,6 +107,8 @@ struct line_finder { int found; }; -#endif /* HAVE_DWARF_SUPPORT */ +#else +#define is_known_C_lang(lang) (false) +#endif /* HAVE_LIBDW_SUPPORT */ #endif /*_PROBE_FINDER_H */ diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c index a1d1e4ef6257..141ffa129c69 100644 --- a/tools/perf/util/pstack.c +++ b/tools/perf/util/pstack.c @@ -63,20 +63,6 @@ void pstack__push(struct pstack *pstack, void *key) pstack->entries[pstack->top++] = key; } -void *pstack__pop(struct pstack *pstack) -{ - void *ret; - - if (pstack->top == 0) { - pr_err("%s: underflow!\n", __func__); - return NULL; - } - - ret = pstack->entries[--pstack->top]; - pstack->entries[pstack->top] = NULL; - return ret; -} - void *pstack__peek(struct pstack *pstack) { if (pstack->top == 0) diff --git a/tools/perf/util/pstack.h b/tools/perf/util/pstack.h index 8729b8be061d..712051b8130f 100644 --- a/tools/perf/util/pstack.h +++ b/tools/perf/util/pstack.h @@ -10,7 +10,6 @@ void pstack__delete(struct pstack *pstack); bool pstack__empty(const struct pstack *pstack); void pstack__remove(struct pstack *pstack, void *key); void pstack__push(struct pstack *pstack, void *key); -void *pstack__pop(struct pstack *pstack); void *pstack__peek(struct pstack *pstack); #endif /* _PERF_PSTACK_ */ diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources deleted file mode 100644 index 1bec945f4838..000000000000 --- a/tools/perf/util/python-ext-sources +++ /dev/null @@ -1,53 +0,0 @@ -# -# List of files needed by perf python extension -# -# Each source file must be placed on its own line so that it can be -# processed by Makefile and util/setup.py accordingly. -# - -util/python.c -../lib/ctype.c -util/cap.c -util/evlist.c -util/evsel.c -util/evsel_fprintf.c -util/perf_event_attr_fprintf.c -util/cpumap.c -util/memswap.c -util/mmap.c -util/namespaces.c -../lib/bitmap.c -../lib/find_bit.c -../lib/list_sort.c -../lib/hweight.c -../lib/string.c -../lib/vsprintf.c -util/thread_map.c -util/util.c -util/cgroup.c -util/parse-branch-options.c -util/rblist.c -util/counts.c -util/print_binary.c -util/strlist.c -util/trace-event.c -util/trace-event-parse.c -../lib/rbtree.c -util/string.c -util/symbol_fprintf.c -util/units.c -util/affinity.c -util/rwsem.c -util/hashmap.c -util/perf_regs.c -util/fncache.c -util/rlimit.c -util/perf-regs-arch/perf_regs_aarch64.c -util/perf-regs-arch/perf_regs_arm.c -util/perf-regs-arch/perf_regs_csky.c -util/perf-regs-arch/perf_regs_loongarch.c -util/perf-regs-arch/perf_regs_mips.c -util/perf-regs-arch/perf_regs_powerpc.c -util/perf-regs-arch/perf_regs_riscv.c -util/perf-regs-arch/perf_regs_s390.c -util/perf-regs-arch/perf_regs_x86.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 0aeb97c11c03..cc1019d29a5d 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -6,220 +6,27 @@ #include <linux/err.h> #include <perf/cpumap.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #include <perf/mmap.h> -#include "evlist.h" #include "callchain.h" +#include "counts.h" +#include "evlist.h" #include "evsel.h" #include "event.h" +#include "expr.h" #include "print_binary.h" +#include "record.h" +#include "strbuf.h" #include "thread_map.h" +#include "tp_pmu.h" #include "trace-event.h" -#include "mmap.h" -#include "stat.h" #include "metricgroup.h" -#include "util/bpf-filter.h" -#include "util/env.h" -#include "util/pmu.h" -#include "util/pmus.h" +#include "mmap.h" +#include "util/sample.h" #include <internal/lib.h> -#include "util.h" - -#if PY_MAJOR_VERSION < 3 -#define _PyUnicode_FromString(arg) \ - PyString_FromString(arg) -#define _PyUnicode_AsString(arg) \ - PyString_AsString(arg) -#define _PyUnicode_FromFormat(...) \ - PyString_FromFormat(__VA_ARGS__) -#define _PyLong_FromLong(arg) \ - PyInt_FromLong(arg) - -#else - -#define _PyUnicode_FromString(arg) \ - PyUnicode_FromString(arg) -#define _PyUnicode_FromFormat(...) \ - PyUnicode_FromFormat(__VA_ARGS__) -#define _PyLong_FromLong(arg) \ - PyLong_FromLong(arg) -#endif - -#ifndef Py_TYPE -#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) -#endif - -/* - * Avoid bringing in event parsing. - */ -int parse_event(struct evlist *evlist __maybe_unused, const char *str __maybe_unused) -{ - return 0; -} - -/* - * Provide these two so that we don't have to link against callchain.c and - * start dragging hist.c, etc. - */ -struct callchain_param callchain_param; - -int parse_callchain_record(const char *arg __maybe_unused, - struct callchain_param *param __maybe_unused) -{ - return 0; -} - -/* - * Add these not to drag util/env.c - */ -struct perf_env perf_env; - -const char *perf_env__cpuid(struct perf_env *env __maybe_unused) -{ - return NULL; -} - -// This one is a bit easier, wouldn't drag too much, but leave it as a stub we need it here -const char *perf_env__arch(struct perf_env *env __maybe_unused) -{ - return NULL; -} - -/* - * These ones are needed not to drag the PMU bandwagon, jevents generated - * pmu_sys_event_tables, etc and evsel__find_pmu() is used so far just for - * doing per PMU perf_event_attr.exclude_guest handling, not really needed, so - * far, for the perf python binding known usecases, revisit if this become - * necessary. - */ -struct perf_pmu *evsel__find_pmu(const struct evsel *evsel __maybe_unused) -{ - return NULL; -} - -int perf_pmu__scan_file(const struct perf_pmu *pmu, const char *name, const char *fmt, ...) -{ - return EOF; -} - -const char *perf_pmu__name_from_config(struct perf_pmu *pmu __maybe_unused, u64 config __maybe_unused) -{ - return NULL; -} - -struct perf_pmu *perf_pmus__find_by_type(unsigned int type __maybe_unused) -{ - return NULL; -} - -int perf_pmus__num_core_pmus(void) -{ - return 1; -} - -bool evsel__is_aux_event(const struct evsel *evsel __maybe_unused) -{ - return false; -} - -bool perf_pmus__supports_extended_type(void) -{ - return false; -} - -/* - * Add this one here not to drag util/metricgroup.c - */ -int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, - struct rblist *new_metric_events, - struct rblist *old_metric_events) -{ - return 0; -} - -/* - * Add this one here not to drag util/trace-event-info.c - */ -char *tracepoint_id_to_name(u64 config) -{ - return NULL; -} - -/* - * XXX: All these evsel destructors need some better mechanism, like a linked - * list of destructors registered when the relevant code indeed is used instead - * of having more and more calls in perf_evsel__delete(). -- acme - * - * For now, add some more: - * - * Not to drag the BPF bandwagon... - */ -void bpf_counter__destroy(struct evsel *evsel); -int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd); -int bpf_counter__disable(struct evsel *evsel); - -void bpf_counter__destroy(struct evsel *evsel __maybe_unused) -{ -} - -int bpf_counter__install_pe(struct evsel *evsel __maybe_unused, int cpu __maybe_unused, int fd __maybe_unused) -{ - return 0; -} - -int bpf_counter__disable(struct evsel *evsel __maybe_unused) -{ - return 0; -} - -// not to drag util/bpf-filter.c -#ifdef HAVE_BPF_SKEL -int perf_bpf_filter__prepare(struct evsel *evsel __maybe_unused) -{ - return 0; -} - -int perf_bpf_filter__destroy(struct evsel *evsel __maybe_unused) -{ - return 0; -} -#endif - -/* - * Support debug printing even though util/debug.c is not linked. That means - * implementing 'verbose' and 'eprintf'. - */ -int verbose; -int debug_kmaps; -int debug_peo_args; - -int eprintf(int level, int var, const char *fmt, ...); - -int eprintf(int level, int var, const char *fmt, ...) -{ - va_list args; - int ret = 0; - - if (var >= level) { - va_start(args, fmt); - ret = vfprintf(stderr, fmt, args); - va_end(args); - } - return ret; -} - -/* Define PyVarObject_HEAD_INIT for python 2.5 */ -#ifndef PyVarObject_HEAD_INIT -# define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size, -#endif - -#if PY_MAJOR_VERSION < 3 -PyMODINIT_FUNC initperf(void); -#else PyMODINIT_FUNC PyInit_perf(void); -#endif #define member_def(type, member, ptype, help) \ { #member, ptype, \ @@ -239,7 +46,7 @@ struct pyrf_event { }; #define sample_members \ - sample_member_def(sample_ip, ip, T_ULONGLONG, "event type"), \ + sample_member_def(sample_ip, ip, T_ULONGLONG, "event ip"), \ sample_member_def(sample_pid, pid, T_INT, "event pid"), \ sample_member_def(sample_tid, tid, T_INT, "event tid"), \ sample_member_def(sample_time, time, T_ULONGLONG, "event timestamp"), \ @@ -249,7 +56,7 @@ struct pyrf_event { sample_member_def(sample_period, period, T_ULONGLONG, "event period"), \ sample_member_def(sample_cpu, cpu, T_UINT, "event cpu"), -static char pyrf_mmap_event__doc[] = PyDoc_STR("perf mmap event object."); +static const char pyrf_mmap_event__doc[] = PyDoc_STR("perf mmap event object."); static PyMemberDef pyrf_mmap_event__members[] = { sample_members @@ -264,7 +71,7 @@ static PyMemberDef pyrf_mmap_event__members[] = { { .name = NULL, }, }; -static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent) +static PyObject *pyrf_mmap_event__repr(const struct pyrf_event *pevent) { PyObject *ret; char *s; @@ -277,7 +84,7 @@ static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent) pevent->event.mmap.pgoff, pevent->event.mmap.filename) < 0) { ret = PyErr_NoMemory(); } else { - ret = _PyUnicode_FromString(s); + ret = PyUnicode_FromString(s); free(s); } return ret; @@ -293,7 +100,7 @@ static PyTypeObject pyrf_mmap_event__type = { .tp_repr = (reprfunc)pyrf_mmap_event__repr, }; -static char pyrf_task_event__doc[] = PyDoc_STR("perf task (fork/exit) event object."); +static const char pyrf_task_event__doc[] = PyDoc_STR("perf task (fork/exit) event object."); static PyMemberDef pyrf_task_event__members[] = { sample_members @@ -306,9 +113,9 @@ static PyMemberDef pyrf_task_event__members[] = { { .name = NULL, }, }; -static PyObject *pyrf_task_event__repr(struct pyrf_event *pevent) +static PyObject *pyrf_task_event__repr(const struct pyrf_event *pevent) { - return _PyUnicode_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, " + return PyUnicode_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, " "ptid: %u, time: %" PRI_lu64 "}", pevent->event.header.type == PERF_RECORD_FORK ? "fork" : "exit", pevent->event.fork.pid, @@ -328,7 +135,7 @@ static PyTypeObject pyrf_task_event__type = { .tp_repr = (reprfunc)pyrf_task_event__repr, }; -static char pyrf_comm_event__doc[] = PyDoc_STR("perf comm event object."); +static const char pyrf_comm_event__doc[] = PyDoc_STR("perf comm event object."); static PyMemberDef pyrf_comm_event__members[] = { sample_members @@ -339,9 +146,9 @@ static PyMemberDef pyrf_comm_event__members[] = { { .name = NULL, }, }; -static PyObject *pyrf_comm_event__repr(struct pyrf_event *pevent) +static PyObject *pyrf_comm_event__repr(const struct pyrf_event *pevent) { - return _PyUnicode_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }", + return PyUnicode_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }", pevent->event.comm.pid, pevent->event.comm.tid, pevent->event.comm.comm); @@ -357,7 +164,7 @@ static PyTypeObject pyrf_comm_event__type = { .tp_repr = (reprfunc)pyrf_comm_event__repr, }; -static char pyrf_throttle_event__doc[] = PyDoc_STR("perf throttle event object."); +static const char pyrf_throttle_event__doc[] = PyDoc_STR("perf throttle event object."); static PyMemberDef pyrf_throttle_event__members[] = { sample_members @@ -368,11 +175,12 @@ static PyMemberDef pyrf_throttle_event__members[] = { { .name = NULL, }, }; -static PyObject *pyrf_throttle_event__repr(struct pyrf_event *pevent) +static PyObject *pyrf_throttle_event__repr(const struct pyrf_event *pevent) { - struct perf_record_throttle *te = (struct perf_record_throttle *)(&pevent->event.header + 1); + const struct perf_record_throttle *te = (const struct perf_record_throttle *) + (&pevent->event.header + 1); - return _PyUnicode_FromFormat("{ type: %sthrottle, time: %" PRI_lu64 ", id: %" PRI_lu64 + return PyUnicode_FromFormat("{ type: %sthrottle, time: %" PRI_lu64 ", id: %" PRI_lu64 ", stream_id: %" PRI_lu64 " }", pevent->event.header.type == PERF_RECORD_THROTTLE ? "" : "un", te->time, te->id, te->stream_id); @@ -388,7 +196,7 @@ static PyTypeObject pyrf_throttle_event__type = { .tp_repr = (reprfunc)pyrf_throttle_event__repr, }; -static char pyrf_lost_event__doc[] = PyDoc_STR("perf lost event object."); +static const char pyrf_lost_event__doc[] = PyDoc_STR("perf lost event object."); static PyMemberDef pyrf_lost_event__members[] = { sample_members @@ -397,7 +205,7 @@ static PyMemberDef pyrf_lost_event__members[] = { { .name = NULL, }, }; -static PyObject *pyrf_lost_event__repr(struct pyrf_event *pevent) +static PyObject *pyrf_lost_event__repr(const struct pyrf_event *pevent) { PyObject *ret; char *s; @@ -407,7 +215,7 @@ static PyObject *pyrf_lost_event__repr(struct pyrf_event *pevent) pevent->event.lost.id, pevent->event.lost.lost) < 0) { ret = PyErr_NoMemory(); } else { - ret = _PyUnicode_FromString(s); + ret = PyUnicode_FromString(s); free(s); } return ret; @@ -423,7 +231,7 @@ static PyTypeObject pyrf_lost_event__type = { .tp_repr = (reprfunc)pyrf_lost_event__repr, }; -static char pyrf_read_event__doc[] = PyDoc_STR("perf read event object."); +static const char pyrf_read_event__doc[] = PyDoc_STR("perf read event object."); static PyMemberDef pyrf_read_event__members[] = { sample_members @@ -432,9 +240,9 @@ static PyMemberDef pyrf_read_event__members[] = { { .name = NULL, }, }; -static PyObject *pyrf_read_event__repr(struct pyrf_event *pevent) +static PyObject *pyrf_read_event__repr(const struct pyrf_event *pevent) { - return _PyUnicode_FromFormat("{ type: read, pid: %u, tid: %u }", + return PyUnicode_FromFormat("{ type: read, pid: %u, tid: %u }", pevent->event.read.pid, pevent->event.read.tid); /* @@ -453,7 +261,7 @@ static PyTypeObject pyrf_read_event__type = { .tp_repr = (reprfunc)pyrf_read_event__repr, }; -static char pyrf_sample_event__doc[] = PyDoc_STR("perf sample event object."); +static const char pyrf_sample_event__doc[] = PyDoc_STR("perf sample event object."); static PyMemberDef pyrf_sample_event__members[] = { sample_members @@ -461,7 +269,13 @@ static PyMemberDef pyrf_sample_event__members[] = { { .name = NULL, }, }; -static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent) +static void pyrf_sample_event__delete(struct pyrf_event *pevent) +{ + perf_sample__exit(&pevent->sample); + Py_TYPE(pevent)->tp_free((PyObject*)pevent); +} + +static PyObject *pyrf_sample_event__repr(const struct pyrf_event *pevent) { PyObject *ret; char *s; @@ -469,20 +283,20 @@ static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent) if (asprintf(&s, "{ type: sample }") < 0) { ret = PyErr_NoMemory(); } else { - ret = _PyUnicode_FromString(s); + ret = PyUnicode_FromString(s); free(s); } return ret; } #ifdef HAVE_LIBTRACEEVENT -static bool is_tracepoint(struct pyrf_event *pevent) +static bool is_tracepoint(const struct pyrf_event *pevent) { return pevent->evsel->core.attr.type == PERF_TYPE_TRACEPOINT; } static PyObject* -tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field) +tracepoint_field(const struct pyrf_event *pe, struct tep_format_field *field) { struct tep_handle *pevent = field->event->tep; void *data = pe->sample.raw_data; @@ -503,7 +317,7 @@ tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field) } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { - ret = _PyUnicode_FromString((char *)data + offset); + ret = PyUnicode_FromString((char *)data + offset); } else { ret = PyByteArray_FromStringAndSize((const char *) data + offset, len); field->flags &= ~TEP_FIELD_IS_STRING; @@ -525,25 +339,26 @@ tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field) static PyObject* get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name) { - const char *str = _PyUnicode_AsString(PyObject_Str(attr_name)); struct evsel *evsel = pevent->evsel; + struct tep_event *tp_format = evsel__tp_format(evsel); struct tep_format_field *field; - if (!evsel->tp_format) { - struct tep_event *tp_format; - - tp_format = trace_event__tp_format_id(evsel->core.attr.config); - if (IS_ERR_OR_NULL(tp_format)) - return NULL; + if (IS_ERR_OR_NULL(tp_format)) + return NULL; - evsel->tp_format = tp_format; - } + PyObject *obj = PyObject_Str(attr_name); + if (obj == NULL) + return NULL; - field = tep_find_any_field(evsel->tp_format, str); - if (!field) + const char *str = PyUnicode_AsUTF8(obj); + if (str == NULL) { + Py_DECREF(obj); return NULL; + } - return tracepoint_field(pevent, field); + field = tep_find_any_field(tp_format, str); + Py_DECREF(obj); + return field ? tracepoint_field(pevent, field) : NULL; } #endif /* HAVE_LIBTRACEEVENT */ @@ -571,7 +386,7 @@ static PyTypeObject pyrf_sample_event__type = { .tp_getattro = (getattrofunc) pyrf_sample_event__getattro, }; -static char pyrf_context_switch_event__doc[] = PyDoc_STR("perf context_switch event object."); +static const char pyrf_context_switch_event__doc[] = PyDoc_STR("perf context_switch event object."); static PyMemberDef pyrf_context_switch_event__members[] = { sample_members @@ -581,7 +396,7 @@ static PyMemberDef pyrf_context_switch_event__members[] = { { .name = NULL, }, }; -static PyObject *pyrf_context_switch_event__repr(struct pyrf_event *pevent) +static PyObject *pyrf_context_switch_event__repr(const struct pyrf_event *pevent) { PyObject *ret; char *s; @@ -592,7 +407,7 @@ static PyObject *pyrf_context_switch_event__repr(struct pyrf_event *pevent) !!(pevent->event.header.misc & PERF_RECORD_MISC_SWITCH_OUT)) < 0) { ret = PyErr_NoMemory(); } else { - ret = _PyUnicode_FromString(s); + ret = PyUnicode_FromString(s); free(s); } return ret; @@ -619,6 +434,9 @@ static int pyrf_event__setup_types(void) pyrf_sample_event__type.tp_new = pyrf_context_switch_event__type.tp_new = pyrf_throttle_event__type.tp_new = PyType_GenericNew; + + pyrf_sample_event__type.tp_dealloc = (destructor)pyrf_sample_event__delete, + err = PyType_Ready(&pyrf_mmap_event__type); if (err < 0) goto out; @@ -661,7 +479,7 @@ static PyTypeObject *pyrf_event__type[] = { [PERF_RECORD_SWITCH_CPU_WIDE] = &pyrf_context_switch_event__type, }; -static PyObject *pyrf_event__new(union perf_event *event) +static PyObject *pyrf_event__new(const union perf_event *event) { struct pyrf_event *pevent; PyTypeObject *ptype; @@ -669,8 +487,19 @@ static PyObject *pyrf_event__new(union perf_event *event) if ((event->header.type < PERF_RECORD_MMAP || event->header.type > PERF_RECORD_SAMPLE) && !(event->header.type == PERF_RECORD_SWITCH || - event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)) + event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)) { + PyErr_Format(PyExc_TypeError, "Unexpected header type %u", + event->header.type); + return NULL; + } + + // FIXME this better be dynamic or we need to parse everything + // before calling perf_mmap__consume(), including tracepoint fields. + if (sizeof(pevent->event) < event->header.size) { + PyErr_Format(PyExc_TypeError, "Unexpected event size: %zd < %u", + sizeof(pevent->event), event->header.size); return NULL; + } ptype = pyrf_event__type[event->header.type]; pevent = PyObject_New(struct pyrf_event, ptype); @@ -718,8 +547,10 @@ static PyObject *pyrf_cpu_map__item(PyObject *obj, Py_ssize_t i) { struct pyrf_cpu_map *pcpus = (void *)obj; - if (i >= perf_cpu_map__nr(pcpus->cpus)) + if (i >= perf_cpu_map__nr(pcpus->cpus)) { + PyErr_SetString(PyExc_IndexError, "Index out of range"); return NULL; + } return Py_BuildValue("i", perf_cpu_map__cpu(pcpus->cpus, i).cpu); } @@ -729,7 +560,7 @@ static PySequenceMethods pyrf_cpu_map__sequence_methods = { .sq_item = pyrf_cpu_map__item, }; -static char pyrf_cpu_map__doc[] = PyDoc_STR("cpu map object."); +static const char pyrf_cpu_map__doc[] = PyDoc_STR("cpu map object."); static PyTypeObject pyrf_cpu_map__type = { PyVarObject_HEAD_INIT(NULL, 0) @@ -757,14 +588,14 @@ struct pyrf_thread_map { static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, PyObject *args, PyObject *kwargs) { - static char *kwlist[] = { "pid", "tid", "uid", NULL }; - int pid = -1, tid = -1, uid = UINT_MAX; + static char *kwlist[] = { "pid", "tid", NULL }; + int pid = -1, tid = -1; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iii", - kwlist, &pid, &tid, &uid)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", + kwlist, &pid, &tid)) return -1; - pthreads->threads = thread_map__new(pid, tid, uid); + pthreads->threads = thread_map__new(pid, tid); if (pthreads->threads == NULL) return -1; return 0; @@ -787,8 +618,10 @@ static PyObject *pyrf_thread_map__item(PyObject *obj, Py_ssize_t i) { struct pyrf_thread_map *pthreads = (void *)obj; - if (i >= perf_thread_map__nr(pthreads->threads)) + if (i >= perf_thread_map__nr(pthreads->threads)) { + PyErr_SetString(PyExc_IndexError, "Index out of range"); return NULL; + } return Py_BuildValue("i", perf_thread_map__pid(pthreads->threads, i)); } @@ -798,7 +631,7 @@ static PySequenceMethods pyrf_thread_map__sequence_methods = { .sq_item = pyrf_thread_map__item, }; -static char pyrf_thread_map__doc[] = PyDoc_STR("thread map object."); +static const char pyrf_thread_map__doc[] = PyDoc_STR("thread map object."); static PyTypeObject pyrf_thread_map__type = { PyVarObject_HEAD_INIT(NULL, 0) @@ -817,6 +650,295 @@ static int pyrf_thread_map__setup_types(void) return PyType_Ready(&pyrf_thread_map__type); } +/** + * A python wrapper for perf_pmus that are globally owned by the pmus.c code. + */ +struct pyrf_pmu { + PyObject_HEAD + + struct perf_pmu *pmu; +}; + +static void pyrf_pmu__delete(struct pyrf_pmu *ppmu) +{ + Py_TYPE(ppmu)->tp_free((PyObject *)ppmu); +} + +static PyObject *pyrf_pmu__name(PyObject *self) +{ + struct pyrf_pmu *ppmu = (void *)self; + + return PyUnicode_FromString(ppmu->pmu->name); +} + +static bool add_to_dict(PyObject *dict, const char *key, const char *value) +{ + PyObject *pkey, *pvalue; + bool ret; + + if (value == NULL) + return true; + + pkey = PyUnicode_FromString(key); + pvalue = PyUnicode_FromString(value); + + ret = pkey && pvalue && PyDict_SetItem(dict, pkey, pvalue) == 0; + Py_XDECREF(pkey); + Py_XDECREF(pvalue); + return ret; +} + +static int pyrf_pmu__events_cb(void *state, struct pmu_event_info *info) +{ + PyObject *py_list = state; + PyObject *dict = PyDict_New(); + + if (!dict) + return -ENOMEM; + + if (!add_to_dict(dict, "name", info->name) || + !add_to_dict(dict, "alias", info->alias) || + !add_to_dict(dict, "scale_unit", info->scale_unit) || + !add_to_dict(dict, "desc", info->desc) || + !add_to_dict(dict, "long_desc", info->long_desc) || + !add_to_dict(dict, "encoding_desc", info->encoding_desc) || + !add_to_dict(dict, "topic", info->topic) || + !add_to_dict(dict, "event_type_desc", info->event_type_desc) || + !add_to_dict(dict, "str", info->str) || + !add_to_dict(dict, "deprecated", info->deprecated ? "deprecated" : NULL) || + PyList_Append(py_list, dict) != 0) { + Py_DECREF(dict); + return -ENOMEM; + } + Py_DECREF(dict); + return 0; +} + +static PyObject *pyrf_pmu__events(PyObject *self) +{ + struct pyrf_pmu *ppmu = (void *)self; + PyObject *py_list = PyList_New(0); + int ret; + + if (!py_list) + return NULL; + + ret = perf_pmu__for_each_event(ppmu->pmu, + /*skip_duplicate_pmus=*/false, + py_list, + pyrf_pmu__events_cb); + if (ret) { + Py_DECREF(py_list); + errno = -ret; + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + return py_list; +} + +static PyObject *pyrf_pmu__repr(PyObject *self) +{ + struct pyrf_pmu *ppmu = (void *)self; + + return PyUnicode_FromFormat("pmu(%s)", ppmu->pmu->name); +} + +static const char pyrf_pmu__doc[] = PyDoc_STR("perf Performance Monitoring Unit (PMU) object."); + +static PyMethodDef pyrf_pmu__methods[] = { + { + .ml_name = "events", + .ml_meth = (PyCFunction)pyrf_pmu__events, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("Returns a sequence of events encoded as a dictionaries.") + }, + { + .ml_name = "name", + .ml_meth = (PyCFunction)pyrf_pmu__name, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("Name of the PMU including suffixes.") + }, + { .ml_name = NULL, } +}; + +/** The python type for a perf.pmu. */ +static PyTypeObject pyrf_pmu__type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "perf.pmu", + .tp_basicsize = sizeof(struct pyrf_pmu), + .tp_dealloc = (destructor)pyrf_pmu__delete, + .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, + .tp_doc = pyrf_pmu__doc, + .tp_methods = pyrf_pmu__methods, + .tp_str = pyrf_pmu__name, + .tp_repr = pyrf_pmu__repr, +}; + +static int pyrf_pmu__setup_types(void) +{ + pyrf_pmu__type.tp_new = PyType_GenericNew; + return PyType_Ready(&pyrf_pmu__type); +} + + +/** A python iterator for pmus that has no equivalent in the C code. */ +struct pyrf_pmu_iterator { + PyObject_HEAD + struct perf_pmu *pmu; +}; + +static void pyrf_pmu_iterator__dealloc(struct pyrf_pmu_iterator *self) +{ + Py_TYPE(self)->tp_free((PyObject *) self); +} + +static PyObject *pyrf_pmu_iterator__new(PyTypeObject *type, PyObject *args __maybe_unused, + PyObject *kwds __maybe_unused) +{ + struct pyrf_pmu_iterator *itr = (void *)type->tp_alloc(type, 0); + + if (itr != NULL) + itr->pmu = perf_pmus__scan(/*pmu=*/NULL); + + return (PyObject *) itr; +} + +static PyObject *pyrf_pmu_iterator__iter(PyObject *self) +{ + Py_INCREF(self); + return self; +} + +static PyObject *pyrf_pmu_iterator__iternext(PyObject *self) +{ + struct pyrf_pmu_iterator *itr = (void *)self; + struct pyrf_pmu *ppmu; + + if (itr->pmu == NULL) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + // Create object to return. + ppmu = PyObject_New(struct pyrf_pmu, &pyrf_pmu__type); + if (ppmu) { + ppmu->pmu = itr->pmu; + // Advance iterator. + itr->pmu = perf_pmus__scan(itr->pmu); + } + return (PyObject *)ppmu; +} + +/** The python type for the PMU iterator. */ +static PyTypeObject pyrf_pmu_iterator__type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "pmus.iterator", + .tp_doc = "Iterator for the pmus string sequence.", + .tp_basicsize = sizeof(struct pyrf_pmu_iterator), + .tp_itemsize = 0, + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_new = pyrf_pmu_iterator__new, + .tp_dealloc = (destructor) pyrf_pmu_iterator__dealloc, + .tp_iter = pyrf_pmu_iterator__iter, + .tp_iternext = pyrf_pmu_iterator__iternext, +}; + +static int pyrf_pmu_iterator__setup_types(void) +{ + return PyType_Ready(&pyrf_pmu_iterator__type); +} + +static PyObject *pyrf__pmus(PyObject *self, PyObject *args) +{ + // Calling the class creates an instance of the iterator. + return PyObject_CallObject((PyObject *) &pyrf_pmu_iterator__type, /*args=*/NULL); +} + +struct pyrf_counts_values { + PyObject_HEAD + + struct perf_counts_values values; +}; + +static const char pyrf_counts_values__doc[] = PyDoc_STR("perf counts values object."); + +static void pyrf_counts_values__delete(struct pyrf_counts_values *pcounts_values) +{ + Py_TYPE(pcounts_values)->tp_free((PyObject *)pcounts_values); +} + +#define counts_values_member_def(member, ptype, help) \ + { #member, ptype, \ + offsetof(struct pyrf_counts_values, values.member), \ + 0, help } + +static PyMemberDef pyrf_counts_values_members[] = { + counts_values_member_def(val, T_ULONG, "Value of event"), + counts_values_member_def(ena, T_ULONG, "Time for which enabled"), + counts_values_member_def(run, T_ULONG, "Time for which running"), + counts_values_member_def(id, T_ULONG, "Unique ID for an event"), + counts_values_member_def(lost, T_ULONG, "Num of lost samples"), + { .name = NULL, }, +}; + +static PyObject *pyrf_counts_values_get_values(struct pyrf_counts_values *self, void *closure) +{ + PyObject *vals = PyList_New(5); + + if (!vals) + return NULL; + for (int i = 0; i < 5; i++) + PyList_SetItem(vals, i, PyLong_FromLong(self->values.values[i])); + + return vals; +} + +static int pyrf_counts_values_set_values(struct pyrf_counts_values *self, PyObject *list, + void *closure) +{ + Py_ssize_t size; + PyObject *item = NULL; + + if (!PyList_Check(list)) { + PyErr_SetString(PyExc_TypeError, "Value assigned must be a list"); + return -1; + } + + size = PyList_Size(list); + for (Py_ssize_t i = 0; i < size; i++) { + item = PyList_GetItem(list, i); + if (!PyLong_Check(item)) { + PyErr_SetString(PyExc_TypeError, "List members should be numbers"); + return -1; + } + self->values.values[i] = PyLong_AsLong(item); + } + + return 0; +} + +static PyGetSetDef pyrf_counts_values_getset[] = { + {"values", (getter)pyrf_counts_values_get_values, (setter)pyrf_counts_values_set_values, + "Name field", NULL}, + { .name = NULL, }, +}; + +static PyTypeObject pyrf_counts_values__type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "perf.counts_values", + .tp_basicsize = sizeof(struct pyrf_counts_values), + .tp_dealloc = (destructor)pyrf_counts_values__delete, + .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, + .tp_doc = pyrf_counts_values__doc, + .tp_members = pyrf_counts_values_members, + .tp_getset = pyrf_counts_values_getset, +}; + +static int pyrf_counts_values__setup_types(void) +{ + pyrf_counts_values__type.tp_new = PyType_GenericNew; + return PyType_Ready(&pyrf_counts_values__type); +} + struct pyrf_evsel { PyObject_HEAD @@ -972,6 +1094,106 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel, return Py_None; } +static PyObject *pyrf_evsel__cpus(struct pyrf_evsel *pevsel) +{ + struct pyrf_cpu_map *pcpu_map = PyObject_New(struct pyrf_cpu_map, &pyrf_cpu_map__type); + + if (pcpu_map) + pcpu_map->cpus = perf_cpu_map__get(pevsel->evsel.core.cpus); + + return (PyObject *)pcpu_map; +} + +static PyObject *pyrf_evsel__threads(struct pyrf_evsel *pevsel) +{ + struct pyrf_thread_map *pthread_map = + PyObject_New(struct pyrf_thread_map, &pyrf_thread_map__type); + + if (pthread_map) + pthread_map->threads = perf_thread_map__get(pevsel->evsel.core.threads); + + return (PyObject *)pthread_map; +} + +/* + * Ensure evsel's counts and prev_raw_counts are allocated, the latter + * used by tool PMUs to compute the cumulative count as expected by + * stat's process_counter_values. + */ +static int evsel__ensure_counts(struct evsel *evsel) +{ + int nthreads, ncpus; + + if (evsel->counts != NULL) + return 0; + + nthreads = perf_thread_map__nr(evsel->core.threads); + ncpus = perf_cpu_map__nr(evsel->core.cpus); + + evsel->counts = perf_counts__new(ncpus, nthreads); + if (evsel->counts == NULL) + return -ENOMEM; + + evsel->prev_raw_counts = perf_counts__new(ncpus, nthreads); + if (evsel->prev_raw_counts == NULL) + return -ENOMEM; + + return 0; +} + +static PyObject *pyrf_evsel__read(struct pyrf_evsel *pevsel, + PyObject *args, PyObject *kwargs) +{ + struct evsel *evsel = &pevsel->evsel; + int cpu = 0, cpu_idx, thread = 0, thread_idx; + struct perf_counts_values *old_count, *new_count; + struct pyrf_counts_values *count_values = PyObject_New(struct pyrf_counts_values, + &pyrf_counts_values__type); + + if (!count_values) + return NULL; + + if (!PyArg_ParseTuple(args, "ii", &cpu, &thread)) + return NULL; + + cpu_idx = perf_cpu_map__idx(evsel->core.cpus, (struct perf_cpu){.cpu = cpu}); + if (cpu_idx < 0) { + PyErr_Format(PyExc_TypeError, "CPU %d is not part of evsel's CPUs", cpu); + return NULL; + } + thread_idx = perf_thread_map__idx(evsel->core.threads, thread); + if (thread_idx < 0) { + PyErr_Format(PyExc_TypeError, "Thread %d is not part of evsel's threads", + thread); + return NULL; + } + + if (evsel__ensure_counts(evsel)) + return PyErr_NoMemory(); + + /* Set up pointers to the old and newly read counter values. */ + old_count = perf_counts(evsel->prev_raw_counts, cpu_idx, thread_idx); + new_count = perf_counts(evsel->counts, cpu_idx, thread_idx); + /* Update the value in evsel->counts. */ + evsel__read_counter(evsel, cpu_idx, thread_idx); + /* Copy the value and turn it into the delta from old_count. */ + count_values->values = *new_count; + count_values->values.val -= old_count->val; + count_values->values.ena -= old_count->ena; + count_values->values.run -= old_count->run; + /* Save the new count over the old_count for the next read. */ + *old_count = *new_count; + return (PyObject *)count_values; +} + +static PyObject *pyrf_evsel__str(PyObject *self) +{ + struct pyrf_evsel *pevsel = (void *)self; + struct evsel *evsel = &pevsel->evsel; + + return PyUnicode_FromFormat("evsel(%s/%s/)", evsel__pmu_name(evsel), evsel__name(evsel)); +} + static PyMethodDef pyrf_evsel__methods[] = { { .ml_name = "open", @@ -979,10 +1201,50 @@ static PyMethodDef pyrf_evsel__methods[] = { .ml_flags = METH_VARARGS | METH_KEYWORDS, .ml_doc = PyDoc_STR("open the event selector file descriptor table.") }, + { + .ml_name = "cpus", + .ml_meth = (PyCFunction)pyrf_evsel__cpus, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("CPUs the event is to be used with.") + }, + { + .ml_name = "threads", + .ml_meth = (PyCFunction)pyrf_evsel__threads, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("threads the event is to be used with.") + }, + { + .ml_name = "read", + .ml_meth = (PyCFunction)pyrf_evsel__read, + .ml_flags = METH_VARARGS | METH_KEYWORDS, + .ml_doc = PyDoc_STR("read counters") + }, { .ml_name = NULL, } }; -static char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object."); +#define evsel_member_def(member, ptype, help) \ + { #member, ptype, \ + offsetof(struct pyrf_evsel, evsel.member), \ + 0, help } + +#define evsel_attr_member_def(member, ptype, help) \ + { #member, ptype, \ + offsetof(struct pyrf_evsel, evsel.core.attr.member), \ + 0, help } + +static PyMemberDef pyrf_evsel__members[] = { + evsel_member_def(tracking, T_BOOL, "tracking event."), + evsel_attr_member_def(type, T_UINT, "attribute type."), + evsel_attr_member_def(size, T_UINT, "attribute size."), + evsel_attr_member_def(config, T_ULONGLONG, "attribute config."), + evsel_attr_member_def(sample_period, T_ULONGLONG, "attribute sample_period."), + evsel_attr_member_def(sample_type, T_ULONGLONG, "attribute sample_type."), + evsel_attr_member_def(read_format, T_ULONGLONG, "attribute read_format."), + evsel_attr_member_def(wakeup_events, T_UINT, "attribute wakeup_events."), + { .name = NULL, }, +}; + +static const char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object."); static PyTypeObject pyrf_evsel__type = { PyVarObject_HEAD_INIT(NULL, 0) @@ -991,8 +1253,11 @@ static PyTypeObject pyrf_evsel__type = { .tp_dealloc = (destructor)pyrf_evsel__delete, .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, .tp_doc = pyrf_evsel__doc, + .tp_members = pyrf_evsel__members, .tp_methods = pyrf_evsel__methods, .tp_init = (initproc)pyrf_evsel__init, + .tp_str = pyrf_evsel__str, + .tp_repr = pyrf_evsel__str, }; static int pyrf_evsel__setup_types(void) @@ -1029,6 +1294,188 @@ static void pyrf_evlist__delete(struct pyrf_evlist *pevlist) Py_TYPE(pevlist)->tp_free((PyObject*)pevlist); } +static PyObject *pyrf_evlist__all_cpus(struct pyrf_evlist *pevlist) +{ + struct pyrf_cpu_map *pcpu_map = PyObject_New(struct pyrf_cpu_map, &pyrf_cpu_map__type); + + if (pcpu_map) + pcpu_map->cpus = perf_cpu_map__get(pevlist->evlist.core.all_cpus); + + return (PyObject *)pcpu_map; +} + +static PyObject *pyrf_evlist__metrics(struct pyrf_evlist *pevlist) +{ + PyObject *list = PyList_New(/*len=*/0); + struct rb_node *node; + + if (!list) + return NULL; + + for (node = rb_first_cached(&pevlist->evlist.metric_events.entries); node; + node = rb_next(node)) { + struct metric_event *me = container_of(node, struct metric_event, nd); + struct list_head *pos; + + list_for_each(pos, &me->head) { + struct metric_expr *expr = container_of(pos, struct metric_expr, nd); + PyObject *str = PyUnicode_FromString(expr->metric_name); + + if (!str || PyList_Append(list, str) != 0) { + Py_DECREF(list); + return NULL; + } + Py_DECREF(str); + } + } + return list; +} + +static int prepare_metric(const struct metric_expr *mexp, + const struct evsel *evsel, + struct expr_parse_ctx *pctx, + int cpu_idx, int thread_idx) +{ + struct evsel * const *metric_events = mexp->metric_events; + struct metric_ref *metric_refs = mexp->metric_refs; + + for (int i = 0; metric_events[i]; i++) { + struct evsel *cur = metric_events[i]; + double val, ena, run; + int ret, source_count = 0; + struct perf_counts_values *old_count, *new_count; + char *n = strdup(evsel__metric_id(cur)); + + if (!n) + return -ENOMEM; + + /* + * If there are multiple uncore PMUs and we're not reading the + * leader's stats, determine the stats for the appropriate + * uncore PMU. + */ + if (evsel && evsel->metric_leader && + evsel->pmu != evsel->metric_leader->pmu && + cur->pmu == evsel->metric_leader->pmu) { + struct evsel *pos; + + evlist__for_each_entry(evsel->evlist, pos) { + if (pos->pmu != evsel->pmu) + continue; + if (pos->metric_leader != cur) + continue; + cur = pos; + source_count = 1; + break; + } + } + + if (source_count == 0) + source_count = evsel__source_count(cur); + + ret = evsel__ensure_counts(cur); + if (ret) + return ret; + + /* Set up pointers to the old and newly read counter values. */ + old_count = perf_counts(cur->prev_raw_counts, cpu_idx, thread_idx); + new_count = perf_counts(cur->counts, cpu_idx, thread_idx); + /* Update the value in cur->counts. */ + evsel__read_counter(cur, cpu_idx, thread_idx); + + val = new_count->val - old_count->val; + ena = new_count->ena - old_count->ena; + run = new_count->run - old_count->run; + + if (ena != run && run != 0) + val = val * ena / run; + ret = expr__add_id_val_source_count(pctx, n, val, source_count); + if (ret) + return ret; + } + + for (int i = 0; metric_refs && metric_refs[i].metric_name; i++) { + int ret = expr__add_ref(pctx, &metric_refs[i]); + + if (ret) + return ret; + } + + return 0; +} + +static PyObject *pyrf_evlist__compute_metric(struct pyrf_evlist *pevlist, + PyObject *args, PyObject *kwargs) +{ + int ret, cpu = 0, cpu_idx = 0, thread = 0, thread_idx = 0; + const char *metric; + struct rb_node *node; + struct metric_expr *mexp = NULL; + struct expr_parse_ctx *pctx; + double result = 0; + struct evsel *metric_evsel = NULL; + + if (!PyArg_ParseTuple(args, "sii", &metric, &cpu, &thread)) + return NULL; + + for (node = rb_first_cached(&pevlist->evlist.metric_events.entries); + mexp == NULL && node; + node = rb_next(node)) { + struct metric_event *me = container_of(node, struct metric_event, nd); + struct list_head *pos; + + list_for_each(pos, &me->head) { + struct metric_expr *e = container_of(pos, struct metric_expr, nd); + struct evsel *pos2; + + if (strcmp(e->metric_name, metric)) + continue; + + if (e->metric_events[0] == NULL) + continue; + + evlist__for_each_entry(&pevlist->evlist, pos2) { + if (pos2->metric_leader != e->metric_events[0]) + continue; + cpu_idx = perf_cpu_map__idx(pos2->core.cpus, + (struct perf_cpu){.cpu = cpu}); + if (cpu_idx < 0) + continue; + + thread_idx = perf_thread_map__idx(pos2->core.threads, thread); + if (thread_idx < 0) + continue; + metric_evsel = pos2; + mexp = e; + goto done; + } + } + } +done: + if (!mexp) { + PyErr_Format(PyExc_TypeError, "Unknown metric '%s' for CPU '%d' and thread '%d'", + metric, cpu, thread); + return NULL; + } + + pctx = expr__ctx_new(); + if (!pctx) + return PyErr_NoMemory(); + + ret = prepare_metric(mexp, metric_evsel, pctx, cpu_idx, thread_idx); + if (ret) { + expr__ctx_free(pctx); + errno = -ret; + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + if (expr__parse(&result, pctx, mexp->metric_expr)) + result = 0.0; + + expr__ctx_free(pctx); + return PyFloat_FromDouble(result); +} + static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist, PyObject *args, PyObject *kwargs) { @@ -1078,17 +1525,8 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist, for (i = 0; i < evlist->core.pollfd.nr; ++i) { PyObject *file; -#if PY_MAJOR_VERSION < 3 - FILE *fp = fdopen(evlist->core.pollfd.entries[i].fd, "r"); - - if (fp == NULL) - goto free_list; - - file = PyFile_FromFile(fp, "perf", "r", NULL); -#else file = PyFile_FromFd(evlist->core.pollfd.entries[i].fd, "perf", "r", -1, NULL, NULL, NULL, 0); -#endif if (file == NULL) goto free_list; @@ -1154,8 +1592,10 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, return NULL; md = get_md(evlist, cpu); - if (!md) + if (!md) { + PyErr_Format(PyExc_TypeError, "Unknown CPU '%d'", cpu); return NULL; + } if (perf_mmap__read_init(&md->core) < 0) goto end; @@ -1171,20 +1611,22 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, evsel = evlist__event2evsel(evlist, event); if (!evsel) { + Py_DECREF(pyevent); Py_INCREF(Py_None); return Py_None; } pevent->evsel = evsel; - err = evsel__parse_sample(evsel, event, &pevent->sample); - - /* Consume the even only after we parsed it out. */ perf_mmap__consume(&md->core); - if (err) + err = evsel__parse_sample(evsel, &pevent->event, &pevent->sample); + if (err) { + Py_DECREF(pyevent); return PyErr_Format(PyExc_OSError, "perf: can't parse sample, err=%d", err); + } + return pyevent; } end: @@ -1206,8 +1648,75 @@ static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist, return Py_None; } +static PyObject *pyrf_evlist__close(struct pyrf_evlist *pevlist) +{ + struct evlist *evlist = &pevlist->evlist; + + evlist__close(evlist); + + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject *pyrf_evlist__config(struct pyrf_evlist *pevlist) +{ + struct record_opts opts = { + .sample_time = true, + .mmap_pages = UINT_MAX, + .user_freq = UINT_MAX, + .user_interval = ULLONG_MAX, + .freq = 4000, + .target = { + .uses_mmap = true, + .default_per_cpu = true, + }, + .nr_threads_synthesize = 1, + .ctl_fd = -1, + .ctl_fd_ack = -1, + .no_buffering = true, + .no_inherit = true, + }; + struct evlist *evlist = &pevlist->evlist; + + evlist__config(evlist, &opts, &callchain_param); + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject *pyrf_evlist__disable(struct pyrf_evlist *pevlist) +{ + evlist__disable(&pevlist->evlist); + Py_INCREF(Py_None); + return Py_None; +} + +static PyObject *pyrf_evlist__enable(struct pyrf_evlist *pevlist) +{ + evlist__enable(&pevlist->evlist); + Py_INCREF(Py_None); + return Py_None; +} + static PyMethodDef pyrf_evlist__methods[] = { { + .ml_name = "all_cpus", + .ml_meth = (PyCFunction)pyrf_evlist__all_cpus, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("CPU map union of all evsel CPU maps.") + }, + { + .ml_name = "metrics", + .ml_meth = (PyCFunction)pyrf_evlist__metrics, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("List of metric names within the evlist.") + }, + { + .ml_name = "compute_metric", + .ml_meth = (PyCFunction)pyrf_evlist__compute_metric, + .ml_flags = METH_VARARGS | METH_KEYWORDS, + .ml_doc = PyDoc_STR("compute metric for given name, cpu and thread") + }, + { .ml_name = "mmap", .ml_meth = (PyCFunction)pyrf_evlist__mmap, .ml_flags = METH_VARARGS | METH_KEYWORDS, @@ -1220,6 +1729,12 @@ static PyMethodDef pyrf_evlist__methods[] = { .ml_doc = PyDoc_STR("open the file descriptors.") }, { + .ml_name = "close", + .ml_meth = (PyCFunction)pyrf_evlist__close, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("close the file descriptors.") + }, + { .ml_name = "poll", .ml_meth = (PyCFunction)pyrf_evlist__poll, .ml_flags = METH_VARARGS | METH_KEYWORDS, @@ -1243,6 +1758,24 @@ static PyMethodDef pyrf_evlist__methods[] = { .ml_flags = METH_VARARGS | METH_KEYWORDS, .ml_doc = PyDoc_STR("reads an event.") }, + { + .ml_name = "config", + .ml_meth = (PyCFunction)pyrf_evlist__config, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("Apply default record options to the evlist.") + }, + { + .ml_name = "disable", + .ml_meth = (PyCFunction)pyrf_evlist__disable, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("Disable the evsels in the evlist.") + }, + { + .ml_name = "enable", + .ml_meth = (PyCFunction)pyrf_evlist__enable, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("Enable the evsels in the evlist.") + }, { .ml_name = NULL, } }; @@ -1258,8 +1791,10 @@ static PyObject *pyrf_evlist__item(PyObject *obj, Py_ssize_t i) struct pyrf_evlist *pevlist = (void *)obj; struct evsel *pos; - if (i >= pevlist->evlist.core.nr_entries) + if (i >= pevlist->evlist.core.nr_entries) { + PyErr_SetString(PyExc_IndexError, "Index out of range"); return NULL; + } evlist__for_each_entry(&pevlist->evlist, pos) { if (i-- == 0) @@ -1269,12 +1804,36 @@ static PyObject *pyrf_evlist__item(PyObject *obj, Py_ssize_t i) return Py_BuildValue("O", container_of(pos, struct pyrf_evsel, evsel)); } +static PyObject *pyrf_evlist__str(PyObject *self) +{ + struct pyrf_evlist *pevlist = (void *)self; + struct evsel *pos; + struct strbuf sb = STRBUF_INIT; + bool first = true; + PyObject *result; + + strbuf_addstr(&sb, "evlist(["); + evlist__for_each_entry(&pevlist->evlist, pos) { + if (!first) + strbuf_addch(&sb, ','); + if (!pos->pmu) + strbuf_addstr(&sb, evsel__name(pos)); + else + strbuf_addf(&sb, "%s/%s/", pos->pmu->name, evsel__name(pos)); + first = false; + } + strbuf_addstr(&sb, "])"); + result = PyUnicode_FromString(sb.buf); + strbuf_release(&sb); + return result; +} + static PySequenceMethods pyrf_evlist__sequence_methods = { .sq_length = pyrf_evlist__length, .sq_item = pyrf_evlist__item, }; -static char pyrf_evlist__doc[] = PyDoc_STR("perf event selector list object."); +static const char pyrf_evlist__doc[] = PyDoc_STR("perf event selector list object."); static PyTypeObject pyrf_evlist__type = { PyVarObject_HEAD_INIT(NULL, 0) @@ -1286,6 +1845,8 @@ static PyTypeObject pyrf_evlist__type = { .tp_doc = pyrf_evlist__doc, .tp_methods = pyrf_evlist__methods, .tp_init = (initproc)pyrf_evlist__init, + .tp_repr = pyrf_evlist__str, + .tp_str = pyrf_evlist__str, }; static int pyrf_evlist__setup_types(void) @@ -1296,10 +1857,12 @@ static int pyrf_evlist__setup_types(void) #define PERF_CONST(name) { #name, PERF_##name } -static struct { +struct perf_constant { const char *name; int value; -} perf__constants[] = { +}; + +static const struct perf_constant perf__constants[] = { PERF_CONST(TYPE_HARDWARE), PERF_CONST(TYPE_SOFTWARE), PERF_CONST(TYPE_TRACEPOINT), @@ -1380,10 +1943,6 @@ static struct { static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel, PyObject *args, PyObject *kwargs) { -#ifndef HAVE_LIBTRACEEVENT - return NULL; -#else - struct tep_event *tp_format; static char *kwlist[] = { "sys", "name", NULL }; char *sys = NULL; char *name = NULL; @@ -1392,36 +1951,296 @@ static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel, &sys, &name)) return NULL; - tp_format = trace_event__tp_format(sys, name); - if (IS_ERR(tp_format)) - return _PyLong_FromLong(-1); + return PyLong_FromLong(tp_pmu__id(sys, name)); +} + +static PyObject *pyrf_evsel__from_evsel(struct evsel *evsel) +{ + struct pyrf_evsel *pevsel = PyObject_New(struct pyrf_evsel, &pyrf_evsel__type); + + if (!pevsel) + return NULL; + + memset(&pevsel->evsel, 0, sizeof(pevsel->evsel)); + evsel__init(&pevsel->evsel, &evsel->core.attr, evsel->core.idx); + + evsel__clone(&pevsel->evsel, evsel); + if (evsel__is_group_leader(evsel)) + evsel__set_leader(&pevsel->evsel, &pevsel->evsel); + return (PyObject *)pevsel; +} + +static int evlist__pos(struct evlist *evlist, struct evsel *evsel) +{ + struct evsel *pos; + int idx = 0; + + evlist__for_each_entry(evlist, pos) { + if (evsel == pos) + return idx; + idx++; + } + return -1; +} + +static struct evsel *evlist__at(struct evlist *evlist, int idx) +{ + struct evsel *pos; + int idx2 = 0; + + evlist__for_each_entry(evlist, pos) { + if (idx == idx2) + return pos; + idx2++; + } + return NULL; +} + +static PyObject *pyrf_evlist__from_evlist(struct evlist *evlist) +{ + struct pyrf_evlist *pevlist = PyObject_New(struct pyrf_evlist, &pyrf_evlist__type); + struct evsel *pos; + struct rb_node *node; + + if (!pevlist) + return NULL; + + memset(&pevlist->evlist, 0, sizeof(pevlist->evlist)); + evlist__init(&pevlist->evlist, evlist->core.all_cpus, evlist->core.threads); + evlist__for_each_entry(evlist, pos) { + struct pyrf_evsel *pevsel = (void *)pyrf_evsel__from_evsel(pos); + + evlist__add(&pevlist->evlist, &pevsel->evsel); + } + evlist__for_each_entry(&pevlist->evlist, pos) { + struct evsel *leader = evsel__leader(pos); - return _PyLong_FromLong(tp_format->id); -#endif // HAVE_LIBTRACEEVENT + if (pos != leader) { + int idx = evlist__pos(evlist, leader); + + if (idx >= 0) + evsel__set_leader(pos, evlist__at(&pevlist->evlist, idx)); + else if (leader == NULL) + evsel__set_leader(pos, pos); + } + + leader = pos->metric_leader; + + if (pos != leader) { + int idx = evlist__pos(evlist, leader); + + if (idx >= 0) + pos->metric_leader = evlist__at(&pevlist->evlist, idx); + else if (leader == NULL) + pos->metric_leader = pos; + } + } + metricgroup__copy_metric_events(&pevlist->evlist, /*cgrp=*/NULL, + &pevlist->evlist.metric_events, + &evlist->metric_events); + for (node = rb_first_cached(&pevlist->evlist.metric_events.entries); node; + node = rb_next(node)) { + struct metric_event *me = container_of(node, struct metric_event, nd); + struct list_head *mpos; + int idx = evlist__pos(evlist, me->evsel); + + if (idx >= 0) + me->evsel = evlist__at(&pevlist->evlist, idx); + list_for_each(mpos, &me->head) { + struct metric_expr *e = container_of(mpos, struct metric_expr, nd); + + for (int j = 0; e->metric_events[j]; j++) { + idx = evlist__pos(evlist, e->metric_events[j]); + if (idx >= 0) + e->metric_events[j] = evlist__at(&pevlist->evlist, idx); + } + } + } + return (PyObject *)pevlist; +} + +static PyObject *pyrf__parse_events(PyObject *self, PyObject *args) +{ + const char *input; + struct evlist evlist = {}; + struct parse_events_error err; + PyObject *result; + PyObject *pcpus = NULL, *pthreads = NULL; + struct perf_cpu_map *cpus; + struct perf_thread_map *threads; + + if (!PyArg_ParseTuple(args, "s|OO", &input, &pcpus, &pthreads)) + return NULL; + + threads = pthreads ? ((struct pyrf_thread_map *)pthreads)->threads : NULL; + cpus = pcpus ? ((struct pyrf_cpu_map *)pcpus)->cpus : NULL; + + parse_events_error__init(&err); + evlist__init(&evlist, cpus, threads); + if (parse_events(&evlist, input, &err)) { + parse_events_error__print(&err, input); + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + result = pyrf_evlist__from_evlist(&evlist); + evlist__exit(&evlist); + return result; +} + +static PyObject *pyrf__parse_metrics(PyObject *self, PyObject *args) +{ + const char *input, *pmu = NULL; + struct evlist evlist = {}; + PyObject *result; + PyObject *pcpus = NULL, *pthreads = NULL; + struct perf_cpu_map *cpus; + struct perf_thread_map *threads; + int ret; + + if (!PyArg_ParseTuple(args, "s|sOO", &input, &pmu, &pcpus, &pthreads)) + return NULL; + + threads = pthreads ? ((struct pyrf_thread_map *)pthreads)->threads : NULL; + cpus = pcpus ? ((struct pyrf_cpu_map *)pcpus)->cpus : NULL; + + evlist__init(&evlist, cpus, threads); + ret = metricgroup__parse_groups(&evlist, pmu ?: "all", input, + /*metric_no_group=*/ false, + /*metric_no_merge=*/ false, + /*metric_no_threshold=*/ true, + /*user_requested_cpu_list=*/ NULL, + /*system_wide=*/true, + /*hardware_aware_grouping=*/ false); + if (ret) { + errno = -ret; + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + result = pyrf_evlist__from_evlist(&evlist); + evlist__exit(&evlist); + return result; +} + +static PyObject *pyrf__metrics_groups(const struct pmu_metric *pm) +{ + PyObject *groups = PyList_New(/*len=*/0); + const char *mg = pm->metric_group; + + if (!groups) + return NULL; + + while (mg) { + PyObject *val = NULL; + const char *sep = strchr(mg, ';'); + size_t len = sep ? (size_t)(sep - mg) : strlen(mg); + + if (len > 0) { + val = PyUnicode_FromStringAndSize(mg, len); + if (val) + PyList_Append(groups, val); + + Py_XDECREF(val); + } + mg = sep ? sep + 1 : NULL; + } + return groups; +} + +static int pyrf__metrics_cb(const struct pmu_metric *pm, + const struct pmu_metrics_table *table __maybe_unused, + void *vdata) +{ + PyObject *py_list = vdata; + PyObject *dict = PyDict_New(); + PyObject *key = dict ? PyUnicode_FromString("MetricGroup") : NULL; + PyObject *value = key ? pyrf__metrics_groups(pm) : NULL; + + if (!value || PyDict_SetItem(dict, key, value) != 0) { + Py_XDECREF(key); + Py_XDECREF(value); + Py_XDECREF(dict); + return -ENOMEM; + } + + if (!add_to_dict(dict, "MetricName", pm->metric_name) || + !add_to_dict(dict, "PMU", pm->pmu) || + !add_to_dict(dict, "MetricExpr", pm->metric_expr) || + !add_to_dict(dict, "MetricThreshold", pm->metric_threshold) || + !add_to_dict(dict, "ScaleUnit", pm->unit) || + !add_to_dict(dict, "Compat", pm->compat) || + !add_to_dict(dict, "BriefDescription", pm->desc) || + !add_to_dict(dict, "PublicDescription", pm->long_desc) || + PyList_Append(py_list, dict) != 0) { + Py_DECREF(dict); + return -ENOMEM; + } + Py_DECREF(dict); + return 0; +} + +static PyObject *pyrf__metrics(PyObject *self, PyObject *args) +{ + const struct pmu_metrics_table *table = pmu_metrics_table__find(); + PyObject *list = PyList_New(/*len=*/0); + int ret; + + if (!list) + return NULL; + + ret = pmu_metrics_table__for_each_metric(table, pyrf__metrics_cb, list); + if (!ret) + ret = pmu_for_each_sys_metric(pyrf__metrics_cb, list); + + if (ret) { + Py_DECREF(list); + errno = -ret; + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + return list; } static PyMethodDef perf__methods[] = { { + .ml_name = "metrics", + .ml_meth = (PyCFunction) pyrf__metrics, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR( + "Returns a list of metrics represented as string values in dictionaries.") + }, + { .ml_name = "tracepoint", .ml_meth = (PyCFunction) pyrf__tracepoint, .ml_flags = METH_VARARGS | METH_KEYWORDS, .ml_doc = PyDoc_STR("Get tracepoint config.") }, + { + .ml_name = "parse_events", + .ml_meth = (PyCFunction) pyrf__parse_events, + .ml_flags = METH_VARARGS, + .ml_doc = PyDoc_STR("Parse a string of events and return an evlist.") + }, + { + .ml_name = "parse_metrics", + .ml_meth = (PyCFunction) pyrf__parse_metrics, + .ml_flags = METH_VARARGS, + .ml_doc = PyDoc_STR( + "Parse a string of metrics or metric groups and return an evlist.") + }, + { + .ml_name = "pmus", + .ml_meth = (PyCFunction) pyrf__pmus, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("Returns a sequence of pmus.") + }, { .ml_name = NULL, } }; -#if PY_MAJOR_VERSION < 3 -PyMODINIT_FUNC initperf(void) -#else PyMODINIT_FUNC PyInit_perf(void) -#endif { PyObject *obj; int i; PyObject *dict; -#if PY_MAJOR_VERSION < 3 - PyObject *module = Py_InitModule("perf", perf__methods); -#else static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "perf", /* m_name */ @@ -1434,19 +2253,17 @@ PyMODINIT_FUNC PyInit_perf(void) NULL, /* m_free */ }; PyObject *module = PyModule_Create(&moduledef); -#endif if (module == NULL || pyrf_event__setup_types() < 0 || pyrf_evlist__setup_types() < 0 || pyrf_evsel__setup_types() < 0 || pyrf_thread_map__setup_types() < 0 || - pyrf_cpu_map__setup_types() < 0) -#if PY_MAJOR_VERSION < 3 - return; -#else + pyrf_cpu_map__setup_types() < 0 || + pyrf_pmu_iterator__setup_types() < 0 || + pyrf_pmu__setup_types() < 0 || + pyrf_counts_values__setup_types() < 0) return module; -#endif /* The page_size is placed in util object. */ page_size = sysconf(_SC_PAGE_SIZE); @@ -1490,12 +2307,15 @@ PyMODINIT_FUNC PyInit_perf(void) Py_INCREF(&pyrf_cpu_map__type); PyModule_AddObject(module, "cpu_map", (PyObject*)&pyrf_cpu_map__type); + Py_INCREF(&pyrf_counts_values__type); + PyModule_AddObject(module, "counts_values", (PyObject *)&pyrf_counts_values__type); + dict = PyModule_GetDict(module); if (dict == NULL) goto error; for (i = 0; perf__constants[i].name != NULL; i++) { - obj = _PyLong_FromLong(perf__constants[i].value); + obj = PyLong_FromLong(perf__constants[i].value); if (obj == NULL) goto error; PyDict_SetItemString(dict, perf__constants[i].name, obj); @@ -1505,20 +2325,5 @@ PyMODINIT_FUNC PyInit_perf(void) error: if (PyErr_Occurred()) PyErr_SetString(PyExc_ImportError, "perf: Init failed!"); -#if PY_MAJOR_VERSION >= 3 return module; -#endif -} - -/* - * Dummy, to avoid dragging all the test_attr infrastructure in the python - * binding. - */ -void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, - int fd, int group_fd, unsigned long flags) -{ -} - -void evlist__free_stats(struct evlist *evlist) -{ } diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h deleted file mode 100644 index d927a0d25052..000000000000 --- a/tools/perf/util/rb_resort.h +++ /dev/null @@ -1,146 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _PERF_RESORT_RB_H_ -#define _PERF_RESORT_RB_H_ -/* - * Template for creating a class to resort an existing rb_tree according to - * a new sort criteria, that must be present in the entries of the source - * rb_tree. - * - * (c) 2016 Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Quick example, resorting threads by its shortname: - * - * First define the prefix (threads) to be used for the functions and data - * structures created, and provide an expression for the sorting, then the - * fields to be present in each of the entries in the new, sorted, rb_tree. - * - * The body of the init function should collect the fields, maybe - * pre-calculating them from multiple entries in the original 'entry' from - * the rb_tree used as a source for the entries to be sorted: - -DEFINE_RB_RESORT_RB(threads, strcmp(a->thread->shortname, - b->thread->shortname) < 0, - struct thread *thread; -) -{ - entry->thread = rb_entry(nd, struct thread, rb_node); -} - - * After this it is just a matter of instantiating it and iterating it, - * for a few data structures with existing rb_trees, such as 'struct machine', - * helpers are available to get the rb_root and the nr_entries: - - DECLARE_RESORT_RB_MACHINE_THREADS(threads, machine_ptr); - - * This will instantiate the new rb_tree and a cursor for it, that can be used as: - - struct rb_node *nd; - - resort_rb__for_each_entry(nd, threads) { - struct thread *t = threads_entry; - printf("%s: %d\n", t->shortname, t->tid); - } - - * Then delete it: - - resort_rb__delete(threads); - - * The name of the data structures and functions will have a _sorted suffix - * right before the method names, i.e. will look like: - * - * struct threads_sorted_entry {} - * threads_sorted__insert() - */ - -#define DEFINE_RESORT_RB(__name, __comp, ...) \ -struct __name##_sorted_entry { \ - struct rb_node rb_node; \ - __VA_ARGS__ \ -}; \ -static void __name##_sorted__init_entry(struct rb_node *nd, \ - struct __name##_sorted_entry *entry); \ - \ -static int __name##_sorted__cmp(struct rb_node *nda, struct rb_node *ndb) \ -{ \ - struct __name##_sorted_entry *a, *b; \ - a = rb_entry(nda, struct __name##_sorted_entry, rb_node); \ - b = rb_entry(ndb, struct __name##_sorted_entry, rb_node); \ - return __comp; \ -} \ - \ -struct __name##_sorted { \ - struct rb_root entries; \ - struct __name##_sorted_entry nd[0]; \ -}; \ - \ -static void __name##_sorted__insert(struct __name##_sorted *sorted, \ - struct rb_node *sorted_nd) \ -{ \ - struct rb_node **p = &sorted->entries.rb_node, *parent = NULL; \ - while (*p != NULL) { \ - parent = *p; \ - if (__name##_sorted__cmp(sorted_nd, parent)) \ - p = &(*p)->rb_left; \ - else \ - p = &(*p)->rb_right; \ - } \ - rb_link_node(sorted_nd, parent, p); \ - rb_insert_color(sorted_nd, &sorted->entries); \ -} \ - \ -static void __name##_sorted__sort(struct __name##_sorted *sorted, \ - struct rb_root *entries) \ -{ \ - struct rb_node *nd; \ - unsigned int i = 0; \ - for (nd = rb_first(entries); nd; nd = rb_next(nd)) { \ - struct __name##_sorted_entry *snd = &sorted->nd[i++]; \ - __name##_sorted__init_entry(nd, snd); \ - __name##_sorted__insert(sorted, &snd->rb_node); \ - } \ -} \ - \ -static struct __name##_sorted *__name##_sorted__new(struct rb_root *entries, \ - int nr_entries) \ -{ \ - struct __name##_sorted *sorted; \ - sorted = malloc(sizeof(*sorted) + sizeof(sorted->nd[0]) * nr_entries); \ - if (sorted) { \ - sorted->entries = RB_ROOT; \ - __name##_sorted__sort(sorted, entries); \ - } \ - return sorted; \ -} \ - \ -static void __name##_sorted__delete(struct __name##_sorted *sorted) \ -{ \ - free(sorted); \ -} \ - \ -static void __name##_sorted__init_entry(struct rb_node *nd, \ - struct __name##_sorted_entry *entry) - -#define DECLARE_RESORT_RB(__name) \ -struct __name##_sorted_entry *__name##_entry; \ -struct __name##_sorted *__name = __name##_sorted__new - -#define resort_rb__for_each_entry(__nd, __name) \ - for (__nd = rb_first(&__name->entries); \ - __name##_entry = rb_entry(__nd, struct __name##_sorted_entry, \ - rb_node), __nd; \ - __nd = rb_next(__nd)) - -#define resort_rb__delete(__name) \ - __name##_sorted__delete(__name), __name = NULL - -/* - * Helpers for other classes that contains both an rbtree and the - * number of entries in it: - */ - -/* For 'struct intlist' */ -#define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \ - DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root, \ - __ilist->rblist.nr_entries) - -#endif /* _PERF_RESORT_RB_H_ */ diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index a6566134e09e..ea3a6c4657ee 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -28,6 +28,7 @@ struct record_opts { bool sample_time_set; bool sample_cpu; bool sample_identifier; + bool sample_data_src; bool period; bool period_set; bool running_time; @@ -79,6 +80,7 @@ struct record_opts { int synth; int threads_spec; const char *threads_user_spec; + u64 off_cpu_thresh_ns; }; extern const char * const *record_usage; diff --git a/tools/perf/util/rwsem.c b/tools/perf/util/rwsem.c index 5109167f27f7..9d26832398db 100644 --- a/tools/perf/util/rwsem.c +++ b/tools/perf/util/rwsem.c @@ -27,6 +27,7 @@ int exit_rwsem(struct rw_semaphore *sem) } int down_read(struct rw_semaphore *sem) + NO_THREAD_SAFETY_ANALYSIS { #if RWS_ERRORCHECK mutex_lock(&sem->mtx); @@ -37,6 +38,7 @@ int down_read(struct rw_semaphore *sem) } int up_read(struct rw_semaphore *sem) + NO_THREAD_SAFETY_ANALYSIS { #if RWS_ERRORCHECK mutex_unlock(&sem->mtx); @@ -47,6 +49,7 @@ int up_read(struct rw_semaphore *sem) } int down_write(struct rw_semaphore *sem) + NO_THREAD_SAFETY_ANALYSIS { #if RWS_ERRORCHECK mutex_lock(&sem->mtx); @@ -57,6 +60,7 @@ int down_write(struct rw_semaphore *sem) } int up_write(struct rw_semaphore *sem) + NO_THREAD_SAFETY_ANALYSIS { #if RWS_ERRORCHECK mutex_unlock(&sem->mtx); diff --git a/tools/perf/util/rwsem.h b/tools/perf/util/rwsem.h index ef5cbc31d967..b102d8143181 100644 --- a/tools/perf/util/rwsem.h +++ b/tools/perf/util/rwsem.h @@ -10,7 +10,7 @@ */ #define RWS_ERRORCHECK 0 -struct rw_semaphore { +struct LOCKABLE rw_semaphore { #if RWS_ERRORCHECK struct mutex mtx; #else @@ -21,10 +21,10 @@ struct rw_semaphore { int init_rwsem(struct rw_semaphore *sem); int exit_rwsem(struct rw_semaphore *sem); -int down_read(struct rw_semaphore *sem); -int up_read(struct rw_semaphore *sem); +int down_read(struct rw_semaphore *sem) SHARED_LOCK_FUNCTION(sem); +int up_read(struct rw_semaphore *sem) UNLOCK_FUNCTION(sem); -int down_write(struct rw_semaphore *sem); -int up_write(struct rw_semaphore *sem); +int down_write(struct rw_semaphore *sem) EXCLUSIVE_LOCK_FUNCTION(sem); +int up_write(struct rw_semaphore *sem) UNLOCK_FUNCTION(sem); #endif /* _PERF_RWSEM_H */ diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 6fe478b0b61b..c17dbe232c54 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -345,7 +345,7 @@ static bool s390_cpumsf_trailer_show(const char *color, size_t pos, } color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d" " dsdes:%d Overflow:%lld Time:%#llx\n" - "\t\tC:%d TOD:%#lx\n", + "\t\tC:%d TOD:%#llx\n", pos, te->f ? 'F' : ' ', te->a ? 'A' : ' ', @@ -513,6 +513,7 @@ static bool s390_cpumsf_make_event(size_t pos, .period = 1 }; union perf_event event; + int ret; memset(&event, 0, sizeof(event)); if (basic->CL == 1) /* Native LPAR mode */ @@ -536,8 +537,9 @@ static bool s390_cpumsf_make_event(size_t pos, pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n", __func__, pos, sample.ip, basic->P, basic->CL, sample.pid, sample.tid, sample.cpumode, sample.cpu); - if (perf_session__deliver_synth_event(sfq->sf->session, &event, - &sample)) { + ret = perf_session__deliver_synth_event(sfq->sf->session, &event, &sample); + perf_sample__exit(&sample); + if (ret) { pr_err("s390 Auxiliary Trace: failed to deliver event\n"); return false; } @@ -912,7 +914,7 @@ static int s390_cpumsf_process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, - struct perf_tool *tool) + const struct perf_tool *tool) { struct s390_cpumsf *sf = container_of(session->auxtrace, struct s390_cpumsf, @@ -952,15 +954,10 @@ s390_cpumsf_process_event(struct perf_session *session, return err; } -struct s390_cpumsf_synth { - struct perf_tool cpumsf_tool; - struct perf_session *session; -}; - static int s390_cpumsf_process_auxtrace_event(struct perf_session *session, union perf_event *event __maybe_unused, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { struct s390_cpumsf *sf = container_of(session->auxtrace, struct s390_cpumsf, @@ -1003,7 +1000,7 @@ static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused) } static int s390_cpumsf_flush(struct perf_session *session __maybe_unused, - struct perf_tool *tool __maybe_unused) + const struct perf_tool *tool __maybe_unused) { return 0; } @@ -1145,7 +1142,7 @@ int s390_cpumsf_process_auxtrace_info(union perf_event *event, sf->machine = &session->machines.host; /* No kvm support */ sf->auxtrace_type = auxtrace_info->type; sf->pmu_type = PERF_TYPE_RAW; - sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid); + sf->machine_type = s390_cpumsf_get_type(perf_session__env(session)->cpuid); sf->auxtrace.process_event = s390_cpumsf_process_event; sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event; diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 53383e97ec9d..c6ae0ae8d86a 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -19,12 +19,14 @@ #include <sys/stat.h> #include <linux/compiler.h> +#include <linux/err.h> #include <asm/byteorder.h> #include "debug.h" #include "session.h" #include "evlist.h" #include "color.h" +#include "hashmap.h" #include "sample-raw.h" #include "s390-cpumcf-kernel.h" #include "util/pmu.h" @@ -98,12 +100,12 @@ static void s390_cpumcfdg_dumptrail(const char *color, size_t offset, te.res2 = be32_to_cpu(tep->res2); color_fprintf(stdout, color, " [%#08zx] Trailer:%c%c%c%c%c" - " Cfvn:%d Csvn:%d Speed:%d TOD:%#llx\n", + " Cfvn:%d Csvn:%d Speed:%d TOD:%#lx\n", offset, te.clock_base ? 'T' : ' ', te.speed ? 'S' : ' ', te.mtda ? 'M' : ' ', te.caca ? 'C' : ' ', te.lcda ? 'L' : ' ', te.cfvn, te.csvn, te.cpu_speed, te.timestamp); - color_fprintf(stdout, color, "\t\t1:%lx 2:%lx 3:%lx TOD-Base:%#llx" + color_fprintf(stdout, color, "\t\t1:%lx 2:%lx 3:%lx TOD-Base:%#lx" " Type:%x\n\n", te.progusage1, te.progusage2, te.progusage3, te.tod_base, te.mach_type); @@ -132,8 +134,8 @@ static int get_counterset_start(int setnr) } struct get_counter_name_data { - int wanted; - char *result; + long wanted; + const char *result; }; static int get_counter_name_callback(void *vdata, struct pmu_event_info *info) @@ -151,12 +153,22 @@ static int get_counter_name_callback(void *vdata, struct pmu_event_info *info) rc = sscanf(event_str, "event=%x", &event_nr); if (rc == 1 && event_nr == data->wanted) { - data->result = strdup(info->name); + data->result = info->name; return 1; /* Terminate the search. */ } return 0; } +static size_t get_counter_name_hash_fn(long key, void *ctx __maybe_unused) +{ + return key; +} + +static bool get_counter_name_hashmap_equal_fn(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + /* Scan the PMU and extract the logical name of a counter from the event. Input * is the counter set and counter number with in the set. Construct the event * number and use this as key. If they match return the name of this counter. @@ -164,17 +176,50 @@ static int get_counter_name_callback(void *vdata, struct pmu_event_info *info) */ static char *get_counter_name(int set, int nr, struct perf_pmu *pmu) { + static struct hashmap *cache; + static struct perf_pmu *cache_pmu; + long cache_key = get_counterset_start(set) + nr; struct get_counter_name_data data = { - .wanted = get_counterset_start(set) + nr, + .wanted = cache_key, .result = NULL, }; + char *result = NULL; if (!pmu) return NULL; + if (cache_pmu == pmu && hashmap__find(cache, cache_key, &result)) + return strdup(result); + perf_pmu__for_each_event(pmu, /*skip_duplicate_pmus=*/ true, &data, get_counter_name_callback); - return data.result; + + result = strdup(data.result ?: "<unknown>"); + + if (cache_pmu == NULL) { + struct hashmap *tmp = hashmap__new(get_counter_name_hash_fn, + get_counter_name_hashmap_equal_fn, + /*ctx=*/NULL); + + if (!IS_ERR(tmp)) { + cache = tmp; + cache_pmu = pmu; + } + } + + if (cache_pmu == pmu && result) { + char *old_value = NULL, *new_value = strdup(result); + + if (new_value) { + hashmap__set(cache, cache_key, new_value, /*old_key=*/NULL, &old_value); + /* + * Free in case of a race, but resizing would be broken + * in that case. + */ + free(old_value); + } + } + return result; } static void s390_cpumcfdg_dump(struct perf_pmu *pmu, struct perf_sample *sample) @@ -205,7 +250,7 @@ static void s390_cpumcfdg_dump(struct perf_pmu *pmu, struct perf_sample *sample) char *ev_name = get_counter_name(ce.set, i, pmu); color_fprintf(stdout, color, - "\tCounter:%03d %s Value:%#018lx\n", i, + "\tCounter:%03zd %s Value:%#018"PRIx64"\n", i, ev_name ?: "<unknown>", be64_to_cpu(*p)); free(ev_name); } @@ -260,7 +305,7 @@ static void s390_pai_all_dump(struct evsel *evsel, struct perf_sample *sample) ev_name = get_counter_name(evsel->core.attr.config, pai_data.event_nr, evsel->pmu); - color_fprintf(stdout, color, "\tCounter:%03d %s Value:%#018lx\n", + color_fprintf(stdout, color, "\tCounter:%03d %s Value:%#018"PRIx64"\n", pai_data.event_nr, ev_name ?: "<unknown>", pai_data.event_val); free(ev_name); diff --git a/tools/perf/util/sample-raw.c b/tools/perf/util/sample-raw.c index f3f6bd9d290e..bcf442574d6e 100644 --- a/tools/perf/util/sample-raw.c +++ b/tools/perf/util/sample-raw.c @@ -6,15 +6,16 @@ #include "env.h" #include "header.h" #include "sample-raw.h" +#include "session.h" /* * Check platform the perf data file was created on and perform platform * specific interpretation. */ -void evlist__init_trace_event_sample_raw(struct evlist *evlist) +void evlist__init_trace_event_sample_raw(struct evlist *evlist, struct perf_env *env) { - const char *arch_pf = perf_env__arch(evlist->env); - const char *cpuid = perf_env__cpuid(evlist->env); + const char *arch_pf = perf_env__arch(env); + const char *cpuid = perf_env__cpuid(env); if (arch_pf && !strcmp("s390", arch_pf)) evlist->trace_event_sample_raw = evlist__s390_sample_raw; diff --git a/tools/perf/util/sample-raw.h b/tools/perf/util/sample-raw.h index ea01c5811503..896e9a87e373 100644 --- a/tools/perf/util/sample-raw.h +++ b/tools/perf/util/sample-raw.h @@ -11,5 +11,5 @@ void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, bool evlist__has_amd_ibs(struct evlist *evlist); void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); -void evlist__init_trace_event_sample_raw(struct evlist *evlist); +void evlist__init_trace_event_sample_raw(struct evlist *evlist, struct perf_env *env); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/sample.c b/tools/perf/util/sample.c new file mode 100644 index 000000000000..605fee971f55 --- /dev/null +++ b/tools/perf/util/sample.c @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include "sample.h" +#include "debug.h" +#include <linux/zalloc.h> +#include <stdlib.h> +#include <string.h> + +void perf_sample__init(struct perf_sample *sample, bool all) +{ + if (all) { + memset(sample, 0, sizeof(*sample)); + } else { + sample->user_regs = NULL; + sample->intr_regs = NULL; + } +} + +void perf_sample__exit(struct perf_sample *sample) +{ + free(sample->user_regs); + free(sample->intr_regs); +} + +struct regs_dump *perf_sample__user_regs(struct perf_sample *sample) +{ + if (!sample->user_regs) { + sample->user_regs = zalloc(sizeof(*sample->user_regs)); + if (!sample->user_regs) + pr_err("Failure to allocate sample user_regs"); + } + return sample->user_regs; +} + + +struct regs_dump *perf_sample__intr_regs(struct perf_sample *sample) +{ + if (!sample->intr_regs) { + sample->intr_regs = zalloc(sizeof(*sample->intr_regs)); + if (!sample->intr_regs) + pr_err("Failure to allocate sample intr_regs"); + } + return sample->intr_regs; +} diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h index 70b2c3135555..a8307b20a9ea 100644 --- a/tools/perf/util/sample.h +++ b/tools/perf/util/sample.h @@ -67,7 +67,7 @@ struct aux_sample { }; struct simd_flags { - u64 arch:1, /* architecture (isa) */ + u8 arch:1, /* architecture (isa) */ pred:2; /* predication */ }; @@ -104,24 +104,29 @@ struct perf_sample { u8 cpumode; u16 misc; u16 ins_lat; - union { - u16 p_stage_cyc; - u16 retire_lat; - }; + /** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */ + u16 weight3; bool no_hw_idx; /* No hw_idx collected in branch_stack */ + bool deferred_callchain; /* Has deferred user callchains */ + u64 deferred_cookie; char insn[MAX_INSN]; void *raw_data; struct ip_callchain *callchain; struct branch_stack *branch_stack; u64 *branch_stack_cntr; - struct regs_dump user_regs; - struct regs_dump intr_regs; + struct regs_dump *user_regs; + struct regs_dump *intr_regs; struct stack_dump user_stack; struct sample_read read; struct aux_sample aux_sample; struct simd_flags simd_flags; }; +void perf_sample__init(struct perf_sample *sample, bool all); +void perf_sample__exit(struct perf_sample *sample); +struct regs_dump *perf_sample__user_regs(struct perf_sample *sample); +struct regs_dump *perf_sample__intr_regs(struct perf_sample *sample); + /* * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get * 8-byte alignment. diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build index 586b94e90f4e..24f087b0cd11 100644 --- a/tools/perf/util/scripting-engines/Build +++ b/tools/perf/util/scripting-engines/Build @@ -1,9 +1,9 @@ ifeq ($(CONFIG_LIBTRACEEVENT),y) - perf-$(CONFIG_LIBPERL) += trace-event-perl.o + perf-util-$(CONFIG_LIBPERL) += trace-event-perl.o endif -perf-$(CONFIG_LIBPYTHON) += trace-event-python.o +perf-util-$(CONFIG_LIBPYTHON) += trace-event-python.o -CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -Wno-bad-function-cast -Wno-declaration-after-statement -Wno-switch-enum +CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -Wno-bad-function-cast -Wno-declaration-after-statement -Wno-switch-enum -Wno-thread-safety-analysis # -Wno-declaration-after-statement: The python headers have mixed code with declarations (decls after asserts, for instance) CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-deprecated-declarations -Wno-switch-enum -Wno-declaration-after-statement diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index e16257d5ab2c..e261a57b87d4 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -27,7 +27,7 @@ #include <errno.h> #include <linux/bitmap.h> #include <linux/time64.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include <stdbool.h> /* perl needs the following define, right after including stdbool.h */ @@ -344,7 +344,7 @@ static void perl_process_tracepoint(struct perf_sample *sample, struct addr_location *al) { struct thread *thread = al->thread; - struct tep_event *event = evsel->tp_format; + struct tep_event *event; struct tep_format_field *field; static char handler[256]; unsigned long long val; @@ -362,6 +362,7 @@ static void perl_process_tracepoint(struct perf_sample *sample, if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) return; + event = evsel__tp_format(evsel); if (!event) { pr_debug("ug! no event found for type %" PRIu64, (u64)evsel->core.attr.config); return; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index fb00f3ad6815..6655c0bbe0d8 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -31,7 +31,7 @@ #include <linux/compiler.h> #include <linux/time64.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #include "../build-id.h" @@ -58,22 +58,6 @@ #include "mem-events.h" #include "util/perf_regs.h" -#if PY_MAJOR_VERSION < 3 -#define _PyUnicode_FromString(arg) \ - PyString_FromString(arg) -#define _PyUnicode_FromStringAndSize(arg1, arg2) \ - PyString_FromStringAndSize((arg1), (arg2)) -#define _PyBytes_FromStringAndSize(arg1, arg2) \ - PyString_FromStringAndSize((arg1), (arg2)) -#define _PyLong_FromLong(arg) \ - PyInt_FromLong(arg) -#define _PyLong_AsLong(arg) \ - PyInt_AsLong(arg) -#define _PyCapsule_New(arg1, arg2, arg3) \ - PyCObject_FromVoidPtr((arg1), (arg2)) - -PyMODINIT_FUNC initperf_trace_context(void); -#else #define _PyUnicode_FromString(arg) \ PyUnicode_FromString(arg) #define _PyUnicode_FromStringAndSize(arg1, arg2) \ @@ -88,7 +72,6 @@ PyMODINIT_FUNC initperf_trace_context(void); PyCapsule_New((arg1), (arg2), (arg3)) PyMODINIT_FUNC PyInit_perf_trace_context(void); -#endif #ifdef HAVE_LIBTRACEEVENT #define TRACE_EVENT_TYPE_MAX \ @@ -181,17 +164,7 @@ static int get_argument_count(PyObject *handler) { int arg_count = 0; - /* - * The attribute for the code object is func_code in Python 2, - * whereas it is __code__ in Python 3.0+. - */ - PyObject *code_obj = PyObject_GetAttrString(handler, - "func_code"); - if (PyErr_Occurred()) { - PyErr_Clear(); - code_obj = PyObject_GetAttrString(handler, - "__code__"); - } + PyObject *code_obj = code_obj = PyObject_GetAttrString(handler, "__code__"); PyErr_Clear(); if (code_obj) { PyObject *arg_count_obj = PyObject_GetAttrString(code_obj, @@ -762,6 +735,8 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, ch } } +#define MAX_REG_SIZE 128 + static int set_regs_in_dict(PyObject *dict, struct perf_sample *sample, struct evsel *evsel) @@ -769,27 +744,31 @@ static int set_regs_in_dict(PyObject *dict, struct perf_event_attr *attr = &evsel->core.attr; const char *arch = perf_env__arch(evsel__env(evsel)); - /* - * Here value 28 is a constant size which can be used to print - * one register value and its corresponds to: - * 16 chars is to specify 64 bit register in hexadecimal. - * 2 chars is for appending "0x" to the hexadecimal value and - * 10 chars is for register name. - */ - int size = __sw_hweight64(attr->sample_regs_intr) * 28; - char *bf = malloc(size); - if (!bf) - return -1; + int size = (__sw_hweight64(attr->sample_regs_intr) * MAX_REG_SIZE) + 1; + char *bf = NULL; - regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, size); + if (sample->intr_regs) { + bf = malloc(size); + if (!bf) + return -1; - pydict_set_item_string_decref(dict, "iregs", - _PyUnicode_FromString(bf)); + regs_map(sample->intr_regs, attr->sample_regs_intr, arch, bf, size); - regs_map(&sample->user_regs, attr->sample_regs_user, arch, bf, size); + pydict_set_item_string_decref(dict, "iregs", + _PyUnicode_FromString(bf)); + } - pydict_set_item_string_decref(dict, "uregs", - _PyUnicode_FromString(bf)); + if (sample->user_regs) { + if (!bf) { + bf = malloc(size); + if (!bf) + return -1; + } + regs_map(sample->user_regs, attr->sample_regs_user, arch, bf, size); + + pydict_set_item_string_decref(dict, "uregs", + _PyUnicode_FromString(bf)); + } free(bf); return 0; @@ -798,22 +777,24 @@ static int set_regs_in_dict(PyObject *dict, static void set_sym_in_dict(PyObject *dict, struct addr_location *al, const char *dso_field, const char *dso_bid_field, const char *dso_map_start, const char *dso_map_end, - const char *sym_field, const char *symoff_field) + const char *sym_field, const char *symoff_field, + const char *map_pgoff) { - char sbuild_id[SBUILD_ID_SIZE]; - if (al->map) { + char sbuild_id[SBUILD_ID_SIZE]; struct dso *dso = map__dso(al->map); pydict_set_item_string_decref(dict, dso_field, _PyUnicode_FromString(dso__name(dso))); - build_id__sprintf(dso__bid(dso), sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); pydict_set_item_string_decref(dict, dso_bid_field, _PyUnicode_FromString(sbuild_id)); pydict_set_item_string_decref(dict, dso_map_start, PyLong_FromUnsignedLong(map__start(al->map))); pydict_set_item_string_decref(dict, dso_map_end, PyLong_FromUnsignedLong(map__end(al->map))); + pydict_set_item_string_decref(dict, map_pgoff, + PyLong_FromUnsignedLongLong(map__pgoff(al->map))); } if (al->sym) { pydict_set_item_string_decref(dict, sym_field, @@ -888,6 +869,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, set_sample_read_in_dict(dict_sample, sample, evsel); pydict_set_item_string_decref(dict_sample, "weight", PyLong_FromUnsignedLongLong(sample->weight)); + pydict_set_item_string_decref(dict_sample, "ins_lat", + PyLong_FromUnsignedLong(sample->ins_lat)); pydict_set_item_string_decref(dict_sample, "transaction", PyLong_FromUnsignedLongLong(sample->transaction)); set_sample_datasrc_in_dict(dict_sample, sample); @@ -898,7 +881,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, pydict_set_item_string_decref(dict, "comm", _PyUnicode_FromString(thread__comm_str(al->thread))); set_sym_in_dict(dict, al, "dso", "dso_bid", "dso_map_start", "dso_map_end", - "symbol", "symoff"); + "symbol", "symoff", "map_pgoff"); pydict_set_item_string_decref(dict, "callchain", callchain); @@ -923,7 +906,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, PyBool_FromLong(1)); set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_dso_bid", "addr_dso_map_start", "addr_dso_map_end", - "addr_symbol", "addr_symoff"); + "addr_symbol", "addr_symoff", "addr_map_pgoff"); } if (sample->flags) @@ -949,7 +932,7 @@ static void python_process_tracepoint(struct perf_sample *sample, struct addr_location *al, struct addr_location *addr_al) { - struct tep_event *event = evsel->tp_format; + struct tep_event *event; PyObject *handler, *context, *t, *obj = NULL, *callchain; PyObject *dict = NULL, *all_entries_dict = NULL; static char handler_name[256]; @@ -966,6 +949,7 @@ static void python_process_tracepoint(struct perf_sample *sample, bitmap_zero(events_defined, TRACE_EVENT_TYPE_MAX); + event = evsel__tp_format(evsel); if (!event) { snprintf(handler_name, sizeof(handler_name), "ug! no event found for type %" PRIu64, (u64)evsel->core.attr.config); @@ -1253,7 +1237,7 @@ static int python_export_dso(struct db_export *dbe, struct dso *dso, char sbuild_id[SBUILD_ID_SIZE]; PyObject *t; - build_id__sprintf(dso__bid(dso), sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); t = tuple_new(5); @@ -1317,11 +1301,11 @@ static void python_export_sample_table(struct db_export *dbe, struct tables *tables = container_of(dbe, struct tables, dbe); PyObject *t; - t = tuple_new(27); + t = tuple_new(28); tuple_set_d64(t, 0, es->db_id); tuple_set_d64(t, 1, es->evsel->db_id); - tuple_set_d64(t, 2, maps__machine(es->al->maps)->db_id); + tuple_set_d64(t, 2, maps__machine(thread__maps(es->al->thread))->db_id); tuple_set_d64(t, 3, thread__db_id(es->al->thread)); tuple_set_d64(t, 4, es->comm_db_id); tuple_set_d64(t, 5, es->dso_db_id); @@ -1346,6 +1330,7 @@ static void python_export_sample_table(struct db_export *dbe, tuple_set_s32(t, 24, es->sample->flags); tuple_set_d64(t, 25, es->sample->id); tuple_set_d64(t, 26, es->sample->stream_id); + tuple_set_u32(t, 27, es->sample->ins_lat); call_object(tables->sample_handler, t, "sample_table"); @@ -1901,12 +1886,6 @@ static void set_table_handlers(struct tables *tables) tables->synth_handler = get_handler("synth_data"); } -#if PY_MAJOR_VERSION < 3 -static void _free_command_line(const char **command_line, int num) -{ - free(command_line); -} -#else static void _free_command_line(wchar_t **command_line, int num) { int i; @@ -1914,7 +1893,6 @@ static void _free_command_line(wchar_t **command_line, int num) PyMem_RawFree(command_line[i]); free(command_line); } -#endif /* @@ -1924,30 +1902,12 @@ static int python_start_script(const char *script, int argc, const char **argv, struct perf_session *session) { struct tables *tables = &tables_global; -#if PY_MAJOR_VERSION < 3 - const char **command_line; -#else wchar_t **command_line; -#endif - /* - * Use a non-const name variable to cope with python 2.6's - * PyImport_AppendInittab prototype - */ - char buf[PATH_MAX], name[19] = "perf_trace_context"; + char buf[PATH_MAX]; int i, err = 0; FILE *fp; scripting_context->session = session; -#if PY_MAJOR_VERSION < 3 - command_line = malloc((argc + 1) * sizeof(const char *)); - if (!command_line) - return -1; - - command_line[0] = script; - for (i = 1; i < argc + 1; i++) - command_line[i] = argv[i - 1]; - PyImport_AppendInittab(name, initperf_trace_context); -#else command_line = malloc((argc + 1) * sizeof(wchar_t *)); if (!command_line) return -1; @@ -1955,15 +1915,10 @@ static int python_start_script(const char *script, int argc, const char **argv, command_line[0] = Py_DecodeLocale(script, NULL); for (i = 1; i < argc + 1; i++) command_line[i] = Py_DecodeLocale(argv[i - 1], NULL); - PyImport_AppendInittab(name, PyInit_perf_trace_context); -#endif + PyImport_AppendInittab("perf_trace_context", PyInit_perf_trace_context); Py_Initialize(); -#if PY_MAJOR_VERSION < 3 - PySys_SetArgv(argc + 1, (char **)command_line); -#else PySys_SetArgv(argc + 1, command_line); -#endif fp = fopen(script, "r"); if (!fp) { diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index a10343b9dcd4..4236503c8f6c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -12,6 +12,7 @@ #include <sys/types.h> #include <sys/mman.h> #include <perf/cpumap.h> +#include <perf/event.h> #include "map_symbol.h" #include "branch.h" @@ -36,81 +37,21 @@ #include "util.h" #include "arch/common.h" #include "units.h" +#include "annotate.h" +#include "perf.h" #include <internal/lib.h> -#ifdef HAVE_ZSTD_SUPPORT -static int perf_session__process_compressed_event(struct perf_session *session, - union perf_event *event, u64 file_offset, - const char *file_path) -{ - void *src; - size_t decomp_size, src_size; - u64 decomp_last_rem = 0; - size_t mmap_len, decomp_len = session->header.env.comp_mmap_len; - struct decomp *decomp, *decomp_last = session->active_decomp->decomp_last; - - if (decomp_last) { - decomp_last_rem = decomp_last->size - decomp_last->head; - decomp_len += decomp_last_rem; - } - - mmap_len = sizeof(struct decomp) + decomp_len; - decomp = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE, - MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - if (decomp == MAP_FAILED) { - pr_err("Couldn't allocate memory for decompression\n"); - return -1; - } - - decomp->file_pos = file_offset; - decomp->file_path = file_path; - decomp->mmap_len = mmap_len; - decomp->head = 0; - - if (decomp_last_rem) { - memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem); - decomp->size = decomp_last_rem; - } - - src = (void *)event + sizeof(struct perf_record_compressed); - src_size = event->pack.header.size - sizeof(struct perf_record_compressed); - - decomp_size = zstd_decompress_stream(session->active_decomp->zstd_decomp, src, src_size, - &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem); - if (!decomp_size) { - munmap(decomp, mmap_len); - pr_err("Couldn't decompress data\n"); - return -1; - } - - decomp->size += decomp_size; - - if (session->active_decomp->decomp == NULL) - session->active_decomp->decomp = decomp; - else - session->active_decomp->decomp_last->next = decomp; - - session->active_decomp->decomp_last = decomp; - - pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size); - - return 0; -} -#else /* !HAVE_ZSTD_SUPPORT */ -#define perf_session__process_compressed_event perf_session__process_compressed_event_stub -#endif - static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool, + const struct perf_tool *tool, u64 file_offset, const char *file_path); -static int perf_session__open(struct perf_session *session, int repipe_fd) +static int perf_session__open(struct perf_session *session) { struct perf_data *data = session->data; - if (perf_session__read_header(session, repipe_fd) < 0) { + if (perf_session__read_header(session) < 0) { pr_err("incompatible file format (rerun with -v to learn more)\n"); return -1; } @@ -196,8 +137,9 @@ static int ordered_events__deliver_event(struct ordered_events *oe, } struct perf_session *__perf_session__new(struct perf_data *data, - bool repipe, int repipe_fd, - struct perf_tool *tool) + struct perf_tool *tool, + bool trace_event_repipe, + struct perf_env *host_env) { int ret = -ENOMEM; struct perf_session *session = zalloc(sizeof(*session)); @@ -205,7 +147,7 @@ struct perf_session *__perf_session__new(struct perf_data *data, if (!session) goto out; - session->repipe = repipe; + session->trace_event_repipe = trace_event_repipe; session->tool = tool; session->decomp_data.zstd_decomp = &session->zstd_data; session->active_decomp = &session->decomp_data; @@ -223,7 +165,7 @@ struct perf_session *__perf_session__new(struct perf_data *data, session->data = data; if (perf_data__is_read(data)) { - ret = perf_session__open(session, repipe_fd); + ret = perf_session__open(session); if (ret < 0) goto out_delete; @@ -236,7 +178,7 @@ struct perf_session *__perf_session__new(struct perf_data *data, perf_session__set_comm_exec(session); } - evlist__init_trace_event_sample_raw(session->evlist); + evlist__init_trace_event_sample_raw(session->evlist, &session->header.env); /* Open the directory data. */ if (data->is_dir) { @@ -250,8 +192,11 @@ struct perf_session *__perf_session__new(struct perf_data *data, symbol_conf.kallsyms_name = perf_data__kallsyms_name(data); } } else { - session->machines.host.env = &perf_env; + assert(host_env != NULL); + session->machines.host.env = host_env; } + if (session->evlist) + session->evlist->session = session; session->machines.host.single_address_space = perf_env__single_address_space(session->machines.host.env); @@ -304,6 +249,7 @@ void perf_session__delete(struct perf_session *session) return; auxtrace__free(session); auxtrace_index__free(&session->auxtrace_index); + debuginfo_cache__delete(); perf_session__destroy_kernel_maps(session); perf_decomp__release_events(session->decomp_data.decomp); perf_env__exit(&session->header.env); @@ -319,251 +265,6 @@ void perf_session__delete(struct perf_session *session) free(session); } -static int process_event_synth_tracing_data_stub(struct perf_session *session - __maybe_unused, - union perf_event *event - __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - -static int process_event_synth_attr_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct evlist **pevlist - __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - -static int process_event_synth_event_update_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct evlist **pevlist - __maybe_unused) -{ - if (dump_trace) - perf_event__fprintf_event_update(event, stdout); - - dump_printf(": unhandled!\n"); - return 0; -} - -static int process_event_sample_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_sample *sample __maybe_unused, - struct evsel *evsel __maybe_unused, - struct machine *machine __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - -static int process_event_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_sample *sample __maybe_unused, - struct machine *machine __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - -static int process_finished_round_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct ordered_events *oe __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - -static int skipn(int fd, off_t n) -{ - char buf[4096]; - ssize_t ret; - - while (n > 0) { - ret = read(fd, buf, min(n, (off_t)sizeof(buf))); - if (ret <= 0) - return ret; - n -= ret; - } - - return 0; -} - -static s64 process_event_auxtrace_stub(struct perf_session *session __maybe_unused, - union perf_event *event) -{ - dump_printf(": unhandled!\n"); - if (perf_data__is_pipe(session->data)) - skipn(perf_data__fd(session->data), event->auxtrace.size); - return event->auxtrace.size; -} - -static int process_event_op2_stub(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - - -static -int process_event_thread_map_stub(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused) -{ - if (dump_trace) - perf_event__fprintf_thread_map(event, stdout); - - dump_printf(": unhandled!\n"); - return 0; -} - -static -int process_event_cpu_map_stub(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused) -{ - if (dump_trace) - perf_event__fprintf_cpu_map(event, stdout); - - dump_printf(": unhandled!\n"); - return 0; -} - -static -int process_event_stat_config_stub(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused) -{ - if (dump_trace) - perf_event__fprintf_stat_config(event, stdout); - - dump_printf(": unhandled!\n"); - return 0; -} - -static int process_stat_stub(struct perf_session *perf_session __maybe_unused, - union perf_event *event) -{ - if (dump_trace) - perf_event__fprintf_stat(event, stdout); - - dump_printf(": unhandled!\n"); - return 0; -} - -static int process_stat_round_stub(struct perf_session *perf_session __maybe_unused, - union perf_event *event) -{ - if (dump_trace) - perf_event__fprintf_stat_round(event, stdout); - - dump_printf(": unhandled!\n"); - return 0; -} - -static int process_event_time_conv_stub(struct perf_session *perf_session __maybe_unused, - union perf_event *event) -{ - if (dump_trace) - perf_event__fprintf_time_conv(event, stdout); - - dump_printf(": unhandled!\n"); - return 0; -} - -static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused, - u64 file_offset __maybe_unused, - const char *file_path __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - -void perf_tool__fill_defaults(struct perf_tool *tool) -{ - if (tool->sample == NULL) - tool->sample = process_event_sample_stub; - if (tool->mmap == NULL) - tool->mmap = process_event_stub; - if (tool->mmap2 == NULL) - tool->mmap2 = process_event_stub; - if (tool->comm == NULL) - tool->comm = process_event_stub; - if (tool->namespaces == NULL) - tool->namespaces = process_event_stub; - if (tool->cgroup == NULL) - tool->cgroup = process_event_stub; - if (tool->fork == NULL) - tool->fork = process_event_stub; - if (tool->exit == NULL) - tool->exit = process_event_stub; - if (tool->lost == NULL) - tool->lost = perf_event__process_lost; - if (tool->lost_samples == NULL) - tool->lost_samples = perf_event__process_lost_samples; - if (tool->aux == NULL) - tool->aux = perf_event__process_aux; - if (tool->itrace_start == NULL) - tool->itrace_start = perf_event__process_itrace_start; - if (tool->context_switch == NULL) - tool->context_switch = perf_event__process_switch; - if (tool->ksymbol == NULL) - tool->ksymbol = perf_event__process_ksymbol; - if (tool->bpf == NULL) - tool->bpf = perf_event__process_bpf; - if (tool->text_poke == NULL) - tool->text_poke = perf_event__process_text_poke; - if (tool->aux_output_hw_id == NULL) - tool->aux_output_hw_id = perf_event__process_aux_output_hw_id; - if (tool->read == NULL) - tool->read = process_event_sample_stub; - if (tool->throttle == NULL) - tool->throttle = process_event_stub; - if (tool->unthrottle == NULL) - tool->unthrottle = process_event_stub; - if (tool->attr == NULL) - tool->attr = process_event_synth_attr_stub; - if (tool->event_update == NULL) - tool->event_update = process_event_synth_event_update_stub; - if (tool->tracing_data == NULL) - tool->tracing_data = process_event_synth_tracing_data_stub; - if (tool->build_id == NULL) - tool->build_id = process_event_op2_stub; - if (tool->finished_round == NULL) { - if (tool->ordered_events) - tool->finished_round = perf_event__process_finished_round; - else - tool->finished_round = process_finished_round_stub; - } - if (tool->id_index == NULL) - tool->id_index = process_event_op2_stub; - if (tool->auxtrace_info == NULL) - tool->auxtrace_info = process_event_op2_stub; - if (tool->auxtrace == NULL) - tool->auxtrace = process_event_auxtrace_stub; - if (tool->auxtrace_error == NULL) - tool->auxtrace_error = process_event_op2_stub; - if (tool->thread_map == NULL) - tool->thread_map = process_event_thread_map_stub; - if (tool->cpu_map == NULL) - tool->cpu_map = process_event_cpu_map_stub; - if (tool->stat_config == NULL) - tool->stat_config = process_event_stat_config_stub; - if (tool->stat == NULL) - tool->stat = process_stat_stub; - if (tool->stat_round == NULL) - tool->stat_round = process_stat_round_stub; - if (tool->time_conv == NULL) - tool->time_conv = process_event_time_conv_stub; - if (tool->feature == NULL) - tool->feature = process_event_op2_stub; - if (tool->compressed == NULL) - tool->compressed = perf_session__process_compressed_event; - if (tool->finished_init == NULL) - tool->finished_init = process_event_op2_stub; -} - static void swap_sample_id_all(union perf_event *event, void *data) { void *end = (void *) event + event->header.size; @@ -1019,6 +720,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_CGROUP] = perf_event__cgroup_swap, [PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap, [PERF_RECORD_AUX_OUTPUT_HW_ID] = perf_event__all64_swap, + [PERF_RECORD_CALLCHAIN_DEFERRED] = perf_event__all64_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, @@ -1076,7 +778,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { * Flush every events below timestamp 7 * etc... */ -int perf_event__process_finished_round(struct perf_tool *tool __maybe_unused, +int perf_event__process_finished_round(const struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, struct ordered_events *oe) { @@ -1153,6 +855,9 @@ static void callchain__printf(struct evsel *evsel, for (i = 0; i < callchain->nr; i++) printf("..... %2d: %016" PRIx64 "\n", i, callchain->ips[i]); + + if (sample->deferred_callchain) + printf("...... (deferred)\n"); } static void branch_stack__printf(struct perf_sample *sample, @@ -1161,7 +866,6 @@ static void branch_stack__printf(struct perf_sample *sample, struct branch_entry *entries = perf_sample__branch_entries(sample); bool callstack = evsel__has_branch_callstack(evsel); u64 *branch_stack_cntr = sample->branch_stack_cntr; - struct perf_env *env = evsel__env(evsel); uint64_t i; if (!callstack) { @@ -1205,8 +909,11 @@ static void branch_stack__printf(struct perf_sample *sample, } if (branch_stack_cntr) { + unsigned int br_cntr_width, br_cntr_nr; + + perf_env__find_br_cntr_info(evsel__env(evsel), &br_cntr_nr, &br_cntr_width); printf("... branch stack counters: nr:%" PRIu64 " (counter width: %u max counter nr:%u)\n", - sample->branch_stack->nr, env->br_cntr_width, env->br_cntr_nr); + sample->branch_stack->nr, br_cntr_width, br_cntr_nr); for (i = 0; i < sample->branch_stack->nr; i++) printf("..... %2"PRIu64": %016" PRIx64 "\n", i, branch_stack_cntr[i]); } @@ -1252,7 +959,12 @@ static void regs__printf(const char *type, struct regs_dump *regs, const char *a static void regs_user__printf(struct perf_sample *sample, const char *arch) { - struct regs_dump *user_regs = &sample->user_regs; + struct regs_dump *user_regs; + + if (!sample->user_regs) + return; + + user_regs = perf_sample__user_regs(sample); if (user_regs->regs) regs__printf("user", user_regs, arch); @@ -1260,7 +972,12 @@ static void regs_user__printf(struct perf_sample *sample, const char *arch) static void regs_intr__printf(struct perf_sample *sample, const char *arch) { - struct regs_dump *intr_regs = &sample->intr_regs; + struct regs_dump *intr_regs; + + if (!sample->intr_regs) + return; + + intr_regs = perf_sample__intr_regs(sample); if (intr_regs->regs) regs__printf("intr", intr_regs, arch); @@ -1386,7 +1103,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, printf("... weight: %" PRIu64 "", sample->weight); if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) { printf(",0x%"PRIx16"", sample->ins_lat); - printf(",0x%"PRIx16"", sample->p_stage_cyc); + printf(",0x%"PRIx16"", sample->weight3); } printf("\n"); } @@ -1410,6 +1127,19 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, sample_read__printf(sample, evsel->core.attr.read_format); } +static void dump_deferred_callchain(struct evsel *evsel, union perf_event *event, + struct perf_sample *sample) +{ + if (!dump_trace) + return; + + printf("(IP, 0x%x): %d/%d: %#" PRIx64 "\n", + event->header.misc, sample->pid, sample->tid, sample->deferred_cookie); + + if (evsel__has_callchain(evsel)) + callchain__printf(evsel, sample); +} + static void dump_read(struct evsel *evsel, union perf_event *event) { struct perf_record_read *read_event = &event->read; @@ -1470,22 +1200,28 @@ static struct machine *machines__find_for_cpumode(struct machines *machines, } static int deliver_sample_value(struct evlist *evlist, - struct perf_tool *tool, + const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct sample_read_value *v, - struct machine *machine) + struct machine *machine, + bool per_thread) { struct perf_sample_id *sid = evlist__id2sid(evlist, v->id); struct evsel *evsel; + u64 *storage = NULL; if (sid) { + storage = perf_sample_id__get_period_storage(sid, sample->tid, per_thread); + } + + if (storage) { sample->id = v->id; - sample->period = v->value - sid->period; - sid->period = v->value; + sample->period = v->value - *storage; + *storage = v->value; } - if (!sid || sid->evsel == NULL) { + if (!storage || sid->evsel == NULL) { ++evlist->stats.nr_unknown_id; return 0; } @@ -1502,18 +1238,23 @@ static int deliver_sample_value(struct evlist *evlist, } static int deliver_sample_group(struct evlist *evlist, - struct perf_tool *tool, + const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine, - u64 read_format) + u64 read_format, + bool per_thread) { int ret = -EINVAL; struct sample_read_value *v = sample->read.group.values; + if (tool->dont_split_sample_group) + return deliver_sample_value(evlist, tool, event, sample, v, machine, + per_thread); + sample_read_group__for_each(v, sample->read.group.nr, read_format) { ret = deliver_sample_value(evlist, tool, event, sample, v, - machine); + machine, per_thread); if (ret) break; } @@ -1521,13 +1262,14 @@ static int deliver_sample_group(struct evlist *evlist, return ret; } -static int evlist__deliver_sample(struct evlist *evlist, struct perf_tool *tool, +static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct evsel *evsel, struct machine *machine) { /* We know evsel != NULL. */ u64 sample_type = evsel->core.attr.sample_type; u64 read_format = evsel->core.attr.read_format; + bool per_thread = perf_evsel__attr_has_per_thread_sample_period(&evsel->core); /* Standard sample delivery. */ if (!(sample_type & PERF_SAMPLE_READ)) @@ -1536,17 +1278,118 @@ static int evlist__deliver_sample(struct evlist *evlist, struct perf_tool *tool, /* For PERF_SAMPLE_READ we have either single or group mode. */ if (read_format & PERF_FORMAT_GROUP) return deliver_sample_group(evlist, tool, event, sample, - machine, read_format); + machine, read_format, per_thread); else return deliver_sample_value(evlist, tool, event, sample, - &sample->read.one, machine); + &sample->read.one, machine, + per_thread); +} + +/* + * Samples with deferred callchains should wait for the next matching + * PERF_RECORD_CALLCHAIN_RECORD entries. Keep the events in a list and + * deliver them once it finds the callchains. + */ +struct deferred_event { + struct list_head list; + union perf_event *event; +}; + +/* + * This is called when a deferred callchain record comes up. Find all matching + * samples, merge the callchains and process them. + */ +static int evlist__deliver_deferred_callchain(struct evlist *evlist, + const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct deferred_event *de, *tmp; + struct evsel *evsel; + int ret = 0; + + if (!tool->merge_deferred_callchains) { + evsel = evlist__id2evsel(evlist, sample->id); + return tool->callchain_deferred(tool, event, sample, + evsel, machine); + } + + list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) { + struct perf_sample orig_sample; + + ret = evlist__parse_sample(evlist, de->event, &orig_sample); + if (ret < 0) { + pr_err("failed to parse original sample\n"); + break; + } + + if (sample->tid != orig_sample.tid) + continue; + + if (event->callchain_deferred.cookie == orig_sample.deferred_cookie) + sample__merge_deferred_callchain(&orig_sample, sample); + else + orig_sample.deferred_callchain = false; + + evsel = evlist__id2evsel(evlist, orig_sample.id); + ret = evlist__deliver_sample(evlist, tool, de->event, + &orig_sample, evsel, machine); + + if (orig_sample.deferred_callchain) + free(orig_sample.callchain); + + list_del(&de->list); + free(de->event); + free(de); + + if (ret) + break; + } + return ret; +} + +/* + * This is called at the end of the data processing for the session. Flush the + * remaining samples as there's no hope for matching deferred callchains. + */ +static int session__flush_deferred_samples(struct perf_session *session, + const struct perf_tool *tool) +{ + struct evlist *evlist = session->evlist; + struct machine *machine = &session->machines.host; + struct deferred_event *de, *tmp; + struct evsel *evsel; + int ret = 0; + + list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) { + struct perf_sample sample; + + ret = evlist__parse_sample(evlist, de->event, &sample); + if (ret < 0) { + pr_err("failed to parse original sample\n"); + break; + } + + evsel = evlist__id2evsel(evlist, sample.id); + ret = evlist__deliver_sample(evlist, tool, de->event, + &sample, evsel, machine); + + list_del(&de->list); + free(de->event); + free(de); + + if (ret) + break; + } + return ret; } static int machines__deliver_event(struct machines *machines, struct evlist *evlist, union perf_event *event, struct perf_sample *sample, - struct perf_tool *tool, u64 file_offset, + const struct perf_tool *tool, u64 file_offset, const char *file_path) { struct evsel *evsel; @@ -1570,6 +1413,22 @@ static int machines__deliver_event(struct machines *machines, return 0; } dump_sample(evsel, event, sample, perf_env__arch(machine->env)); + if (sample->deferred_callchain && tool->merge_deferred_callchains) { + struct deferred_event *de = malloc(sizeof(*de)); + size_t sz = event->header.size; + + if (de == NULL) + return -ENOMEM; + + de->event = malloc(sz); + if (de->event == NULL) { + free(de); + return -ENOMEM; + } + memcpy(de->event, event, sz); + list_add_tail(&de->list, &evlist->deferred_samples); + return 0; + } return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine); case PERF_RECORD_MMAP: return tool->mmap(tool, event, sample, machine); @@ -1592,8 +1451,9 @@ static int machines__deliver_event(struct machines *machines, evlist->stats.total_lost += event->lost.lost; return tool->lost(tool, event, sample, machine); case PERF_RECORD_LOST_SAMPLES: - if (tool->lost_samples == perf_event__process_lost_samples && - !(event->header.misc & PERF_RECORD_MISC_LOST_SAMPLES_BPF)) + if (event->header.misc & PERF_RECORD_MISC_LOST_SAMPLES_BPF) + evlist->stats.total_dropped_samples += event->lost_samples.lost; + else if (tool->lost_samples == perf_event__process_lost_samples) evlist->stats.total_lost_samples += event->lost_samples.lost; return tool->lost_samples(tool, event, sample, machine); case PERF_RECORD_READ: @@ -1626,6 +1486,10 @@ static int machines__deliver_event(struct machines *machines, return tool->text_poke(tool, event, sample, machine); case PERF_RECORD_AUX_OUTPUT_HW_ID: return tool->aux_output_hw_id(tool, event, sample, machine); + case PERF_RECORD_CALLCHAIN_DEFERRED: + dump_deferred_callchain(evsel, event, sample); + return evlist__deliver_deferred_callchain(evlist, tool, event, + sample, machine); default: ++evlist->stats.nr_unknown_events; return -1; @@ -1634,30 +1498,35 @@ static int machines__deliver_event(struct machines *machines, static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool, + const struct perf_tool *tool, u64 file_offset, const char *file_path) { struct perf_sample sample; - int ret = evlist__parse_sample(session->evlist, event, &sample); + int ret; + perf_sample__init(&sample, /*all=*/false); + ret = evlist__parse_sample(session->evlist, event, &sample); if (ret) { pr_err("Can't parse sample, err = %d\n", ret); - return ret; + goto out; } ret = auxtrace__process_event(session, event, &sample, tool); if (ret < 0) - return ret; - if (ret > 0) - return 0; + goto out; + if (ret > 0) { + ret = 0; + goto out; + } ret = machines__deliver_event(&session->machines, session->evlist, event, &sample, tool, file_offset, file_path); if (dump_trace && sample.aux_sample.size) auxtrace__dump_auxtrace_sample(session, &sample); - +out: + perf_sample__exit(&sample); return ret; } @@ -1667,13 +1536,15 @@ static s64 perf_session__process_user_event(struct perf_session *session, const char *file_path) { struct ordered_events *oe = &session->ordered_events; - struct perf_tool *tool = session->tool; - struct perf_sample sample = { .time = 0, }; + const struct perf_tool *tool = session->tool; + struct perf_sample sample; int fd = perf_data__fd(session->data); - int err; + s64 err; - if (event->header.type != PERF_RECORD_COMPRESSED || - tool->compressed == perf_session__process_compressed_event_stub) + perf_sample__init(&sample, /*all=*/true); + if ((event->header.type != PERF_RECORD_COMPRESSED && + event->header.type != PERF_RECORD_COMPRESSED2) || + perf_tool__compressed_is_stub(tool)) dump_event(session->evlist, event, file_offset, &sample, file_path); /* These events are processed right away */ @@ -1684,15 +1555,17 @@ static s64 perf_session__process_user_event(struct perf_session *session, perf_session__set_id_hdr_size(session); perf_session__set_comm_exec(session); } - return err; + break; case PERF_RECORD_EVENT_UPDATE: - return tool->event_update(tool, event, &session->evlist); + err = tool->event_update(tool, event, &session->evlist); + break; case PERF_RECORD_HEADER_EVENT_TYPE: /* * Deprecated, but we need to handle it for sake * of old data files create in pipe mode. */ - return 0; + err = 0; + break; case PERF_RECORD_HEADER_TRACING_DATA: /* * Setup for reading amidst mmap, but only when we @@ -1701,15 +1574,20 @@ static s64 perf_session__process_user_event(struct perf_session *session, */ if (!perf_data__is_pipe(session->data)) lseek(fd, file_offset, SEEK_SET); - return tool->tracing_data(session, event); + err = tool->tracing_data(tool, session, event); + break; case PERF_RECORD_HEADER_BUILD_ID: - return tool->build_id(session, event); + err = tool->build_id(tool, session, event); + break; case PERF_RECORD_FINISHED_ROUND: - return tool->finished_round(tool, event, oe); + err = tool->finished_round(tool, event, oe); + break; case PERF_RECORD_ID_INDEX: - return tool->id_index(session, event); + err = tool->id_index(tool, session, event); + break; case PERF_RECORD_AUXTRACE_INFO: - return tool->auxtrace_info(session, event); + err = tool->auxtrace_info(tool, session, event); + break; case PERF_RECORD_AUXTRACE: /* * Setup for reading amidst mmap, but only when we @@ -1718,35 +1596,52 @@ static s64 perf_session__process_user_event(struct perf_session *session, */ if (!perf_data__is_pipe(session->data)) lseek(fd, file_offset + event->header.size, SEEK_SET); - return tool->auxtrace(session, event); + err = tool->auxtrace(tool, session, event); + break; case PERF_RECORD_AUXTRACE_ERROR: perf_session__auxtrace_error_inc(session, event); - return tool->auxtrace_error(session, event); + err = tool->auxtrace_error(tool, session, event); + break; case PERF_RECORD_THREAD_MAP: - return tool->thread_map(session, event); + err = tool->thread_map(tool, session, event); + break; case PERF_RECORD_CPU_MAP: - return tool->cpu_map(session, event); + err = tool->cpu_map(tool, session, event); + break; case PERF_RECORD_STAT_CONFIG: - return tool->stat_config(session, event); + err = tool->stat_config(tool, session, event); + break; case PERF_RECORD_STAT: - return tool->stat(session, event); + err = tool->stat(tool, session, event); + break; case PERF_RECORD_STAT_ROUND: - return tool->stat_round(session, event); + err = tool->stat_round(tool, session, event); + break; case PERF_RECORD_TIME_CONV: session->time_conv = event->time_conv; - return tool->time_conv(session, event); + err = tool->time_conv(tool, session, event); + break; case PERF_RECORD_HEADER_FEATURE: - return tool->feature(session, event); + err = tool->feature(tool, session, event); + break; case PERF_RECORD_COMPRESSED: - err = tool->compressed(session, event, file_offset, file_path); + case PERF_RECORD_COMPRESSED2: + err = tool->compressed(tool, session, event, file_offset, file_path); if (err) dump_event(session->evlist, event, file_offset, &sample, file_path); - return err; + break; case PERF_RECORD_FINISHED_INIT: - return tool->finished_init(session, event); + err = tool->finished_init(tool, session, event); + break; + case PERF_RECORD_BPF_METADATA: + err = tool->bpf_metadata(tool, session, event); + break; default: - return -EINVAL; + err = -EINVAL; + break; } + perf_sample__exit(&sample); + return err; } int perf_session__deliver_synth_event(struct perf_session *session, @@ -1754,7 +1649,7 @@ int perf_session__deliver_synth_event(struct perf_session *session, struct perf_sample *sample) { struct evlist *evlist = session->evlist; - struct perf_tool *tool = session->tool; + const struct perf_tool *tool = session->tool; events_stats__inc(&evlist->stats, event->header.type); @@ -1764,6 +1659,30 @@ int perf_session__deliver_synth_event(struct perf_session *session, return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0, NULL); } +int perf_session__deliver_synth_attr_event(struct perf_session *session, + const struct perf_event_attr *attr, + u64 id) +{ + union { + struct { + struct perf_record_header_attr attr; + u64 ids[1]; + } attr_id; + union perf_event ev; + } ev = { + .attr_id.attr.header.type = PERF_RECORD_HEADER_ATTR, + .attr_id.attr.header.size = sizeof(ev.attr_id), + .attr_id.ids[0] = id, + }; + + if (attr->size != sizeof(ev.attr_id.attr.attr)) { + pr_debug("Unexpected perf_event_attr size\n"); + return -EINVAL; + } + ev.attr_id.attr.attr = *attr; + return perf_session__deliver_synth_event(session, &ev.ev, NULL); +} + static void event_swap(union perf_event *event, bool sample_id_all) { perf_event__swap_op swap; @@ -1862,14 +1781,23 @@ static s64 perf_session__process_event(struct perf_session *session, const char *file_path) { struct evlist *evlist = session->evlist; - struct perf_tool *tool = session->tool; + const struct perf_tool *tool = session->tool; int ret; if (session->header.needs_swap) event_swap(event, evlist__sample_id_all(evlist)); - if (event->header.type >= PERF_RECORD_HEADER_MAX) - return -EINVAL; + if (event->header.type >= PERF_RECORD_HEADER_MAX) { + /* perf should not support unaligned event, stop here. */ + if (event->header.size % sizeof(u64)) + return -EINVAL; + + /* This perf is outdated and does not support the latest event type. */ + ui__warning("Unsupported header type %u, please consider updating perf.\n", + event->header.type); + /* Skip unsupported event by returning its size. */ + return event->header.size; + } events_stats__inc(&evlist->stats, event->header.type); @@ -2049,7 +1977,8 @@ static int __perf_session__process_decomp_events(struct perf_session *session); static int __perf_session__process_pipe_events(struct perf_session *session) { struct ordered_events *oe = &session->ordered_events; - struct perf_tool *tool = session->tool; + const struct perf_tool *tool = session->tool; + struct ui_progress prog; union perf_event *event; uint32_t size, cur_size = 0; void *buf = NULL; @@ -2057,8 +1986,18 @@ static int __perf_session__process_pipe_events(struct perf_session *session) u64 head; ssize_t err; void *p; + bool update_prog = false; - perf_tool__fill_defaults(tool); + /* + * If it's from a file saving pipe data (by redirection), it would have + * a file name other than "-". Then we can get the total size and show + * the progress. + */ + if (strcmp(session->data->path, "-") && session->data->file.size) { + ui_progress__init_size(&prog, session->data->file.size, + "Processing events..."); + update_prog = true; + } head = 0; cur_size = sizeof(union perf_event); @@ -2131,6 +2070,9 @@ more: if (err) goto out_err; + if (update_prog) + ui_progress__update(&prog, size); + if (!session_done()) goto more; done: @@ -2138,12 +2080,17 @@ done: err = ordered_events__flush(oe, OE_FLUSH__FINAL); if (err) goto out_err; + err = session__flush_deferred_samples(session, tool); + if (err) + goto out_err; err = auxtrace__flush_events(session, tool); if (err) goto out_err; err = perf_session__flush_thread_stacks(session); out_err: free(buf); + if (update_prog) + ui_progress__finish(); if (!tool->no_warn) perf_session__warn_about_errors(session); ordered_events__free(&session->ordered_events); @@ -2463,12 +2410,10 @@ static int __perf_session__process_events(struct perf_session *session) .in_place_update = session->data->in_place_update, }; struct ordered_events *oe = &session->ordered_events; - struct perf_tool *tool = session->tool; + const struct perf_tool *tool = session->tool; struct ui_progress prog; int err; - perf_tool__fill_defaults(tool); - if (rd.data_size == 0) return -1; @@ -2484,6 +2429,9 @@ static int __perf_session__process_events(struct perf_session *session) err = auxtrace__flush_events(session, tool); if (err) goto out_err; + err = session__flush_deferred_samples(session, tool); + if (err) + goto out_err; err = perf_session__flush_thread_stacks(session); out_err: ui_progress__finish(); @@ -2515,15 +2463,13 @@ out_err: static int __perf_session__process_dir_events(struct perf_session *session) { struct perf_data *data = session->data; - struct perf_tool *tool = session->tool; + const struct perf_tool *tool = session->tool; int i, ret, readers, nr_readers; struct ui_progress prog; u64 total_size = perf_data__size(session->data); struct reader *rd; - perf_tool__fill_defaults(tool); - - ui_progress__init_size(&prog, total_size, "Sorting events..."); + ui_progress__init_size(&prog, total_size, "Processing events..."); nr_readers = 1; for (i = 0; i < data->dir.nr; i++) { @@ -2606,6 +2552,10 @@ static int __perf_session__process_dir_events(struct perf_session *session) if (ret) goto out_err; + ret = session__flush_deferred_samples(session, tool); + if (ret) + goto out_err; + ret = perf_session__flush_thread_stacks(session); out_err: ui_progress__finish(); @@ -2656,6 +2606,18 @@ bool perf_session__has_traces(struct perf_session *session, const char *msg) return false; } +bool perf_session__has_switch_events(struct perf_session *session) +{ + struct evsel *evsel; + + evlist__for_each_entry(session->evlist, evsel) { + if (evsel->core.attr.context_switch) + return true; + } + + return false; +} + int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr) { char *bracket; @@ -2696,8 +2658,7 @@ size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp return machines__fprintf_dsos_buildid(&session->machines, fp, skip, parm); } -size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp, - bool skip_empty) +size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) { size_t ret; const char *msg = ""; @@ -2707,7 +2668,7 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp, ret = fprintf(fp, "\nAggregated stats:%s\n", msg); - ret += events_stats__fprintf(&session->evlist->stats, fp, skip_empty); + ret += events_stats__fprintf(&session->evlist->stats, fp); return ret; } @@ -2748,7 +2709,7 @@ int perf_session__cpu_bitmap(struct perf_session *session, { int i, err = -1; struct perf_cpu_map *map; - int nr_cpus = min(session->header.env.nr_cpus_avail, MAX_NR_CPUS); + int nr_cpus = min(perf_session__env(session)->nr_cpus_avail, MAX_NR_CPUS); struct perf_cpu cpu; for (i = 0; i < PERF_TYPE_MAX; ++i) { @@ -2833,7 +2794,8 @@ static int perf_session__set_guest_cpu(struct perf_session *session, pid_t pid, return 0; } -int perf_event__process_id_index(struct perf_session *session, +int perf_event__process_id_index(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { struct evlist *evlist = session->evlist; @@ -2937,3 +2899,8 @@ int perf_session__dsos_hit_all(struct perf_session *session) return 0; } + +struct perf_env *perf_session__env(struct perf_session *session) +{ + return &session->header.env; +} diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 3b0256e977a6..22d3ff877e83 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -26,26 +26,68 @@ struct decomp_data { struct zstd_data *zstd_decomp; }; +/** + * struct perf_session- A Perf session holds the main state when the program is + * working with live perf events or reading data from an input file. + * + * The rough organization of a perf_session is: + * ``` + * +--------------+ +-----------+ +------------+ + * | Session |1..* ----->| Machine |1..* ----->| Thread | + * +--------------+ +-----------+ +------------+ + * ``` + */ struct perf_session { + /** + * @header: The read version of a perf_file_header, or captures global + * information from a live session. + */ struct perf_header header; + /** @machines: Machines within the session a host and 0 or more guests. */ struct machines machines; + /** @evlist: List of evsels/events of the session. */ struct evlist *evlist; - struct auxtrace *auxtrace; + /** @auxtrace: callbacks to allow AUX area data decoding. */ + const struct auxtrace *auxtrace; + /** @itrace_synth_opts: AUX area tracing synthesis options. */ struct itrace_synth_opts *itrace_synth_opts; + /** @auxtrace_index: index of AUX area tracing events within a perf.data file. */ struct list_head auxtrace_index; #ifdef HAVE_LIBTRACEEVENT + /** @tevent: handles for libtraceevent and plugins. */ struct trace_event tevent; #endif + /** @time_conv: Holds contents of last PERF_RECORD_TIME_CONV event. */ struct perf_record_time_conv time_conv; - bool repipe; + /** @trace_event_repipe: When set causes read trace events to be written to stdout. */ + bool trace_event_repipe; + /** + * @one_mmap: The reader will use a single mmap by default. There may be + * multiple data files in particular for aux events. If this is true + * then the single big mmap for the data file can be assumed. + */ bool one_mmap; + /** @one_mmap_addr: Address of initial perf data file reader mmap. */ void *one_mmap_addr; + /** @one_mmap_offset: File offset in perf.data file when mapped. */ u64 one_mmap_offset; + /** @ordered_events: Used to turn unordered events into ordered ones. */ struct ordered_events ordered_events; + /** @data: Optional perf data file being read from. */ struct perf_data *data; - struct perf_tool *tool; + /** @tool: callbacks for event handling. */ + const struct perf_tool *tool; + /** + * @bytes_transferred: Used by perf record to count written bytes before + * compression. + */ u64 bytes_transferred; + /** + * @bytes_compressed: Used by perf record to count written bytes after + * compression. + */ u64 bytes_compressed; + /** @zstd_data: Owner of global compression state, buffers, etc. */ struct zstd_data zstd_data; struct decomp_data decomp_data; struct decomp_data *active_decomp; @@ -64,13 +106,14 @@ struct decomp { struct perf_tool; struct perf_session *__perf_session__new(struct perf_data *data, - bool repipe, int repipe_fd, - struct perf_tool *tool); + struct perf_tool *tool, + bool trace_event_repipe, + struct perf_env *host_env); static inline struct perf_session *perf_session__new(struct perf_data *data, struct perf_tool *tool) { - return __perf_session__new(data, false, -1, tool); + return __perf_session__new(data, tool, /*trace_event_repipe=*/false, /*host_env=*/NULL); } void perf_session__delete(struct perf_session *session); @@ -92,8 +135,6 @@ int perf_session__process_events(struct perf_session *session); int perf_session__queue_event(struct perf_session *s, union perf_event *event, u64 timestamp, u64 file_offset, const char *file_path); -void perf_tool__fill_defaults(struct perf_tool *tool); - int perf_session__resolve_callchain(struct perf_session *session, struct evsel *evsel, struct thread *thread, @@ -101,6 +142,7 @@ int perf_session__resolve_callchain(struct perf_session *session, struct symbol **parent); bool perf_session__has_traces(struct perf_session *session, const char *msg); +bool perf_session__has_switch_events(struct perf_session *session); void perf_event__attr_swap(struct perf_event_attr *attr); @@ -130,8 +172,7 @@ size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp); size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp, bool (fn)(struct dso *dso, int parm), int parm); -size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp, - bool skip_empty); +size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp); void perf_session__dump_kmaps(struct perf_session *session); @@ -155,14 +196,20 @@ extern volatile int session_done; int perf_session__deliver_synth_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample); +int perf_session__deliver_synth_attr_event(struct perf_session *session, + const struct perf_event_attr *attr, + u64 id); int perf_session__dsos_hit_all(struct perf_session *session); -int perf_event__process_id_index(struct perf_session *session, +int perf_event__process_id_index(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); -int perf_event__process_finished_round(struct perf_tool *tool, +int perf_event__process_finished_round(const struct perf_tool *tool, union perf_event *event, struct ordered_events *oe); +struct perf_env *perf_session__env(struct perf_session *session); + #endif /* __PERF_SESSION_H */ diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 3107f5aa8c9a..b65b1792ca05 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -1,8 +1,10 @@ from os import getenv, path from subprocess import Popen, PIPE from re import sub +import shlex cc = getenv("CC") +assert cc, "Environment variable CC not set" # Check if CC has options, as is the case in yocto, where it uses CC="cc --sysroot..." cc_tokens = cc.split() @@ -12,12 +14,26 @@ if len(cc_tokens) > 1: else: cc_options = "" +# ignore optional stderr could be None as it is set to PIPE to avoid that. +# mypy: disable-error-code="union-attr" cc_is_clang = b"clang version" in Popen([cc, "-v"], stderr=PIPE).stderr.readline() -src_feature_tests = getenv('srctree') + '/tools/build/feature' + +srctree = getenv('srctree') +assert srctree, "Environment variable srctree, for the Linux sources, not set" +src_feature_tests = f'{srctree}/tools/build/feature' def clang_has_option(option): - cc_output = Popen([cc, cc_options + option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines() - return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o))] == [ ] + error_substrings = ( + b"unknown argument", + b"is not supported", + b"unknown warning option" + ) + cmd = shlex.split(f"{cc} {cc_options} {option}") + [ + "-o", "/dev/null", + path.join(src_feature_tests, "test-hello.c") + ] + cc_output = Popen(cmd, stderr=PIPE).stderr.readlines() + return not any(any(error in line for error in error_substrings) for line in cc_output) if cc_is_clang: from sysconfig import get_config_vars @@ -60,48 +76,25 @@ class install_lib(_install_lib): cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) -cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls', '-DPYTHON_PERF' ] +cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ] if cc_is_clang: cflags += ["-Wno-unused-command-line-argument" ] + if clang_has_option("-Wno-cast-function-type-mismatch"): + cflags += ["-Wno-cast-function-type-mismatch" ] else: cflags += ['-Wno-cast-function-type' ] # The python headers have mixed code with declarations (decls after asserts, for instance) cflags += [ "-Wno-declaration-after-statement" ] -src_perf = getenv('srctree') + '/tools/perf' +src_perf = f'{srctree}/tools/perf' build_lib = getenv('PYTHON_EXTBUILD_LIB') build_tmp = getenv('PYTHON_EXTBUILD_TMP') -libtraceevent = getenv('LIBTRACEEVENT') -libapikfs = getenv('LIBAPI') -libperf = getenv('LIBPERF') - -ext_sources = [f.strip() for f in open('util/python-ext-sources') - if len(f.strip()) > 0 and f[0] != '#'] - -extra_libraries = [] - -if '-DHAVE_LIBTRACEEVENT' in cflags: - extra_libraries += [ 'traceevent' ] -else: - ext_sources.remove('util/trace-event.c') - ext_sources.remove('util/trace-event-parse.c') - -# use full paths with source files -ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources)) - -if '-DHAVE_LIBNUMA_SUPPORT' in cflags: - extra_libraries += [ 'numa' ] -if '-DHAVE_LIBCAP_SUPPORT' in cflags: - extra_libraries += [ 'cap' ] perf = Extension('perf', - sources = ext_sources, - include_dirs = ['util/include'], - libraries = extra_libraries, - extra_compile_args = cflags, - extra_objects = [ x for x in [libtraceevent, libapikfs, libperf] - if x is not None], + sources = [ src_perf + '/util/python.c' ], + include_dirs = ['util/include'], + extra_compile_args = cflags, ) setup(name='perf', diff --git a/tools/perf/util/sha1.c b/tools/perf/util/sha1.c new file mode 100644 index 000000000000..7032fa4ff3fd --- /dev/null +++ b/tools/perf/util/sha1.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * SHA-1 message digest algorithm + * + * Copyright 2025 Google LLC + */ +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <linux/unaligned.h> +#include <string.h> + +#include "sha1.h" + +#define SHA1_BLOCK_SIZE 64 + +static const u32 sha1_K[4] = { 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6 }; + +#define SHA1_ROUND(i, a, b, c, d, e) \ + do { \ + if ((i) >= 16) \ + w[i] = rol32(w[(i) - 16] ^ w[(i) - 14] ^ w[(i) - 8] ^ \ + w[(i) - 3], \ + 1); \ + e += w[i] + rol32(a, 5) + sha1_K[(i) / 20]; \ + if ((i) < 20) \ + e += (b & (c ^ d)) ^ d; \ + else if ((i) < 40 || (i) >= 60) \ + e += b ^ c ^ d; \ + else \ + e += (c & d) ^ (b & (c ^ d)); \ + b = rol32(b, 30); \ + /* The new (a, b, c, d, e) is the old (e, a, b, c, d). */ \ + } while (0) + +#define SHA1_5ROUNDS(i) \ + do { \ + SHA1_ROUND((i) + 0, a, b, c, d, e); \ + SHA1_ROUND((i) + 1, e, a, b, c, d); \ + SHA1_ROUND((i) + 2, d, e, a, b, c); \ + SHA1_ROUND((i) + 3, c, d, e, a, b); \ + SHA1_ROUND((i) + 4, b, c, d, e, a); \ + } while (0) + +#define SHA1_20ROUNDS(i) \ + do { \ + SHA1_5ROUNDS((i) + 0); \ + SHA1_5ROUNDS((i) + 5); \ + SHA1_5ROUNDS((i) + 10); \ + SHA1_5ROUNDS((i) + 15); \ + } while (0) + +static void sha1_blocks(u32 h[5], const u8 *data, size_t nblocks) +{ + while (nblocks--) { + u32 a = h[0]; + u32 b = h[1]; + u32 c = h[2]; + u32 d = h[3]; + u32 e = h[4]; + u32 w[80]; + + for (int i = 0; i < 16; i++) + w[i] = get_unaligned_be32(&data[i * 4]); + SHA1_20ROUNDS(0); + SHA1_20ROUNDS(20); + SHA1_20ROUNDS(40); + SHA1_20ROUNDS(60); + + h[0] += a; + h[1] += b; + h[2] += c; + h[3] += d; + h[4] += e; + data += SHA1_BLOCK_SIZE; + } +} + +/* Calculate the SHA-1 message digest of the given data. */ +void sha1(const void *data, size_t len, u8 out[SHA1_DIGEST_SIZE]) +{ + u32 h[5] = { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, + 0xC3D2E1F0 }; + u8 final_data[2 * SHA1_BLOCK_SIZE] = { 0 }; + size_t final_len = len % SHA1_BLOCK_SIZE; + + sha1_blocks(h, data, len / SHA1_BLOCK_SIZE); + + memcpy(final_data, data + len - final_len, final_len); + final_data[final_len] = 0x80; + final_len = round_up(final_len + 9, SHA1_BLOCK_SIZE); + put_unaligned_be64((u64)len * 8, &final_data[final_len - 8]); + + sha1_blocks(h, final_data, final_len / SHA1_BLOCK_SIZE); + + for (int i = 0; i < 5; i++) + put_unaligned_be32(h[i], &out[i * 4]); +} diff --git a/tools/perf/util/sha1.h b/tools/perf/util/sha1.h new file mode 100644 index 000000000000..e92c9966e1d5 --- /dev/null +++ b/tools/perf/util/sha1.h @@ -0,0 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include <linux/types.h> + +#define SHA1_DIGEST_SIZE 20 + +void sha1(const void *data, size_t len, u8 out[SHA1_DIGEST_SIZE]); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index cd39ea972193..f3a565b0e230 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -35,7 +35,7 @@ #include <linux/string.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif regex_t parent_regex; @@ -141,6 +141,43 @@ struct sort_entry sort_thread = { .se_width_idx = HISTC_THREAD, }; +/* --sort tgid */ + +static int64_t +sort__tgid_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return thread__pid(right->thread) - thread__pid(left->thread); +} + +static int hist_entry__tgid_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + int tgid = thread__pid(he->thread); + const char *comm = NULL; + + /* display comm of the thread-group leader */ + if (thread__pid(he->thread) == thread__tid(he->thread)) { + comm = thread__comm_str(he->thread); + } else { + struct maps *maps = thread__maps(he->thread); + struct thread *leader = machine__find_thread(maps__machine(maps), + tgid, tgid); + if (leader) { + comm = thread__comm_str(leader); + thread__put(leader); + } + } + width = max(7U, width) - 8; + return repsep_snprintf(bf, size, "%7d:%-*.*s", tgid, width, width, comm ?: ""); +} + +struct sort_entry sort_tgid = { + .se_header = " Tgid:Command", + .se_cmp = sort__tgid_cmp, + .se_snprintf = hist_entry__tgid_snprintf, + .se_width_idx = HISTC_TGID, +}; + /* --sort simd */ static int64_t @@ -334,7 +371,7 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) * comparing symbol address alone is not enough since it's a * relative address within a dso. */ - if (!hists__has(left->hists, dso) || hists__has(right->hists, dso)) { + if (!hists__has(left->hists, dso)) { ret = sort__dso_cmp(left, right); if (ret != 0) return ret; @@ -677,6 +714,102 @@ struct sort_entry sort_sym_ipc_null = { .se_width_idx = HISTC_SYMBOL_IPC, }; +/* --sort callchain_branch_predicted */ + +static int64_t +sort__callchain_branch_predicted_cmp(struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) +{ + return 0; +} + +static int hist_entry__callchain_branch_predicted_snprintf( + struct hist_entry *he, char *bf, size_t size, unsigned int width) +{ + u64 branch_count, predicted_count; + double percent = 0.0; + char str[32]; + + callchain_branch_counts(he->callchain, &branch_count, + &predicted_count, NULL, NULL); + + if (branch_count) + percent = predicted_count * 100.0 / branch_count; + + snprintf(str, sizeof(str), "%.1f%%", percent); + return repsep_snprintf(bf, size, "%-*.*s", width, width, str); +} + +struct sort_entry sort_callchain_branch_predicted = { + .se_header = "Predicted", + .se_cmp = sort__callchain_branch_predicted_cmp, + .se_snprintf = hist_entry__callchain_branch_predicted_snprintf, + .se_width_idx = HISTC_CALLCHAIN_BRANCH_PREDICTED, +}; + +/* --sort callchain_branch_abort */ + +static int64_t +sort__callchain_branch_abort_cmp(struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) +{ + return 0; +} + +static int hist_entry__callchain_branch_abort_snprintf(struct hist_entry *he, + char *bf, size_t size, + unsigned int width) +{ + u64 branch_count, abort_count; + char str[32]; + + callchain_branch_counts(he->callchain, &branch_count, + NULL, &abort_count, NULL); + + snprintf(str, sizeof(str), "%" PRId64, abort_count); + return repsep_snprintf(bf, size, "%-*.*s", width, width, str); +} + +struct sort_entry sort_callchain_branch_abort = { + .se_header = "Abort", + .se_cmp = sort__callchain_branch_abort_cmp, + .se_snprintf = hist_entry__callchain_branch_abort_snprintf, + .se_width_idx = HISTC_CALLCHAIN_BRANCH_ABORT, +}; + +/* --sort callchain_branch_cycles */ + +static int64_t +sort__callchain_branch_cycles_cmp(struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) +{ + return 0; +} + +static int hist_entry__callchain_branch_cycles_snprintf(struct hist_entry *he, + char *bf, size_t size, + unsigned int width) +{ + u64 branch_count, cycles_count, cycles = 0; + char str[32]; + + callchain_branch_counts(he->callchain, &branch_count, + NULL, NULL, &cycles_count); + + if (branch_count) + cycles = cycles_count / branch_count; + + snprintf(str, sizeof(str), "%" PRId64 "", cycles); + return repsep_snprintf(bf, size, "%-*.*s", width, width, str); +} + +struct sort_entry sort_callchain_branch_cycles = { + .se_header = "Cycles", + .se_cmp = sort__callchain_branch_cycles_cmp, + .se_snprintf = hist_entry__callchain_branch_cycles_snprintf, + .se_width_idx = HISTC_CALLCHAIN_BRANCH_CYCLES, +}; + /* --sort srcfile */ static char no_srcfile[1]; @@ -796,6 +929,38 @@ struct sort_entry sort_cpu = { .se_width_idx = HISTC_CPU, }; +/* --sort parallelism */ + +static int64_t +sort__parallelism_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return right->parallelism - left->parallelism; +} + +static int hist_entry__parallelism_filter(struct hist_entry *he, int type, const void *arg) +{ + const unsigned long *parallelism_filter = arg; + + if (type != HIST_FILTER__PARALLELISM) + return -1; + + return test_bit(he->parallelism, parallelism_filter); +} + +static int hist_entry__parallelism_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%*d", width, he->parallelism); +} + +struct sort_entry sort_parallelism = { + .se_header = "Parallelism", + .se_cmp = sort__parallelism_cmp, + .se_filter = hist_entry__parallelism_filter, + .se_snprintf = hist_entry__parallelism_snprintf, + .se_width_idx = HISTC_PARALLELISM, +}; + /* --sort cgroup_id */ static int64_t _sort__cgroup_dev_cmp(u64 left_dev, u64 right_dev) @@ -942,17 +1107,19 @@ static char *get_trace_output(struct hist_entry *he) .data = he->raw_data, .size = he->raw_size, }; + struct tep_event *tp_format; evsel = hists_to_evsel(he->hists); trace_seq_init(&seq); - if (symbol_conf.raw_trace) { - tep_print_fields(&seq, he->raw_data, he->raw_size, - evsel->tp_format); - } else { - tep_print_event(evsel->tp_format->tep, - &seq, &rec, "%s", TEP_PRINT_INFO); + tp_format = evsel__tp_format(evsel); + if (tp_format) { + if (symbol_conf.raw_trace) + tep_print_fields(&seq, he->raw_data, he->raw_size, tp_format); + else + tep_print_event(tp_format->tep, &seq, &rec, "%s", TEP_PRINT_INFO); } + /* * Trim the buffer, it starts at 4KB and we're not going to * add anything more to this buffer. @@ -1579,22 +1746,27 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) if (rc) return rc; /* - * Addresses with no major/minor numbers are assumed to be + * Addresses with no major/minor numbers or build ID are assumed to be * anonymous in userspace. Sort those on pid then address. * * The kernel and non-zero major/minor mapped areas are * assumed to be unity mapped. Sort those on address. */ + if (left->cpumode != PERF_RECORD_MISC_KERNEL && (map__flags(l_map) & MAP_SHARED) == 0) { + const struct dso_id *dso_id = dso__id_const(l_dso); - if ((left->cpumode != PERF_RECORD_MISC_KERNEL) && - (!(map__flags(l_map) & MAP_SHARED)) && !dso__id(l_dso)->maj && !dso__id(l_dso)->min && - !dso__id(l_dso)->ino && !dso__id(l_dso)->ino_generation) { - /* userspace anonymous */ + if (!dso_id->mmap2_valid) + dso_id = dso__id_const(r_dso); - if (thread__pid(left->thread) > thread__pid(right->thread)) - return -1; - if (thread__pid(left->thread) < thread__pid(right->thread)) - return 1; + if (!build_id__is_defined(&dso_id->build_id) && + (!dso_id->mmap2_valid || (dso_id->maj == 0 && dso_id->min == 0))) { + /* userspace anonymous */ + + if (thread__pid(left->thread) > thread__pid(right->thread)) + return -1; + if (thread__pid(left->thread) < thread__pid(right->thread)) + return 1; + } } addr: @@ -1619,6 +1791,7 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf, if (he->mem_info) { struct map *map = mem_info__daddr(he->mem_info)->ms.map; struct dso *dso = map ? map__dso(map) : NULL; + const struct dso_id *dso_id = dso ? dso__id_const(dso) : &dso_id_empty; addr = cl_address(mem_info__daddr(he->mem_info)->al_addr, chk_double_cl); ms = &mem_info__daddr(he->mem_info)->ms; @@ -1627,8 +1800,7 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf, if ((he->cpumode != PERF_RECORD_MISC_KERNEL) && map && !(map__prot(map) & PROT_EXEC) && (map__flags(map) & MAP_SHARED) && - (dso__id(dso)->maj || dso__id(dso)->min || dso__id(dso)->ino || - dso__id(dso)->ino_generation)) + (!dso_id->mmap2_valid || (dso_id->maj == 0 && dso_id->min == 0))) level = 's'; else if (!map) level = 'X'; @@ -1712,21 +1884,20 @@ struct sort_entry sort_global_ins_lat = { static int64_t sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) { - return left->p_stage_cyc - right->p_stage_cyc; + return left->weight3 - right->weight3; } static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, - he->p_stage_cyc * he->stat.nr_events); + return repsep_snprintf(bf, size, "%-*u", width, he->weight3 * he->stat.nr_events); } static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc); + return repsep_snprintf(bf, size, "%-*u", width, he->weight3); } struct sort_entry sort_local_p_stage_cyc = { @@ -2273,46 +2444,21 @@ sort__typeoff_sort(struct hist_entry *left, struct hist_entry *right) return left->mem_type_off - right->mem_type_off; } -static void fill_member_name(char *buf, size_t sz, struct annotated_member *m, - int offset, bool first) -{ - struct annotated_member *child; - - if (list_empty(&m->children)) - return; - - list_for_each_entry(child, &m->children, node) { - if (child->offset <= offset && offset < child->offset + child->size) { - int len = 0; - - /* It can have anonymous struct/union members */ - if (child->var_name) { - len = scnprintf(buf, sz, "%s%s", - first ? "" : ".", child->var_name); - first = false; - } - - fill_member_name(buf + len, sz - len, child, offset, first); - return; - } - } -} - static int hist_entry__typeoff_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width __maybe_unused) { struct annotated_data_type *he_type = he->mem_type; char buf[4096]; - buf[0] = '\0'; - if (list_empty(&he_type->self.children)) - snprintf(buf, sizeof(buf), "no field"); - else - fill_member_name(buf, sizeof(buf), &he_type->self, - he->mem_type_off, true); - buf[4095] = '\0'; + if (he_type == &unknown_type || he_type == &stackop_type || + he_type == &canary_type) + return repsep_snprintf(bf, size, "%s", he_type->self.type_name); + + if (!annotated_data_type__get_member_name(he_type, buf, sizeof(buf), + he->mem_type_off)) + scnprintf(buf, sizeof(buf), "no field"); - return repsep_snprintf(bf, size, "%s %+d (%s)", he_type->self.type_name, + return repsep_snprintf(bf, size, "%s +%#x (%s)", he_type->self.type_name, he->mem_type_off, buf); } @@ -2326,6 +2472,57 @@ struct sort_entry sort_type_offset = { .se_width_idx = HISTC_TYPE_OFFSET, }; +/* --sort typecln */ + +/* TODO: use actual value in the system */ +#define TYPE_CACHELINE_SIZE 64 + +static int64_t +sort__typecln_sort(struct hist_entry *left, struct hist_entry *right) +{ + struct annotated_data_type *left_type = left->mem_type; + struct annotated_data_type *right_type = right->mem_type; + int64_t left_cln, right_cln; + int64_t ret; + + if (!left_type) { + sort__type_init(left); + left_type = left->mem_type; + } + + if (!right_type) { + sort__type_init(right); + right_type = right->mem_type; + } + + ret = strcmp(left_type->self.type_name, right_type->self.type_name); + if (ret) + return ret; + + left_cln = left->mem_type_off / TYPE_CACHELINE_SIZE; + right_cln = right->mem_type_off / TYPE_CACHELINE_SIZE; + return left_cln - right_cln; +} + +static int hist_entry__typecln_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width __maybe_unused) +{ + struct annotated_data_type *he_type = he->mem_type; + + return repsep_snprintf(bf, size, "%s: cache-line %d", he_type->self.type_name, + he->mem_type_off / TYPE_CACHELINE_SIZE); +} + +struct sort_entry sort_type_cacheline = { + .se_header = "Data Type Cacheline", + .se_cmp = sort__type_cmp, + .se_collapse = sort__typecln_sort, + .se_sort = sort__typecln_sort, + .se_init = sort__type_init, + .se_snprintf = hist_entry__typecln_snprintf, + .se_width_idx = HISTC_TYPE_CACHELINE, +}; + struct sort_dimension { const char *name; @@ -2333,25 +2530,51 @@ struct sort_dimension { int taken; }; -int __weak arch_support_sort_key(const char *sort_key __maybe_unused) +static int arch_support_sort_key(const char *sort_key, struct perf_env *env) { + const char *arch = perf_env__arch(env); + + if (!strcmp("x86", arch) || !strcmp("powerpc", arch)) { + if (!strcmp(sort_key, "p_stage_cyc")) + return 1; + if (!strcmp(sort_key, "local_p_stage_cyc")) + return 1; + } return 0; } -const char * __weak arch_perf_header_entry(const char *se_header) -{ +static const char *arch_perf_header_entry(const char *se_header, struct perf_env *env) +{ + const char *arch = perf_env__arch(env); + + if (!strcmp("x86", arch)) { + if (!strcmp(se_header, "Local Pipeline Stage Cycle")) + return "Local Retire Latency"; + else if (!strcmp(se_header, "Pipeline Stage Cycle")) + return "Retire Latency"; + } else if (!strcmp("powerpc", arch)) { + if (!strcmp(se_header, "Local INSTR Latency")) + return "Finish Cyc"; + else if (!strcmp(se_header, "INSTR Latency")) + return "Global Finish_cyc"; + else if (!strcmp(se_header, "Local Pipeline Stage Cycle")) + return "Dispatch Cyc"; + else if (!strcmp(se_header, "Pipeline Stage Cycle")) + return "Global Dispatch_cyc"; + } return se_header; } -static void sort_dimension_add_dynamic_header(struct sort_dimension *sd) +static void sort_dimension_add_dynamic_header(struct sort_dimension *sd, struct perf_env *env) { - sd->entry->se_header = arch_perf_header_entry(sd->entry->se_header); + sd->entry->se_header = arch_perf_header_entry(sd->entry->se_header, env); } #define DIM(d, n, func) [d] = { .name = n, .entry = &(func) } static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_PID, "pid", sort_thread), + DIM(SORT_TGID, "tgid", sort_tgid), DIM(SORT_COMM, "comm", sort_comm), DIM(SORT_DSO, "dso", sort_dso), DIM(SORT_SYM, "symbol", sort_sym), @@ -2384,6 +2607,8 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_ANNOTATE_DATA_TYPE, "type", sort_type), DIM(SORT_ANNOTATE_DATA_TYPE_OFFSET, "typeoff", sort_type_offset), DIM(SORT_SYM_OFFSET, "symoff", sort_sym_offset), + DIM(SORT_ANNOTATE_DATA_TYPE_CACHELINE, "typecln", sort_type_cacheline), + DIM(SORT_PARALLELISM, "parallelism", sort_parallelism), }; #undef DIM @@ -2404,6 +2629,15 @@ static struct sort_dimension bstack_sort_dimensions[] = { DIM(SORT_SYM_IPC, "ipc_lbr", sort_sym_ipc), DIM(SORT_ADDR_FROM, "addr_from", sort_addr_from), DIM(SORT_ADDR_TO, "addr_to", sort_addr_to), + DIM(SORT_CALLCHAIN_BRANCH_PREDICTED, + "callchain_branch_predicted", + sort_callchain_branch_predicted), + DIM(SORT_CALLCHAIN_BRANCH_ABORT, + "callchain_branch_abort", + sort_callchain_branch_abort), + DIM(SORT_CALLCHAIN_BRANCH_CYCLES, + "callchain_branch_cycles", + sort_callchain_branch_cycles) }; #undef DIM @@ -2430,17 +2664,22 @@ struct hpp_dimension { const char *name; struct perf_hpp_fmt *fmt; int taken; + int was_taken; + int mem_mode; }; #define DIM(d, n) { .name = n, .fmt = &perf_hpp__format[d], } +#define DIM_MEM(d, n) { .name = n, .fmt = &perf_hpp__format[d], .mem_mode = 1, } static struct hpp_dimension hpp_sort_dimensions[] = { DIM(PERF_HPP__OVERHEAD, "overhead"), + DIM(PERF_HPP__LATENCY, "latency"), DIM(PERF_HPP__OVERHEAD_SYS, "overhead_sys"), DIM(PERF_HPP__OVERHEAD_US, "overhead_us"), DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"), DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"), DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"), + DIM(PERF_HPP__LATENCY_ACC, "latency_children"), DIM(PERF_HPP__SAMPLES, "sample"), DIM(PERF_HPP__PERIOD, "period"), DIM(PERF_HPP__WEIGHT1, "weight1"), @@ -2450,8 +2689,15 @@ static struct hpp_dimension hpp_sort_dimensions[] = { DIM(PERF_HPP__WEIGHT2, "ins_lat"), DIM(PERF_HPP__WEIGHT3, "retire_lat"), DIM(PERF_HPP__WEIGHT3, "p_stage_cyc"), + /* used for output only when SORT_MODE__MEM */ + DIM_MEM(PERF_HPP__MEM_STAT_OP, "op"), + DIM_MEM(PERF_HPP__MEM_STAT_CACHE, "cache"), + DIM_MEM(PERF_HPP__MEM_STAT_MEMORY, "memory"), + DIM_MEM(PERF_HPP__MEM_STAT_SNOOP, "snoop"), + DIM_MEM(PERF_HPP__MEM_STAT_DTLB, "dtlb"), }; +#undef DIM_MEM #undef DIM struct hpp_sort_entry { @@ -2471,18 +2717,22 @@ void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists) } static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, - struct hists *hists, int line __maybe_unused, + struct hists *hists, int line, int *span __maybe_unused) { struct hpp_sort_entry *hse; size_t len = fmt->user_len; + const char *hdr = ""; + + if (line == hists->hpp_list->nr_header_lines - 1) + hdr = fmt->name; hse = container_of(fmt, struct hpp_sort_entry, hpp); if (!len) len = hists__col_len(hists, hse->se->se_width_idx); - return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, fmt->name); + return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, hdr); } static int __sort__hpp_width(struct perf_hpp_fmt *fmt, @@ -2576,6 +2826,7 @@ MK_SORT_ENTRY_CHK(thread) MK_SORT_ENTRY_CHK(comm) MK_SORT_ENTRY_CHK(dso) MK_SORT_ENTRY_CHK(sym) +MK_SORT_ENTRY_CHK(parallelism) static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) @@ -2713,9 +2964,10 @@ static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd, } static int __sort_dimension__add_hpp_output(struct sort_dimension *sd, - struct perf_hpp_list *list) + struct perf_hpp_list *list, + int level) { - struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, 0); + struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, level); if (hse == NULL) return -1; @@ -3136,9 +3388,8 @@ static int __dynamic_dimension__add(struct evsel *evsel, static int add_evsel_fields(struct evsel *evsel, bool raw_trace, int level) { int ret; - struct tep_format_field *field; - - field = evsel->tp_format->format.fields; + struct tep_event *tp_format = evsel__tp_format(evsel); + struct tep_format_field *field = tp_format ? tp_format->format.fields : NULL; while (field) { ret = __dynamic_dimension__add(evsel, field, raw_trace, level); if (ret < 0) @@ -3171,13 +3422,19 @@ static int add_all_matching_fields(struct evlist *evlist, { int ret = -ESRCH; struct evsel *evsel; - struct tep_format_field *field; evlist__for_each_entry(evlist, evsel) { + struct tep_event *tp_format; + struct tep_format_field *field; + if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) continue; - field = tep_find_any_field(evsel->tp_format, field_name); + tp_format = evsel__tp_format(evsel); + if (tp_format == NULL) + continue; + + field = tep_find_any_field(tp_format, field_name); if (field == NULL) continue; @@ -3259,7 +3516,9 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok, if (!strcmp(field_name, "*")) { ret = add_evsel_fields(evsel, raw_trace, level); } else { - struct tep_format_field *field = tep_find_any_field(evsel->tp_format, field_name); + struct tep_event *tp_format = evsel__tp_format(evsel); + struct tep_format_field *field = + tp_format ? tep_find_any_field(tp_format, field_name) : NULL; if (field == NULL) { pr_debug("Cannot find event field for %s.%s\n", @@ -3311,17 +3570,19 @@ static int __hpp_dimension__add(struct hpp_dimension *hd, return -1; hd->taken = 1; + hd->was_taken = 1; perf_hpp_list__register_sort_field(list, fmt); return 0; } static int __sort_dimension__add_output(struct perf_hpp_list *list, - struct sort_dimension *sd) + struct sort_dimension *sd, + int level) { if (sd->taken) return 0; - if (__sort_dimension__add_hpp_output(sd, list) < 0) + if (__sort_dimension__add_hpp_output(sd, list, level) < 0) return -1; sd->taken = 1; @@ -3329,14 +3590,15 @@ static int __sort_dimension__add_output(struct perf_hpp_list *list, } static int __hpp_dimension__add_output(struct perf_hpp_list *list, - struct hpp_dimension *hd) + struct hpp_dimension *hd, + int level) { struct perf_hpp_fmt *fmt; if (hd->taken) return 0; - fmt = __hpp_dimension__alloc_hpp(hd, 0); + fmt = __hpp_dimension__alloc_hpp(hd, level); if (!fmt) return -1; @@ -3345,14 +3607,19 @@ static int __hpp_dimension__add_output(struct perf_hpp_list *list, return 0; } -int hpp_dimension__add_output(unsigned col) +int hpp_dimension__add_output(unsigned col, bool implicit) { + struct hpp_dimension *hd; + BUG_ON(col >= PERF_HPP__MAX_INDEX); - return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]); + hd = &hpp_sort_dimensions[col]; + if (implicit && !hd->was_taken) + return 0; + return __hpp_dimension__add_output(&perf_hpp_list, hd, /*level=*/0); } int sort_dimension__add(struct perf_hpp_list *list, const char *tok, - struct evlist *evlist, + struct evlist *evlist, struct perf_env *env, int level) { unsigned int i, j; @@ -3365,7 +3632,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, */ for (j = 0; j < ARRAY_SIZE(arch_specific_sort_keys); j++) { if (!strcmp(arch_specific_sort_keys[j], tok) && - !arch_support_sort_key(tok)) { + !arch_support_sort_key(tok, env)) { return 0; } } @@ -3378,7 +3645,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, for (j = 0; j < ARRAY_SIZE(dynamic_headers); j++) { if (sd->name && !strcmp(dynamic_headers[j], sd->name)) - sort_dimension_add_dynamic_header(sd); + sort_dimension_add_dynamic_header(sd, env); } if (sd->entry == &sort_parent && parent_pattern) { @@ -3417,22 +3684,19 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, return __sort_dimension__add(sd, list, level); } - for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { - struct hpp_dimension *hd = &hpp_sort_dimensions[i]; - - if (strncasecmp(tok, hd->name, strlen(tok))) - continue; - - return __hpp_dimension__add(hd, list, level); - } - for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { struct sort_dimension *sd = &bstack_sort_dimensions[i]; if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; - if (sort__mode != SORT_MODE__BRANCH) + if ((sort__mode != SORT_MODE__BRANCH) && + strncasecmp(tok, "callchain_branch_predicted", + strlen(tok)) && + strncasecmp(tok, "callchain_branch_abort", + strlen(tok)) && + strncasecmp(tok, "callchain_branch_cycles", + strlen(tok))) return -EINVAL; if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to) @@ -3461,20 +3725,59 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, return 0; } + for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { + struct hpp_dimension *hd = &hpp_sort_dimensions[i]; + + if (strncasecmp(tok, hd->name, strlen(tok))) + continue; + + return __hpp_dimension__add(hd, list, level); + } + if (!add_dynamic_entry(evlist, tok, level)) return 0; return -ESRCH; } +/* This should match with sort_dimension__add() above */ +static bool is_hpp_sort_key(const char *key, struct perf_env *env) +{ + unsigned i; + + for (i = 0; i < ARRAY_SIZE(arch_specific_sort_keys); i++) { + if (!strcmp(arch_specific_sort_keys[i], key) && + !arch_support_sort_key(key, env)) { + return false; + } + } + + for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) { + struct sort_dimension *sd = &common_sort_dimensions[i]; + + if (sd->name && !strncasecmp(key, sd->name, strlen(key))) + return false; + } + + for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { + struct hpp_dimension *hd = &hpp_sort_dimensions[i]; + + if (!strncasecmp(key, hd->name, strlen(key))) + return true; + } + return false; +} + static int setup_sort_list(struct perf_hpp_list *list, char *str, - struct evlist *evlist) + struct evlist *evlist, struct perf_env *env) { char *tmp, *tok; int ret = 0; int level = 0; int next_level = 1; + int prev_level = 0; bool in_group = false; + bool prev_was_hpp = false; do { tok = str; @@ -3495,7 +3798,20 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str, } if (*tok) { - ret = sort_dimension__add(list, tok, evlist, level); + if (is_hpp_sort_key(tok, env)) { + /* keep output (hpp) sort keys in the same level */ + if (prev_was_hpp) { + bool next_same = (level == next_level); + + level = prev_level; + next_level = next_same ? level : level+1; + } + prev_was_hpp = true; + } else { + prev_was_hpp = false; + } + + ret = sort_dimension__add(list, tok, evlist, env, level); if (ret == -EINVAL) { if (!cacheline_size() && !strncasecmp(tok, "dcacheline", strlen(tok))) ui__error("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system"); @@ -3506,6 +3822,7 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str, ui__error("Unknown --sort key: `%s'", tok); break; } + prev_level = level; } level = next_level; @@ -3601,15 +3918,29 @@ static char *setup_overhead(char *keys) if (sort__mode == SORT_MODE__DIFF) return keys; - keys = prefix_if_not_in("overhead", keys); - - if (symbol_conf.cumulate_callchain) - keys = prefix_if_not_in("overhead_children", keys); + if (symbol_conf.prefer_latency) { + keys = prefix_if_not_in("overhead", keys); + keys = prefix_if_not_in("latency", keys); + if (symbol_conf.cumulate_callchain) { + keys = prefix_if_not_in("overhead_children", keys); + keys = prefix_if_not_in("latency_children", keys); + } + } else if (!keys || (!strstr(keys, "overhead") && + !strstr(keys, "latency"))) { + if (symbol_conf.enable_latency) + keys = prefix_if_not_in("latency", keys); + keys = prefix_if_not_in("overhead", keys); + if (symbol_conf.cumulate_callchain) { + if (symbol_conf.enable_latency) + keys = prefix_if_not_in("latency_children", keys); + keys = prefix_if_not_in("overhead_children", keys); + } + } return keys; } -static int __setup_sorting(struct evlist *evlist) +static int __setup_sorting(struct evlist *evlist, struct perf_env *env) { char *str; const char *sort_keys; @@ -3649,7 +3980,7 @@ static int __setup_sorting(struct evlist *evlist) } } - ret = setup_sort_list(&perf_hpp_list, str, evlist); + ret = setup_sort_list(&perf_hpp_list, str, evlist, env); free(str); return ret; @@ -3752,7 +4083,7 @@ void sort__setup_elide(FILE *output) } } -int output_field_add(struct perf_hpp_list *list, const char *tok) +int output_field_add(struct perf_hpp_list *list, const char *tok, int *level) { unsigned int i; @@ -3765,16 +4096,25 @@ int output_field_add(struct perf_hpp_list *list, const char *tok) if (!strcasecmp(tok, "weight")) ui__warning("--fields weight shows the average value unlike in the --sort key.\n"); - return __hpp_dimension__add_output(list, hd); + if (hd->mem_mode && sort__mode != SORT_MODE__MEMORY) + continue; + + return __hpp_dimension__add_output(list, hd, *level); } + /* + * A non-output field will increase level so that it can be in a + * different hierarchy. + */ + (*level)++; + for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) { struct sort_dimension *sd = &common_sort_dimensions[i]; if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; - return __sort_dimension__add_output(list, sd); + return __sort_dimension__add_output(list, sd, *level); } for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { @@ -3786,7 +4126,7 @@ int output_field_add(struct perf_hpp_list *list, const char *tok) if (sort__mode != SORT_MODE__BRANCH) return -EINVAL; - return __sort_dimension__add_output(list, sd); + return __sort_dimension__add_output(list, sd, *level); } for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) { @@ -3798,7 +4138,7 @@ int output_field_add(struct perf_hpp_list *list, const char *tok) if (sort__mode != SORT_MODE__MEMORY) return -EINVAL; - return __sort_dimension__add_output(list, sd); + return __sort_dimension__add_output(list, sd, *level); } return -ESRCH; @@ -3808,10 +4148,11 @@ static int setup_output_list(struct perf_hpp_list *list, char *str) { char *tmp, *tok; int ret = 0; + int level = 0; for (tok = strtok_r(str, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { - ret = output_field_add(list, tok); + ret = output_field_add(list, tok, &level); if (ret == -EINVAL) { ui__error("Invalid --fields key: `%s'", tok); break; @@ -3875,16 +4216,16 @@ out: return ret; } -int setup_sorting(struct evlist *evlist) +int setup_sorting(struct evlist *evlist, struct perf_env *env) { int err; - err = __setup_sorting(evlist); + err = __setup_sorting(evlist, env); if (err < 0) return err; if (parent_pattern != default_parent_pattern) { - err = sort_dimension__add(&perf_hpp_list, "parent", evlist, -1); + err = sort_dimension__add(&perf_hpp_list, "parent", evlist, env, -1); if (err < 0) return err; } @@ -3901,6 +4242,10 @@ int setup_sorting(struct evlist *evlist) if (err < 0) return err; + err = perf_hpp__alloc_mem_stats(&perf_hpp_list, evlist); + if (err < 0) + return err; + /* copy sort keys to output fields */ perf_hpp__setup_output_field(&perf_hpp_list); /* and then copy output fields to sort keys */ @@ -3960,7 +4305,7 @@ static void add_hpp_sort_string(struct strbuf *sb, struct hpp_dimension *s, int add_key(sb, s[i].name, llen); } -char *sort_help(const char *prefix) +char *sort_help(const char *prefix, enum sort_mode mode) { struct strbuf sb; char *s; @@ -3972,10 +4317,12 @@ char *sort_help(const char *prefix) ARRAY_SIZE(hpp_sort_dimensions), &len); add_sort_string(&sb, common_sort_dimensions, ARRAY_SIZE(common_sort_dimensions), &len); - add_sort_string(&sb, bstack_sort_dimensions, - ARRAY_SIZE(bstack_sort_dimensions), &len); - add_sort_string(&sb, memory_sort_dimensions, - ARRAY_SIZE(memory_sort_dimensions), &len); + if (mode == SORT_MODE__NORMAL || mode == SORT_MODE__BRANCH) + add_sort_string(&sb, bstack_sort_dimensions, + ARRAY_SIZE(bstack_sort_dimensions), &len); + if (mode == SORT_MODE__NORMAL || mode == SORT_MODE__MEMORY) + add_sort_string(&sb, memory_sort_dimensions, + ARRAY_SIZE(memory_sort_dimensions), &len); s = strbuf_detach(&sb, NULL); strbuf_release(&sb); return s; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 0bd0ee3ae76b..d7787958e06b 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -6,6 +6,7 @@ #include "hist.h" struct option; +struct perf_env; extern regex_t parent_regex; extern const char *sort_order; @@ -71,6 +72,9 @@ enum sort_type { SORT_ANNOTATE_DATA_TYPE, SORT_ANNOTATE_DATA_TYPE_OFFSET, SORT_SYM_OFFSET, + SORT_ANNOTATE_DATA_TYPE_CACHELINE, + SORT_PARALLELISM, + SORT_TGID, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, @@ -87,6 +91,9 @@ enum sort_type { SORT_SYM_IPC, SORT_ADDR_FROM, SORT_ADDR_TO, + SORT_CALLCHAIN_BRANCH_PREDICTED, + SORT_CALLCHAIN_BRANCH_ABORT, + SORT_CALLCHAIN_BRANCH_CYCLES, /* memory mode specific sort keys */ __SORT_MEMORY_MODE, @@ -124,24 +131,24 @@ extern struct sort_entry sort_thread; struct evlist; struct tep_handle; -int setup_sorting(struct evlist *evlist); +int setup_sorting(struct evlist *evlist, struct perf_env *env); int setup_output_field(void); void reset_output_field(void); void sort__setup_elide(FILE *fp); void perf_hpp__set_elide(int idx, bool elide); -char *sort_help(const char *prefix); +char *sort_help(const char *prefix, enum sort_mode mode); int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset); bool is_strict_order(const char *order); -int hpp_dimension__add_output(unsigned col); +int hpp_dimension__add_output(unsigned col, bool implicit); void reset_dimensions(void); int sort_dimension__add(struct perf_hpp_list *list, const char *tok, - struct evlist *evlist, + struct evlist *evlist, struct perf_env *env, int level); -int output_field_add(struct perf_hpp_list *list, const char *tok); +int output_field_add(struct perf_hpp_list *list, const char *tok, int *level); int64_t sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right); int64_t diff --git a/tools/perf/util/spark.c b/tools/perf/util/spark.c index 70272a8b81a6..65ca253cc22e 100644 --- a/tools/perf/util/spark.c +++ b/tools/perf/util/spark.c @@ -1,9 +1,7 @@ -#include <stdio.h> -#include <limits.h> -#include <string.h> -#include <stdlib.h> +// SPDX-License-Identifier: GPL-2.0 #include "spark.h" -#include "stat.h" +#include <limits.h> +#include <linux/kernel.h> #define SPARK_SHIFT 8 diff --git a/tools/perf/util/spark.h b/tools/perf/util/spark.h index 25402d7d7a64..78597c38ef35 100644 --- a/tools/perf/util/spark.h +++ b/tools/perf/util/spark.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef SPARK_H #define SPARK_H 1 diff --git a/tools/perf/util/srccode.c b/tools/perf/util/srccode.c index 476e99896d5e..0f4907843ac1 100644 --- a/tools/perf/util/srccode.c +++ b/tools/perf/util/srccode.c @@ -16,7 +16,7 @@ #include "srccode.h" #include "debug.h" #include <internal/lib.h> // page_size -#include "fncache.h" +#include "hashmap.h" #define MAXSRCCACHE (32*1024*1024) #define MAXSRCFILES 64 @@ -92,7 +92,7 @@ static struct srcfile *find_srcfile(char *fn) struct srcfile *h; int fd; unsigned long sz; - unsigned hval = shash((unsigned char *)fn) % SRC_HTAB_SZ; + size_t hval = str_hash(fn) % SRC_HTAB_SZ; hlist_for_each_entry (h, &srcfile_htab[hval], hash_nd) { if (!strcmp(fn, h->fn)) { diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 9d670d8c1c08..27c0966611ab 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -1,28 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 -#include <inttypes.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/types.h> - -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/zalloc.h> - -#include <api/io.h> - -#include "util/dso.h" -#include "util/debug.h" -#include "util/callchain.h" -#include "util/symbol_conf.h" #include "srcline.h" -#include "string2.h" +#include "addr2line.h" +#include "dso.h" +#include "callchain.h" +#include "libbfd.h" +#include "llvm.h" #include "symbol.h" -#include "subcmd/run-command.h" -/* If addr2line doesn't return data for 1 second then timeout. */ -int addr2line_timeout_ms = 1 * 1000; +#include <inttypes.h> +#include <string.h> + bool srcline_full_filename; char *srcline__unknown = (char *)"??:0"; @@ -39,14 +26,13 @@ static const char *srcline_dso_name(struct dso *dso) if (dso_name[0] == '[') return NULL; - if (!strncmp(dso_name, "/tmp/perf-", 10)) + if (is_perf_pid_map_name(dso_name)) return NULL; return dso_name; } -static int inline_list__append(struct symbol *symbol, char *srcline, - struct inline_node *node) +int inline_list__append(struct symbol *symbol, char *srcline, struct inline_node *node) { struct inline_list *ilist; @@ -73,7 +59,7 @@ static const char *gnu_basename(const char *path) return base ? base + 1 : path; } -static char *srcline_from_fileline(const char *file, unsigned int line) +char *srcline_from_fileline(const char *file, unsigned int line) { char *srcline; @@ -89,9 +75,9 @@ static char *srcline_from_fileline(const char *file, unsigned int line) return srcline; } -static struct symbol *new_inline_sym(struct dso *dso, - struct symbol *base_sym, - const char *funcname) +struct symbol *new_inline_sym(struct dso *dso, + struct symbol *base_sym, + const char *funcname) { struct symbol *inline_sym; char *demangled = NULL; @@ -128,669 +114,23 @@ static struct symbol *new_inline_sym(struct dso *dso, return inline_sym; } -#define MAX_INLINE_NEST 1024 - -#ifdef HAVE_LIBBFD_SUPPORT - -/* - * Implement addr2line using libbfd. - */ -#define PACKAGE "perf" -#include <bfd.h> - -struct a2l_data { - const char *input; - u64 addr; - - bool found; - const char *filename; - const char *funcname; - unsigned line; - - bfd *abfd; - asymbol **syms; -}; - -static int bfd_error(const char *string) -{ - const char *errmsg; - - errmsg = bfd_errmsg(bfd_get_error()); - fflush(stdout); - - if (string) - pr_debug("%s: %s\n", string, errmsg); - else - pr_debug("%s\n", errmsg); - - return -1; -} - -static int slurp_symtab(bfd *abfd, struct a2l_data *a2l) -{ - long storage; - long symcount; - asymbol **syms; - bfd_boolean dynamic = FALSE; - - if ((bfd_get_file_flags(abfd) & HAS_SYMS) == 0) - return bfd_error(bfd_get_filename(abfd)); - - storage = bfd_get_symtab_upper_bound(abfd); - if (storage == 0L) { - storage = bfd_get_dynamic_symtab_upper_bound(abfd); - dynamic = TRUE; - } - if (storage < 0L) - return bfd_error(bfd_get_filename(abfd)); - - syms = malloc(storage); - if (dynamic) - symcount = bfd_canonicalize_dynamic_symtab(abfd, syms); - else - symcount = bfd_canonicalize_symtab(abfd, syms); - - if (symcount < 0) { - free(syms); - return bfd_error(bfd_get_filename(abfd)); - } - - a2l->syms = syms; - return 0; -} - -static void find_address_in_section(bfd *abfd, asection *section, void *data) -{ - bfd_vma pc, vma; - bfd_size_type size; - struct a2l_data *a2l = data; - flagword flags; - - if (a2l->found) - return; - -#ifdef bfd_get_section_flags - flags = bfd_get_section_flags(abfd, section); -#else - flags = bfd_section_flags(section); -#endif - if ((flags & SEC_ALLOC) == 0) - return; - - pc = a2l->addr; -#ifdef bfd_get_section_vma - vma = bfd_get_section_vma(abfd, section); -#else - vma = bfd_section_vma(section); -#endif -#ifdef bfd_get_section_size - size = bfd_get_section_size(section); -#else - size = bfd_section_size(section); -#endif - - if (pc < vma || pc >= vma + size) - return; - - a2l->found = bfd_find_nearest_line(abfd, section, a2l->syms, pc - vma, - &a2l->filename, &a2l->funcname, - &a2l->line); - - if (a2l->filename && !strlen(a2l->filename)) - a2l->filename = NULL; -} - -static struct a2l_data *addr2line_init(const char *path) -{ - bfd *abfd; - struct a2l_data *a2l = NULL; - - abfd = bfd_openr(path, NULL); - if (abfd == NULL) - return NULL; - - if (!bfd_check_format(abfd, bfd_object)) - goto out; - - a2l = zalloc(sizeof(*a2l)); - if (a2l == NULL) - goto out; - - a2l->abfd = abfd; - a2l->input = strdup(path); - if (a2l->input == NULL) - goto out; - - if (slurp_symtab(abfd, a2l)) - goto out; - - return a2l; - -out: - if (a2l) { - zfree((char **)&a2l->input); - free(a2l); - } - bfd_close(abfd); - return NULL; -} - -static void addr2line_cleanup(struct a2l_data *a2l) -{ - if (a2l->abfd) - bfd_close(a2l->abfd); - zfree((char **)&a2l->input); - zfree(&a2l->syms); - free(a2l); -} - -static int inline_list__append_dso_a2l(struct dso *dso, - struct inline_node *node, - struct symbol *sym) -{ - struct a2l_data *a2l = dso->a2l; - struct symbol *inline_sym = new_inline_sym(dso, sym, a2l->funcname); - char *srcline = NULL; - - if (a2l->filename) - srcline = srcline_from_fileline(a2l->filename, a2l->line); - - return inline_list__append(inline_sym, srcline, node); -} - -static int addr2line(const char *dso_name, u64 addr, - char **file, unsigned int *line, struct dso *dso, - bool unwind_inlines, struct inline_node *node, +static int addr2line(const char *dso_name, u64 addr, char **file, unsigned int *line_nr, + struct dso *dso, bool unwind_inlines, struct inline_node *node, struct symbol *sym) { - int ret = 0; - struct a2l_data *a2l = dso->a2l; - - if (!a2l) { - dso->a2l = addr2line_init(dso_name); - a2l = dso->a2l; - } - - if (a2l == NULL) { - if (!symbol_conf.disable_add2line_warn) - pr_warning("addr2line_init failed for %s\n", dso_name); - return 0; - } - - a2l->addr = addr; - a2l->found = false; - - bfd_map_over_sections(a2l->abfd, find_address_in_section, a2l); - - if (!a2l->found) - return 0; + int ret; - if (unwind_inlines) { - int cnt = 0; + ret = llvm__addr2line(dso_name, addr, file, line_nr, dso, unwind_inlines, node, sym); + if (ret > 0) + return ret; - if (node && inline_list__append_dso_a2l(dso, node, sym)) - return 0; + ret = libbfd__addr2line(dso_name, addr, file, line_nr, dso, unwind_inlines, node, sym); + if (ret > 0) + return ret; - while (bfd_find_inliner_info(a2l->abfd, &a2l->filename, - &a2l->funcname, &a2l->line) && - cnt++ < MAX_INLINE_NEST) { - - if (a2l->filename && !strlen(a2l->filename)) - a2l->filename = NULL; - - if (node != NULL) { - if (inline_list__append_dso_a2l(dso, node, sym)) - return 0; - // found at least one inline frame - ret = 1; - } - } - } - - if (file) { - *file = a2l->filename ? strdup(a2l->filename) : NULL; - ret = *file ? 1 : 0; - } - - if (line) - *line = a2l->line; - - return ret; + return cmd__addr2line(dso_name, addr, file, line_nr, dso, unwind_inlines, node, sym); } -void dso__free_a2l(struct dso *dso) -{ - struct a2l_data *a2l = dso->a2l; - - if (!a2l) - return; - - addr2line_cleanup(a2l); - - dso->a2l = NULL; -} - -#else /* HAVE_LIBBFD_SUPPORT */ - -static int filename_split(char *filename, unsigned int *line_nr) -{ - char *sep; - - sep = strchr(filename, '\n'); - if (sep) - *sep = '\0'; - - if (!strcmp(filename, "??:0")) - return 0; - - sep = strchr(filename, ':'); - if (sep) { - *sep++ = '\0'; - *line_nr = strtoul(sep, NULL, 0); - return 1; - } - pr_debug("addr2line missing ':' in filename split\n"); - return 0; -} - -static void addr2line_subprocess_cleanup(struct child_process *a2l) -{ - if (a2l->pid != -1) { - kill(a2l->pid, SIGKILL); - finish_command(a2l); /* ignore result, we don't care */ - a2l->pid = -1; - close(a2l->in); - close(a2l->out); - } - - free(a2l); -} - -static struct child_process *addr2line_subprocess_init(const char *addr2line_path, - const char *binary_path) -{ - const char *argv[] = { - addr2line_path ?: "addr2line", - "-e", binary_path, - "-a", "-i", "-f", NULL - }; - struct child_process *a2l = zalloc(sizeof(*a2l)); - int start_command_status = 0; - - if (a2l == NULL) { - pr_err("Failed to allocate memory for addr2line"); - return NULL; - } - - a2l->pid = -1; - a2l->in = -1; - a2l->out = -1; - a2l->no_stderr = 1; - - a2l->argv = argv; - start_command_status = start_command(a2l); - a2l->argv = NULL; /* it's not used after start_command; avoid dangling pointers */ - - if (start_command_status != 0) { - pr_warning("could not start addr2line (%s) for %s: start_command return code %d\n", - addr2line_path, binary_path, start_command_status); - addr2line_subprocess_cleanup(a2l); - return NULL; - } - - return a2l; -} - -enum a2l_style { - BROKEN, - GNU_BINUTILS, - LLVM, -}; - -static enum a2l_style addr2line_configure(struct child_process *a2l, const char *dso_name) -{ - static bool cached; - static enum a2l_style style; - - if (!cached) { - char buf[128]; - struct io io; - int ch; - int lines; - - if (write(a2l->in, ",\n", 2) != 2) - return BROKEN; - - io__init(&io, a2l->out, buf, sizeof(buf)); - ch = io__get_char(&io); - if (ch == ',') { - style = LLVM; - cached = true; - lines = 1; - pr_debug("Detected LLVM addr2line style\n"); - } else if (ch == '0') { - style = GNU_BINUTILS; - cached = true; - lines = 3; - pr_debug("Detected binutils addr2line style\n"); - } else { - if (!symbol_conf.disable_add2line_warn) { - char *output = NULL; - size_t output_len; - - io__getline(&io, &output, &output_len); - pr_warning("%s %s: addr2line configuration failed\n", - __func__, dso_name); - pr_warning("\t%c%s", ch, output); - } - pr_debug("Unknown/broken addr2line style\n"); - return BROKEN; - } - while (lines) { - ch = io__get_char(&io); - if (ch <= 0) - break; - if (ch == '\n') - lines--; - } - /* Ignore SIGPIPE in the event addr2line exits. */ - signal(SIGPIPE, SIG_IGN); - } - return style; -} - -static int read_addr2line_record(struct io *io, - enum a2l_style style, - const char *dso_name, - u64 addr, - bool first, - char **function, - char **filename, - unsigned int *line_nr) -{ - /* - * Returns: - * -1 ==> error - * 0 ==> sentinel (or other ill-formed) record read - * 1 ==> a genuine record read - */ - char *line = NULL; - size_t line_len = 0; - unsigned int dummy_line_nr = 0; - int ret = -1; - - if (function != NULL) - zfree(function); - - if (filename != NULL) - zfree(filename); - - if (line_nr != NULL) - *line_nr = 0; - - /* - * Read the first line. Without an error this will be: - * - for the first line an address like 0x1234, - * - the binutils sentinel 0x0000000000000000, - * - the llvm-addr2line the sentinel ',' character, - * - the function name line for an inlined function. - */ - if (io__getline(io, &line, &line_len) < 0 || !line_len) - goto error; - - pr_debug("%s %s: addr2line read address for sentinel: %s", __func__, dso_name, line); - if (style == LLVM && line_len == 2 && line[0] == ',') { - /* Found the llvm-addr2line sentinel character. */ - zfree(&line); - return 0; - } else if (style == GNU_BINUTILS && (!first || addr != 0)) { - int zero_count = 0, non_zero_count = 0; - /* - * Check for binutils sentinel ignoring it for the case the - * requested address is 0. - */ - - /* A given address should always start 0x. */ - if (line_len >= 2 || line[0] != '0' || line[1] != 'x') { - for (size_t i = 2; i < line_len; i++) { - if (line[i] == '0') - zero_count++; - else if (line[i] != '\n') - non_zero_count++; - } - if (!non_zero_count) { - int ch; - - if (first && !zero_count) { - /* Line was erroneous just '0x'. */ - goto error; - } - /* - * Line was 0x0..0, the sentinel for binutils. Remove - * the function and filename lines. - */ - zfree(&line); - do { - ch = io__get_char(io); - } while (ch > 0 && ch != '\n'); - do { - ch = io__get_char(io); - } while (ch > 0 && ch != '\n'); - return 0; - } - } - } - /* Read the second function name line (if inline data then this is the first line). */ - if (first && (io__getline(io, &line, &line_len) < 0 || !line_len)) - goto error; - - pr_debug("%s %s: addr2line read line: %s", __func__, dso_name, line); - if (function != NULL) - *function = strdup(strim(line)); - - zfree(&line); - line_len = 0; - - /* Read the third filename and line number line. */ - if (io__getline(io, &line, &line_len) < 0 || !line_len) - goto error; - - pr_debug("%s %s: addr2line filename:number : %s", __func__, dso_name, line); - if (filename_split(line, line_nr == NULL ? &dummy_line_nr : line_nr) == 0 && - style == GNU_BINUTILS) { - ret = 0; - goto error; - } - - if (filename != NULL) - *filename = strdup(line); - - zfree(&line); - line_len = 0; - - return 1; - -error: - free(line); - if (function != NULL) - zfree(function); - if (filename != NULL) - zfree(filename); - return ret; -} - -static int inline_list__append_record(struct dso *dso, - struct inline_node *node, - struct symbol *sym, - const char *function, - const char *filename, - unsigned int line_nr) -{ - struct symbol *inline_sym = new_inline_sym(dso, sym, function); - - return inline_list__append(inline_sym, srcline_from_fileline(filename, line_nr), node); -} - -static int addr2line(const char *dso_name, u64 addr, - char **file, unsigned int *line_nr, - struct dso *dso, - bool unwind_inlines, - struct inline_node *node, - struct symbol *sym __maybe_unused) -{ - struct child_process *a2l = dso__a2l(dso); - char *record_function = NULL; - char *record_filename = NULL; - unsigned int record_line_nr = 0; - int record_status = -1; - int ret = 0; - size_t inline_count = 0; - int len; - char buf[128]; - ssize_t written; - struct io io = { .eof = false }; - enum a2l_style a2l_style; - - if (!a2l) { - if (!filename__has_section(dso_name, ".debug_line")) - goto out; - - dso__set_a2l(dso, - addr2line_subprocess_init(symbol_conf.addr2line_path, dso_name)); - a2l = dso__a2l(dso); - } - - if (a2l == NULL) { - if (!symbol_conf.disable_add2line_warn) - pr_warning("%s %s: addr2line_subprocess_init failed\n", __func__, dso_name); - goto out; - } - a2l_style = addr2line_configure(a2l, dso_name); - if (a2l_style == BROKEN) - goto out; - - /* - * Send our request and then *deliberately* send something that can't be - * interpreted as a valid address to ask addr2line about (namely, - * ","). This causes addr2line to first write out the answer to our - * request, in an unbounded/unknown number of records, and then to write - * out the lines "0x0...0", "??" and "??:0", for GNU binutils, or "," - * for llvm-addr2line, so that we can detect when it has finished giving - * us anything useful. - */ - len = snprintf(buf, sizeof(buf), "%016"PRIx64"\n,\n", addr); - written = len > 0 ? write(a2l->in, buf, len) : -1; - if (written != len) { - if (!symbol_conf.disable_add2line_warn) - pr_warning("%s %s: could not send request\n", __func__, dso_name); - goto out; - } - io__init(&io, a2l->out, buf, sizeof(buf)); - io.timeout_ms = addr2line_timeout_ms; - switch (read_addr2line_record(&io, a2l_style, dso_name, addr, /*first=*/true, - &record_function, &record_filename, &record_line_nr)) { - case -1: - if (!symbol_conf.disable_add2line_warn) - pr_warning("%s %s: could not read first record\n", __func__, dso_name); - goto out; - case 0: - /* - * The first record was invalid, so return failure, but first - * read another record, since we sent a sentinel ',' for the - * sake of detected the last inlined function. Treat this as the - * first of a record as the ',' generates a new start with GNU - * binutils, also force a non-zero address as we're no longer - * reading that record. - */ - switch (read_addr2line_record(&io, a2l_style, dso_name, - /*addr=*/1, /*first=*/true, - NULL, NULL, NULL)) { - case -1: - if (!symbol_conf.disable_add2line_warn) - pr_warning("%s %s: could not read sentinel record\n", - __func__, dso_name); - break; - case 0: - /* The sentinel as expected. */ - break; - default: - if (!symbol_conf.disable_add2line_warn) - pr_warning("%s %s: unexpected record instead of sentinel", - __func__, dso_name); - break; - } - goto out; - default: - /* First record as expected. */ - break; - } - - if (file) { - *file = strdup(record_filename); - ret = 1; - } - if (line_nr) - *line_nr = record_line_nr; - - if (unwind_inlines) { - if (node && inline_list__append_record(dso, node, sym, - record_function, - record_filename, - record_line_nr)) { - ret = 0; - goto out; - } - } - - /* - * We have to read the records even if we don't care about the inline - * info. This isn't the first record and force the address to non-zero - * as we're reading records beyond the first. - */ - while ((record_status = read_addr2line_record(&io, - a2l_style, - dso_name, - /*addr=*/1, - /*first=*/false, - &record_function, - &record_filename, - &record_line_nr)) == 1) { - if (unwind_inlines && node && inline_count++ < MAX_INLINE_NEST) { - if (inline_list__append_record(dso, node, sym, - record_function, - record_filename, - record_line_nr)) { - ret = 0; - goto out; - } - ret = 1; /* found at least one inline frame */ - } - } - -out: - free(record_function); - free(record_filename); - if (io.eof) { - dso__set_a2l(dso, NULL); - addr2line_subprocess_cleanup(a2l); - } - return ret; -} - -void dso__free_a2l(struct dso *dso) -{ - struct child_process *a2l = dso__a2l(dso); - - if (!a2l) - return; - - addr2line_subprocess_cleanup(a2l); - - dso__set_a2l(dso, NULL); -} - -#endif /* HAVE_LIBBFD_SUPPORT */ - static struct inline_node *addr2inlines(const char *dso_name, u64 addr, struct dso *dso, struct symbol *sym) { @@ -805,7 +145,9 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr, INIT_LIST_HEAD(&node->val); node->addr = addr; - addr2line(dso_name, addr, NULL, NULL, dso, true, node, sym); + addr2line(dso_name, addr, /*file=*/NULL, /*line_nr=*/NULL, dso, + /*unwind_inlines=*/true, node, sym); + return node; } @@ -832,7 +174,7 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, goto out_err; if (!addr2line(dso_name, addr, &file, &line, dso, - unwind_inlines, NULL, sym)) + unwind_inlines, /*node=*/NULL, sym)) goto out_err; srcline = srcline_from_fileline(file, line); @@ -878,7 +220,8 @@ char *get_srcline_split(struct dso *dso, u64 addr, unsigned *line) if (dso_name == NULL) goto out_err; - if (!addr2line(dso_name, addr, &file, line, dso, true, NULL, NULL)) + if (!addr2line(dso_name, addr, &file, line, dso, /*unwind_inlines=*/true, + /*node=*/NULL, /*sym=*/NULL)) goto out_err; dso__set_a2l_fails(dso, 0); diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h index 75010d39ea28..c36f573cd339 100644 --- a/tools/perf/util/srcline.h +++ b/tools/perf/util/srcline.h @@ -9,7 +9,6 @@ struct dso; struct symbol; -extern int addr2line_timeout_ms; extern bool srcline_full_filename; char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, bool show_sym, bool show_addr, u64 ip); @@ -29,6 +28,8 @@ void srcline__tree_delete(struct rb_root_cached *tree); extern char *srcline__unknown; #define SRCLINE_UNKNOWN srcline__unknown +#define MAX_INLINE_NEST 1024 + struct inline_list { struct symbol *symbol; char *srcline; @@ -55,4 +56,10 @@ struct inline_node *inlines__tree_find(struct rb_root_cached *tree, u64 addr); /* delete all nodes within the tree of inline_node s */ void inlines__tree_delete(struct rb_root_cached *tree); +int inline_list__append(struct symbol *symbol, char *srcline, struct inline_node *node); +char *srcline_from_fileline(const char *file, unsigned int line); +struct symbol *new_inline_sym(struct dso *dso, + struct symbol *base_sym, + const char *funcname); + #endif /* PERF_SRCLINE_H */ diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 91d2f7f65df7..6d02f84c5691 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -7,6 +7,7 @@ #include <perf/cpumap.h> #include "color.h" #include "counts.h" +#include "debug.h" #include "evlist.h" #include "evsel.h" #include "stat.h" @@ -21,6 +22,7 @@ #include "iostat.h" #include "pmu.h" #include "pmus.h" +#include "tool_pmu.h" #define CNTR_NOT_SUPPORTED "<not supported>" #define CNTR_NOT_COUNTED "<not counted>" @@ -38,6 +40,7 @@ static int aggr_header_lens[] = { [AGGR_CORE] = 18, [AGGR_CACHE] = 22, + [AGGR_CLUSTER] = 20, [AGGR_DIE] = 12, [AGGR_SOCKET] = 6, [AGGR_NODE] = 6, @@ -47,19 +50,21 @@ static int aggr_header_lens[] = { }; static const char *aggr_header_csv[] = { - [AGGR_CORE] = "core,cpus,", - [AGGR_CACHE] = "cache,cpus,", - [AGGR_DIE] = "die,cpus,", - [AGGR_SOCKET] = "socket,cpus,", - [AGGR_NONE] = "cpu,", - [AGGR_THREAD] = "comm-pid,", - [AGGR_NODE] = "node,", - [AGGR_GLOBAL] = "" + [AGGR_CORE] = "core,ctrs,", + [AGGR_CACHE] = "cache,ctrs,", + [AGGR_CLUSTER] = "cluster,ctrs,", + [AGGR_DIE] = "die,ctrs,", + [AGGR_SOCKET] = "socket,ctrs,", + [AGGR_NONE] = "cpu,", + [AGGR_THREAD] = "comm-pid,", + [AGGR_NODE] = "node,", + [AGGR_GLOBAL] = "" }; static const char *aggr_header_std[] = { [AGGR_CORE] = "core", [AGGR_CACHE] = "cache", + [AGGR_CLUSTER] = "cluster", [AGGR_DIE] = "die", [AGGR_SOCKET] = "socket", [AGGR_NONE] = "cpu", @@ -68,6 +73,32 @@ static const char *aggr_header_std[] = { [AGGR_GLOBAL] = "" }; +const char *metric_threshold_classify__color(enum metric_threshold_classify thresh) +{ + const char * const colors[] = { + "", /* unknown */ + PERF_COLOR_RED, /* bad */ + PERF_COLOR_MAGENTA, /* nearly bad */ + PERF_COLOR_YELLOW, /* less good */ + PERF_COLOR_GREEN, /* good */ + }; + static_assert(ARRAY_SIZE(colors) - 1 == METRIC_THRESHOLD_GOOD, "missing enum value"); + return colors[thresh]; +} + +static const char *metric_threshold_classify__str(enum metric_threshold_classify thresh) +{ + const char * const strs[] = { + "unknown", + "bad", + "nearly bad", + "less good", + "good", + }; + static_assert(ARRAY_SIZE(strs) - 1 == METRIC_THRESHOLD_GOOD, "missing enum value"); + return strs[thresh]; +} + static void print_running_std(struct perf_stat_config *config, u64 run, u64 ena) { if (run != ena) @@ -83,23 +114,59 @@ static void print_running_csv(struct perf_stat_config *config, u64 run, u64 ena) fprintf(config->output, "%s%" PRIu64 "%s%.2f", config->csv_sep, run, config->csv_sep, enabled_percent); } +struct outstate { + /* Std mode: insert a newline before the next metric */ + bool newline; + /* JSON mode: track need for comma for a previous field or not */ + bool first; + /* Num CSV separators remaining to pad out when not all fields are printed */ + int csv_col_pad; + + /* + * The following don't track state across fields, but are here as a shortcut to + * pass data to the print functions. The alternative would be to update the + * function signatures of the entire print stack to pass them through. + */ + /* Place to output to */ + FILE * const fh; + /* Lines are timestamped in --interval-print mode */ + char timestamp[64]; + /* Num items aggregated in current line. See struct perf_stat_aggr.nr */ + int aggr_nr; + /* Core/socket/die etc ID for the current line */ + struct aggr_cpu_id id; + /* Event for current line */ + struct evsel *evsel; + /* Cgroup for current line */ + struct cgroup *cgrp; +}; + +static const char *json_sep(struct outstate *os) +{ + const char *sep = os->first ? "" : ", "; + + os->first = false; + return sep; +} -static void print_running_json(struct perf_stat_config *config, u64 run, u64 ena) +#define json_out(os, format, ...) fprintf((os)->fh, "%s" format, json_sep(os), ##__VA_ARGS__) + +static void print_running_json(struct outstate *os, u64 run, u64 ena) { double enabled_percent = 100; if (run != ena) enabled_percent = 100 * run / ena; - fprintf(config->output, "\"event-runtime\" : %" PRIu64 ", \"pcnt-running\" : %.2f, ", - run, enabled_percent); + json_out(os, "\"event-runtime\" : %" PRIu64 ", \"pcnt-running\" : %.2f", + run, enabled_percent); } -static void print_running(struct perf_stat_config *config, +static void print_running(struct perf_stat_config *config, struct outstate *os, u64 run, u64 ena, bool before_metric) { if (config->json_output) { if (before_metric) - print_running_json(config, run, ena); + print_running_json(os, run, ena); } else if (config->csv_output) { if (before_metric) print_running_csv(config, run, ena); @@ -122,20 +189,20 @@ static void print_noise_pct_csv(struct perf_stat_config *config, fprintf(config->output, "%s%.2f%%", config->csv_sep, pct); } -static void print_noise_pct_json(struct perf_stat_config *config, +static void print_noise_pct_json(struct outstate *os, double pct) { - fprintf(config->output, "\"variance\" : %.2f, ", pct); + json_out(os, "\"variance\" : %.2f", pct); } -static void print_noise_pct(struct perf_stat_config *config, +static void print_noise_pct(struct perf_stat_config *config, struct outstate *os, double total, double avg, bool before_metric) { double pct = rel_stddev_stats(total, avg); if (config->json_output) { if (before_metric) - print_noise_pct_json(config, pct); + print_noise_pct_json(os, pct); } else if (config->csv_output) { if (before_metric) print_noise_pct_csv(config, pct); @@ -145,7 +212,7 @@ static void print_noise_pct(struct perf_stat_config *config, } } -static void print_noise(struct perf_stat_config *config, +static void print_noise(struct perf_stat_config *config, struct outstate *os, struct evsel *evsel, double avg, bool before_metric) { struct perf_stat_evsel *ps; @@ -154,7 +221,7 @@ static void print_noise(struct perf_stat_config *config, return; ps = evsel->stats; - print_noise_pct(config, stddev_stats(&ps->res_stats), avg, before_metric); + print_noise_pct(config, os, stddev_stats(&ps->res_stats), avg, before_metric); } static void print_cgroup_std(struct perf_stat_config *config, const char *cgrp_name) @@ -167,18 +234,19 @@ static void print_cgroup_csv(struct perf_stat_config *config, const char *cgrp_n fprintf(config->output, "%s%s", config->csv_sep, cgrp_name); } -static void print_cgroup_json(struct perf_stat_config *config, const char *cgrp_name) +static void print_cgroup_json(struct outstate *os, const char *cgrp_name) { - fprintf(config->output, "\"cgroup\" : \"%s\", ", cgrp_name); + json_out(os, "\"cgroup\" : \"%s\"", cgrp_name); } -static void print_cgroup(struct perf_stat_config *config, struct cgroup *cgrp) +static void print_cgroup(struct perf_stat_config *config, struct outstate *os, + struct cgroup *cgrp) { if (nr_cgroups || config->cgroup_list) { const char *cgrp_name = cgrp ? cgrp->name : ""; if (config->json_output) - print_cgroup_json(config, cgrp_name); + print_cgroup_json(os, cgrp_name); else if (config->csv_output) print_cgroup_csv(config, cgrp_name); else @@ -236,7 +304,7 @@ static void print_aggr_id_std(struct perf_stat_config *config, return; } - fprintf(output, "%-*s %*d ", aggr_header_lens[idx], buf, 4, aggr_nr); + fprintf(output, "%-*s %*d ", aggr_header_lens[idx], buf, /*strlen("ctrs")*/ 4, aggr_nr); } static void print_aggr_id_csv(struct perf_stat_config *config, @@ -293,47 +361,45 @@ static void print_aggr_id_csv(struct perf_stat_config *config, } } -static void print_aggr_id_json(struct perf_stat_config *config, +static void print_aggr_id_json(struct perf_stat_config *config, struct outstate *os, struct evsel *evsel, struct aggr_cpu_id id, int aggr_nr) { - FILE *output = config->output; - switch (config->aggr_mode) { case AGGR_CORE: - fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ", + json_out(os, "\"core\" : \"S%d-D%d-C%d\", \"counters\" : %d", id.socket, id.die, id.core, aggr_nr); break; case AGGR_CACHE: - fprintf(output, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"aggregate-number\" : %d, ", + json_out(os, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"counters\" : %d", id.socket, id.die, id.cache_lvl, id.cache, aggr_nr); break; case AGGR_CLUSTER: - fprintf(output, "\"cluster\" : \"S%d-D%d-CLS%d\", \"aggregate-number\" : %d, ", + json_out(os, "\"cluster\" : \"S%d-D%d-CLS%d\", \"counters\" : %d", id.socket, id.die, id.cluster, aggr_nr); break; case AGGR_DIE: - fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ", + json_out(os, "\"die\" : \"S%d-D%d\", \"counters\" : %d", id.socket, id.die, aggr_nr); break; case AGGR_SOCKET: - fprintf(output, "\"socket\" : \"S%d\", \"aggregate-number\" : %d, ", + json_out(os, "\"socket\" : \"S%d\", \"counters\" : %d", id.socket, aggr_nr); break; case AGGR_NODE: - fprintf(output, "\"node\" : \"N%d\", \"aggregate-number\" : %d, ", + json_out(os, "\"node\" : \"N%d\", \"counters\" : %d", id.node, aggr_nr); break; case AGGR_NONE: if (evsel->percore && !config->percore_show_thread) { - fprintf(output, "\"core\" : \"S%d-D%d-C%d\"", + json_out(os, "\"core\" : \"S%d-D%d-C%d\"", id.socket, id.die, id.core); } else if (id.cpu.cpu > -1) { - fprintf(output, "\"cpu\" : \"%d\", ", + json_out(os, "\"cpu\" : \"%d\"", id.cpu.cpu); } break; case AGGR_THREAD: - fprintf(output, "\"thread\" : \"%s-%d\", ", + json_out(os, "\"thread\" : \"%s-%d\"", perf_thread_map__comm(evsel->core.threads, id.thread_idx), perf_thread_map__pid(evsel->core.threads, id.thread_idx)); break; @@ -345,29 +411,17 @@ static void print_aggr_id_json(struct perf_stat_config *config, } } -static void aggr_printout(struct perf_stat_config *config, +static void aggr_printout(struct perf_stat_config *config, struct outstate *os, struct evsel *evsel, struct aggr_cpu_id id, int aggr_nr) { if (config->json_output) - print_aggr_id_json(config, evsel, id, aggr_nr); + print_aggr_id_json(config, os, evsel, id, aggr_nr); else if (config->csv_output) print_aggr_id_csv(config, evsel, id, aggr_nr); else print_aggr_id_std(config, evsel, id, aggr_nr); } -struct outstate { - FILE *fh; - bool newline; - bool first; - const char *prefix; - int nfields; - int aggr_nr; - struct aggr_cpu_id id; - struct evsel *evsel; - struct cgroup *cgrp; -}; - static void new_line_std(struct perf_stat_config *config __maybe_unused, void *ctx) { @@ -380,14 +434,14 @@ static inline void __new_line_std_csv(struct perf_stat_config *config, struct outstate *os) { fputc('\n', os->fh); - if (os->prefix) - fputs(os->prefix, os->fh); - aggr_printout(config, os->evsel, os->id, os->aggr_nr); + if (config->interval) + fputs(os->timestamp, os->fh); + aggr_printout(config, os, os->evsel, os->id, os->aggr_nr); } -static inline void __new_line_std(struct outstate *os) +static inline void __new_line_std(struct perf_stat_config *config, struct outstate *os) { - fprintf(os->fh, " "); + fprintf(os->fh, "%*s", COUNTS_LEN + EVNAME_LEN + config->unit_width + 2, ""); } static void do_new_line_std(struct perf_stat_config *config, @@ -396,17 +450,18 @@ static void do_new_line_std(struct perf_stat_config *config, __new_line_std_csv(config, os); if (config->aggr_mode == AGGR_NONE) fprintf(os->fh, " "); - __new_line_std(os); + __new_line_std(config, os); } static void print_metric_std(struct perf_stat_config *config, - void *ctx, const char *color, const char *fmt, - const char *unit, double val) + void *ctx, enum metric_threshold_classify thresh, + const char *fmt, const char *unit, double val) { struct outstate *os = ctx; FILE *out = os->fh; int n; bool newline = os->newline; + const char *color = metric_threshold_classify__color(thresh); os->newline = false; @@ -432,13 +487,13 @@ static void new_line_csv(struct perf_stat_config *config, void *ctx) int i; __new_line_std_csv(config, os); - for (i = 0; i < os->nfields; i++) + for (i = 0; i < os->csv_col_pad; i++) fputs(config->csv_sep, os->fh); } static void print_metric_csv(struct perf_stat_config *config __maybe_unused, void *ctx, - const char *color __maybe_unused, + enum metric_threshold_classify thresh __maybe_unused, const char *fmt, const char *unit, double val) { struct outstate *os = ctx; @@ -459,15 +514,20 @@ static void print_metric_csv(struct perf_stat_config *config __maybe_unused, static void print_metric_json(struct perf_stat_config *config __maybe_unused, void *ctx, - const char *color __maybe_unused, + enum metric_threshold_classify thresh, const char *fmt __maybe_unused, const char *unit, double val) { struct outstate *os = ctx; FILE *out = os->fh; - fprintf(out, "\"metric-value\" : \"%f\", ", val); - fprintf(out, "\"metric-unit\" : \"%s\"", unit); + if (unit) { + json_out(os, "\"metric-value\" : \"%f\", \"metric-unit\" : \"%s\"", val, unit); + if (thresh != METRIC_THRESHOLD_UNKNOWN) { + json_out(os, "\"metric-threshold\" : \"%s\"", + metric_threshold_classify__str(thresh)); + } + } if (!config->metric_only) fprintf(out, "}"); } @@ -477,9 +537,11 @@ static void new_line_json(struct perf_stat_config *config, void *ctx) struct outstate *os = ctx; fputs("\n{", os->fh); - if (os->prefix) - fprintf(os->fh, "%s", os->prefix); - aggr_printout(config, os->evsel, os->id, os->aggr_nr); + os->first = true; + if (config->interval) + json_out(os, "%s", os->timestamp); + + aggr_printout(config, os, os->evsel, os->id, os->aggr_nr); } static void print_metricgroup_header_json(struct perf_stat_config *config, @@ -489,7 +551,7 @@ static void print_metricgroup_header_json(struct perf_stat_config *config, if (!metricgroup_name) return; - fprintf(config->output, "\"metricgroup\" : \"%s\"}", metricgroup_name); + json_out((struct outstate *) ctx, "\"metricgroup\" : \"%s\"}", metricgroup_name); new_line_json(config, ctx); } @@ -502,12 +564,12 @@ static void print_metricgroup_header_csv(struct perf_stat_config *config, if (!metricgroup_name) { /* Leave space for running and enabling */ - for (i = 0; i < os->nfields - 2; i++) + for (i = 0; i < os->csv_col_pad - 2; i++) fputs(config->csv_sep, os->fh); return; } - for (i = 0; i < os->nfields; i++) + for (i = 0; i < os->csv_col_pad; i++) fputs(config->csv_sep, os->fh); fprintf(config->output, "%s", metricgroup_name); new_line_csv(config, ctx); @@ -521,50 +583,27 @@ static void print_metricgroup_header_std(struct perf_stat_config *config, int n; if (!metricgroup_name) { - __new_line_std(os); + __new_line_std(config, os); return; } n = fprintf(config->output, " %*s", EVNAME_LEN, metricgroup_name); - fprintf(config->output, "%*s", MGROUP_LEN - n - 1, ""); -} - -/* Filter out some columns that don't work well in metrics only mode */ - -static bool valid_only_metric(const char *unit) -{ - if (!unit) - return false; - if (strstr(unit, "/sec") || - strstr(unit, "CPUs utilized")) - return false; - return true; -} - -static const char *fixunit(char *buf, struct evsel *evsel, - const char *unit) -{ - if (!strncmp(unit, "of all", 6)) { - snprintf(buf, 1024, "%s %s", evsel__name(evsel), - unit); - return buf; - } - return unit; + fprintf(config->output, "%*s", MGROUP_LEN + config->unit_width + 2 - n, ""); } static void print_metric_only(struct perf_stat_config *config, - void *ctx, const char *color, const char *fmt, - const char *unit, double val) + void *ctx, enum metric_threshold_classify thresh, + const char *fmt, const char *unit, double val) { struct outstate *os = ctx; FILE *out = os->fh; - char buf[1024], str[1024]; + char str[1024]; unsigned mlen = config->metric_only_len; + const char *color = metric_threshold_classify__color(thresh); - if (!valid_only_metric(unit)) - return; - unit = fixunit(buf, os->evsel, unit); + if (!unit) + unit = ""; if (mlen < strlen(unit)) mlen = strlen(unit) + 1; @@ -577,18 +616,18 @@ static void print_metric_only(struct perf_stat_config *config, } static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused, - void *ctx, const char *color __maybe_unused, + void *ctx, + enum metric_threshold_classify thresh __maybe_unused, const char *fmt, - const char *unit, double val) + const char *unit __maybe_unused, double val) { struct outstate *os = ctx; FILE *out = os->fh; char buf[64], *vals, *ends; - char tbuf[1024]; - if (!valid_only_metric(unit)) + if (!unit) return; - unit = fixunit(tbuf, os->evsel, unit); + snprintf(buf, sizeof(buf), fmt ?: "", val); ends = vals = skip_spaces(buf); while (isdigit(*ends) || *ends == '.') @@ -599,41 +638,34 @@ static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused } static void print_metric_only_json(struct perf_stat_config *config __maybe_unused, - void *ctx, const char *color __maybe_unused, + void *ctx, + enum metric_threshold_classify thresh __maybe_unused, const char *fmt, const char *unit, double val) { struct outstate *os = ctx; - FILE *out = os->fh; - char buf[64], *vals, *ends; - char tbuf[1024]; + char buf[64], *ends; + const char *vals; - if (!valid_only_metric(unit)) + if (!unit || !unit[0]) return; - unit = fixunit(tbuf, os->evsel, unit); snprintf(buf, sizeof(buf), fmt ?: "", val); - ends = vals = skip_spaces(buf); + vals = ends = skip_spaces(buf); while (isdigit(*ends) || *ends == '.') ends++; *ends = 0; - if (!unit[0] || !vals[0]) - return; - fprintf(out, "%s\"%s\" : \"%s\"", os->first ? "" : ", ", unit, vals); - os->first = false; -} - -static void new_line_metric(struct perf_stat_config *config __maybe_unused, - void *ctx __maybe_unused) -{ + if (!vals[0]) + vals = "none"; + json_out(os, "\"%s\" : \"%s\"", unit, vals); } static void print_metric_header(struct perf_stat_config *config, - void *ctx, const char *color __maybe_unused, + void *ctx, + enum metric_threshold_classify thresh __maybe_unused, const char *fmt __maybe_unused, const char *unit, double val __maybe_unused) { struct outstate *os = ctx; - char tbuf[1024]; /* In case of iostat, print metric header for first root port only */ if (config->iostat_run && @@ -643,9 +675,8 @@ static void print_metric_header(struct perf_stat_config *config, if (os->evsel->cgrp != os->cgrp) return; - if (!valid_only_metric(unit)) + if (!unit) return; - unit = fixunit(tbuf, os->evsel, unit); if (config->json_output) return; @@ -699,28 +730,27 @@ static void print_counter_value_csv(struct perf_stat_config *config, fprintf(output, "%s", evsel__name(evsel)); } -static void print_counter_value_json(struct perf_stat_config *config, +static void print_counter_value_json(struct outstate *os, struct evsel *evsel, double avg, bool ok) { - FILE *output = config->output; const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED; if (ok) - fprintf(output, "\"counter-value\" : \"%f\", ", avg); + json_out(os, "\"counter-value\" : \"%f\"", avg); else - fprintf(output, "\"counter-value\" : \"%s\", ", bad_count); + json_out(os, "\"counter-value\" : \"%s\"", bad_count); if (evsel->unit) - fprintf(output, "\"unit\" : \"%s\", ", evsel->unit); + json_out(os, "\"unit\" : \"%s\"", evsel->unit); - fprintf(output, "\"event\" : \"%s\", ", evsel__name(evsel)); + json_out(os, "\"event\" : \"%s\"", evsel__name(evsel)); } -static void print_counter_value(struct perf_stat_config *config, +static void print_counter_value(struct perf_stat_config *config, struct outstate *os, struct evsel *evsel, double avg, bool ok) { if (config->json_output) - print_counter_value_json(config, evsel, avg, ok); + print_counter_value_json(os, evsel, avg, ok); else if (config->csv_output) print_counter_value_csv(config, evsel, avg, ok); else @@ -728,48 +758,37 @@ static void print_counter_value(struct perf_stat_config *config, } static void abs_printout(struct perf_stat_config *config, + struct outstate *os, struct aggr_cpu_id id, int aggr_nr, struct evsel *evsel, double avg, bool ok) { - aggr_printout(config, evsel, id, aggr_nr); - print_counter_value(config, evsel, avg, ok); - print_cgroup(config, evsel->cgrp); -} - -static bool is_mixed_hw_group(struct evsel *counter) -{ - struct evlist *evlist = counter->evlist; - u32 pmu_type = counter->core.attr.type; - struct evsel *pos; - - if (counter->core.nr_members < 2) - return false; - - evlist__for_each_entry(evlist, pos) { - /* software events can be part of any hardware group */ - if (pos->core.attr.type == PERF_TYPE_SOFTWARE) - continue; - if (pmu_type == PERF_TYPE_SOFTWARE) { - pmu_type = pos->core.attr.type; - continue; - } - if (pmu_type != pos->core.attr.type) - return true; - } - - return false; + aggr_printout(config, os, evsel, id, aggr_nr); + print_counter_value(config, os, evsel, avg, ok); + print_cgroup(config, os, evsel->cgrp); } -static bool evlist__has_hybrid(struct evlist *evlist) +static bool evlist__has_hybrid_pmus(struct evlist *evlist) { struct evsel *evsel; + struct perf_pmu *last_core_pmu = NULL; if (perf_pmus__num_core_pmus() == 1) return false; evlist__for_each_entry(evlist, evsel) { - if (evsel->core.is_pmu_core) + if (evsel->core.is_pmu_core) { + struct perf_pmu *pmu = evsel__find_pmu(evsel); + + if (pmu == last_core_pmu) + continue; + + if (last_core_pmu == NULL) { + last_core_pmu = pmu; + continue; + } + /* A distinct core PMU. */ return true; + } } return false; @@ -787,32 +806,31 @@ static void printout(struct perf_stat_config *config, struct outstate *os, if (config->csv_output) { pm = config->metric_only ? print_metric_only_csv : print_metric_csv; - nl = config->metric_only ? new_line_metric : new_line_csv; + nl = config->metric_only ? NULL : new_line_csv; pmh = print_metricgroup_header_csv; - os->nfields = 4 + (counter->cgrp ? 1 : 0); + os->csv_col_pad = 4 + (counter->cgrp ? 1 : 0); } else if (config->json_output) { pm = config->metric_only ? print_metric_only_json : print_metric_json; - nl = config->metric_only ? new_line_metric : new_line_json; + nl = config->metric_only ? NULL : new_line_json; pmh = print_metricgroup_header_json; } else { pm = config->metric_only ? print_metric_only : print_metric_std; - nl = config->metric_only ? new_line_metric : new_line_std; + nl = config->metric_only ? NULL : new_line_std; pmh = print_metricgroup_header_std; } if (run == 0 || ena == 0 || counter->counts->scaled == -1) { if (config->metric_only) { - pm(config, os, NULL, "", "", 0); + pm(config, os, METRIC_THRESHOLD_UNKNOWN, /*format=*/NULL, + /*unit=*/NULL, /*val=*/0); return; } ok = false; if (counter->supported) { - if (!evlist__has_hybrid(counter->evlist)) { + if (!evlist__has_hybrid_pmus(counter->evlist)) { config->print_free_counters_hint = 1; - if (is_mixed_hw_group(counter)) - config->print_mixed_hw_group_error = 1; } } } @@ -823,18 +841,18 @@ static void printout(struct perf_stat_config *config, struct outstate *os, out.ctx = os; out.force_header = false; - if (!config->metric_only && !counter->default_metricgroup) { - abs_printout(config, os->id, os->aggr_nr, counter, uval, ok); + if (!config->metric_only && (!counter->default_metricgroup || counter->default_show_events)) { + abs_printout(config, os, os->id, os->aggr_nr, counter, uval, ok); - print_noise(config, counter, noise, /*before_metric=*/true); - print_running(config, run, ena, /*before_metric=*/true); + print_noise(config, os, counter, noise, /*before_metric=*/true); + print_running(config, os, run, ena, /*before_metric=*/true); } if (ok) { - if (!config->metric_only && counter->default_metricgroup) { + if (!config->metric_only && counter->default_metricgroup && !counter->default_show_events) { void *from = NULL; - aggr_printout(config, os->evsel, os->id, os->aggr_nr); + aggr_printout(config, os, os->evsel, os->id, os->aggr_nr); /* Print out all the metricgroup with the same metric event. */ do { int num = 0; @@ -847,70 +865,22 @@ static void printout(struct perf_stat_config *config, struct outstate *os, __new_line_std_csv(config, os); } - print_noise(config, counter, noise, /*before_metric=*/true); - print_running(config, run, ena, /*before_metric=*/true); + print_noise(config, os, counter, noise, /*before_metric=*/true); + print_running(config, os, run, ena, /*before_metric=*/true); from = perf_stat__print_shadow_stats_metricgroup(config, counter, aggr_idx, - &num, from, &out, - &config->metric_events); + &num, from, &out); } while (from != NULL); - } else - perf_stat__print_shadow_stats(config, counter, uval, aggr_idx, - &out, &config->metric_events); + } else { + perf_stat__print_shadow_stats(config, counter, aggr_idx, &out); + } } else { - pm(config, os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0); + pm(config, os, METRIC_THRESHOLD_UNKNOWN, /*format=*/NULL, /*unit=*/NULL, /*val=*/0); } if (!config->metric_only) { - print_noise(config, counter, noise, /*before_metric=*/false); - print_running(config, run, ena, /*before_metric=*/false); - } -} - -static void uniquify_event_name(struct evsel *counter) -{ - char *new_name; - char *config; - int ret = 0; - - if (counter->uniquified_name || counter->use_config_name || - !counter->pmu_name || !strncmp(evsel__name(counter), counter->pmu_name, - strlen(counter->pmu_name))) - return; - - config = strchr(counter->name, '/'); - if (config) { - if (asprintf(&new_name, - "%s%s", counter->pmu_name, config) > 0) { - free(counter->name); - counter->name = new_name; - } - } else { - if (evsel__is_hybrid(counter)) { - ret = asprintf(&new_name, "%s/%s/", - counter->pmu_name, counter->name); - } else { - ret = asprintf(&new_name, "%s [%s]", - counter->name, counter->pmu_name); - } - - if (ret) { - free(counter->name); - counter->name = new_name; - } + print_noise(config, os, counter, noise, /*before_metric=*/false); + print_running(config, os, run, ena, /*before_metric=*/false); } - - counter->uniquified_name = true; -} - -static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config) -{ - return evsel__is_hybrid(evsel) && !config->hybrid_merge; -} - -static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter) -{ - if (config->aggr_mode == AGGR_NONE || hybrid_uniquify(counter, config)) - uniquify_event_name(counter); } /** @@ -937,24 +907,46 @@ static bool should_skip_zero_counter(struct perf_stat_config *config, int idx; /* + * Skip unsupported default events when not verbose. (default events + * are all marked 'skippable'). + */ + if (verbose == 0 && counter->skippable && !counter->supported) + return true; + + /* Metric only counts won't be displayed but the metric wants to be computed. */ + if (config->metric_only) + return false; + /* * Skip value 0 when enabling --per-thread globally, * otherwise it will have too many 0 output. */ if (config->aggr_mode == AGGR_THREAD && config->system_wide) return true; - /* Tool events have the software PMU but are only gathered on 1. */ - if (evsel__is_tool(counter)) - return true; - /* - * Skip value 0 when it's an uncore event and the given aggr id - * does not belong to the PMU cpumask. + * In per-thread mode the aggr_map and aggr_get_id functions may be + * NULL, assume all 0 values should be output in that case. */ - if (!counter->pmu || !counter->pmu->is_uncore) + if (!config->aggr_map || !config->aggr_get_id) return false; - perf_cpu_map__for_each_cpu(cpu, idx, counter->pmu->cpus) { + /* + * Tool events may be gathered on all logical CPUs, for example + * system_time, but for many the first index is the only one used, for + * example num_cores. Don't skip for the first index. + */ + if (evsel__is_tool(counter)) { + struct aggr_cpu_id own_id = + config->aggr_get_id(config, (struct perf_cpu){ .cpu = 0 }); + + return !aggr_cpu_id__equal(id, &own_id); + } + /* + * Skip value 0 when the counter's cpumask doesn't match the given aggr + * id. + */ + + perf_cpu_map__for_each_cpu(cpu, idx, counter->core.cpus) { struct aggr_cpu_id own_id = config->aggr_get_id(config, cpu); if (aggr_cpu_id__equal(id, &own_id)) @@ -981,28 +973,38 @@ static void print_counter_aggrdata(struct perf_stat_config *config, os->evsel = counter; /* Skip already merged uncore/hybrid events */ - if (counter->merged_stat) - return; - - uniquify_counter(config, counter); + if (config->aggr_mode != AGGR_NONE) { + if (evsel__is_hybrid(counter)) { + if (config->hybrid_merge && counter->first_wildcard_match != NULL) + return; + } else { + if (counter->first_wildcard_match != NULL) + return; + } + } val = aggr->counts.val; ena = aggr->counts.ena; run = aggr->counts.run; - if (perf_stat__skip_metric_event(counter, &config->metric_events, ena, run)) + if (perf_stat__skip_metric_event(counter, ena, run)) return; if (val == 0 && should_skip_zero_counter(config, counter, &id)) return; if (!metric_only) { - if (config->json_output) + if (config->json_output) { + os->first = true; fputc('{', output); - if (os->prefix) - fprintf(output, "%s", os->prefix); - else if (config->summary && config->csv_output && - !config->no_csv_summary && !config->interval) + } + if (config->interval) { + if (config->json_output) + json_out(os, "%s", os->timestamp); + else + fprintf(output, "%s", os->timestamp); + } else if (config->summary && config->csv_output && + !config->no_csv_summary) fprintf(output, "%s%s", "summary", config->csv_sep); } @@ -1028,15 +1030,19 @@ static void print_metric_begin(struct perf_stat_config *config, if (config->json_output) fputc('{', config->output); - if (os->prefix) - fprintf(config->output, "%s", os->prefix); + if (config->interval) { + if (config->json_output) + json_out(os, "%s", os->timestamp); + else + fprintf(config->output, "%s", os->timestamp); + } evsel = evlist__first(evlist); id = config->aggr_map->map[aggr_idx]; aggr = &evsel->stats->aggr[aggr_idx]; - aggr_printout(config, evsel, id, aggr->nr); + aggr_printout(config, os, evsel, id, aggr->nr); - print_cgroup(config, os->cgrp ? : evsel->cgrp); + print_cgroup(config, os, os->cgrp ? : evsel->cgrp); } static void print_metric_end(struct perf_stat_config *config, struct outstate *os) @@ -1183,10 +1189,21 @@ static void print_metric_headers_std(struct perf_stat_config *config, static void print_metric_headers_csv(struct perf_stat_config *config, bool no_indent __maybe_unused) { + const char *p; + if (config->interval) - fputs("time,", config->output); - if (!config->iostat_run) - fputs(aggr_header_csv[config->aggr_mode], config->output); + fprintf(config->output, "time%s", config->csv_sep); + if (config->iostat_run) + return; + + p = aggr_header_csv[config->aggr_mode]; + while (*p) { + if (*p == ',') + fputs(config->csv_sep, config->output); + else + fputc(*p, config->output); + p++; + } } static void print_metric_headers_json(struct perf_stat_config *config __maybe_unused, @@ -1204,7 +1221,7 @@ static void print_metric_headers(struct perf_stat_config *config, struct perf_stat_output_ctx out = { .ctx = &os, .print_metric = print_metric_header, - .new_line = new_line_metric, + .new_line = NULL, .force_header = true, }; @@ -1223,35 +1240,33 @@ static void print_metric_headers(struct perf_stat_config *config, /* Print metrics headers only */ evlist__for_each_entry(evlist, counter) { - if (config->aggr_mode != AGGR_NONE && counter->metric_leader != counter) + if (!config->iostat_run && + config->aggr_mode != AGGR_NONE && counter->metric_leader != counter) continue; os.evsel = counter; - perf_stat__print_shadow_stats(config, counter, 0, - 0, - &out, - &config->metric_events); + perf_stat__print_shadow_stats(config, counter, /*aggr_idx=*/0, &out); } if (!config->json_output) fputc('\n', config->output); } -static void prepare_interval(struct perf_stat_config *config, - char *prefix, size_t len, struct timespec *ts) +static void prepare_timestamp(struct perf_stat_config *config, + struct outstate *os, struct timespec *ts) { if (config->iostat_run) return; if (config->json_output) - scnprintf(prefix, len, "\"interval\" : %lu.%09lu, ", + scnprintf(os->timestamp, sizeof(os->timestamp), "\"interval\" : %lu.%09lu", (unsigned long) ts->tv_sec, ts->tv_nsec); else if (config->csv_output) - scnprintf(prefix, len, "%lu.%09lu%s", + scnprintf(os->timestamp, sizeof(os->timestamp), "%lu.%09lu%s", (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep); else - scnprintf(prefix, len, "%6lu.%09lu ", + scnprintf(os->timestamp, sizeof(os->timestamp), "%6lu.%09lu ", (unsigned long) ts->tv_sec, ts->tv_nsec); } @@ -1270,7 +1285,7 @@ static void print_header_interval_std(struct perf_stat_config *config, case AGGR_CLUSTER: case AGGR_CACHE: case AGGR_CORE: - fprintf(output, "#%*s %-*s cpus", + fprintf(output, "#%*s %-*s ctrs", INTERVAL_LEN - 1, "time", aggr_header_lens[config->aggr_mode], aggr_header_std[config->aggr_mode]); @@ -1459,7 +1474,7 @@ static void print_footer(struct perf_stat_config *config) fprintf(output, " %17.*f +- %.*f seconds time elapsed", precision, avg, precision, sd); - print_noise_pct(config, sd, avg, /*before_metric=*/false); + print_noise_pct(config, NULL, sd, avg, /*before_metric=*/false); } fprintf(output, "\n\n"); @@ -1469,11 +1484,6 @@ static void print_footer(struct perf_stat_config *config) " echo 0 > /proc/sys/kernel/nmi_watchdog\n" " perf stat ...\n" " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); - - if (config->print_mixed_hw_group_error) - fprintf(output, - "The events in group usually have to be from " - "the same PMU. Try reorganizing the group.\n"); } static void print_percore(struct perf_stat_config *config, @@ -1549,21 +1559,19 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf int argc, const char **argv) { bool metric_only = config->metric_only; - int interval = config->interval; struct evsel *counter; - char buf[64]; struct outstate os = { .fh = config->output, .first = true, }; + evlist__uniquify_evsel_names(evlist, config); + if (config->iostat_run) evlist->selected = evlist__first(evlist); - if (interval) { - os.prefix = buf; - prepare_interval(config, buf, sizeof(buf), ts); - } + if (config->interval) + prepare_timestamp(config, &os, ts); print_header(config, _target, evlist, argc, argv); @@ -1582,7 +1590,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf case AGGR_THREAD: case AGGR_GLOBAL: if (config->iostat_run) { - iostat_print_counters(evlist, config, ts, buf, + iostat_print_counters(evlist, config, ts, os.timestamp, (iostat_print_counter_t)print_counter, &os); } else if (config->cgroup_list) { print_cgroup_counter(config, evlist, &os); diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 3466aa952442..9c83f7d96caa 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <errno.h> #include <math.h> #include <stdio.h> #include "evsel.h" @@ -15,347 +16,34 @@ #include <linux/zalloc.h> #include "iostat.h" #include "util/hashmap.h" +#include "tool_pmu.h" -struct stats walltime_nsecs_stats; -struct rusage_stats ru_stats; - -enum { - CTX_BIT_USER = 1 << 0, - CTX_BIT_KERNEL = 1 << 1, - CTX_BIT_HV = 1 << 2, - CTX_BIT_HOST = 1 << 3, - CTX_BIT_IDLE = 1 << 4, - CTX_BIT_MAX = 1 << 5, -}; - -enum stat_type { - STAT_NONE = 0, - STAT_NSECS, - STAT_CYCLES, - STAT_INSTRUCTIONS, - STAT_STALLED_CYCLES_FRONT, - STAT_STALLED_CYCLES_BACK, - STAT_BRANCHES, - STAT_BRANCH_MISS, - STAT_CACHE_REFS, - STAT_CACHE_MISSES, - STAT_L1_DCACHE, - STAT_L1_ICACHE, - STAT_LL_CACHE, - STAT_ITLB_CACHE, - STAT_DTLB_CACHE, - STAT_L1D_MISS, - STAT_L1I_MISS, - STAT_LL_MISS, - STAT_DTLB_MISS, - STAT_ITLB_MISS, - STAT_MAX -}; - -static int evsel_context(const struct evsel *evsel) +static bool tool_pmu__is_time_event(const struct perf_stat_config *config, + const struct evsel *evsel, int *tool_aggr_idx) { - int ctx = 0; - - if (evsel->core.attr.exclude_kernel) - ctx |= CTX_BIT_KERNEL; - if (evsel->core.attr.exclude_user) - ctx |= CTX_BIT_USER; - if (evsel->core.attr.exclude_hv) - ctx |= CTX_BIT_HV; - if (evsel->core.attr.exclude_host) - ctx |= CTX_BIT_HOST; - if (evsel->core.attr.exclude_idle) - ctx |= CTX_BIT_IDLE; - - return ctx; -} - -void perf_stat__reset_shadow_stats(void) -{ - memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); - memset(&ru_stats, 0, sizeof(ru_stats)); -} - -static enum stat_type evsel__stat_type(const struct evsel *evsel) -{ - /* Fake perf_hw_cache_op_id values for use with evsel__match. */ - u64 PERF_COUNT_hw_cache_l1d_miss = PERF_COUNT_HW_CACHE_L1D | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); - u64 PERF_COUNT_hw_cache_l1i_miss = PERF_COUNT_HW_CACHE_L1I | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); - u64 PERF_COUNT_hw_cache_ll_miss = PERF_COUNT_HW_CACHE_LL | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); - u64 PERF_COUNT_hw_cache_dtlb_miss = PERF_COUNT_HW_CACHE_DTLB | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); - u64 PERF_COUNT_hw_cache_itlb_miss = PERF_COUNT_HW_CACHE_ITLB | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); - - if (evsel__is_clock(evsel)) - return STAT_NSECS; - else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) - return STAT_CYCLES; - else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) - return STAT_INSTRUCTIONS; - else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) - return STAT_STALLED_CYCLES_FRONT; - else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) - return STAT_STALLED_CYCLES_BACK; - else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - return STAT_BRANCHES; - else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) - return STAT_BRANCH_MISS; - else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES)) - return STAT_CACHE_REFS; - else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) - return STAT_CACHE_MISSES; - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D)) - return STAT_L1_DCACHE; - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I)) - return STAT_L1_ICACHE; - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL)) - return STAT_LL_CACHE; - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB)) - return STAT_DTLB_CACHE; - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB)) - return STAT_ITLB_CACHE; - else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss)) - return STAT_L1D_MISS; - else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss)) - return STAT_L1I_MISS; - else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss)) - return STAT_LL_MISS; - else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss)) - return STAT_DTLB_MISS; - else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss)) - return STAT_ITLB_MISS; - return STAT_NONE; -} + enum tool_pmu_event event = evsel__tool_event(evsel); + int aggr_idx; -static const char *get_ratio_color(const double ratios[3], double val) -{ - const char *color = PERF_COLOR_NORMAL; - - if (val > ratios[0]) - color = PERF_COLOR_RED; - else if (val > ratios[1]) - color = PERF_COLOR_MAGENTA; - else if (val > ratios[2]) - color = PERF_COLOR_YELLOW; - - return color; -} - -static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type type) -{ - const struct evsel *cur; - int evsel_ctx = evsel_context(evsel); - - evlist__for_each_entry(evsel->evlist, cur) { - struct perf_stat_aggr *aggr; - - /* Ignore the evsel that is being searched from. */ - if (evsel == cur) - continue; - - /* Ignore evsels that are part of different groups. */ - if (evsel->core.leader->nr_members > 1 && - evsel->core.leader != cur->core.leader) - continue; - /* Ignore evsels with mismatched modifiers. */ - if (evsel_ctx != evsel_context(cur)) - continue; - /* Ignore if not the cgroup we're looking for. */ - if (evsel->cgrp != cur->cgrp) - continue; - /* Ignore if not the stat we're looking for. */ - if (type != evsel__stat_type(cur)) - continue; - - aggr = &cur->stats->aggr[aggr_idx]; - if (type == STAT_NSECS) - return aggr->counts.val; - return aggr->counts.val * cur->scale; - } - return 0.0; -} - -static void print_ratio(struct perf_stat_config *config, - const struct evsel *evsel, int aggr_idx, - double numerator, struct perf_stat_output_ctx *out, - enum stat_type denominator_type, - const double color_ratios[3], const char *unit) -{ - double denominator = find_stat(evsel, aggr_idx, denominator_type); - - if (numerator && denominator) { - double ratio = numerator / denominator * 100.0; - const char *color = get_ratio_color(color_ratios, ratio); - - out->print_metric(config, out->ctx, color, "%7.2f%%", unit, ratio); - } else - out->print_metric(config, out->ctx, NULL, NULL, unit, 0); -} - -static void print_stalled_cycles_front(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double stalled, - struct perf_stat_output_ctx *out) -{ - static const double color_ratios[3] = {50.0, 30.0, 10.0}; - - print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios, - "frontend cycles idle"); -} - -static void print_stalled_cycles_back(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double stalled, - struct perf_stat_output_ctx *out) -{ - static const double color_ratios[3] = {75.0, 50.0, 20.0}; - - print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios, - "backend cycles idle"); -} - -static void print_branch_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - static const double color_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_BRANCHES, color_ratios, - "of all branches"); -} - -static void print_l1d_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - static const double color_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_DCACHE, color_ratios, - "of all L1-dcache accesses"); -} - -static void print_l1i_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - static const double color_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_ICACHE, color_ratios, - "of all L1-icache accesses"); -} - -static void print_ll_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - static const double color_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, color_ratios, - "of all LL-cache accesses"); -} - -static void print_dtlb_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - static const double color_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_DTLB_CACHE, color_ratios, - "of all dTLB cache accesses"); -} - -static void print_itlb_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - static const double color_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_ITLB_CACHE, color_ratios, - "of all iTLB cache accesses"); -} - -static void print_cache_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - static const double color_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_CACHE_REFS, color_ratios, - "of all cache refs"); -} + if (event != TOOL_PMU__EVENT_DURATION_TIME && + event != TOOL_PMU__EVENT_USER_TIME && + event != TOOL_PMU__EVENT_SYSTEM_TIME) + return false; -static void print_instructions(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double instructions, - struct perf_stat_output_ctx *out) -{ - print_metric_t print_metric = out->print_metric; - void *ctxp = out->ctx; - double cycles = find_stat(evsel, aggr_idx, STAT_CYCLES); - double max_stalled = max(find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_FRONT), - find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_BACK)); - - if (cycles) { - print_metric(config, ctxp, NULL, "%7.2f ", "insn per cycle", - instructions / cycles); - } else - print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); - - if (max_stalled && instructions) { - out->new_line(config, ctxp); - print_metric(config, ctxp, NULL, "%7.2f ", "stalled cycles per insn", - max_stalled / instructions); + if (config) { + cpu_aggr_map__for_each_idx(aggr_idx, config->aggr_map) { + if (config->aggr_map->map[aggr_idx].cpu.cpu == 0) { + *tool_aggr_idx = aggr_idx; + return true; + } + } + pr_debug("Unexpected CPU0 missing in aggregation for tool event.\n"); } + *tool_aggr_idx = 0; /* Assume the first aggregation index works. */ + return true; } -static void print_cycles(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double cycles, - struct perf_stat_output_ctx *out) -{ - double nsecs = find_stat(evsel, aggr_idx, STAT_NSECS); - - if (cycles && nsecs) { - double ratio = cycles / nsecs; - - out->print_metric(config, out->ctx, NULL, "%8.3f", "GHz", ratio); - } else - out->print_metric(config, out->ctx, NULL, NULL, "GHz", 0); -} - -static void print_nsecs(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx __maybe_unused, double nsecs, - struct perf_stat_output_ctx *out) -{ - print_metric_t print_metric = out->print_metric; - void *ctxp = out->ctx; - double wall_time = avg_stats(&walltime_nsecs_stats); - - if (wall_time) { - print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", - nsecs / (wall_time * evsel->scale)); - } else - print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); -} - -static int prepare_metric(const struct metric_expr *mexp, +static int prepare_metric(struct perf_stat_config *config, + const struct metric_expr *mexp, const struct evsel *evsel, struct expr_parse_ctx *pctx, int aggr_idx) @@ -365,82 +53,51 @@ static int prepare_metric(const struct metric_expr *mexp, int i; for (i = 0; metric_events[i]; i++) { + int source_count = 0, tool_aggr_idx; + bool is_tool_time = + tool_pmu__is_time_event(config, metric_events[i], &tool_aggr_idx); + struct perf_stat_evsel *ps = metric_events[i]->stats; + struct perf_stat_aggr *aggr; char *n; double val; - int source_count = 0; - - if (evsel__is_tool(metric_events[i])) { - struct stats *stats; - double scale; - switch (metric_events[i]->tool_event) { - case PERF_TOOL_DURATION_TIME: - stats = &walltime_nsecs_stats; - scale = 1e-9; - break; - case PERF_TOOL_USER_TIME: - stats = &ru_stats.ru_utime_usec_stat; - scale = 1e-6; + /* + * If there are multiple uncore PMUs and we're not reading the + * leader's stats, determine the stats for the appropriate + * uncore PMU. + */ + if (evsel && evsel->metric_leader && + evsel->pmu != evsel->metric_leader->pmu && + mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) { + struct evsel *pos; + + evlist__for_each_entry(evsel->evlist, pos) { + if (pos->pmu != evsel->pmu) + continue; + if (pos->metric_leader != mexp->metric_events[i]) + continue; + ps = pos->stats; + source_count = 1; break; - case PERF_TOOL_SYSTEM_TIME: - stats = &ru_stats.ru_stime_usec_stat; - scale = 1e-6; - break; - case PERF_TOOL_NONE: - pr_err("Invalid tool event 'none'"); - abort(); - case PERF_TOOL_MAX: - pr_err("Invalid tool event 'max'"); - abort(); - default: - pr_err("Unknown tool event '%s'", evsel__name(metric_events[i])); - abort(); } - val = avg_stats(stats) * scale; - source_count = 1; - } else { - struct perf_stat_evsel *ps = metric_events[i]->stats; - struct perf_stat_aggr *aggr; - + } + /* Time events are always on CPU0, the first aggregation index. */ + aggr = &ps->aggr[is_tool_time ? tool_aggr_idx : aggr_idx]; + if (!aggr || !metric_events[i]->supported) { /* - * If there are multiple uncore PMUs and we're not - * reading the leader's stats, determine the stats for - * the appropriate uncore PMU. + * Not supported events will have a count of 0, which + * can be confusing in a metric. Explicitly set the + * value to NAN. Not counted events (enable time of 0) + * are read as 0. */ - if (evsel && evsel->metric_leader && - evsel->pmu != evsel->metric_leader->pmu && - mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) { - struct evsel *pos; - - evlist__for_each_entry(evsel->evlist, pos) { - if (pos->pmu != evsel->pmu) - continue; - if (pos->metric_leader != mexp->metric_events[i]) - continue; - ps = pos->stats; - source_count = 1; - break; - } - } - aggr = &ps->aggr[aggr_idx]; - if (!aggr) - break; - - if (!metric_events[i]->supported) { - /* - * Not supported events will have a count of 0, - * which can be confusing in a - * metric. Explicitly set the value to NAN. Not - * counted events (enable time of 0) are read as - * 0. - */ - val = NAN; - source_count = 0; - } else { - val = aggr->counts.val; - if (!source_count) - source_count = evsel__source_count(metric_events[i]); - } + val = NAN; + source_count = 0; + } else { + val = aggr->counts.val; + if (is_tool_time) + val *= 1e-9; /* Convert time event nanoseconds to seconds. */ + if (!source_count) + source_count = evsel__source_count(metric_events[i]); } n = strdup(evsel__metric_id(metric_events[i])); if (!n) @@ -476,7 +133,7 @@ static void generic_metric(struct perf_stat_config *config, double ratio, scale, threshold; int i; void *ctxp = out->ctx; - const char *color = NULL; + enum metric_threshold_classify thresh = METRIC_THRESHOLD_UNKNOWN; pctx = expr__ctx_new(); if (!pctx) @@ -486,7 +143,7 @@ static void generic_metric(struct perf_stat_config *config, pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list); pctx->sctx.runtime = runtime; pctx->sctx.system_wide = config->system_wide; - i = prepare_metric(mexp, evsel, pctx, aggr_idx); + i = prepare_metric(config, mexp, evsel, pctx, aggr_idx); if (i < 0) { expr__ctx_free(pctx); return; @@ -494,13 +151,13 @@ static void generic_metric(struct perf_stat_config *config, if (!metric_events[i]) { if (expr__parse(&ratio, pctx, metric_expr) == 0) { char *unit; - char metric_bf[64]; + char metric_bf[128]; if (metric_threshold && expr__parse(&threshold, pctx, metric_threshold) == 0 && !isnan(threshold)) { - color = fpclassify(threshold) == FP_ZERO - ? PERF_COLOR_GREEN : PERF_COLOR_RED; + thresh = fpclassify(threshold) == FP_ZERO + ? METRIC_THRESHOLD_GOOD : METRIC_THRESHOLD_BAD; } if (metric_unit && metric_name) { @@ -515,22 +172,22 @@ static void generic_metric(struct perf_stat_config *config, scnprintf(metric_bf, sizeof(metric_bf), "%s %s", unit, metric_name); - print_metric(config, ctxp, color, "%8.1f", + print_metric(config, ctxp, thresh, "%8.1f", metric_bf, ratio); } else { - print_metric(config, ctxp, color, "%8.2f", + print_metric(config, ctxp, thresh, "%8.2f", metric_name ? metric_name : out->force_header ? evsel->name : "", ratio); } } else { - print_metric(config, ctxp, color, /*unit=*/NULL, + print_metric(config, ctxp, thresh, /*fmt=*/NULL, out->force_header ? (metric_name ?: evsel->name) : "", 0); } } else { - print_metric(config, ctxp, color, /*unit=*/NULL, + print_metric(config, ctxp, thresh, /*fmt=*/NULL, out->force_header ? (metric_name ?: evsel->name) : "", 0); } @@ -547,7 +204,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx) if (!pctx) return NAN; - if (prepare_metric(mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0) + if (prepare_metric(/*config=*/NULL, mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0) goto out; if (expr__parse(&ratio, pctx, mexp->metric_expr)) @@ -566,7 +223,7 @@ static void perf_stat__print_metricgroup_header(struct perf_stat_config *config, { bool need_full_name = perf_pmus__num_core_pmus() > 1; static const char *last_name; - static const char *last_pmu; + static const struct perf_pmu *last_pmu; char full_name[64]; /* @@ -576,22 +233,20 @@ static void perf_stat__print_metricgroup_header(struct perf_stat_config *config, * event. Only align with other metics from * different metric events. */ - if (last_name && !strcmp(last_name, name)) { - if (!need_full_name || !strcmp(last_pmu, evsel->pmu_name)) { - out->print_metricgroup_header(config, ctxp, NULL); - return; - } + if (last_name && !strcmp(last_name, name) && last_pmu == evsel->pmu) { + out->print_metricgroup_header(config, ctxp, NULL); + return; } - if (need_full_name) - scnprintf(full_name, sizeof(full_name), "%s (%s)", name, evsel->pmu_name); + if (need_full_name && evsel->pmu) + scnprintf(full_name, sizeof(full_name), "%s (%s)", name, evsel->pmu->name); else scnprintf(full_name, sizeof(full_name), "%s", name); out->print_metricgroup_header(config, ctxp, full_name); last_name = name; - last_pmu = evsel->pmu_name; + last_pmu = evsel->pmu; } /** @@ -610,14 +265,14 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config, int aggr_idx, int *num, void *from, - struct perf_stat_output_ctx *out, - struct rblist *metric_events) + struct perf_stat_output_ctx *out) { struct metric_event *me; struct metric_expr *mexp = from; void *ctxp = out->ctx; bool header_printed = false; const char *name = NULL; + struct rblist *metric_events = &evsel->evlist->metric_events; me = metricgroup__lookup(metric_events, evsel, false); if (me == NULL) @@ -640,14 +295,14 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config, if (strcmp(name, mexp->default_metricgroup_name)) return (void *)mexp; /* Only print the name of the metricgroup once */ - if (!header_printed) { + if (!header_printed && !evsel->default_show_events) { header_printed = true; perf_stat__print_metricgroup_header(config, evsel, ctxp, name, out); } } - if ((*num)++ > 0) + if ((*num)++ > 0 && out->new_line) out->new_line(config, ctxp); generic_metric(config, mexp, evsel, aggr_idx, out); } @@ -657,61 +312,23 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config, void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct evsel *evsel, - double avg, int aggr_idx, - struct perf_stat_output_ctx *out, - struct rblist *metric_events) + int aggr_idx, + struct perf_stat_output_ctx *out) { - typedef void (*stat_print_function_t)(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out); - static const stat_print_function_t stat_print_function[STAT_MAX] = { - [STAT_INSTRUCTIONS] = print_instructions, - [STAT_BRANCH_MISS] = print_branch_miss, - [STAT_L1D_MISS] = print_l1d_miss, - [STAT_L1I_MISS] = print_l1i_miss, - [STAT_DTLB_MISS] = print_dtlb_miss, - [STAT_ITLB_MISS] = print_itlb_miss, - [STAT_LL_MISS] = print_ll_miss, - [STAT_CACHE_MISSES] = print_cache_miss, - [STAT_STALLED_CYCLES_FRONT] = print_stalled_cycles_front, - [STAT_STALLED_CYCLES_BACK] = print_stalled_cycles_back, - [STAT_CYCLES] = print_cycles, - [STAT_NSECS] = print_nsecs, - }; print_metric_t print_metric = out->print_metric; void *ctxp = out->ctx; - int num = 1; + int num = 0; - if (config->iostat_run) { + if (config->iostat_run) iostat_print_metric(config, evsel, out); - } else { - stat_print_function_t fn = stat_print_function[evsel__stat_type(evsel)]; - - if (fn) - fn(config, evsel, aggr_idx, avg, out); - else { - double nsecs = find_stat(evsel, aggr_idx, STAT_NSECS); - - if (nsecs) { - char unit = ' '; - char unit_buf[10] = "/sec"; - double ratio = convert_unit_double(1000000000.0 * avg / nsecs, - &unit); - - if (unit != ' ') - snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); - print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); - } else - num = 0; - } - } perf_stat__print_shadow_stats_metricgroup(config, evsel, aggr_idx, - &num, NULL, out, metric_events); + &num, NULL, out); - if (num == 0) - print_metric(config, ctxp, NULL, NULL, NULL, 0); + if (num == 0) { + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, + /*fmt=*/NULL, /*unit=*/NULL, 0); + } } /** @@ -719,7 +336,6 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, * if it's not running or not the metric event. */ bool perf_stat__skip_metric_event(struct evsel *evsel, - struct rblist *metric_events, u64 ena, u64 run) { if (!evsel->default_metricgroup) @@ -728,5 +344,5 @@ bool perf_stat__skip_metric_event(struct evsel *evsel, if (!ena || !run) return true; - return !metricgroup__lookup(metric_events, evsel, false); + return !metricgroup__lookup(&evsel->evlist->metric_events, evsel, false); } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 0bd5467389e4..976a06e63252 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -526,7 +526,7 @@ static int evsel__merge_aggr_counters(struct evsel *evsel, struct evsel *alias) struct perf_counts_values *aggr_counts_a = &ps_a->aggr[i].counts; struct perf_counts_values *aggr_counts_b = &ps_b->aggr[i].counts; - /* NB: don't increase aggr.nr for aliases */ + ps_a->aggr[i].nr += ps_b->aggr[i].nr; aggr_counts_a->val += aggr_counts_b->val; aggr_counts_a->ena += aggr_counts_b->ena; @@ -535,26 +535,6 @@ static int evsel__merge_aggr_counters(struct evsel *evsel, struct evsel *alias) return 0; } -/* events should have the same name, scale, unit, cgroup but on different PMUs */ -static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b) -{ - if (strcmp(evsel__name(evsel_a), evsel__name(evsel_b))) - return false; - - if (evsel_a->scale != evsel_b->scale) - return false; - - if (evsel_a->cgrp != evsel_b->cgrp) - return false; - - if (strcmp(evsel_a->unit, evsel_b->unit)) - return false; - - if (evsel__is_clock(evsel_a) != evsel__is_clock(evsel_b)) - return false; - - return !!strcmp(evsel_a->pmu_name, evsel_b->pmu_name); -} static void evsel__merge_aliases(struct evsel *evsel) { @@ -563,10 +543,9 @@ static void evsel__merge_aliases(struct evsel *evsel) alias = list_prepare_entry(evsel, &(evlist->core.entries), core.node); list_for_each_entry_continue(alias, &evlist->core.entries, core.node) { - /* Merge the same events on different PMUs. */ - if (evsel__is_alias(evsel, alias)) { + if (alias->first_wildcard_match == evsel) { + /* Merge the same events on different PMUs. */ evsel__merge_aggr_counters(evsel, alias); - alias->merged_stat = true; } } } @@ -579,11 +558,7 @@ static bool evsel__should_merge_hybrid(const struct evsel *evsel, static void evsel__merge_stats(struct evsel *evsel, struct perf_stat_config *config) { - /* this evsel is already merged */ - if (evsel->merged_stat) - return; - - if (evsel->auto_merge_stats || evsel__should_merge_hybrid(evsel, config)) + if (!evsel->pmu || !evsel->pmu->is_core || evsel__should_merge_hybrid(evsel, config)) evsel__merge_aliases(evsel); } @@ -670,7 +645,8 @@ void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *e evsel__process_percore(evsel); } -int perf_event__process_stat_event(struct perf_session *session, +int perf_event__process_stat_event(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { struct perf_counts_values count, *ptr; @@ -741,61 +717,3 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp) return ret; } - -int create_perf_stat_counter(struct evsel *evsel, - struct perf_stat_config *config, - struct target *target, - int cpu_map_idx) -{ - struct perf_event_attr *attr = &evsel->core.attr; - struct evsel *leader = evsel__leader(evsel); - - attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | - PERF_FORMAT_TOTAL_TIME_RUNNING; - - /* - * The event is part of non trivial group, let's enable - * the group read (for leader) and ID retrieval for all - * members. - */ - if (leader->core.nr_members > 1) - attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; - - attr->inherit = !config->no_inherit && list_empty(&evsel->bpf_counter_list); - - /* - * Some events get initialized with sample_(period/type) set, - * like tracepoints. Clear it up for counting. - */ - attr->sample_period = 0; - - if (config->identifier) - attr->sample_type = PERF_SAMPLE_IDENTIFIER; - - if (config->all_user) { - attr->exclude_kernel = 1; - attr->exclude_user = 0; - } - - if (config->all_kernel) { - attr->exclude_kernel = 0; - attr->exclude_user = 1; - } - - /* - * Disabling all counters initially, they will be enabled - * either manually by us or by kernel via enable_on_exec - * set later. - */ - if (evsel__is_group_leader(evsel)) { - attr->disabled = 1; - - if (target__enable_on_exec(target)) - attr->enable_on_exec = 1; - } - - if (target__has_cpu(target) && !target__has_per_thread(target)) - return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu_map_idx); - - return evsel__open_per_thread(evsel, evsel->core.threads); -} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index fd7a187551bd..f986911c9296 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -7,7 +7,6 @@ #include <sys/types.h> #include <sys/resource.h> #include "cpumap.h" -#include "rblist.h" #include "counts.h" struct perf_cpu_map; @@ -57,11 +56,6 @@ enum aggr_mode { AGGR_MAX }; -struct rusage_stats { - struct stats ru_utime_usec_stat; - struct stats ru_stime_usec_stat; -}; - typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, struct perf_cpu cpu); struct perf_stat_config { @@ -100,16 +94,13 @@ struct perf_stat_config { int times; int run_count; int print_free_counters_hint; - int print_mixed_hw_group_error; const char *csv_sep; struct stats *walltime_nsecs_stats; struct rusage ru_data; - struct rusage_stats *ru_stats; struct cpu_aggr_map *aggr_map; aggr_get_id_t aggr_get_id; struct cpu_aggr_map *cpus_aggr_map; u64 *walltime_run; - struct rblist metric_events; int ctl_fd; int ctl_fd_ack; bool ctl_fd_close; @@ -117,8 +108,9 @@ struct perf_stat_config { unsigned int topdown_level; }; +extern struct perf_stat_config stat_config; + void perf_stat__set_big_num(int set); -void perf_stat__set_no_csv_summary(int set); void update_stats(struct stats *stats, u64 val); double avg_stats(struct stats *stats); @@ -134,29 +126,24 @@ static inline void init_stats(struct stats *stats) stats->max = 0; } -static inline void init_rusage_stats(struct rusage_stats *ru_stats) { - init_stats(&ru_stats->ru_utime_usec_stat); - init_stats(&ru_stats->ru_stime_usec_stat); -} - -static inline void update_rusage_stats(struct rusage_stats *ru_stats, struct rusage* rusage) { - const u64 us_to_ns = 1000; - const u64 s_to_ns = 1000000000; - update_stats(&ru_stats->ru_utime_usec_stat, - (rusage->ru_utime.tv_usec * us_to_ns + rusage->ru_utime.tv_sec * s_to_ns)); - update_stats(&ru_stats->ru_stime_usec_stat, - (rusage->ru_stime.tv_usec * us_to_ns + rusage->ru_stime.tv_sec * s_to_ns)); -} - struct evsel; struct evlist; -extern struct stats walltime_nsecs_stats; -extern struct rusage_stats ru_stats; +enum metric_threshold_classify { + METRIC_THRESHOLD_UNKNOWN, + METRIC_THRESHOLD_BAD, + METRIC_THRESHOLD_NEARLY_BAD, + METRIC_THRESHOLD_LESS_GOOD, + METRIC_THRESHOLD_GOOD, +}; +const char *metric_threshold_classify__color(enum metric_threshold_classify thresh); typedef void (*print_metric_t)(struct perf_stat_config *config, - void *ctx, const char *color, const char *unit, - const char *fmt, double val); + void *ctx, + enum metric_threshold_classify thresh, + const char *fmt, + const char *unit, + double val); typedef void (*new_line_t)(struct perf_stat_config *config, void *ctx); /* Used to print the display name of the Default metricgroup for now. */ @@ -174,19 +161,15 @@ struct perf_stat_output_ctx { void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct evsel *evsel, - double avg, int aggr_idx, - struct perf_stat_output_ctx *out, - struct rblist *metric_events); -bool perf_stat__skip_metric_event(struct evsel *evsel, - struct rblist *metric_events, - u64 ena, u64 run); + int aggr_idx, + struct perf_stat_output_ctx *out); +bool perf_stat__skip_metric_event(struct evsel *evsel, u64 ena, u64 run); void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config, struct evsel *evsel, int aggr_idx, int *num, void *from, - struct perf_stat_output_ctx *out, - struct rblist *metric_events); + struct perf_stat_output_ctx *out); int evlist__alloc_stats(struct perf_stat_config *config, struct evlist *evlist, bool alloc_raw); @@ -210,17 +193,14 @@ union perf_event; struct perf_session; struct target; -int perf_event__process_stat_event(struct perf_session *session, +int perf_event__process_stat_event(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp); size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp); size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp); -int create_perf_stat_counter(struct evsel *evsel, - struct perf_stat_config *config, - struct target *target, - int cpu_map_idx); void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, struct target *_target, struct timespec *ts, int argc, const char **argv); diff --git a/tools/perf/util/stream.c b/tools/perf/util/stream.c index 545e44981a27..3de4a6130853 100644 --- a/tools/perf/util/stream.c +++ b/tools/perf/util/stream.c @@ -52,7 +52,6 @@ static struct evlist_streams *evlist_streams__new(int nr_evsel, goto err; s->nr_streams_max = nr_streams_max; - s->evsel_idx = -1; } els->ev_streams = es; @@ -139,7 +138,7 @@ static int evlist__init_callchain_streams(struct evlist *evlist, hists__output_resort(hists, NULL); init_hot_callchain(hists, &es[i]); - es[i].evsel_idx = pos->core.idx; + es[i].evsel = pos; i++; } @@ -166,12 +165,12 @@ struct evlist_streams *evlist__create_streams(struct evlist *evlist, } struct evsel_streams *evsel_streams__entry(struct evlist_streams *els, - int evsel_idx) + const struct evsel *evsel) { struct evsel_streams *es = els->ev_streams; for (int i = 0; i < els->nr_evsel; i++) { - if (es[i].evsel_idx == evsel_idx) + if (es[i].evsel == evsel) return &es[i]; } diff --git a/tools/perf/util/stream.h b/tools/perf/util/stream.h index bee768874fea..50f7e6e04982 100644 --- a/tools/perf/util/stream.h +++ b/tools/perf/util/stream.h @@ -2,7 +2,9 @@ #ifndef __PERF_STREAM_H #define __PERF_STREAM_H -#include "callchain.h" +struct callchain_node; +struct evlist; +struct evsel; struct stream { struct callchain_node *cnode; @@ -11,9 +13,9 @@ struct stream { struct evsel_streams { struct stream *streams; + const struct evsel *evsel; int nr_streams_max; int nr_streams; - int evsel_idx; u64 streams_hits; }; @@ -22,15 +24,13 @@ struct evlist_streams { int nr_evsel; }; -struct evlist; - void evlist_streams__delete(struct evlist_streams *els); struct evlist_streams *evlist__create_streams(struct evlist *evlist, int nr_streams_max); struct evsel_streams *evsel_streams__entry(struct evlist_streams *els, - int evsel_idx); + const struct evsel *evsel); void evsel_streams__match(struct evsel_streams *es_base, struct evsel_streams *es_pair); diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index 116a642ad99d..c0e927bbadf6 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -254,12 +254,49 @@ char *strpbrk_esc(char *str, const char *stopset) do { ptr = strpbrk(str, stopset); - if (ptr == str || - (ptr == str + 1 && *(ptr - 1) != '\\')) + if (!ptr) { + /* stopset not in str. */ + break; + } + if (ptr == str) { + /* stopset character is first in str. */ + break; + } + if (ptr == str + 1 && str[0] != '\\') { + /* stopset chacter is second and wasn't preceded by a '\'. */ + break; + } + str = ptr + 1; + } while (ptr[-1] == '\\' && ptr[-2] != '\\'); + + return ptr; +} + +/* Like strpbrk_esc(), but not break if it is quoted with single/double quotes */ +char *strpbrk_esq(char *str, const char *stopset) +{ + char *_stopset = NULL; + char *ptr; + const char *squote = "'"; + const char *dquote = "\""; + + if (asprintf(&_stopset, "%s%c%c", stopset, *squote, *dquote) < 0) + return NULL; + + do { + ptr = strpbrk_esc(str, _stopset); + if (!ptr) + break; + if (*ptr == *squote) + ptr = strpbrk_esc(ptr + 1, squote); + else if (*ptr == *dquote) + ptr = strpbrk_esc(ptr + 1, dquote); + else break; str = ptr + 1; - } while (ptr && *(ptr - 1) == '\\' && *(ptr - 2) != '\\'); + } while (ptr); + free(_stopset); return ptr; } @@ -293,6 +330,78 @@ char *strdup_esc(const char *str) return ret; } +/* Remove backslash right before quote and return next quote address. */ +static char *remove_consumed_esc(char *str, int len, int quote) +{ + char *ptr = str, *end = str + len; + + while (*ptr != quote && ptr < end) { + if (*ptr == '\\' && *(ptr + 1) == quote) { + memmove(ptr, ptr + 1, end - (ptr + 1)); + /* now *ptr is `quote`. */ + end--; + } + ptr++; + } + + return *ptr == quote ? ptr : NULL; +} + +/* + * Like strdup_esc, but keep quoted string as it is (and single backslash + * before quote is removed). If there is no closed quote, return NULL. + */ +char *strdup_esq(const char *str) +{ + char *d, *ret; + + /* If there is no quote, return normal strdup_esc() */ + d = strpbrk_esc((char *)str, "\"'"); + if (!d) + return strdup_esc(str); + + ret = strdup(str); + if (!ret) + return NULL; + + d = ret; + do { + d = strpbrk(d, "\\\"\'"); + if (!d) + break; + + if (*d == '"' || *d == '\'') { + /* This is non-escaped quote */ + int quote = *d; + int len = strlen(d + 1) + 1; + + /* + * Remove the start quote and remove consumed escape (backslash + * before quote) and remove the end quote. If there is no end + * quote, it is the input error. + */ + memmove(d, d + 1, len); + d = remove_consumed_esc(d, len, quote); + if (!d) + goto error; + memmove(d, d + 1, strlen(d + 1) + 1); + } + if (*d == '\\') { + memmove(d, d + 1, strlen(d + 1) + 1); + if (*d == '\\') { + /* double backslash -- keep the second one. */ + d++; + } + } + } while (*d != '\0'); + + return ret; + +error: + free(ret); + return NULL; +} + unsigned int hex(char c) { if (c >= '0' && c <= '9') diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h index 52cb8ba057c7..4c8bff47cfd3 100644 --- a/tools/perf/util/string2.h +++ b/tools/perf/util/string2.h @@ -37,6 +37,8 @@ char *asprintf__tp_filter_pids(size_t npids, pid_t *pids); char *strpbrk_esc(char *str, const char *stopset); char *strdup_esc(const char *str); +char *strpbrk_esq(char *str, const char *stopset); +char *strdup_esq(const char *str); unsigned int hex(char c); char *strreplace_chars(char needle, const char *haystack, const char *replace); diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 2b04f47f4db0..b1d259f590e9 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -21,6 +21,7 @@ #include <perf/cpumap.h> #include "env.h" +#include "perf.h" #include "svghelper.h" static u64 first_time, last_time; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index e398abfd13a0..957143fbf8a0 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -7,15 +7,13 @@ #include <unistd.h> #include <inttypes.h> +#include "compress.h" #include "dso.h" +#include "libbfd.h" #include "map.h" #include "maps.h" #include "symbol.h" #include "symsrc.h" -#include "demangle-cxx.h" -#include "demangle-ocaml.h" -#include "demangle-java.h" -#include "demangle-rust.h" #include "machine.h" #include "vdso.h" #include "debug.h" @@ -27,18 +25,6 @@ #include <symbol/kallsyms.h> #include <internal/lib.h> -#ifdef HAVE_LIBBFD_SUPPORT -#define PACKAGE 'perf' -#include <bfd.h> -#endif - -#if defined(HAVE_LIBBFD_SUPPORT) || defined(HAVE_CPLUS_DEMANGLE_SUPPORT) -#ifndef DMGL_PARAMS -#define DMGL_PARAMS (1 << 0) /* Include function args */ -#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ -#endif -#endif - #ifndef EM_AARCH64 #define EM_AARCH64 183 /* ARM 64 bit */ #endif @@ -278,60 +264,6 @@ static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr) return -1; } -static bool want_demangle(bool is_kernel_sym) -{ - return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; -} - -/* - * Demangle C++ function signature, typically replaced by demangle-cxx.cpp - * version. - */ -__weak char *cxx_demangle_sym(const char *str __maybe_unused, bool params __maybe_unused, - bool modifiers __maybe_unused) -{ -#ifdef HAVE_LIBBFD_SUPPORT - int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0); - - return bfd_demangle(NULL, str, flags); -#elif defined(HAVE_CPLUS_DEMANGLE_SUPPORT) - int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0); - - return cplus_demangle(str, flags); -#else - return NULL; -#endif -} - -static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name) -{ - char *demangled = NULL; - - /* - * We need to figure out if the object was created from C++ sources - * DWARF DW_compile_unit has this, but we don't always have access - * to it... - */ - if (!want_demangle(dso__kernel(dso) || kmodule)) - return demangled; - - demangled = cxx_demangle_sym(elf_name, verbose > 0, verbose > 0); - if (demangled == NULL) { - demangled = ocaml_demangle_sym(elf_name); - if (demangled == NULL) { - demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET); - } - } - else if (rust_is_mangled(demangled)) - /* - * Input to Rust demangling is the BFD-demangled - * name which it Rust-demangles in place. - */ - rust_demangle_sym(demangled); - - return demangled; -} - struct rel_info { u32 nr_entries; u32 *sorted; @@ -617,7 +549,7 @@ static bool get_plt_got_name(GElf_Shdr *shdr, size_t i, /* Get the associated symbol */ gelf_getsym(di->dynsym_data, vr->sym_idx, &sym); sym_name = elf_sym__name(&sym, di->dynstr_data); - demangled = demangle_sym(di->dso, 0, sym_name); + demangled = dso__demangle_sym(di->dso, /*kmodule=*/0, sym_name); if (demangled != NULL) sym_name = demangled; @@ -815,7 +747,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) gelf_getsym(syms, get_rel_symidx(&ri, idx), &sym); elf_name = elf_sym__name(&sym, symstrs); - demangled = demangle_sym(dso, 0, elf_name); + demangled = dso__demangle_sym(dso, /*kmodule=*/0, elf_name); if (demangled) elf_name = demangled; if (*elf_name) @@ -844,11 +776,6 @@ out_elf_end: return 0; } -char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name) -{ - return demangle_sym(dso, kmodule, elf_name); -} - /* * Align offset to 4 bytes as needed for note name and descriptor data. */ @@ -933,43 +860,16 @@ out: return err; } -#ifdef HAVE_LIBBFD_BUILDID_SUPPORT - static int read_build_id(const char *filename, struct build_id *bid) { size_t size = sizeof(bid->data); - int err = -1; - bfd *abfd; - - abfd = bfd_openr(filename, NULL); - if (!abfd) - return -1; - - if (!bfd_check_format(abfd, bfd_object)) { - pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename); - goto out_close; - } - - if (!abfd->build_id || abfd->build_id->size > size) - goto out_close; - - memcpy(bid->data, abfd->build_id->data, abfd->build_id->size); - memset(bid->data + abfd->build_id->size, 0, size - abfd->build_id->size); - err = bid->size = abfd->build_id->size; - -out_close: - bfd_close(abfd); - return err; -} - -#else // HAVE_LIBBFD_BUILDID_SUPPORT - -static int read_build_id(const char *filename, struct build_id *bid) -{ - size_t size = sizeof(bid->data); - int fd, err = -1; + int fd, err; Elf *elf; + err = libbfd__read_build_id(filename, bid); + if (err >= 0) + goto out; + if (size < BUILD_ID_SIZE) goto out; @@ -994,8 +894,6 @@ out: return err; } -#endif // HAVE_LIBBFD_BUILDID_SUPPORT - int filename__read_build_id(const char *filename, struct build_id *bid) { struct kmod_path m = { .name = NULL, }; @@ -1004,6 +902,8 @@ int filename__read_build_id(const char *filename, struct build_id *bid) if (!filename) return -EFAULT; + if (!is_regular_file(filename)) + return -EWOULDBLOCK; err = kmod_path__parse(&m, filename); if (err) @@ -1079,44 +979,6 @@ out: return err; } -#ifdef HAVE_LIBBFD_SUPPORT - -int filename__read_debuglink(const char *filename, char *debuglink, - size_t size) -{ - int err = -1; - asection *section; - bfd *abfd; - - abfd = bfd_openr(filename, NULL); - if (!abfd) - return -1; - - if (!bfd_check_format(abfd, bfd_object)) { - pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename); - goto out_close; - } - - section = bfd_get_section_by_name(abfd, ".gnu_debuglink"); - if (!section) - goto out_close; - - if (section->size > size) - goto out_close; - - if (!bfd_get_section_contents(abfd, section, debuglink, 0, - section->size)) - goto out_close; - - err = 0; - -out_close: - bfd_close(abfd); - return err; -} - -#else - int filename__read_debuglink(const char *filename, char *debuglink, size_t size) { @@ -1128,6 +990,10 @@ int filename__read_debuglink(const char *filename, char *debuglink, Elf_Scn *sec; Elf_Kind ek; + err = libbfd_filename__read_debuglink(filename, debuglink, size); + if (err >= 0) + goto out; + fd = open(filename, O_RDONLY); if (fd < 0) goto out; @@ -1169,35 +1035,6 @@ out: return err; } -#endif - -static int dso__swap_init(struct dso *dso, unsigned char eidata) -{ - static unsigned int const endian = 1; - - dso__set_needs_swap(dso, DSO_SWAP__NO); - - switch (eidata) { - case ELFDATA2LSB: - /* We are big endian, DSO is little endian. */ - if (*(unsigned char const *)&endian != 1) - dso__set_needs_swap(dso, DSO_SWAP__YES); - break; - - case ELFDATA2MSB: - /* We are little endian, DSO is big endian. */ - if (*(unsigned char const *)&endian != 0) - dso__set_needs_swap(dso, DSO_SWAP__YES); - break; - - default: - pr_err("unrecognized DSO data encoding %d\n", eidata); - return -EINVAL; - } - - return 0; -} - bool symsrc__possibly_runtime(struct symsrc *ss) { return ss->dynsym || ss->opdsec; @@ -1226,6 +1063,81 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) ehdr.e_type == ET_DYN; } +static Elf *read_gnu_debugdata(struct dso *dso, Elf *elf, const char *name, int *fd_ret) +{ + Elf *elf_embedded; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + Elf_Scn *scn; + Elf_Data *scn_data; + FILE *wrapped; + size_t shndx; + char temp_filename[] = "/tmp/perf.gnu_debugdata.elf.XXXXXX"; + int ret, temp_fd; + + if (gelf_getehdr(elf, &ehdr) == NULL) { + pr_debug("%s: cannot read %s ELF file.\n", __func__, name); + *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF; + return NULL; + } + + scn = elf_section_by_name(elf, &ehdr, &shdr, ".gnu_debugdata", &shndx); + if (!scn) { + *dso__load_errno(dso) = -ENOENT; + return NULL; + } + + if (shdr.sh_type == SHT_NOBITS) { + pr_debug("%s: .gnu_debugdata of ELF file %s has no data.\n", __func__, name); + *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF; + return NULL; + } + + scn_data = elf_rawdata(scn, NULL); + if (!scn_data) { + pr_debug("%s: error reading .gnu_debugdata of %s: %s\n", __func__, + name, elf_errmsg(-1)); + *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF; + return NULL; + } + + wrapped = fmemopen(scn_data->d_buf, scn_data->d_size, "r"); + if (!wrapped) { + pr_debug("%s: fmemopen: %s\n", __func__, strerror(errno)); + *dso__load_errno(dso) = -errno; + return NULL; + } + + temp_fd = mkstemp(temp_filename); + if (temp_fd < 0) { + pr_debug("%s: mkstemp: %s\n", __func__, strerror(errno)); + *dso__load_errno(dso) = -errno; + fclose(wrapped); + return NULL; + } + unlink(temp_filename); + + ret = lzma_decompress_stream_to_file(wrapped, temp_fd); + fclose(wrapped); + if (ret < 0) { + *dso__load_errno(dso) = -errno; + close(temp_fd); + return NULL; + } + + elf_embedded = elf_begin(temp_fd, PERF_ELF_C_READ_MMAP, NULL); + if (!elf_embedded) { + pr_debug("%s: error reading .gnu_debugdata of %s: %s\n", __func__, + name, elf_errmsg(-1)); + *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF; + close(temp_fd); + return NULL; + } + pr_debug("%s: using .gnu_debugdata of %s\n", __func__, name); + *fd_ret = temp_fd; + return elf_embedded; +} + int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, enum dso_binary_type type) { @@ -1254,6 +1166,19 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, goto out_close; } + if (type == DSO_BINARY_TYPE__GNU_DEBUGDATA) { + int new_fd; + Elf *embedded = read_gnu_debugdata(dso, elf, name, &new_fd); + + if (!embedded) + goto out_close; + + elf_end(elf); + close(fd); + fd = new_fd; + elf = embedded; + } + if (gelf_getehdr(elf, &ehdr) == NULL) { *dso__load_errno(dso) = DSO_LOAD_ERRNO__INVALID_ELF; pr_debug("%s: cannot get elf header.\n", __func__); @@ -1522,8 +1447,11 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, map__set_mapping_type(curr_map, MAPPING_TYPE__IDENTITY); } dso__set_symtab_type(curr_dso, dso__symtab_type(dso)); - if (maps__insert(kmaps, curr_map)) + if (maps__insert(kmaps, curr_map)) { + dso__put(curr_dso); + map__put(curr_map); return -1; + } dsos__add(&maps__machine(kmaps)->dsos, curr_dso); dso__set_loaded(curr_dso); dso__put(*curr_dsop); @@ -1669,6 +1597,12 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, continue; } + /* Reject RISCV ELF "mapping symbols" */ + if (ehdr.e_machine == EM_RISCV) { + if (elf_name[0] == '$' && strchr("dx", elf_name[1])) + continue; + } + if (runtime_ss->opdsec && sym.st_shndx == runtime_ss->opdidx) { u32 offset = sym.st_value - syms_ss->opdshdr.sh_addr; u64 *opd = opddata->d_buf + offset; @@ -1776,7 +1710,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, } } - demangled = demangle_sym(dso, kmodule, elf_name); + demangled = dso__demangle_sym(dso, kmodule, elf_name); if (demangled != NULL) elf_name = demangled; @@ -1852,10 +1786,23 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, kmodule, 1); if (err < 0) return err; - err += nr; + nr += err; } - return err; + /* + * The .gnu_debugdata is a special situation: it contains a symbol + * table, but the runtime file may also contain dynsym entries which are + * not present there. We need to load both. + */ + if (syms_ss->type == DSO_BINARY_TYPE__GNU_DEBUGDATA && runtime_ss->dynsym) { + err = dso__load_sym_internal(dso, map, runtime_ss, runtime_ss, + kmodule, 1); + if (err < 0) + return err; + nr += err; + } + + return nr; } static int elf_read_maps(Elf *elf, bool exe, mapfn_t mapfn, void *data) diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index c6f369b5d893..c6b17c14a2e9 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -4,7 +4,6 @@ #include <errno.h> #include <unistd.h> -#include <stdio.h> #include <fcntl.h> #include <string.h> #include <stdlib.h> @@ -43,7 +42,7 @@ static int read_build_id(void *note_data, size_t note_len, struct build_id *bid, void *ptr; ptr = note_data; - while (ptr < (note_data + note_len)) { + while ((ptr + sizeof(*nhdr)) < (note_data + note_len)) { const char *name; size_t namesz, descsz; @@ -88,139 +87,118 @@ int filename__read_debuglink(const char *filename __maybe_unused, */ int filename__read_build_id(const char *filename, struct build_id *bid) { - FILE *fp; - int ret = -1; - bool need_swap = false; - u8 e_ident[EI_NIDENT]; - size_t buf_size; - void *buf; - int i; + int fd, ret = -1; + bool need_swap = false, elf32; + union { + struct { + Elf32_Ehdr ehdr32; + Elf32_Phdr *phdr32; + }; + struct { + Elf64_Ehdr ehdr64; + Elf64_Phdr *phdr64; + }; + } hdrs; + void *phdr, *buf = NULL; + ssize_t phdr_size, ehdr_size, buf_size = 0; + + if (!filename) + return -EFAULT; + if (!is_regular_file(filename)) + return -EWOULDBLOCK; - fp = fopen(filename, "r"); - if (fp == NULL) + fd = open(filename, O_RDONLY); + if (fd < 0) return -1; - if (fread(e_ident, sizeof(e_ident), 1, fp) != 1) + if (read(fd, hdrs.ehdr32.e_ident, EI_NIDENT) != EI_NIDENT) goto out; - if (memcmp(e_ident, ELFMAG, SELFMAG) || - e_ident[EI_VERSION] != EV_CURRENT) + if (memcmp(hdrs.ehdr32.e_ident, ELFMAG, SELFMAG) || + hdrs.ehdr32.e_ident[EI_VERSION] != EV_CURRENT) goto out; - need_swap = check_need_swap(e_ident[EI_DATA]); - - /* for simplicity */ - fseek(fp, 0, SEEK_SET); + need_swap = check_need_swap(hdrs.ehdr32.e_ident[EI_DATA]); + elf32 = hdrs.ehdr32.e_ident[EI_CLASS] == ELFCLASS32; + ehdr_size = (elf32 ? sizeof(hdrs.ehdr32) : sizeof(hdrs.ehdr64)) - EI_NIDENT; - if (e_ident[EI_CLASS] == ELFCLASS32) { - Elf32_Ehdr ehdr; - Elf32_Phdr *phdr; - - if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) - goto out; + if (read(fd, + (elf32 ? (void *)&hdrs.ehdr32 : (void *)&hdrs.ehdr64) + EI_NIDENT, + ehdr_size) != ehdr_size) + goto out; - if (need_swap) { - ehdr.e_phoff = bswap_32(ehdr.e_phoff); - ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); - ehdr.e_phnum = bswap_16(ehdr.e_phnum); + if (need_swap) { + if (elf32) { + hdrs.ehdr32.e_phoff = bswap_32(hdrs.ehdr32.e_phoff); + hdrs.ehdr32.e_phentsize = bswap_16(hdrs.ehdr32.e_phentsize); + hdrs.ehdr32.e_phnum = bswap_16(hdrs.ehdr32.e_phnum); + } else { + hdrs.ehdr64.e_phoff = bswap_64(hdrs.ehdr64.e_phoff); + hdrs.ehdr64.e_phentsize = bswap_16(hdrs.ehdr64.e_phentsize); + hdrs.ehdr64.e_phnum = bswap_16(hdrs.ehdr64.e_phnum); } + } + if ((elf32 && hdrs.ehdr32.e_phentsize != sizeof(Elf32_Phdr)) || + (!elf32 && hdrs.ehdr64.e_phentsize != sizeof(Elf64_Phdr))) + goto out; - buf_size = ehdr.e_phentsize * ehdr.e_phnum; - buf = malloc(buf_size); - if (buf == NULL) - goto out; - - fseek(fp, ehdr.e_phoff, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; - - for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { - void *tmp; - long offset; - - if (need_swap) { - phdr->p_type = bswap_32(phdr->p_type); - phdr->p_offset = bswap_32(phdr->p_offset); - phdr->p_filesz = bswap_32(phdr->p_filesz); - } - - if (phdr->p_type != PT_NOTE) - continue; - - buf_size = phdr->p_filesz; - offset = phdr->p_offset; - tmp = realloc(buf, buf_size); - if (tmp == NULL) - goto out_free; + phdr_size = elf32 ? sizeof(Elf32_Phdr) * hdrs.ehdr32.e_phnum + : sizeof(Elf64_Phdr) * hdrs.ehdr64.e_phnum; + phdr = malloc(phdr_size); + if (phdr == NULL) + goto out; - buf = tmp; - fseek(fp, offset, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; + lseek(fd, elf32 ? hdrs.ehdr32.e_phoff : hdrs.ehdr64.e_phoff, SEEK_SET); + if (read(fd, phdr, phdr_size) != phdr_size) + goto out_free; - ret = read_build_id(buf, buf_size, bid, need_swap); - if (ret == 0) { - ret = bid->size; - break; - } - } - } else { - Elf64_Ehdr ehdr; - Elf64_Phdr *phdr; + if (elf32) + hdrs.phdr32 = phdr; + else + hdrs.phdr64 = phdr; - if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) - goto out; + for (int i = 0; i < (elf32 ? hdrs.ehdr32.e_phnum : hdrs.ehdr64.e_phnum); i++) { + ssize_t p_filesz; if (need_swap) { - ehdr.e_phoff = bswap_64(ehdr.e_phoff); - ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); - ehdr.e_phnum = bswap_16(ehdr.e_phnum); + if (elf32) { + hdrs.phdr32[i].p_type = bswap_32(hdrs.phdr32[i].p_type); + hdrs.phdr32[i].p_offset = bswap_32(hdrs.phdr32[i].p_offset); + hdrs.phdr32[i].p_filesz = bswap_32(hdrs.phdr32[i].p_offset); + } else { + hdrs.phdr64[i].p_type = bswap_32(hdrs.phdr64[i].p_type); + hdrs.phdr64[i].p_offset = bswap_64(hdrs.phdr64[i].p_offset); + hdrs.phdr64[i].p_filesz = bswap_64(hdrs.phdr64[i].p_filesz); + } } + if ((elf32 ? hdrs.phdr32[i].p_type : hdrs.phdr64[i].p_type) != PT_NOTE) + continue; - buf_size = ehdr.e_phentsize * ehdr.e_phnum; - buf = malloc(buf_size); - if (buf == NULL) - goto out; - - fseek(fp, ehdr.e_phoff, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; - - for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { + p_filesz = elf32 ? hdrs.phdr32[i].p_filesz : hdrs.phdr64[i].p_filesz; + if (p_filesz > buf_size) { void *tmp; - long offset; - if (need_swap) { - phdr->p_type = bswap_32(phdr->p_type); - phdr->p_offset = bswap_64(phdr->p_offset); - phdr->p_filesz = bswap_64(phdr->p_filesz); - } - - if (phdr->p_type != PT_NOTE) - continue; - - buf_size = phdr->p_filesz; - offset = phdr->p_offset; + buf_size = p_filesz; tmp = realloc(buf, buf_size); if (tmp == NULL) goto out_free; - buf = tmp; - fseek(fp, offset, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; + } + lseek(fd, elf32 ? hdrs.phdr32[i].p_offset : hdrs.phdr64[i].p_offset, SEEK_SET); + if (read(fd, buf, p_filesz) != p_filesz) + goto out_free; - ret = read_build_id(buf, buf_size, bid, need_swap); - if (ret == 0) { - ret = bid->size; - break; - } + ret = read_build_id(buf, p_filesz, bid, need_swap); + if (ret == 0) { + ret = bid->size; + break; } } out_free: free(buf); + free(phdr); out: - fclose(fp); + close(fd); return ret; } @@ -343,7 +321,7 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused, struct symsrc *runtime_ss __maybe_unused, int kmodule __maybe_unused) { - struct build_id bid; + struct build_id bid = { .size = 0, }; int ret; ret = fd__is_64_bit(ss->fd); @@ -381,13 +359,6 @@ void symbol__elf_init(void) { } -char *dso__demangle_sym(struct dso *dso __maybe_unused, - int kmodule __maybe_unused, - const char *elf_name __maybe_unused) -{ - return NULL; -} - bool filename__has_section(const char *filename __maybe_unused, const char *sec __maybe_unused) { return false; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 9e5940b5bc59..814f960fa8f8 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -18,6 +18,12 @@ #include "annotate.h" #include "build-id.h" #include "cap.h" +#include "cpumap.h" +#include "debug.h" +#include "demangle-cxx.h" +#include "demangle-java.h" +#include "demangle-ocaml.h" +#include "demangle-rust-v0.h" #include "dso.h" #include "util.h" // lsdir() #include "debug.h" @@ -35,6 +41,7 @@ #include "header.h" #include "path.h" #include <linux/ctype.h> +#include <linux/log2.h> #include <linux/zalloc.h> #include <elf.h> @@ -84,6 +91,7 @@ static enum dso_binary_type binary_type_symtab[] = { DSO_BINARY_TYPE__FEDORA_DEBUGINFO, DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, DSO_BINARY_TYPE__BUILDID_DEBUGINFO, + DSO_BINARY_TYPE__GNU_DEBUGDATA, DSO_BINARY_TYPE__SYSTEM_PATH_DSO, DSO_BINARY_TYPE__GUEST_KMODULE, DSO_BINARY_TYPE__GUEST_KMODULE_COMP, @@ -96,10 +104,21 @@ static enum dso_binary_type binary_type_symtab[] = { #define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab) -static bool symbol_type__filter(char symbol_type) +static bool symbol_type__filter(char __symbol_type) { - symbol_type = toupper(symbol_type); - return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B'; + // Since 'U' == undefined and 'u' == unique global symbol, we can't use toupper there + // 'N' is for debugging symbols, 'n' is a non-data, non-code, non-debug read-only section. + // According to 'man nm'. + // 'N' first seen in: + // ffffffff9b35d130 N __pfx__RNCINvNtNtNtCsbDUBuN8AbD4_4core4iter8adapters3map12map_try_foldjNtCs6vVzKs5jPr6_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ + // a seemingly Rust mangled name + // Ditto for '1': + // root@x1:~# grep ' 1 ' /proc/kallsyms + // ffffffffb098bc00 1 __pfx__RNCINvNtNtNtCsfwaGRd4cjqE_4core4iter8adapters3map12map_try_foldjNtCskFudTml27HW_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ + // ffffffffb098bc10 1 _RNCINvNtNtNtCsfwaGRd4cjqE_4core4iter8adapters3map12map_try_foldjNtCskFudTml27HW_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ + char symbol_type = toupper(__symbol_type); + return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B' || + __symbol_type == 'u' || __symbol_type == 'l' || __symbol_type == 'N' || __symbol_type == '1'; } static int prefix_underscores_count(const char *str) @@ -154,6 +173,13 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb) else if ((a == 0) && (b > 0)) return SYMBOL_B; + if (syma->type != symb->type) { + if (syma->type == STT_NOTYPE) + return SYMBOL_B; + if (symb->type == STT_NOTYPE) + return SYMBOL_A; + } + /* Prefer a non weak symbol over a weak one */ a = syma->binding == STB_WEAK; b = symb->binding == STB_WEAK; @@ -614,7 +640,7 @@ void dso__sort_by_name(struct dso *dso) { mutex_lock(dso__lock(dso)); if (!dso__sorted_by_name(dso)) { - size_t len; + size_t len = 0; dso__set_symbol_names(dso, symbols__sort_by_name(dso__symbols(dso), &len)); if (dso__symbol_names(dso)) { @@ -929,7 +955,8 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, pos->end -= delta; } - if (count == 0) { + if (map__start(initial_map) <= (pos->start + delta) && + (pos->start + delta) < map__end(initial_map)) { map__zput(curr_map); curr_map = map__get(initial_map); goto add_symbol; @@ -938,11 +965,11 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, if (dso__kernel(dso) == DSO_SPACE__KERNEL_GUEST) snprintf(dso_name, sizeof(dso_name), "[guest.kernel].%d", - kernel_range++); + kernel_range); else snprintf(dso_name, sizeof(dso_name), "[kernel].%d", - kernel_range++); + kernel_range); ndso = dso__new(dso_name); map__zput(curr_map); @@ -950,6 +977,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, return -1; dso__set_kernel(ndso, dso__kernel(dso)); + dso__set_loaded(ndso); curr_map = map__new2(pos->start, ndso); if (curr_map == NULL) { @@ -963,6 +991,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, dso__put(ndso); return -1; } + dso__put(ndso); ++kernel_range; } else if (delta) { /* Kernel was relocated at boot time */ @@ -1405,6 +1434,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, goto out_err; } } + map__zput(new_node->map); free(new_node); } @@ -1566,136 +1596,6 @@ out_failure: return -1; } -#ifdef HAVE_LIBBFD_SUPPORT -#define PACKAGE 'perf' -#include <bfd.h> - -static int bfd_symbols__cmpvalue(const void *a, const void *b) -{ - const asymbol *as = *(const asymbol **)a, *bs = *(const asymbol **)b; - - if (bfd_asymbol_value(as) != bfd_asymbol_value(bs)) - return bfd_asymbol_value(as) - bfd_asymbol_value(bs); - - return bfd_asymbol_name(as)[0] - bfd_asymbol_name(bs)[0]; -} - -static int bfd2elf_binding(asymbol *symbol) -{ - if (symbol->flags & BSF_WEAK) - return STB_WEAK; - if (symbol->flags & BSF_GLOBAL) - return STB_GLOBAL; - if (symbol->flags & BSF_LOCAL) - return STB_LOCAL; - return -1; -} - -int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) -{ - int err = -1; - long symbols_size, symbols_count, i; - asection *section; - asymbol **symbols, *sym; - struct symbol *symbol; - bfd *abfd; - u64 start, len; - - abfd = bfd_openr(debugfile, NULL); - if (!abfd) - return -1; - - if (!bfd_check_format(abfd, bfd_object)) { - pr_debug2("%s: cannot read %s bfd file.\n", __func__, - dso->long_name); - goto out_close; - } - - if (bfd_get_flavour(abfd) == bfd_target_elf_flavour) - goto out_close; - - symbols_size = bfd_get_symtab_upper_bound(abfd); - if (symbols_size == 0) { - bfd_close(abfd); - return 0; - } - - if (symbols_size < 0) - goto out_close; - - symbols = malloc(symbols_size); - if (!symbols) - goto out_close; - - symbols_count = bfd_canonicalize_symtab(abfd, symbols); - if (symbols_count < 0) - goto out_free; - - section = bfd_get_section_by_name(abfd, ".text"); - if (section) { - for (i = 0; i < symbols_count; ++i) { - if (!strcmp(bfd_asymbol_name(symbols[i]), "__ImageBase") || - !strcmp(bfd_asymbol_name(symbols[i]), "__image_base__")) - break; - } - if (i < symbols_count) { - /* PE symbols can only have 4 bytes, so use .text high bits */ - dso->text_offset = section->vma - (u32)section->vma; - dso->text_offset += (u32)bfd_asymbol_value(symbols[i]); - dso->text_end = (section->vma - dso->text_offset) + section->size; - } else { - dso->text_offset = section->vma - section->filepos; - dso->text_end = section->filepos + section->size; - } - } - - qsort(symbols, symbols_count, sizeof(asymbol *), bfd_symbols__cmpvalue); - -#ifdef bfd_get_section -#define bfd_asymbol_section bfd_get_section -#endif - for (i = 0; i < symbols_count; ++i) { - sym = symbols[i]; - section = bfd_asymbol_section(sym); - if (bfd2elf_binding(sym) < 0) - continue; - - while (i + 1 < symbols_count && - bfd_asymbol_section(symbols[i + 1]) == section && - bfd2elf_binding(symbols[i + 1]) < 0) - i++; - - if (i + 1 < symbols_count && - bfd_asymbol_section(symbols[i + 1]) == section) - len = symbols[i + 1]->value - sym->value; - else - len = section->size - sym->value; - - start = bfd_asymbol_value(sym) - dso->text_offset; - symbol = symbol__new(start, len, bfd2elf_binding(sym), STT_FUNC, - bfd_asymbol_name(sym)); - if (!symbol) - goto out_free; - - symbols__insert(dso__symbols(dso), symbol); - } -#ifdef bfd_get_section -#undef bfd_asymbol_section -#endif - - symbols__fixup_end(dso__symbols(dso), false); - symbols__fixup_duplicate(dso__symbols(dso)); - dso__set_adjust_symbols(dso, true); - - err = 0; -out_free: - free(symbols); -out_close: - bfd_close(abfd); - return err; -} -#endif - static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, enum dso_binary_type type) { @@ -1708,6 +1608,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO: case DSO_BINARY_TYPE__BUILDID_DEBUGINFO: case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: + case DSO_BINARY_TYPE__GNU_DEBUGDATA: return !kmod && dso__kernel(dso) == DSO_SPACE__USER; case DSO_BINARY_TYPE__KALLSYMS: @@ -1793,13 +1694,13 @@ int dso__load(struct dso *dso, struct map *map) struct symsrc *syms_ss = NULL, *runtime_ss = NULL; bool kmod; bool perfmap; - struct build_id bid; struct nscookie nsc; char newmapname[PATH_MAX]; const char *map_path = dso__long_name(dso); mutex_lock(dso__lock(dso)); - perfmap = strncmp(dso__name(dso), "/tmp/perf-", 10) == 0; + perfmap = is_perf_pid_map_name(map_path); + if (perfmap) { if (dso__nsinfo(dso) && (dso__find_perf_map(newmapname, sizeof(newmapname), @@ -1816,10 +1717,7 @@ int dso__load(struct dso *dso, struct map *map) goto out; } - kmod = dso__symtab_type(dso) == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE || - dso__symtab_type(dso) == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP || - dso__symtab_type(dso) == DSO_BINARY_TYPE__GUEST_KMODULE || - dso__symtab_type(dso) == DSO_BINARY_TYPE__GUEST_KMODULE_COMP; + kmod = dso__is_kmod(dso); if (dso__kernel(dso) && !kmod) { if (dso__kernel(dso) == DSO_SPACE__KERNEL) @@ -1852,10 +1750,11 @@ int dso__load(struct dso *dso, struct map *map) /* * Read the build id if possible. This is required for - * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work + * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work. */ - if (!dso__has_build_id(dso) && - is_regular_file(dso__long_name(dso))) { + if (!dso__has_build_id(dso)) { + struct build_id bid = { .size = 0, }; + __symbol__join_symfs(name, PATH_MAX, dso__long_name(dso)); if (filename__read_build_id(name, &bid) > 0) dso__set_build_id(dso, &bid); @@ -1932,6 +1831,9 @@ int dso__load(struct dso *dso, struct map *map) if (next_slot) { ss_pos++; + if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) + dso__set_binary_type(dso, symtab_type); + if (syms_ss && runtime_ss) break; } else { @@ -2101,10 +2003,11 @@ static bool filename__readable(const char *file) static char *dso__find_kallsyms(struct dso *dso, struct map *map) { - struct build_id bid; + struct build_id bid = { .size = 0, }; char sbuild_id[SBUILD_ID_SIZE]; bool is_host = false; char path[PATH_MAX]; + struct maps *kmaps = map__kmaps(map); if (!dso__has_build_id(dso)) { /* @@ -2131,7 +2034,7 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) goto proc_kallsyms; } - build_id__sprintf(dso__bid(dso), sbuild_id); + build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id)); /* Find kallsyms in build-id cache with kcore */ scnprintf(path, sizeof(path), "%s/%s/%s", @@ -2141,8 +2044,13 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) return strdup(path); /* Use current /proc/kallsyms if possible */ - if (is_host) { proc_kallsyms: + if (kmaps) { + struct machine *machine = maps__machine(kmaps); + + scnprintf(path, sizeof(path), "%s/proc/kallsyms", machine->root_dir); + return strdup(path); + } else if (is_host) { return strdup("/proc/kallsyms"); } @@ -2426,14 +2334,14 @@ static bool symbol__read_kptr_restrict(void) { bool value = false; FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r"); + bool used_root; + bool cap_syslog = perf_cap__capable(CAP_SYSLOG, &used_root); if (fp != NULL) { char line[8]; if (fgets(line, sizeof(line), fp) != NULL) - value = perf_cap__capable(CAP_SYSLOG) ? - (atoi(line) >= 2) : - (atoi(line) != 0); + value = cap_syslog ? (atoi(line) >= 2) : (atoi(line) != 0); fclose(fp); } @@ -2441,7 +2349,7 @@ static bool symbol__read_kptr_restrict(void) /* Per kernel/kallsyms.c: * we also restrict when perf_event_paranoid > 1 w/o CAP_SYSLOG */ - if (perf_event_paranoid() > 1 && !perf_cap__capable(CAP_SYSLOG)) + if (perf_event_paranoid() > 1 && !cap_syslog) value = true; return value; @@ -2462,6 +2370,36 @@ int symbol__annotation_init(void) return 0; } +static int setup_parallelism_bitmap(void) +{ + struct perf_cpu_map *map; + struct perf_cpu cpu; + int i, err = -1; + + if (symbol_conf.parallelism_list_str == NULL) + return 0; + + map = perf_cpu_map__new(symbol_conf.parallelism_list_str); + if (map == NULL) { + pr_err("failed to parse parallelism filter list\n"); + return -1; + } + + bitmap_fill(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1); + perf_cpu_map__for_each_cpu(cpu, i, map) { + if (cpu.cpu <= 0 || cpu.cpu > MAX_NR_CPUS) { + pr_err("Requested parallelism level %d is invalid.\n", cpu.cpu); + goto out_delete_map; + } + __clear_bit(cpu.cpu, symbol_conf.parallelism_filter); + } + + err = 0; +out_delete_map: + perf_cpu_map__put(map); + return err; +} + int symbol__init(struct perf_env *env) { const char *symfs; @@ -2481,6 +2419,9 @@ int symbol__init(struct perf_env *env) return -1; } + if (setup_parallelism_bitmap()) + return -1; + if (setup_list(&symbol_conf.dso_list, symbol_conf.dso_list_str, "dso") < 0) return -1; @@ -2601,3 +2542,79 @@ int symbol__validate_sym_arguments(void) } return 0; } + +static bool want_demangle(bool is_kernel_sym) +{ + return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; +} + +/* + * Demangle C++ function signature, typically replaced by demangle-cxx.cpp + * version. + */ +#ifndef HAVE_CXA_DEMANGLE_SUPPORT +char *cxx_demangle_sym(const char *str __maybe_unused, bool params __maybe_unused, + bool modifiers __maybe_unused) +{ +#ifdef HAVE_LIBBFD_SUPPORT + int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0); + + return bfd_demangle(NULL, str, flags); +#elif defined(HAVE_CPLUS_DEMANGLE_SUPPORT) + int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0); + + return cplus_demangle(str, flags); +#else + return NULL; +#endif +} +#endif /* !HAVE_CXA_DEMANGLE_SUPPORT */ + +char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name) +{ + struct demangle rust_demangle = { + .style = DemangleStyleUnknown, + }; + char *demangled = NULL; + + /* + * We need to figure out if the object was created from C++ sources + * DWARF DW_compile_unit has this, but we don't always have access + * to it... + */ + if (!want_demangle((dso && dso__kernel(dso)) || kmodule)) + return demangled; + + rust_demangle_demangle(elf_name, &rust_demangle); + if (rust_demangle_is_known(&rust_demangle)) { + /* A rust mangled name. */ + if (rust_demangle.mangled_len == 0) + return demangled; + + for (size_t buf_len = roundup_pow_of_two(rust_demangle.mangled_len * 2); + buf_len < 1024 * 1024; buf_len += 32) { + char *tmp = realloc(demangled, buf_len); + + if (!tmp) { + /* Failure to grow output buffer, return what is there. */ + return demangled; + } + demangled = tmp; + if (rust_demangle_display_demangle(&rust_demangle, demangled, buf_len, + /*alternate=*/true) == OverflowOk) + return demangled; + } + /* Buffer exceeded sensible bounds, return what is there. */ + return demangled; + } + + demangled = cxx_demangle_sym(elf_name, verbose > 0, verbose > 0); + if (demangled) + return demangled; + + demangled = ocaml_demangle_sym(elf_name); + if (demangled) + return demangled; + + return java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET); +} diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index c114bbceef40..7a80d2c14d9b 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -3,6 +3,8 @@ #define __PERF_SYMBOL_CONF 1 #include <stdbool.h> +#include <linux/bitmap.h> +#include "perf.h" struct strlist; struct intlist; @@ -41,12 +43,15 @@ struct symbol_conf { report_individual_block, inline_name, disable_add2line_warn, - buildid_mmap2, + no_buildid_mmap2, guest_code, lazy_load_kernel_maps, keep_exited_threads, annotate_data_member, - annotate_data_sample; + annotate_data_sample, + skip_empty, + enable_latency, + prefer_latency; const char *vmlinux_name, *kallsyms_name, *source_prefix, @@ -61,9 +66,10 @@ struct symbol_conf { *pid_list_str, *tid_list_str, *sym_list_str, + *parallelism_list_str, *col_width_list_str, *bt_stop_list_str; - char *addr2line_path; + const char *addr2line_path; unsigned long time_quantum; struct strlist *dso_list, *comm_list, @@ -81,6 +87,7 @@ struct symbol_conf { int pad_output_len_dso; int group_sort_idx; int addr_range; + DECLARE_BITMAP(parallelism_filter, MAX_NR_CPUS + 1); }; extern struct symbol_conf symbol_conf; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 5498048f56ea..2ba9fa25e00a 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-only +// SPDX-License-Identifier: GPL-2.0-only #include "util/cgroup.h" #include "util/data.h" @@ -38,6 +38,7 @@ #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */ #include <api/fs/fs.h> #include <api/io.h> +#include <api/io_dir.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> @@ -47,7 +48,7 @@ unsigned int proc_map_timeout = DEFAULT_PROC_MAP_PARSE_TIMEOUT; -int perf_tool__process_synth_event(struct perf_tool *tool, +int perf_tool__process_synth_event(const struct perf_tool *tool, union perf_event *event, struct machine *machine, perf_event__handler_t process) @@ -187,7 +188,7 @@ static int perf_event__prepare_comm(union perf_event *event, pid_t pid, pid_t ti return 0; } -pid_t perf_event__synthesize_comm(struct perf_tool *tool, +pid_t perf_event__synthesize_comm(const struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine) @@ -218,7 +219,7 @@ static void perf_event__get_ns_link_info(pid_t pid, const char *ns, } } -int perf_event__synthesize_namespaces(struct perf_tool *tool, +int perf_event__synthesize_namespaces(const struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, @@ -257,7 +258,7 @@ int perf_event__synthesize_namespaces(struct perf_tool *tool, return 0; } -static int perf_event__synthesize_fork(struct perf_tool *tool, +static int perf_event__synthesize_fork(const struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, pid_t ppid, perf_event__handler_t process, @@ -367,11 +368,11 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event, struct machine *machine, bool is_kernel) { - struct build_id bid; + struct build_id bid = { .size = 0, }; struct nsinfo *nsi; struct nscookie nc; struct dso *dso = NULL; - struct dso_id id; + struct dso_id dso_id = dso_id_empty; int rc; if (is_kernel) { @@ -379,12 +380,18 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event, goto out; } - id.maj = event->maj; - id.min = event->min; - id.ino = event->ino; - id.ino_generation = event->ino_generation; + if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) { + build_id__init(&dso_id.build_id, event->build_id, event->build_id_size); + } else { + dso_id.maj = event->maj; + dso_id.min = event->min; + dso_id.ino = event->ino; + dso_id.ino_generation = event->ino_generation; + dso_id.mmap2_valid = true; + dso_id.mmap2_ino_generation_valid = true; + } - dso = dsos__findnew_id(&machine->dsos, event->filename, &id); + dso = dsos__findnew_id(&machine->dsos, event->filename, &dso_id); if (dso && dso__has_build_id(dso)) { bid = *dso__bid(dso); rc = 0; @@ -418,7 +425,7 @@ out: dso__put(dso); } -int perf_event__synthesize_mmap_events(struct perf_tool *tool, +int perf_event__synthesize_mmap_events(const struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, @@ -525,7 +532,7 @@ out: event->mmap2.pid = tgid; event->mmap2.tid = pid; - if (symbol_conf.buildid_mmap2) + if (!symbol_conf.no_buildid_mmap2) perf_record_mmap2__read_build_id(&event->mmap2, machine, false); if (perf_tool__process_synth_event(tool, event, machine, process) != 0) { @@ -542,7 +549,7 @@ out: } #ifdef HAVE_FILE_HANDLE -static int perf_event__synthesize_cgroup(struct perf_tool *tool, +static int perf_event__synthesize_cgroup(const struct perf_tool *tool, union perf_event *event, char *path, size_t mount_len, perf_event__handler_t process, @@ -582,7 +589,7 @@ static int perf_event__synthesize_cgroup(struct perf_tool *tool, return 0; } -static int perf_event__walk_cgroup_tree(struct perf_tool *tool, +static int perf_event__walk_cgroup_tree(const struct perf_tool *tool, union perf_event *event, char *path, size_t mount_len, perf_event__handler_t process, @@ -630,7 +637,7 @@ static int perf_event__walk_cgroup_tree(struct perf_tool *tool, return ret; } -int perf_event__synthesize_cgroups(struct perf_tool *tool, +int perf_event__synthesize_cgroups(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { @@ -657,7 +664,7 @@ int perf_event__synthesize_cgroups(struct perf_tool *tool, return 0; } #else -int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused, +int perf_event__synthesize_cgroups(const struct perf_tool *tool __maybe_unused, perf_event__handler_t process __maybe_unused, struct machine *machine __maybe_unused) { @@ -666,7 +673,7 @@ int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused, #endif struct perf_event__synthesize_modules_maps_cb_args { - struct perf_tool *tool; + const struct perf_tool *tool; perf_event__handler_t process; struct machine *machine; union perf_event *event; @@ -683,7 +690,7 @@ static int perf_event__synthesize_modules_maps_cb(struct map *map, void *data) return 0; dso = map__dso(map); - if (symbol_conf.buildid_mmap2) { + if (!symbol_conf.no_buildid_mmap2) { size = PERF_ALIGN(dso__long_name_len(dso) + 1, sizeof(u64)); event->mmap2.header.type = PERF_RECORD_MMAP2; event->mmap2.header.size = (sizeof(event->mmap2) - @@ -717,7 +724,7 @@ static int perf_event__synthesize_modules_maps_cb(struct map *map, void *data) return 0; } -int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, +int perf_event__synthesize_modules(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { int rc; @@ -727,9 +734,9 @@ int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t .process = process, .machine = machine, }; - size_t size = symbol_conf.buildid_mmap2 - ? sizeof(args.event->mmap2) - : sizeof(args.event->mmap); + size_t size = symbol_conf.no_buildid_mmap2 + ? sizeof(args.event->mmap) + : sizeof(args.event->mmap2); args.event = zalloc(size + machine->id_hdr_size); if (args.event == NULL) { @@ -763,14 +770,14 @@ static int __event__synthesize_thread(union perf_event *comm_event, union perf_event *fork_event, union perf_event *namespaces_event, pid_t pid, int full, perf_event__handler_t process, - struct perf_tool *tool, struct machine *machine, + const struct perf_tool *tool, struct machine *machine, bool needs_mmap, bool mmap_data) { char filename[PATH_MAX]; - struct dirent **dirent; + struct io_dir iod; + struct io_dirent64 *dent; pid_t tgid, ppid; int rc = 0; - int i, n; /* special case: only send one comm event using passed in pid */ if (!full) { @@ -802,16 +809,19 @@ static int __event__synthesize_thread(union perf_event *comm_event, snprintf(filename, sizeof(filename), "%s/proc/%d/task", machine->root_dir, pid); - n = scandir(filename, &dirent, filter_task, NULL); - if (n < 0) - return n; + io_dir__init(&iod, open(filename, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + if (iod.dirfd < 0) + return -1; - for (i = 0; i < n; i++) { + while ((dent = io_dir__readdir(&iod)) != NULL) { char *end; pid_t _pid; bool kernel_thread = false; - _pid = strtol(dirent[i]->d_name, &end, 10); + if (!isdigit(dent->d_name[0])) + continue; + + _pid = strtol(dent->d_name, &end, 10); if (*end) continue; @@ -845,14 +855,12 @@ static int __event__synthesize_thread(union perf_event *comm_event, } } - for (i = 0; i < n; i++) - zfree(&dirent[i]); - free(dirent); + close(iod.dirfd); return rc; } -int perf_event__synthesize_thread_map(struct perf_tool *tool, +int perf_event__synthesize_thread_map(const struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, @@ -929,7 +937,7 @@ out: return err; } -static int __perf_event__synthesize_threads(struct perf_tool *tool, +static int __perf_event__synthesize_threads(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, @@ -993,7 +1001,7 @@ out: } struct synthesize_threads_arg { - struct perf_tool *tool; + const struct perf_tool *tool; perf_event__handler_t process; struct machine *machine; bool needs_mmap; @@ -1015,7 +1023,7 @@ static void *synthesize_threads_worker(void *arg) return NULL; } -int perf_event__synthesize_threads(struct perf_tool *tool, +int perf_event__synthesize_threads(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, @@ -1104,20 +1112,20 @@ free_dirent: return err; } -int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused, +int __weak perf_event__synthesize_extra_kmaps(const struct perf_tool *tool __maybe_unused, perf_event__handler_t process __maybe_unused, struct machine *machine __maybe_unused) { return 0; } -static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, +static int __perf_event__synthesize_kernel_mmap(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { union perf_event *event; - size_t size = symbol_conf.buildid_mmap2 ? - sizeof(event->mmap2) : sizeof(event->mmap); + size_t size = symbol_conf.no_buildid_mmap2 ? + sizeof(event->mmap) : sizeof(event->mmap2); struct map *map = machine__kernel_map(machine); struct kmap *kmap; int err; @@ -1151,7 +1159,7 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; } - if (symbol_conf.buildid_mmap2) { + if (!symbol_conf.no_buildid_mmap2) { size = snprintf(event->mmap2.filename, sizeof(event->mmap2.filename), "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1; size = PERF_ALIGN(size, sizeof(u64)); @@ -1183,7 +1191,7 @@ static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, return err; } -int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, +int perf_event__synthesize_kernel_mmap(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { @@ -1196,7 +1204,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, return perf_event__synthesize_extra_kmaps(tool, process, machine); } -int perf_event__synthesize_thread_map2(struct perf_tool *tool, +int perf_event__synthesize_thread_map2(const struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine) @@ -1346,7 +1354,7 @@ static struct perf_record_cpu_map *cpu_map_event__new(const struct perf_cpu_map } -int perf_event__synthesize_cpu_map(struct perf_tool *tool, +int perf_event__synthesize_cpu_map(const struct perf_tool *tool, const struct perf_cpu_map *map, perf_event__handler_t process, struct machine *machine) @@ -1364,7 +1372,7 @@ int perf_event__synthesize_cpu_map(struct perf_tool *tool, return err; } -int perf_event__synthesize_stat_config(struct perf_tool *tool, +int perf_event__synthesize_stat_config(const struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine) @@ -1403,7 +1411,7 @@ int perf_event__synthesize_stat_config(struct perf_tool *tool, return err; } -int perf_event__synthesize_stat(struct perf_tool *tool, +int perf_event__synthesize_stat(const struct perf_tool *tool, struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, @@ -1425,7 +1433,7 @@ int perf_event__synthesize_stat(struct perf_tool *tool, return process(tool, (union perf_event *) &event, NULL, machine); } -int perf_event__synthesize_stat_round(struct perf_tool *tool, +int perf_event__synthesize_stat_round(const struct perf_tool *tool, u64 evtime, u64 type, perf_event__handler_t process, struct machine *machine) @@ -1508,9 +1516,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, } if (type & PERF_SAMPLE_REGS_USER) { - if (sample->user_regs.abi) { + if (sample->user_regs && sample->user_regs->abi) { result += sizeof(u64); - sz = hweight64(sample->user_regs.mask) * sizeof(u64); + sz = hweight64(sample->user_regs->mask) * sizeof(u64); result += sz; } else { result += sizeof(u64); @@ -1536,9 +1544,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, result += sizeof(u64); if (type & PERF_SAMPLE_REGS_INTR) { - if (sample->intr_regs.abi) { + if (sample->intr_regs && sample->intr_regs->abi) { result += sizeof(u64); - sz = hweight64(sample->intr_regs.mask) * sizeof(u64); + sz = hweight64(sample->intr_regs->mask) * sizeof(u64); result += sz; } else { result += sizeof(u64); @@ -1565,10 +1573,16 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, return result; } -void __weak arch_perf_synthesize_sample_weight(const struct perf_sample *data, +static void perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type __maybe_unused) { *array = data->weight; + + if (type & PERF_SAMPLE_WEIGHT_STRUCT) { + *array &= 0xffffffff; + *array |= ((u64)data->ins_lat << 32); + *array |= ((u64)data->weight3 << 48); + } } static __u64 *copy_read_group_values(__u64 *array, __u64 read_format, @@ -1686,12 +1700,16 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo } if (type & PERF_SAMPLE_RAW) { - u.val32[0] = sample->raw_size; - *array = u.val64; - array = (void *)array + sizeof(u32); + u32 *array32 = (void *)array; + + *array32 = sample->raw_size; + array32++; - memcpy(array, sample->raw_data, sample->raw_size); - array = (void *)array + sample->raw_size; + memcpy(array32, sample->raw_data, sample->raw_size); + array = (void *)(array32 + (sample->raw_size / sizeof(u32))); + + /* make sure the array is 64-bit aligned */ + BUG_ON(((long)array) % sizeof(u64)); } if (type & PERF_SAMPLE_BRANCH_STACK) { @@ -1703,10 +1721,10 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo } if (type & PERF_SAMPLE_REGS_USER) { - if (sample->user_regs.abi) { - *array++ = sample->user_regs.abi; - sz = hweight64(sample->user_regs.mask) * sizeof(u64); - memcpy(array, sample->user_regs.regs, sz); + if (sample->user_regs && sample->user_regs->abi) { + *array++ = sample->user_regs->abi; + sz = hweight64(sample->user_regs->mask) * sizeof(u64); + memcpy(array, sample->user_regs->regs, sz); array = (void *)array + sz; } else { *array++ = 0; @@ -1724,7 +1742,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo } if (type & PERF_SAMPLE_WEIGHT_TYPE) { - arch_perf_synthesize_sample_weight(sample, array, type); + perf_synthesize_sample_weight(sample, array, type); array++; } @@ -1739,10 +1757,10 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo } if (type & PERF_SAMPLE_REGS_INTR) { - if (sample->intr_regs.abi) { - *array++ = sample->intr_regs.abi; - sz = hweight64(sample->intr_regs.mask) * sizeof(u64); - memcpy(array, sample->intr_regs.regs, sz); + if (sample->intr_regs && sample->intr_regs->abi) { + *array++ = sample->intr_regs->abi; + sz = hweight64(sample->intr_regs->mask) * sizeof(u64); + memcpy(array, sample->intr_regs->regs, sz); array = (void *)array + sz; } else { *array++ = 0; @@ -1826,7 +1844,7 @@ int perf_event__synthesize_id_sample(__u64 *array, u64 type, const struct perf_s return (void *)array - (void *)start; } -int __perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, +int __perf_event__synthesize_id_index(const struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine, size_t from) { union perf_event *ev; @@ -1918,13 +1936,13 @@ out_err: return err; } -int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, +int perf_event__synthesize_id_index(const struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine) { return __perf_event__synthesize_id_index(tool, process, evlist, machine, 0); } -int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, +int __machine__synthesize_threads(struct machine *machine, const struct perf_tool *tool, struct target *target, struct perf_thread_map *threads, perf_event__handler_t process, bool needs_mmap, bool data_mmap, unsigned int nr_threads_synthesize) @@ -1985,7 +2003,7 @@ static struct perf_record_event_update *event_update_event__new(size_t size, u64 return ev; } -int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evsel *evsel, +int perf_event__synthesize_event_update_unit(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process) { size_t size = strlen(evsel->unit); @@ -2002,7 +2020,7 @@ int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evse return err; } -int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel, +int perf_event__synthesize_event_update_scale(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process) { struct perf_record_event_update *ev; @@ -2019,7 +2037,7 @@ int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evs return err; } -int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel, +int perf_event__synthesize_event_update_name(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process) { struct perf_record_event_update *ev; @@ -2036,10 +2054,10 @@ int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evse return err; } -int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel, +int perf_event__synthesize_event_update_cpus(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process) { - struct synthesize_cpu_map_data syn_data = { .map = evsel->core.own_cpus }; + struct synthesize_cpu_map_data syn_data = { .map = evsel->core.pmu_cpus }; struct perf_record_event_update *ev; int err; @@ -2059,7 +2077,7 @@ int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evse return err; } -int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist, +int perf_event__synthesize_attrs(const struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process) { struct evsel *evsel; @@ -2087,7 +2105,7 @@ static bool has_scale(struct evsel *evsel) return evsel->scale != 1; } -int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evsel_list, +int perf_event__synthesize_extra_attr(const struct perf_tool *tool, struct evlist *evsel_list, perf_event__handler_t process, bool is_pipe) { struct evsel *evsel; @@ -2120,7 +2138,7 @@ int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evs } } - if (evsel->core.own_cpus) { + if (evsel->core.pmu_cpus) { err = perf_event__synthesize_event_update_cpus(tool, evsel, process); if (err < 0) { pr_err("Couldn't synthesize evsel cpus.\n"); @@ -2143,7 +2161,7 @@ int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evs return 0; } -int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, +int perf_event__synthesize_attr(const struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process) { union perf_event *ev; @@ -2177,7 +2195,7 @@ int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr * } #ifdef HAVE_LIBTRACEEVENT -int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, +int perf_event__synthesize_tracing_data(const struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process) { union perf_event ev; @@ -2200,7 +2218,7 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct e if (!tdata) return -1; - memset(&ev, 0, sizeof(ev)); + memset(&ev, 0, sizeof(ev.tracing_data)); ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA; size = tdata->size; @@ -2225,31 +2243,112 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct e } #endif -int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc, - perf_event__handler_t process, struct machine *machine) +int perf_event__synthesize_build_id(const struct perf_tool *tool, + struct perf_sample *sample, + struct machine *machine, + perf_event__handler_t process, + const struct evsel *evsel, + __u16 misc, + const struct build_id *bid, + const char *filename) { union perf_event ev; size_t len; - if (!dso__hit(pos)) - return 0; + len = sizeof(ev.build_id) + strlen(filename) + 1; + len = PERF_ALIGN(len, sizeof(u64)); - memset(&ev, 0, sizeof(ev)); + memset(&ev, 0, len); - len = dso__long_name_len(pos) + 1; - len = PERF_ALIGN(len, NAME_ALIGN); - ev.build_id.size = min(dso__bid(pos)->size, sizeof(dso__bid(pos)->data)); - memcpy(&ev.build_id.build_id, dso__bid(pos)->data, ev.build_id.size); + ev.build_id.size = bid->size; + if (ev.build_id.size > sizeof(ev.build_id.build_id)) + ev.build_id.size = sizeof(ev.build_id.build_id); + memcpy(ev.build_id.build_id, bid->data, ev.build_id.size); ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID; ev.build_id.header.misc = misc | PERF_RECORD_MISC_BUILD_ID_SIZE; ev.build_id.pid = machine->pid; - ev.build_id.header.size = sizeof(ev.build_id) + len; - memcpy(&ev.build_id.filename, dso__long_name(pos), dso__long_name_len(pos)); + ev.build_id.header.size = len; + strcpy(ev.build_id.filename, filename); + + if (evsel) { + void *array = &ev; + int ret; + + array += ev.header.size; + ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample); + if (ret < 0) + return ret; + + if (ret & 7) { + pr_err("Bad id sample size %d\n", ret); + return -EINVAL; + } + + ev.header.size += ret; + } + + return process(tool, &ev, sample, machine); +} + +int perf_event__synthesize_mmap2_build_id(const struct perf_tool *tool, + struct perf_sample *sample, + struct machine *machine, + perf_event__handler_t process, + const struct evsel *evsel, + __u16 misc, + __u32 pid, __u32 tid, + __u64 start, __u64 len, __u64 pgoff, + const struct build_id *bid, + __u32 prot, __u32 flags, + const char *filename) +{ + union perf_event ev; + size_t ev_len; + void *array; + int ret; + + ev_len = sizeof(ev.mmap2) - sizeof(ev.mmap2.filename) + strlen(filename) + 1; + ev_len = PERF_ALIGN(ev_len, sizeof(u64)); + + memset(&ev, 0, ev_len); + + ev.mmap2.header.type = PERF_RECORD_MMAP2; + ev.mmap2.header.misc = misc | PERF_RECORD_MISC_MMAP_BUILD_ID; + ev.mmap2.header.size = ev_len; + + ev.mmap2.pid = pid; + ev.mmap2.tid = tid; + ev.mmap2.start = start; + ev.mmap2.len = len; + ev.mmap2.pgoff = pgoff; + + ev.mmap2.build_id_size = bid->size; + if (ev.mmap2.build_id_size > sizeof(ev.mmap2.build_id)) + ev.build_id.size = sizeof(ev.mmap2.build_id); + memcpy(ev.mmap2.build_id, bid->data, ev.mmap2.build_id_size); + + ev.mmap2.prot = prot; + ev.mmap2.flags = flags; + + memcpy(ev.mmap2.filename, filename, min(strlen(filename), sizeof(ev.mmap.filename))); + + array = &ev; + array += ev.header.size; + ret = perf_event__synthesize_id_sample(array, evsel->core.attr.sample_type, sample); + if (ret < 0) + return ret; + + if (ret & 7) { + pr_err("Bad id sample size %d\n", ret); + return -EINVAL; + } + + ev.header.size += ret; - return process(tool, &ev, NULL, machine); + return process(tool, &ev, sample, machine); } -int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, +int perf_event__synthesize_stat_events(struct perf_stat_config *config, const struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs) { int err; @@ -2286,7 +2385,7 @@ int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct p extern const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE]; -int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session, +int perf_event__synthesize_features(const struct perf_tool *tool, struct perf_session *session, struct evlist *evlist, perf_event__handler_t process) { struct perf_header *header = &session->header; @@ -2349,7 +2448,7 @@ int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session return ret; } -int perf_event__synthesize_for_pipe(struct perf_tool *tool, +int perf_event__synthesize_for_pipe(const struct perf_tool *tool, struct perf_session *session, struct perf_data *data, perf_event__handler_t process) diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h index 53737d1619a4..f8588b6cf11a 100644 --- a/tools/perf/util/synthetic-events.h +++ b/tools/perf/util/synthetic-events.h @@ -9,6 +9,7 @@ #include <perf/cpumap.h> struct auxtrace_record; +struct build_id; struct dso; struct evlist; struct evsel; @@ -40,45 +41,65 @@ enum perf_record_synth { int parse_synth_opt(char *str); -typedef int (*perf_event__handler_t)(struct perf_tool *tool, union perf_event *event, +typedef int (*perf_event__handler_t)(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -int perf_event__synthesize_attrs(struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process); -int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process); -int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_cpu_map(struct perf_tool *tool, const struct perf_cpu_map *cpus, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); -int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); -int perf_event__synthesize_event_update_scale(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); -int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); -int perf_event__synthesize_extra_attr(struct perf_tool *tool, struct evlist *evsel_list, perf_event__handler_t process, bool is_pipe); -int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session, struct evlist *evlist, perf_event__handler_t process); -int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine); -int __perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine, size_t from); +int perf_event__synthesize_attrs(const struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process); +int perf_event__synthesize_attr(const struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process); +int perf_event__synthesize_build_id(const struct perf_tool *tool, + struct perf_sample *sample, + struct machine *machine, + perf_event__handler_t process, + const struct evsel *evsel, + __u16 misc, + const struct build_id *bid, + const char *filename); +int perf_event__synthesize_mmap2_build_id(const struct perf_tool *tool, + struct perf_sample *sample, + struct machine *machine, + perf_event__handler_t process, + const struct evsel *evsel, + __u16 misc, + __u32 pid, __u32 tid, + __u64 start, __u64 len, __u64 pgoff, + const struct build_id *bid, + __u32 prot, __u32 flags, + const char *filename); +int perf_event__synthesize_cpu_map(const struct perf_tool *tool, const struct perf_cpu_map *cpus, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_event_update_cpus(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); +int perf_event__synthesize_event_update_name(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); +int perf_event__synthesize_event_update_scale(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); +int perf_event__synthesize_event_update_unit(const struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process); +int perf_event__synthesize_extra_attr(const struct perf_tool *tool, struct evlist *evsel_list, perf_event__handler_t process, bool is_pipe); +int perf_event__synthesize_extra_kmaps(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_features(const struct perf_tool *tool, struct perf_session *session, struct evlist *evlist, perf_event__handler_t process); +int perf_event__synthesize_id_index(const struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine); +int __perf_event__synthesize_id_index(const struct perf_tool *tool, perf_event__handler_t process, struct evlist *evlist, struct machine *machine, size_t from); int perf_event__synthesize_id_sample(__u64 *array, u64 type, const struct perf_sample *sample); -int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, bool mmap_data); -int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_namespaces(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_cgroups(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_kernel_mmap(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_mmap_events(const struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, bool mmap_data); +int perf_event__synthesize_modules(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_namespaces(const struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_cgroups(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, const struct perf_sample *sample); -int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs); -int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_stat(struct perf_tool *tool, struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data); -int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize); -int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process); -int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); -pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine); - -int perf_tool__process_synth_event(struct perf_tool *tool, union perf_event *event, struct machine *machine, perf_event__handler_t process); +int perf_event__synthesize_stat_config(const struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_stat_events(struct perf_stat_config *config, const struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs); +int perf_event__synthesize_stat_round(const struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_stat(const struct perf_tool *tool, struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_thread_map2(const struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_thread_map(const struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data); +int perf_event__synthesize_threads(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize); +int perf_event__synthesize_tracing_data(const struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process); +int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); +pid_t perf_event__synthesize_comm(const struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine); +void perf_event__synthesize_final_bpf_metadata(struct perf_session *session, + perf_event__handler_t process); + +int perf_tool__process_synth_event(const struct perf_tool *tool, union perf_event *event, struct machine *machine, perf_event__handler_t process); size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format); -int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, +int __machine__synthesize_threads(struct machine *machine, const struct perf_tool *tool, struct target *target, struct perf_thread_map *threads, perf_event__handler_t process, bool needs_mmap, bool data_mmap, unsigned int nr_threads_synthesize); @@ -86,24 +107,9 @@ int machine__synthesize_threads(struct machine *machine, struct target *target, struct perf_thread_map *threads, bool needs_mmap, bool data_mmap, unsigned int nr_threads_synthesize); -#ifdef HAVE_AUXTRACE_SUPPORT -int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, struct perf_tool *tool, +int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, const struct perf_tool *tool, struct perf_session *session, perf_event__handler_t process); -#else // HAVE_AUXTRACE_SUPPORT - -#include <errno.h> - -static inline int -perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused, - struct perf_tool *tool __maybe_unused, - struct perf_session *session __maybe_unused, - perf_event__handler_t process __maybe_unused) -{ - return -EINVAL; -} -#endif // HAVE_AUXTRACE_SUPPORT - #ifdef HAVE_LIBBPF_SUPPORT int perf_event__synthesize_bpf_events(struct perf_session *session, perf_event__handler_t process, struct machine *machine, struct record_opts *opts); @@ -117,7 +123,7 @@ static inline int perf_event__synthesize_bpf_events(struct perf_session *session } #endif // HAVE_LIBBPF_SUPPORT -int perf_event__synthesize_for_pipe(struct perf_tool *tool, +int perf_event__synthesize_for_pipe(const struct perf_tool *tool, struct perf_session *session, struct perf_data *data, perf_event__handler_t process); diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 63be7b58761d..67a8ec10e9e4 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -7,178 +7,127 @@ #include "syscalltbl.h" #include <stdlib.h> +#include <asm/bitsperlong.h> #include <linux/compiler.h> +#include <linux/kernel.h> #include <linux/zalloc.h> -#ifdef HAVE_SYSCALL_TABLE_SUPPORT #include <string.h> #include "string2.h" -#if defined(__x86_64__) -#include <asm/syscalls_64.c> -const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID; -static const char *const *syscalltbl_native = syscalltbl_x86_64; -#elif defined(__s390x__) -#include <asm/syscalls_64.c> -const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID; -static const char *const *syscalltbl_native = syscalltbl_s390_64; -#elif defined(__powerpc64__) -#include <asm/syscalls_64.c> -const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_64_MAX_ID; -static const char *const *syscalltbl_native = syscalltbl_powerpc_64; -#elif defined(__powerpc__) -#include <asm/syscalls_32.c> -const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_32_MAX_ID; -static const char *const *syscalltbl_native = syscalltbl_powerpc_32; -#elif defined(__aarch64__) -#include <asm/syscalls.c> -const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID; -static const char *const *syscalltbl_native = syscalltbl_arm64; -#elif defined(__mips__) -#include <asm/syscalls_n64.c> -const int syscalltbl_native_max_id = SYSCALLTBL_MIPS_N64_MAX_ID; -static const char *const *syscalltbl_native = syscalltbl_mips_n64; -#elif defined(__loongarch__) -#include <asm/syscalls.c> -const int syscalltbl_native_max_id = SYSCALLTBL_LOONGARCH_MAX_ID; -static const char *const *syscalltbl_native = syscalltbl_loongarch; -#endif - -struct syscall { - int id; - const char *name; -}; +#include "trace/beauty/generated/syscalltbl.c" -static int syscallcmpname(const void *vkey, const void *ventry) +static const struct syscalltbl *find_table(int e_machine) { - const char *key = vkey; - const struct syscall *entry = ventry; + static const struct syscalltbl *last_table; + static int last_table_machine = EM_NONE; - return strcmp(key, entry->name); -} + /* Tables only exist for EM_SPARC. */ + if (e_machine == EM_SPARCV9) + e_machine = EM_SPARC; -static int syscallcmp(const void *va, const void *vb) -{ - const struct syscall *a = va, *b = vb; + if (last_table_machine == e_machine && last_table != NULL) + return last_table; - return strcmp(a->name, b->name); -} + for (size_t i = 0; i < ARRAY_SIZE(syscalltbls); i++) { + const struct syscalltbl *entry = &syscalltbls[i]; -static int syscalltbl__init_native(struct syscalltbl *tbl) -{ - int nr_entries = 0, i, j; - struct syscall *entries; - - for (i = 0; i <= syscalltbl_native_max_id; ++i) - if (syscalltbl_native[i]) - ++nr_entries; + if (entry->e_machine != e_machine && entry->e_machine != EM_NONE) + continue; - entries = tbl->syscalls.entries = malloc(sizeof(struct syscall) * nr_entries); - if (tbl->syscalls.entries == NULL) - return -1; - - for (i = 0, j = 0; i <= syscalltbl_native_max_id; ++i) { - if (syscalltbl_native[i]) { - entries[j].name = syscalltbl_native[i]; - entries[j].id = i; - ++j; - } + last_table = entry; + last_table_machine = e_machine; + return entry; } - - qsort(tbl->syscalls.entries, nr_entries, sizeof(struct syscall), syscallcmp); - tbl->syscalls.nr_entries = nr_entries; - tbl->syscalls.max_id = syscalltbl_native_max_id; - return 0; + return NULL; } -struct syscalltbl *syscalltbl__new(void) +const char *syscalltbl__name(int e_machine, int id) { - struct syscalltbl *tbl = malloc(sizeof(*tbl)); - if (tbl) { - if (syscalltbl__init_native(tbl)) { - free(tbl); - return NULL; - } + const struct syscalltbl *table = find_table(e_machine); + + if (e_machine == EM_MIPS && id > 1000) { + /* + * MIPS may encode the N32/64/O32 type in the high part of + * syscall number. Mask this off if present. See the values of + * __NR_N32_Linux, __NR_64_Linux, __NR_O32_Linux and __NR_Linux. + */ + id = id % 1000; } - return tbl; + if (table && id >= 0 && id < table->num_to_name_len) + return table->num_to_name[id]; + return NULL; } -void syscalltbl__delete(struct syscalltbl *tbl) -{ - zfree(&tbl->syscalls.entries); - free(tbl); -} +struct syscall_cmp_key { + const char *name; + const char *const *tbl; +}; -const char *syscalltbl__name(const struct syscalltbl *tbl __maybe_unused, int id) +static int syscallcmpname(const void *vkey, const void *ventry) { - return id <= syscalltbl_native_max_id ? syscalltbl_native[id]: NULL; + const struct syscall_cmp_key *key = vkey; + const uint16_t *entry = ventry; + + return strcmp(key->name, key->tbl[*entry]); } -int syscalltbl__id(struct syscalltbl *tbl, const char *name) +int syscalltbl__id(int e_machine, const char *name) { - struct syscall *sc = bsearch(name, tbl->syscalls.entries, - tbl->syscalls.nr_entries, sizeof(*sc), - syscallcmpname); + const struct syscalltbl *table = find_table(e_machine); + struct syscall_cmp_key key; + const uint16_t *id; + + if (!table) + return -1; - return sc ? sc->id : -1; + key.name = name; + key.tbl = table->num_to_name; + id = bsearch(&key, table->sorted_names, table->sorted_names_len, + sizeof(table->sorted_names[0]), syscallcmpname); + + return id ? *id : -1; } -int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx) +int syscalltbl__num_idx(int e_machine) { - int i; - struct syscall *syscalls = tbl->syscalls.entries; + const struct syscalltbl *table = find_table(e_machine); - for (i = *idx + 1; i < tbl->syscalls.nr_entries; ++i) { - if (strglobmatch(syscalls[i].name, syscall_glob)) { - *idx = i; - return syscalls[i].id; - } - } + if (!table) + return 0; - return -1; + return table->sorted_names_len; } -int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx) +int syscalltbl__id_at_idx(int e_machine, int idx) { - *idx = -1; - return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx); -} - -#else /* HAVE_SYSCALL_TABLE_SUPPORT */ + const struct syscalltbl *table = find_table(e_machine); -#include <libaudit.h> + if (!table) + return -1; -struct syscalltbl *syscalltbl__new(void) -{ - struct syscalltbl *tbl = zalloc(sizeof(*tbl)); - if (tbl) - tbl->audit_machine = audit_detect_machine(); - return tbl; + assert(idx >= 0 && idx < table->sorted_names_len); + return table->sorted_names[idx]; } -void syscalltbl__delete(struct syscalltbl *tbl) +int syscalltbl__strglobmatch_next(int e_machine, const char *syscall_glob, int *idx) { - free(tbl); -} + const struct syscalltbl *table = find_table(e_machine); -const char *syscalltbl__name(const struct syscalltbl *tbl, int id) -{ - return audit_syscall_to_name(id, tbl->audit_machine); -} + for (int i = *idx + 1; table && i < table->sorted_names_len; ++i) { + const char *name = table->num_to_name[table->sorted_names[i]]; -int syscalltbl__id(struct syscalltbl *tbl, const char *name) -{ - return audit_name_to_syscall(name, tbl->audit_machine); -} + if (strglobmatch(name, syscall_glob)) { + *idx = i; + return table->sorted_names[i]; + } + } -int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused, - const char *syscall_glob __maybe_unused, int *idx __maybe_unused) -{ return -1; } -int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx) +int syscalltbl__strglobmatch_first(int e_machine, const char *syscall_glob, int *idx) { - return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx); + *idx = -1; + return syscalltbl__strglobmatch_next(e_machine, syscall_glob, idx); } -#endif /* HAVE_SYSCALL_TABLE_SUPPORT */ diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h index a41d2ca9e4ae..2bb628eff367 100644 --- a/tools/perf/util/syscalltbl.h +++ b/tools/perf/util/syscalltbl.h @@ -2,22 +2,12 @@ #ifndef __PERF_SYSCALLTBL_H #define __PERF_SYSCALLTBL_H -struct syscalltbl { - int audit_machine; - struct { - int max_id; - int nr_entries; - void *entries; - } syscalls; -}; +const char *syscalltbl__name(int e_machine, int id); +int syscalltbl__id(int e_machine, const char *name); +int syscalltbl__num_idx(int e_machine); +int syscalltbl__id_at_idx(int e_machine, int idx); -struct syscalltbl *syscalltbl__new(void); -void syscalltbl__delete(struct syscalltbl *tbl); - -const char *syscalltbl__name(const struct syscalltbl *tbl, int id); -int syscalltbl__id(struct syscalltbl *tbl, const char *name); - -int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx); -int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx); +int syscalltbl__strglobmatch_first(int e_machine, const char *syscall_glob, int *idx); +int syscalltbl__strglobmatch_next(int e_machine, const char *syscall_glob, int *idx); #endif /* __PERF_SYSCALLTBL_H */ diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c index 0f383418e3df..8cf71bea295a 100644 --- a/tools/perf/util/target.c +++ b/tools/perf/util/target.c @@ -28,20 +28,6 @@ enum target_errno target__validate(struct target *target) ret = TARGET_ERRNO__PID_OVERRIDE_CPU; } - /* UID and PID are mutually exclusive */ - if (target->tid && target->uid_str) { - target->uid_str = NULL; - if (ret == TARGET_ERRNO__SUCCESS) - ret = TARGET_ERRNO__PID_OVERRIDE_UID; - } - - /* UID and CPU are mutually exclusive */ - if (target->uid_str && target->cpu_list) { - target->cpu_list = NULL; - if (ret == TARGET_ERRNO__SUCCESS) - ret = TARGET_ERRNO__UID_OVERRIDE_CPU; - } - /* PID and SYSTEM are mutually exclusive */ if (target->tid && target->system_wide) { target->system_wide = false; @@ -49,13 +35,6 @@ enum target_errno target__validate(struct target *target) ret = TARGET_ERRNO__PID_OVERRIDE_SYSTEM; } - /* UID and SYSTEM are mutually exclusive */ - if (target->uid_str && target->system_wide) { - target->system_wide = false; - if (ret == TARGET_ERRNO__SUCCESS) - ret = TARGET_ERRNO__UID_OVERRIDE_SYSTEM; - } - /* BPF and CPU are mutually exclusive */ if (target->bpf_str && target->cpu_list) { target->cpu_list = NULL; @@ -70,13 +49,6 @@ enum target_errno target__validate(struct target *target) ret = TARGET_ERRNO__BPF_OVERRIDE_PID; } - /* BPF and UID are mutually exclusive */ - if (target->bpf_str && target->uid_str) { - target->uid_str = NULL; - if (ret == TARGET_ERRNO__SUCCESS) - ret = TARGET_ERRNO__BPF_OVERRIDE_UID; - } - /* BPF and THREADS are mutually exclusive */ if (target->bpf_str && target->per_thread) { target->per_thread = false; @@ -94,15 +66,13 @@ enum target_errno target__validate(struct target *target) return ret; } -enum target_errno target__parse_uid(struct target *target) +uid_t parse_uid(const char *str) { struct passwd pwd, *result; char buf[1024]; - const char *str = target->uid_str; - target->uid = UINT_MAX; if (str == NULL) - return TARGET_ERRNO__SUCCESS; + return UINT_MAX; /* Try user name first */ getpwnam_r(str, &pwd, buf, sizeof(buf), &result); @@ -115,16 +85,15 @@ enum target_errno target__parse_uid(struct target *target) int uid = strtol(str, &endptr, 10); if (*endptr != '\0') - return TARGET_ERRNO__INVALID_UID; + return UINT_MAX; getpwuid_r(uid, &pwd, buf, sizeof(buf), &result); if (result == NULL) - return TARGET_ERRNO__USER_NOT_FOUND; + return UINT_MAX; } - target->uid = result->pw_uid; - return TARGET_ERRNO__SUCCESS; + return result->pw_uid; } /* @@ -132,20 +101,14 @@ enum target_errno target__parse_uid(struct target *target) */ static const char *target__error_str[] = { "PID/TID switch overriding CPU", - "PID/TID switch overriding UID", - "UID switch overriding CPU", "PID/TID switch overriding SYSTEM", - "UID switch overriding SYSTEM", "SYSTEM/CPU switch overriding PER-THREAD", "BPF switch overriding CPU", "BPF switch overriding PID/TID", - "BPF switch overriding UID", "BPF switch overriding THREAD", - "Invalid User: %s", - "Problems obtaining information for user %s", }; -int target__strerror(struct target *target, int errnum, +int target__strerror(struct target *target __maybe_unused, int errnum, char *buf, size_t buflen) { int idx; @@ -170,11 +133,6 @@ int target__strerror(struct target *target, int errnum, snprintf(buf, buflen, "%s", msg); break; - case TARGET_ERRNO__INVALID_UID: - case TARGET_ERRNO__USER_NOT_FOUND: - snprintf(buf, buflen, msg, target->uid_str); - break; - default: /* cannot reach here */ break; diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index d582cae8e105..84ebb9c940c6 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -9,14 +9,13 @@ struct target { const char *pid; const char *tid; const char *cpu_list; - const char *uid_str; const char *bpf_str; - uid_t uid; bool system_wide; bool uses_mmap; bool default_per_cpu; bool per_thread; bool use_bpf; + bool inherit; int initial_delay; const char *attr_map; }; @@ -35,31 +34,24 @@ enum target_errno { /* for target__validate() */ TARGET_ERRNO__PID_OVERRIDE_CPU = __TARGET_ERRNO__START, - TARGET_ERRNO__PID_OVERRIDE_UID, - TARGET_ERRNO__UID_OVERRIDE_CPU, TARGET_ERRNO__PID_OVERRIDE_SYSTEM, - TARGET_ERRNO__UID_OVERRIDE_SYSTEM, TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD, TARGET_ERRNO__BPF_OVERRIDE_CPU, TARGET_ERRNO__BPF_OVERRIDE_PID, - TARGET_ERRNO__BPF_OVERRIDE_UID, TARGET_ERRNO__BPF_OVERRIDE_THREAD, - /* for target__parse_uid() */ - TARGET_ERRNO__INVALID_UID, - TARGET_ERRNO__USER_NOT_FOUND, - __TARGET_ERRNO__END, }; enum target_errno target__validate(struct target *target); -enum target_errno target__parse_uid(struct target *target); + +uid_t parse_uid(const char *str); int target__strerror(struct target *target, int errnum, char *buf, size_t buflen); static inline bool target__has_task(struct target *target) { - return target->tid || target->pid || target->uid_str; + return target->tid || target->pid; } static inline bool target__has_cpu(struct target *target) diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 87c59aa9fe38..aa9c58bbf9d3 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 +#include <elf.h> #include <errno.h> +#include <fcntl.h> #include <stdlib.h> #include <stdio.h> #include <string.h> @@ -16,6 +18,7 @@ #include "symbol.h" #include "unwind.h" #include "callchain.h" +#include "dwarf-regs.h" #include <api/fs/fs.h> @@ -38,6 +41,7 @@ int thread__init_maps(struct thread *thread, struct machine *machine) } struct thread *thread__new(pid_t pid, pid_t tid) + NO_THREAD_SAFETY_ANALYSIS /* Allocation/creation is inherently single threaded. */ { RC_STRUCT(thread) *_thread = zalloc(sizeof(*_thread)); struct thread *thread; @@ -51,6 +55,7 @@ struct thread *thread__new(pid_t pid, pid_t tid) thread__set_ppid(thread, -1); thread__set_cpu(thread, -1); thread__set_guest_cpu(thread, -1); + thread__set_e_machine(thread, EM_NONE); thread__set_lbr_stitch_enable(thread, false); INIT_LIST_HEAD(thread__namespaces_list(thread)); INIT_LIST_HEAD(thread__comm_list(thread)); @@ -196,7 +201,8 @@ int thread__set_namespaces(struct thread *thread, u64 timestamp, return ret; } -struct comm *thread__comm(struct thread *thread) +static struct comm *__thread__comm(struct thread *thread) + SHARED_LOCKS_REQUIRED(thread__comm_lock(thread)) { if (list_empty(thread__comm_list(thread))) return NULL; @@ -204,16 +210,30 @@ struct comm *thread__comm(struct thread *thread) return list_first_entry(thread__comm_list(thread), struct comm, list); } +struct comm *thread__comm(struct thread *thread) +{ + struct comm *res = NULL; + + down_read(thread__comm_lock(thread)); + res = __thread__comm(thread); + up_read(thread__comm_lock(thread)); + return res; +} + struct comm *thread__exec_comm(struct thread *thread) { struct comm *comm, *last = NULL, *second_last = NULL; + down_read(thread__comm_lock(thread)); list_for_each_entry(comm, thread__comm_list(thread), list) { - if (comm->exec) + if (comm->exec) { + up_read(thread__comm_lock(thread)); return comm; + } second_last = last; last = comm; } + up_read(thread__comm_lock(thread)); /* * 'last' with no start time might be the parent's comm of a synthesized @@ -229,8 +249,9 @@ struct comm *thread__exec_comm(struct thread *thread) static int ____thread__set_comm(struct thread *thread, const char *str, u64 timestamp, bool exec) + EXCLUSIVE_LOCKS_REQUIRED(thread__comm_lock(thread)) { - struct comm *new, *curr = thread__comm(thread); + struct comm *new, *curr = __thread__comm(thread); /* Override the default :tid entry */ if (!thread__comm_set(thread)) { @@ -281,8 +302,9 @@ int thread__set_comm_from_proc(struct thread *thread) } static const char *__thread__comm_str(struct thread *thread) + SHARED_LOCKS_REQUIRED(thread__comm_lock(thread)) { - const struct comm *comm = thread__comm(thread); + const struct comm *comm = __thread__comm(thread); if (!comm) return NULL; @@ -406,7 +428,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bo } void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, - struct addr_location *al) + bool symbols, struct addr_location *al) { size_t i; const u8 cpumodes[] = { @@ -417,12 +439,93 @@ void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, }; for (i = 0; i < ARRAY_SIZE(cpumodes); i++) { - thread__find_symbol(thread, cpumodes[i], addr, al); + if (symbols) + thread__find_symbol(thread, cpumodes[i], addr, al); + else + thread__find_map(thread, cpumodes[i], addr, al); + if (al->map) break; } } +static uint16_t read_proc_e_machine_for_pid(pid_t pid) +{ + char path[6 /* "/proc/" */ + 11 /* max length of pid */ + 5 /* "/exe\0" */]; + int fd; + uint16_t e_machine = EM_NONE; + + snprintf(path, sizeof(path), "/proc/%d/exe", pid); + fd = open(path, O_RDONLY); + if (fd >= 0) { + _Static_assert(offsetof(Elf32_Ehdr, e_machine) == 18, "Unexpected offset"); + _Static_assert(offsetof(Elf64_Ehdr, e_machine) == 18, "Unexpected offset"); + if (pread(fd, &e_machine, sizeof(e_machine), 18) != sizeof(e_machine)) + e_machine = EM_NONE; + close(fd); + } + return e_machine; +} + +static int thread__e_machine_callback(struct map *map, void *machine) +{ + struct dso *dso = map__dso(map); + + _Static_assert(0 == EM_NONE, "Unexpected EM_NONE"); + if (!dso) + return EM_NONE; + + return dso__e_machine(dso, machine); +} + +uint16_t thread__e_machine(struct thread *thread, struct machine *machine) +{ + pid_t tid, pid; + uint16_t e_machine = RC_CHK_ACCESS(thread)->e_machine; + + if (e_machine != EM_NONE) + return e_machine; + + tid = thread__tid(thread); + pid = thread__pid(thread); + if (pid != tid) { + struct thread *parent = machine__findnew_thread(machine, pid, pid); + + if (parent) { + e_machine = thread__e_machine(parent, machine); + thread__put(parent); + thread__set_e_machine(thread, e_machine); + return e_machine; + } + /* Something went wrong, fallback. */ + } + /* Reading on the PID thread. First try to find from the maps. */ + e_machine = maps__for_each_map(thread__maps(thread), + thread__e_machine_callback, + machine); + if (e_machine == EM_NONE) { + /* Maps failed, perhaps we're live with map events disabled. */ + bool is_live = machine->machines == NULL; + + if (!is_live) { + /* Check if the session has a data file. */ + struct perf_session *session = container_of(machine->machines, + struct perf_session, + machines); + + is_live = !!session->data; + } + /* Read from /proc/pid/exe if live. */ + if (is_live) + e_machine = read_proc_e_machine_for_pid(pid); + } + if (e_machine != EM_NONE) + thread__set_e_machine(thread, e_machine); + else + e_machine = EM_HOST; + return e_machine; +} + struct thread *thread__main_thread(struct machine *machine, struct thread *thread) { if (thread__pid(thread) == thread__tid(thread)) @@ -476,6 +579,7 @@ void thread__free_stitch_list(struct thread *thread) return; list_for_each_entry_safe(pos, tmp, &lbr_stitch->lists, node) { + map_symbol__exit(&pos->cursor.ms); list_del_init(&pos->node); free(pos); } @@ -485,6 +589,9 @@ void thread__free_stitch_list(struct thread *thread) free(pos); } + for (unsigned int i = 0 ; i < lbr_stitch->prev_lbr_cursor_size; i++) + map_symbol__exit(&lbr_stitch->prev_lbr_cursor[i].ms); + zfree(&lbr_stitch->prev_lbr_cursor); free(thread__lbr_stitch(thread)); thread__set_lbr_stitch(thread, NULL); diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 8b4a3c69bad1..310eaea344bb 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -26,6 +26,7 @@ struct lbr_stitch { struct list_head free_lists; struct perf_sample prev_sample; struct callchain_cursor_node *prev_lbr_cursor; + unsigned int prev_lbr_cursor_size; }; DECLARE_RC_STRUCT(thread) { @@ -59,7 +60,11 @@ DECLARE_RC_STRUCT(thread) { struct srccode_state srccode_state; bool filter; int filter_entry_depth; - + /** + * @e_machine: The ELF EM_* associated with the thread. EM_NONE if not + * computed. + */ + uint16_t e_machine; /* LBR call stack stitch */ bool lbr_stitch_enable; struct lbr_stitch *lbr_stitch; @@ -121,7 +126,7 @@ struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al); void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, - struct addr_location *al); + bool symbols, struct addr_location *al); int thread__memcpy(struct thread *thread, struct machine *machine, void *buf, u64 ip, int len, bool *is64bit); @@ -231,14 +236,15 @@ static inline struct rw_semaphore *thread__namespaces_lock(struct thread *thread return &RC_CHK_ACCESS(thread)->namespaces_lock; } -static inline struct list_head *thread__comm_list(struct thread *thread) +static inline struct rw_semaphore *thread__comm_lock(struct thread *thread) { - return &RC_CHK_ACCESS(thread)->comm_list; + return &RC_CHK_ACCESS(thread)->comm_lock; } -static inline struct rw_semaphore *thread__comm_lock(struct thread *thread) +static inline struct list_head *thread__comm_list(struct thread *thread) + SHARED_LOCKS_REQUIRED(thread__comm_lock(thread)) { - return &RC_CHK_ACCESS(thread)->comm_lock; + return &RC_CHK_ACCESS(thread)->comm_list; } static inline u64 thread__db_id(const struct thread *thread) @@ -301,6 +307,14 @@ static inline void thread__set_filter_entry_depth(struct thread *thread, int dep RC_CHK_ACCESS(thread)->filter_entry_depth = depth; } +uint16_t thread__e_machine(struct thread *thread, struct machine *machine); + +static inline void thread__set_e_machine(struct thread *thread, uint16_t e_machine) +{ + RC_CHK_ACCESS(thread)->e_machine = e_machine; +} + + static inline bool thread__lbr_stitch_enable(const struct thread *thread) { return RC_CHK_ACCESS(thread)->lbr_stitch_enable; diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index b5f12390c355..ca193c1374ed 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -72,7 +72,7 @@ struct perf_thread_map *thread_map__new_by_tid(pid_t tid) return threads; } -static struct perf_thread_map *__thread_map__new_all_cpus(uid_t uid) +static struct perf_thread_map *thread_map__new_all_cpus(void) { DIR *proc; int max_threads = 32, items, i; @@ -98,15 +98,6 @@ static struct perf_thread_map *__thread_map__new_all_cpus(uid_t uid) if (*end) /* only interested in proper numerical dirents */ continue; - snprintf(path, sizeof(path), "/proc/%s", dirent->d_name); - - if (uid != UINT_MAX) { - struct stat st; - - if (stat(path, &st) != 0 || st.st_uid != uid) - continue; - } - snprintf(path, sizeof(path), "/proc/%d/task", pid); items = scandir(path, &namelist, filter, NULL); if (items <= 0) { @@ -157,24 +148,11 @@ out_free_namelist: goto out_closedir; } -struct perf_thread_map *thread_map__new_all_cpus(void) -{ - return __thread_map__new_all_cpus(UINT_MAX); -} - -struct perf_thread_map *thread_map__new_by_uid(uid_t uid) -{ - return __thread_map__new_all_cpus(uid); -} - -struct perf_thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid) +struct perf_thread_map *thread_map__new(pid_t pid, pid_t tid) { if (pid != -1) return thread_map__new_by_pid(pid); - if (tid == -1 && uid != UINT_MAX) - return thread_map__new_by_uid(uid); - return thread_map__new_by_tid(tid); } @@ -289,15 +267,11 @@ out_free_threads: goto out; } -struct perf_thread_map *thread_map__new_str(const char *pid, const char *tid, - uid_t uid, bool all_threads) +struct perf_thread_map *thread_map__new_str(const char *pid, const char *tid, bool all_threads) { if (pid) return thread_map__new_by_pid_str(pid); - if (!tid && uid != UINT_MAX) - return thread_map__new_by_uid(uid); - if (all_threads) return thread_map__new_all_cpus(); diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 00ec05fc1656..fc16d87f32fb 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -11,13 +11,11 @@ struct perf_record_thread_map; struct perf_thread_map *thread_map__new_dummy(void); struct perf_thread_map *thread_map__new_by_pid(pid_t pid); struct perf_thread_map *thread_map__new_by_tid(pid_t tid); -struct perf_thread_map *thread_map__new_by_uid(uid_t uid); -struct perf_thread_map *thread_map__new_all_cpus(void); -struct perf_thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid); +struct perf_thread_map *thread_map__new(pid_t pid, pid_t tid); struct perf_thread_map *thread_map__new_event(struct perf_record_thread_map *event); struct perf_thread_map *thread_map__new_str(const char *pid, - const char *tid, uid_t uid, bool all_threads); + const char *tid, bool all_threads); struct perf_thread_map *thread_map__new_by_tid_str(const char *tid_str); diff --git a/tools/perf/util/threads.c b/tools/perf/util/threads.c index ff2b169e0085..6ca0b178fb6c 100644 --- a/tools/perf/util/threads.c +++ b/tools/perf/util/threads.c @@ -141,7 +141,7 @@ void threads__remove_all_threads(struct threads *threads) down_write(&table->lock); __threads_table_entry__set_last_match(table, NULL); - hashmap__for_each_entry_safe((&table->shard), cur, tmp, bkt) { + hashmap__for_each_entry_safe(&table->shard, cur, tmp, bkt) { struct thread *old_value; hashmap__delete(&table->shard, cur->key, /*old_key=*/NULL, &old_value); @@ -175,7 +175,7 @@ int threads__for_each_thread(struct threads *threads, size_t bkt; down_read(&table->lock); - hashmap__for_each_entry((&table->shard), cur, bkt) { + hashmap__for_each_entry(&table->shard, cur, bkt) { int rc = fn((struct thread *)cur->pvalue, data); if (rc != 0) { diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 302443921681..1b91ccd4d523 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -20,7 +20,7 @@ int parse_nsec_time(const char *str, u64 *ptime) u64 time_sec, time_nsec; char *end; - time_sec = strtoul(str, &end, 10); + time_sec = strtoull(str, &end, 10); if (*end != '.' && *end != '\0') return -1; @@ -38,7 +38,7 @@ int parse_nsec_time(const char *str, u64 *ptime) for (i = strlen(nsec_buf); i < 9; i++) nsec_buf[i] = '0'; - time_nsec = strtoul(nsec_buf, &end, 10); + time_nsec = strtoull(nsec_buf, &end, 10); if (*end != '\0') return -1; } else diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c new file mode 100644 index 000000000000..27ba5849c74a --- /dev/null +++ b/tools/perf/util/tool.c @@ -0,0 +1,499 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "data.h" +#include "debug.h" +#include "event.h" +#include "header.h" +#include "session.h" +#include "stat.h" +#include "tool.h" +#include "tsc.h" +#include <linux/compiler.h> +#include <sys/mman.h> +#include <stddef.h> +#include <unistd.h> + +#ifdef HAVE_ZSTD_SUPPORT +static int perf_session__process_compressed_event(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event, u64 file_offset, + const char *file_path) +{ + void *src; + size_t decomp_size, src_size; + u64 decomp_last_rem = 0; + size_t mmap_len, decomp_len = perf_session__env(session)->comp_mmap_len; + struct decomp *decomp, *decomp_last = session->active_decomp->decomp_last; + + if (decomp_last) { + decomp_last_rem = decomp_last->size - decomp_last->head; + decomp_len += decomp_last_rem; + } + + mmap_len = sizeof(struct decomp) + decomp_len; + decomp = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if (decomp == MAP_FAILED) { + pr_err("Couldn't allocate memory for decompression\n"); + return -1; + } + + decomp->file_pos = file_offset; + decomp->file_path = file_path; + decomp->mmap_len = mmap_len; + decomp->head = 0; + + if (decomp_last_rem) { + memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem); + decomp->size = decomp_last_rem; + } + + if (event->header.type == PERF_RECORD_COMPRESSED) { + src = (void *)event + sizeof(struct perf_record_compressed); + src_size = event->pack.header.size - sizeof(struct perf_record_compressed); + } else if (event->header.type == PERF_RECORD_COMPRESSED2) { + src = (void *)event + sizeof(struct perf_record_compressed2); + src_size = event->pack2.data_size; + } else { + return -1; + } + + decomp_size = zstd_decompress_stream(session->active_decomp->zstd_decomp, src, src_size, + &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem); + if (!decomp_size) { + munmap(decomp, mmap_len); + pr_err("Couldn't decompress data\n"); + return -1; + } + + decomp->size += decomp_size; + + if (session->active_decomp->decomp == NULL) + session->active_decomp->decomp = decomp; + else + session->active_decomp->decomp_last->next = decomp; + + session->active_decomp->decomp_last = decomp; + + pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size); + + return 0; +} +#endif + +static int process_event_synth_tracing_data_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + +static int process_event_synth_attr_stub(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct evlist **pevlist __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + +static int process_event_synth_event_update_stub(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct evlist **pevlist __maybe_unused) +{ + if (dump_trace) + perf_event__fprintf_event_update(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + +int process_event_sample_stub(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct evsel *evsel __maybe_unused, + struct machine *machine __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + +static int process_event_stub(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + +static int process_finished_round_stub(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct ordered_events *oe __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + +static int skipn(int fd, off_t n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, min(n, (off_t)sizeof(buf))); + if (ret <= 0) + return ret; + n -= ret; + } + + return 0; +} + +static s64 process_event_auxtrace_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event) +{ + dump_printf(": unhandled!\n"); + if (perf_data__is_pipe(session->data)) + skipn(perf_data__fd(session->data), event->auxtrace.size); + return event->auxtrace.size; +} + +static int process_event_op2_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + + +static +int process_event_thread_map_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + if (dump_trace) + perf_event__fprintf_thread_map(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + +static +int process_event_cpu_map_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + if (dump_trace) + perf_event__fprintf_cpu_map(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + +static +int process_event_stat_config_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + if (dump_trace) + perf_event__fprintf_stat_config(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + +static int process_stat_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *perf_session __maybe_unused, + union perf_event *event) +{ + if (dump_trace) + perf_event__fprintf_stat(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + +static int process_stat_round_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *perf_session __maybe_unused, + union perf_event *event) +{ + if (dump_trace) + perf_event__fprintf_stat_round(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + +static int process_event_time_conv_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *perf_session __maybe_unused, + union perf_event *event) +{ + if (dump_trace) + perf_event__fprintf_time_conv(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + +static int perf_session__process_compressed_event_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused, + u64 file_offset __maybe_unused, + const char *file_path __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + +static int perf_event__process_bpf_metadata_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *perf_session __maybe_unused, + union perf_event *event) +{ + if (dump_trace) + perf_event__fprintf_bpf_metadata(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + +void perf_tool__init(struct perf_tool *tool, bool ordered_events) +{ + tool->ordered_events = ordered_events; + tool->ordering_requires_timestamps = false; + tool->namespace_events = false; + tool->cgroup_events = false; + tool->no_warn = false; + tool->show_feat_hdr = SHOW_FEAT_NO_HEADER; + tool->merge_deferred_callchains = true; + + tool->sample = process_event_sample_stub; + tool->mmap = process_event_stub; + tool->mmap2 = process_event_stub; + tool->comm = process_event_stub; + tool->namespaces = process_event_stub; + tool->cgroup = process_event_stub; + tool->fork = process_event_stub; + tool->exit = process_event_stub; + tool->lost = perf_event__process_lost; + tool->lost_samples = perf_event__process_lost_samples; + tool->aux = perf_event__process_aux; + tool->itrace_start = perf_event__process_itrace_start; + tool->context_switch = perf_event__process_switch; + tool->ksymbol = perf_event__process_ksymbol; + tool->bpf = perf_event__process_bpf; + tool->text_poke = perf_event__process_text_poke; + tool->aux_output_hw_id = perf_event__process_aux_output_hw_id; + tool->read = process_event_sample_stub; + tool->throttle = process_event_stub; + tool->unthrottle = process_event_stub; + tool->callchain_deferred = process_event_sample_stub; + tool->attr = process_event_synth_attr_stub; + tool->event_update = process_event_synth_event_update_stub; + tool->tracing_data = process_event_synth_tracing_data_stub; + tool->build_id = process_event_op2_stub; + + if (ordered_events) + tool->finished_round = perf_event__process_finished_round; + else + tool->finished_round = process_finished_round_stub; + + tool->id_index = process_event_op2_stub; + tool->auxtrace_info = process_event_op2_stub; + tool->auxtrace = process_event_auxtrace_stub; + tool->auxtrace_error = process_event_op2_stub; + tool->thread_map = process_event_thread_map_stub; + tool->cpu_map = process_event_cpu_map_stub; + tool->stat_config = process_event_stat_config_stub; + tool->stat = process_stat_stub; + tool->stat_round = process_stat_round_stub; + tool->time_conv = process_event_time_conv_stub; + tool->feature = process_event_op2_stub; +#ifdef HAVE_ZSTD_SUPPORT + tool->compressed = perf_session__process_compressed_event; +#else + tool->compressed = perf_session__process_compressed_event_stub; +#endif + tool->finished_init = process_event_op2_stub; + tool->bpf_metadata = perf_event__process_bpf_metadata_stub; +} + +bool perf_tool__compressed_is_stub(const struct perf_tool *tool) +{ + return tool->compressed == perf_session__process_compressed_event_stub; +} + +#define CREATE_DELEGATE_SAMPLE(name) \ + static int delegate_ ## name(const struct perf_tool *tool, \ + union perf_event *event, \ + struct perf_sample *sample, \ + struct evsel *evsel, \ + struct machine *machine) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, event, sample, evsel, machine); \ + } +CREATE_DELEGATE_SAMPLE(read); +CREATE_DELEGATE_SAMPLE(sample); +CREATE_DELEGATE_SAMPLE(callchain_deferred); + +#define CREATE_DELEGATE_ATTR(name) \ + static int delegate_ ## name(const struct perf_tool *tool, \ + union perf_event *event, \ + struct evlist **pevlist) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, event, pevlist); \ + } +CREATE_DELEGATE_ATTR(attr); +CREATE_DELEGATE_ATTR(event_update); + +#define CREATE_DELEGATE_OE(name) \ + static int delegate_ ## name(const struct perf_tool *tool, \ + union perf_event *event, \ + struct ordered_events *oe) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, event, oe); \ + } +CREATE_DELEGATE_OE(finished_round); + +#define CREATE_DELEGATE_OP(name) \ + static int delegate_ ## name(const struct perf_tool *tool, \ + union perf_event *event, \ + struct perf_sample *sample, \ + struct machine *machine) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, event, sample, machine); \ + } +CREATE_DELEGATE_OP(aux); +CREATE_DELEGATE_OP(aux_output_hw_id); +CREATE_DELEGATE_OP(bpf); +CREATE_DELEGATE_OP(cgroup); +CREATE_DELEGATE_OP(comm); +CREATE_DELEGATE_OP(context_switch); +CREATE_DELEGATE_OP(exit); +CREATE_DELEGATE_OP(fork); +CREATE_DELEGATE_OP(itrace_start); +CREATE_DELEGATE_OP(ksymbol); +CREATE_DELEGATE_OP(lost); +CREATE_DELEGATE_OP(lost_samples); +CREATE_DELEGATE_OP(mmap); +CREATE_DELEGATE_OP(mmap2); +CREATE_DELEGATE_OP(namespaces); +CREATE_DELEGATE_OP(text_poke); +CREATE_DELEGATE_OP(throttle); +CREATE_DELEGATE_OP(unthrottle); + +#define CREATE_DELEGATE_OP2(name) \ + static int delegate_ ## name(const struct perf_tool *tool, \ + struct perf_session *session, \ + union perf_event *event) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, session, event); \ + } +CREATE_DELEGATE_OP2(auxtrace_error); +CREATE_DELEGATE_OP2(auxtrace_info); +CREATE_DELEGATE_OP2(bpf_metadata); +CREATE_DELEGATE_OP2(build_id); +CREATE_DELEGATE_OP2(cpu_map); +CREATE_DELEGATE_OP2(feature); +CREATE_DELEGATE_OP2(finished_init); +CREATE_DELEGATE_OP2(id_index); +CREATE_DELEGATE_OP2(stat); +CREATE_DELEGATE_OP2(stat_config); +CREATE_DELEGATE_OP2(stat_round); +CREATE_DELEGATE_OP2(thread_map); +CREATE_DELEGATE_OP2(time_conv); +CREATE_DELEGATE_OP2(tracing_data); + +#define CREATE_DELEGATE_OP3(name) \ + static s64 delegate_ ## name(const struct perf_tool *tool, \ + struct perf_session *session, \ + union perf_event *event) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, session, event); \ + } +CREATE_DELEGATE_OP3(auxtrace); + +#define CREATE_DELEGATE_OP4(name) \ + static int delegate_ ## name(const struct perf_tool *tool, \ + struct perf_session *session, \ + union perf_event *event, \ + u64 data, \ + const char *str) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, session, event, data, str); \ + } +CREATE_DELEGATE_OP4(compressed); + +void delegate_tool__init(struct delegate_tool *tool, struct perf_tool *delegate) +{ + tool->delegate = delegate; + + tool->tool.ordered_events = delegate->ordered_events; + tool->tool.ordering_requires_timestamps = delegate->ordering_requires_timestamps; + tool->tool.namespace_events = delegate->namespace_events; + tool->tool.cgroup_events = delegate->cgroup_events; + tool->tool.no_warn = delegate->no_warn; + tool->tool.show_feat_hdr = delegate->show_feat_hdr; + tool->tool.merge_deferred_callchains = delegate->merge_deferred_callchains; + + tool->tool.sample = delegate_sample; + tool->tool.read = delegate_read; + + tool->tool.mmap = delegate_mmap; + tool->tool.mmap2 = delegate_mmap2; + tool->tool.comm = delegate_comm; + tool->tool.namespaces = delegate_namespaces; + tool->tool.cgroup = delegate_cgroup; + tool->tool.fork = delegate_fork; + tool->tool.exit = delegate_exit; + tool->tool.lost = delegate_lost; + tool->tool.lost_samples = delegate_lost_samples; + tool->tool.aux = delegate_aux; + tool->tool.itrace_start = delegate_itrace_start; + tool->tool.aux_output_hw_id = delegate_aux_output_hw_id; + tool->tool.context_switch = delegate_context_switch; + tool->tool.throttle = delegate_throttle; + tool->tool.unthrottle = delegate_unthrottle; + tool->tool.ksymbol = delegate_ksymbol; + tool->tool.bpf = delegate_bpf; + tool->tool.text_poke = delegate_text_poke; + tool->tool.callchain_deferred = delegate_callchain_deferred; + + tool->tool.attr = delegate_attr; + tool->tool.event_update = delegate_event_update; + + tool->tool.tracing_data = delegate_tracing_data; + + tool->tool.finished_round = delegate_finished_round; + + tool->tool.build_id = delegate_build_id; + tool->tool.id_index = delegate_id_index; + tool->tool.auxtrace_info = delegate_auxtrace_info; + tool->tool.auxtrace_error = delegate_auxtrace_error; + tool->tool.time_conv = delegate_time_conv; + tool->tool.thread_map = delegate_thread_map; + tool->tool.cpu_map = delegate_cpu_map; + tool->tool.stat_config = delegate_stat_config; + tool->tool.stat = delegate_stat; + tool->tool.stat_round = delegate_stat_round; + tool->tool.feature = delegate_feature; + tool->tool.finished_init = delegate_finished_init; + tool->tool.bpf_metadata = delegate_bpf_metadata; + tool->tool.compressed = delegate_compressed; + tool->tool.auxtrace = delegate_auxtrace; +} diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index c957fb849ac6..e96b69d25a5b 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -15,23 +15,25 @@ struct perf_tool; struct machine; struct ordered_events; -typedef int (*event_sample)(struct perf_tool *tool, union perf_event *event, +typedef int (*event_sample)(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct evsel *evsel, struct machine *machine); -typedef int (*event_op)(struct perf_tool *tool, union perf_event *event, +typedef int (*event_op)(const struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -typedef int (*event_attr_op)(struct perf_tool *tool, +typedef int (*event_attr_op)(const struct perf_tool *tool, union perf_event *event, struct evlist **pevlist); -typedef int (*event_op2)(struct perf_session *session, union perf_event *event); -typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event); -typedef int (*event_op4)(struct perf_session *session, union perf_event *event, u64 data, - const char *str); +typedef int (*event_op2)(const struct perf_tool *tool, struct perf_session *session, + union perf_event *event); +typedef s64 (*event_op3)(const struct perf_tool *tool, struct perf_session *session, + union perf_event *event); +typedef int (*event_op4)(const struct perf_tool *tool, struct perf_session *session, + union perf_event *event, u64 data, const char *str); -typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event, +typedef int (*event_oe)(const struct perf_tool *tool, union perf_event *event, struct ordered_events *oe); enum show_feature_header { @@ -42,7 +44,8 @@ enum show_feature_header { struct perf_tool { event_sample sample, - read; + read, + callchain_deferred; event_op mmap, mmap2, comm, @@ -77,7 +80,8 @@ struct perf_tool { stat, stat_round, feature, - finished_init; + finished_init, + bpf_metadata; event_op4 compressed; event_op3 auxtrace; bool ordered_events; @@ -85,7 +89,28 @@ struct perf_tool { bool namespace_events; bool cgroup_events; bool no_warn; + bool dont_split_sample_group; + bool merge_deferred_callchains; enum show_feature_header show_feat_hdr; }; +void perf_tool__init(struct perf_tool *tool, bool ordered_events); + +bool perf_tool__compressed_is_stub(const struct perf_tool *tool); + +int process_event_sample_stub(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct evsel *evsel, + struct machine *machine); + +struct delegate_tool { + /** @tool: The actual tool that calls the delegate. */ + struct perf_tool tool; + /** @delegate: The tool that is delegated to. */ + struct perf_tool *delegate; +}; + +void delegate_tool__init(struct delegate_tool *tool, struct perf_tool *delegate); + #endif /* __PERF_TOOL_H */ diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c new file mode 100644 index 000000000000..37c4eae0bef1 --- /dev/null +++ b/tools/perf/util/tool_pmu.c @@ -0,0 +1,598 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "cgroup.h" +#include "counts.h" +#include "cputopo.h" +#include "debug.h" +#include "evsel.h" +#include "pmu.h" +#include "print-events.h" +#include "smt.h" +#include "stat.h" +#include "time-utils.h" +#include "tool_pmu.h" +#include "tsc.h" +#include <api/fs/fs.h> +#include <api/io.h> +#include <internal/threadmap.h> +#include <perf/cpumap.h> +#include <perf/threadmap.h> +#include <fcntl.h> +#include <strings.h> + +static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = { + NULL, + "duration_time", + "user_time", + "system_time", + "has_pmem", + "num_cores", + "num_cpus", + "num_cpus_online", + "num_dies", + "num_packages", + "slots", + "smt_on", + "system_tsc_freq", + "core_wide", + "target_cpu", +}; + +bool tool_pmu__skip_event(const char *name __maybe_unused) +{ +#if !defined(__aarch64__) + /* The slots event should only appear on arm64. */ + if (strcasecmp(name, "slots") == 0) + return true; +#endif +#if !defined(__i386__) && !defined(__x86_64__) + /* The system_tsc_freq event should only appear on x86. */ + if (strcasecmp(name, "system_tsc_freq") == 0) + return true; +#endif + return false; +} + +int tool_pmu__num_skip_events(void) +{ + int num = 0; + +#if !defined(__aarch64__) + num++; +#endif +#if !defined(__i386__) && !defined(__x86_64__) + num++; +#endif + return num; +} + +const char *tool_pmu__event_to_str(enum tool_pmu_event ev) +{ + if ((ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) && + !tool_pmu__skip_event(tool_pmu__event_names[ev])) + return tool_pmu__event_names[ev]; + + return NULL; +} + +enum tool_pmu_event tool_pmu__str_to_event(const char *str) +{ + int i; + + if (tool_pmu__skip_event(str)) + return TOOL_PMU__EVENT_NONE; + + tool_pmu__for_each_event(i) { + if (!strcasecmp(str, tool_pmu__event_names[i])) + return i; + } + return TOOL_PMU__EVENT_NONE; +} + +bool perf_pmu__is_tool(const struct perf_pmu *pmu) +{ + return pmu && pmu->type == PERF_PMU_TYPE_TOOL; +} + +bool evsel__is_tool(const struct evsel *evsel) +{ + return perf_pmu__is_tool(evsel->pmu); +} + +enum tool_pmu_event evsel__tool_event(const struct evsel *evsel) +{ + if (!evsel__is_tool(evsel)) + return TOOL_PMU__EVENT_NONE; + + return (enum tool_pmu_event)evsel->core.attr.config; +} + +const char *evsel__tool_pmu_event_name(const struct evsel *evsel) +{ + return tool_pmu__event_to_str(evsel->core.attr.config); +} + +struct perf_cpu_map *tool_pmu__cpus(struct perf_event_attr *attr) +{ + static struct perf_cpu_map *cpu0_map; + enum tool_pmu_event event = (enum tool_pmu_event)attr->config; + + if (event <= TOOL_PMU__EVENT_NONE || event >= TOOL_PMU__EVENT_MAX) { + pr_err("Invalid tool PMU event config %llx\n", attr->config); + return NULL; + } + if (event == TOOL_PMU__EVENT_USER_TIME || event == TOOL_PMU__EVENT_SYSTEM_TIME) + return cpu_map__online(); + + if (!cpu0_map) + cpu0_map = perf_cpu_map__new_int(0); + return perf_cpu_map__get(cpu0_map); +} + +static bool read_until_char(struct io *io, char e) +{ + int c; + + do { + c = io__get_char(io); + if (c == -1) + return false; + } while (c != e); + return true; +} + +static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val) +{ + char buf[256]; + struct io io; + int i; + + io__init(&io, fd, buf, sizeof(buf)); + + /* Skip lines to relevant CPU. */ + for (i = -1; i < cpu.cpu; i++) { + if (!read_until_char(&io, '\n')) + return -EINVAL; + } + /* Skip to "cpu". */ + if (io__get_char(&io) != 'c') return -EINVAL; + if (io__get_char(&io) != 'p') return -EINVAL; + if (io__get_char(&io) != 'u') return -EINVAL; + + /* Skip N of cpuN. */ + if (!read_until_char(&io, ' ')) + return -EINVAL; + + i = 1; + while (true) { + if (io__get_dec(&io, val) != ' ') + break; + if (field == i) + return 0; + i++; + } + return -EINVAL; +} + +static int read_pid_stat_field(int fd, int field, __u64 *val) +{ + char buf[256]; + struct io io; + int c, i; + + io__init(&io, fd, buf, sizeof(buf)); + if (io__get_dec(&io, val) != ' ') + return -EINVAL; + if (field == 1) + return 0; + + /* Skip comm. */ + if (io__get_char(&io) != '(' || !read_until_char(&io, ')')) + return -EINVAL; + if (field == 2) + return -EINVAL; /* String can't be returned. */ + + /* Skip state */ + if (io__get_char(&io) != ' ' || io__get_char(&io) == -1) + return -EINVAL; + if (field == 3) + return -EINVAL; /* String can't be returned. */ + + /* Loop over numeric fields*/ + if (io__get_char(&io) != ' ') + return -EINVAL; + + i = 4; + while (true) { + c = io__get_dec(&io, val); + if (c == -1) + return -EINVAL; + if (c == -2) { + /* Assume a -ve was read */ + c = io__get_dec(&io, val); + *val *= -1; + } + if (c != ' ') + return -EINVAL; + if (field == i) + return 0; + i++; + } + return -EINVAL; +} + +int evsel__tool_pmu_prepare_open(struct evsel *evsel, + struct perf_cpu_map *cpus, + int nthreads) +{ + if ((evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME || + evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) && + !evsel->start_times) { + evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus), + nthreads, + sizeof(__u64)); + if (!evsel->start_times) + return -ENOMEM; + } + return 0; +} + +#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) + +int evsel__tool_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx) +{ + enum tool_pmu_event ev = evsel__tool_event(evsel); + int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno; + + if (ev == TOOL_PMU__EVENT_NUM_CPUS) + return 0; + + if (ev == TOOL_PMU__EVENT_DURATION_TIME) { + if (evsel->core.attr.sample_period) /* no sampling */ + return -EINVAL; + evsel->start_time = rdclock(); + return 0; + } + + if (evsel->cgrp) + pid = evsel->cgrp->fd; + + nthreads = perf_thread_map__nr(threads); + for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { + for (thread = 0; thread < nthreads; thread++) { + if (!evsel->cgrp && !evsel->core.system_wide) + pid = perf_thread_map__pid(threads, thread); + + if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) { + bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME; + __u64 *start_time = NULL; + int fd; + + if (evsel->core.attr.sample_period) { + /* no sampling */ + err = -EINVAL; + goto out_close; + } + if (pid > -1) { + char buf[64]; + + snprintf(buf, sizeof(buf), "/proc/%d/stat", pid); + fd = open(buf, O_RDONLY); + evsel->pid_stat = true; + } else { + fd = open("/proc/stat", O_RDONLY); + } + FD(evsel, idx, thread) = fd; + if (fd < 0) { + err = -errno; + goto out_close; + } + start_time = xyarray__entry(evsel->start_times, idx, thread); + if (pid > -1) { + err = read_pid_stat_field(fd, system ? 15 : 14, + start_time); + } else { + struct perf_cpu cpu; + + cpu = perf_cpu_map__cpu(evsel->core.cpus, idx); + err = read_stat_field(fd, cpu, system ? 3 : 1, + start_time); + } + if (err) + goto out_close; + } + + } + } + return 0; +out_close: + if (err) + threads->err_thread = thread; + + old_errno = errno; + do { + while (--thread >= 0) { + if (FD(evsel, idx, thread) >= 0) + close(FD(evsel, idx, thread)); + FD(evsel, idx, thread) = -1; + } + thread = nthreads; + } while (--idx >= 0); + errno = old_errno; + return err; +} + +#if !defined(__i386__) && !defined(__x86_64__) +u64 arch_get_tsc_freq(void) +{ + return 0; +} +#endif + +#if !defined(__aarch64__) +u64 tool_pmu__cpu_slots_per_cycle(void) +{ + return 0; +} +#endif + +static bool has_pmem(void) +{ + static bool has_pmem, cached; + const char *sysfs = sysfs__mountpoint(); + char path[PATH_MAX]; + + if (!cached) { + snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs); + has_pmem = access(path, F_OK) == 0; + cached = true; + } + return has_pmem; +} + +bool tool_pmu__read_event(enum tool_pmu_event ev, + struct evsel *evsel, + bool system_wide, + const char *user_requested_cpu_list, + u64 *result) +{ + const struct cpu_topology *topology; + + switch (ev) { + case TOOL_PMU__EVENT_HAS_PMEM: + *result = has_pmem() ? 1 : 0; + return true; + + case TOOL_PMU__EVENT_NUM_CORES: + topology = online_topology(); + *result = topology->core_cpus_lists; + return true; + + case TOOL_PMU__EVENT_NUM_CPUS: + if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) { + /* No evsel to be specific to. */ + *result = cpu__max_present_cpu().cpu; + } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) { + /* Evsel just has specific CPUs. */ + *result = perf_cpu_map__nr(evsel->core.cpus); + } else { + /* + * "Any CPU" event that can be scheduled on any CPU in + * the PMU's cpumask. The PMU cpumask should be saved in + * pmu_cpus. If not present fall back to max. + */ + if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) + *result = perf_cpu_map__nr(evsel->core.pmu_cpus); + else + *result = cpu__max_present_cpu().cpu; + } + return true; + + case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: { + struct perf_cpu_map *online = cpu_map__online(); + + if (!online) + return false; + + if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) { + /* No evsel to be specific to. */ + *result = perf_cpu_map__nr(online); + } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) { + /* Evsel just has specific CPUs. */ + struct perf_cpu_map *tmp = + perf_cpu_map__intersect(online, evsel->core.cpus); + + *result = perf_cpu_map__nr(tmp); + perf_cpu_map__put(tmp); + } else { + /* + * "Any CPU" event that can be scheduled on any CPU in + * the PMU's cpumask. The PMU cpumask should be saved in + * pmu_cpus, if not present then just the online cpu + * mask. + */ + if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) { + struct perf_cpu_map *tmp = + perf_cpu_map__intersect(online, evsel->core.pmu_cpus); + + *result = perf_cpu_map__nr(tmp); + perf_cpu_map__put(tmp); + } else { + *result = perf_cpu_map__nr(online); + } + } + perf_cpu_map__put(online); + return true; + } + case TOOL_PMU__EVENT_NUM_DIES: + topology = online_topology(); + *result = topology->die_cpus_lists; + return true; + + case TOOL_PMU__EVENT_NUM_PACKAGES: + topology = online_topology(); + *result = topology->package_cpus_lists; + return true; + + case TOOL_PMU__EVENT_SLOTS: + *result = tool_pmu__cpu_slots_per_cycle(); + return *result ? true : false; + + case TOOL_PMU__EVENT_SMT_ON: + *result = smt_on() ? 1 : 0; + return true; + + case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: + *result = arch_get_tsc_freq(); + return true; + + case TOOL_PMU__EVENT_CORE_WIDE: + *result = core_wide(system_wide, user_requested_cpu_list) ? 1 : 0; + return true; + + case TOOL_PMU__EVENT_TARGET_CPU: + *result = system_wide || (user_requested_cpu_list != NULL) ? 1 : 0; + return true; + + case TOOL_PMU__EVENT_NONE: + case TOOL_PMU__EVENT_DURATION_TIME: + case TOOL_PMU__EVENT_USER_TIME: + case TOOL_PMU__EVENT_SYSTEM_TIME: + case TOOL_PMU__EVENT_MAX: + default: + return false; + } +} + +static void perf_counts__update(struct perf_counts_values *count, + const struct perf_counts_values *old_count, + bool raw, u64 val) +{ + /* + * The values of enabled and running must make a ratio of 100%. The + * exact values don't matter as long as they are non-zero to avoid + * issues with evsel__count_has_error. + */ + if (old_count) { + count->val = raw ? val : old_count->val + val; + count->run = old_count->run + 1; + count->ena = old_count->ena + 1; + count->lost = old_count->lost; + } else { + count->val = val; + count->run++; + count->ena++; + count->lost = 0; + } +} + +int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) +{ + __u64 *start_time, cur_time, delta_start; + int err = 0; + struct perf_counts_values *count, *old_count = NULL; + bool adjust = false; + enum tool_pmu_event ev = evsel__tool_event(evsel); + + count = perf_counts(evsel->counts, cpu_map_idx, thread); + if (evsel->prev_raw_counts) + old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + + switch (ev) { + case TOOL_PMU__EVENT_HAS_PMEM: + case TOOL_PMU__EVENT_NUM_CORES: + case TOOL_PMU__EVENT_NUM_CPUS: + case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: + case TOOL_PMU__EVENT_NUM_DIES: + case TOOL_PMU__EVENT_NUM_PACKAGES: + case TOOL_PMU__EVENT_SLOTS: + case TOOL_PMU__EVENT_SMT_ON: + case TOOL_PMU__EVENT_CORE_WIDE: + case TOOL_PMU__EVENT_TARGET_CPU: + case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: { + u64 val = 0; + + if (cpu_map_idx == 0 && thread == 0) { + if (!tool_pmu__read_event(ev, evsel, + stat_config.system_wide, + stat_config.user_requested_cpu_list, + &val)) { + count->lost++; + val = 0; + } + } + perf_counts__update(count, old_count, /*raw=*/false, val); + return 0; + } + case TOOL_PMU__EVENT_DURATION_TIME: + /* + * Pretend duration_time is only on the first CPU and thread, or + * else aggregation will scale duration_time by the number of + * CPUs/threads. + */ + start_time = &evsel->start_time; + if (cpu_map_idx == 0 && thread == 0) + cur_time = rdclock(); + else + cur_time = *start_time; + break; + case TOOL_PMU__EVENT_USER_TIME: + case TOOL_PMU__EVENT_SYSTEM_TIME: { + bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME; + int fd = FD(evsel, cpu_map_idx, thread); + + start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread); + lseek(fd, SEEK_SET, 0); + if (evsel->pid_stat) { + /* The event exists solely on 1 CPU. */ + if (cpu_map_idx == 0) + err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time); + else + cur_time = 0; + } else { + /* The event is for all threads. */ + if (thread == 0) { + struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, + cpu_map_idx); + + err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time); + } else { + cur_time = 0; + } + } + adjust = true; + break; + } + case TOOL_PMU__EVENT_NONE: + case TOOL_PMU__EVENT_MAX: + default: + err = -EINVAL; + } + if (err) + return err; + + delta_start = cur_time - *start_time; + if (adjust) { + __u64 ticks_per_sec = sysconf(_SC_CLK_TCK); + + delta_start *= 1e9 / ticks_per_sec; + } + perf_counts__update(count, old_count, /*raw=*/true, delta_start); + return 0; +} + +struct perf_pmu *tool_pmu__new(void) +{ + struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu)); + + if (!tool) + return NULL; + + if (perf_pmu__init(tool, PERF_PMU_TYPE_TOOL, "tool") != 0) { + perf_pmu__delete(tool); + return NULL; + } + tool->events_table = find_core_events_table("common", "common"); + return tool; +} diff --git a/tools/perf/util/tool_pmu.h b/tools/perf/util/tool_pmu.h new file mode 100644 index 000000000000..ea343d1983d3 --- /dev/null +++ b/tools/perf/util/tool_pmu.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TOOL_PMU_H +#define __TOOL_PMU_H + +#include "pmu.h" + +struct evsel; +struct perf_thread_map; +struct print_callbacks; + +enum tool_pmu_event { + TOOL_PMU__EVENT_NONE = 0, + TOOL_PMU__EVENT_DURATION_TIME, + TOOL_PMU__EVENT_USER_TIME, + TOOL_PMU__EVENT_SYSTEM_TIME, + TOOL_PMU__EVENT_HAS_PMEM, + TOOL_PMU__EVENT_NUM_CORES, + TOOL_PMU__EVENT_NUM_CPUS, + TOOL_PMU__EVENT_NUM_CPUS_ONLINE, + TOOL_PMU__EVENT_NUM_DIES, + TOOL_PMU__EVENT_NUM_PACKAGES, + TOOL_PMU__EVENT_SLOTS, + TOOL_PMU__EVENT_SMT_ON, + TOOL_PMU__EVENT_SYSTEM_TSC_FREQ, + TOOL_PMU__EVENT_CORE_WIDE, + TOOL_PMU__EVENT_TARGET_CPU, + + TOOL_PMU__EVENT_MAX, +}; + +#define tool_pmu__for_each_event(ev) \ + for ((ev) = TOOL_PMU__EVENT_DURATION_TIME; (ev) < TOOL_PMU__EVENT_MAX; ev++) + +const char *tool_pmu__event_to_str(enum tool_pmu_event ev); +enum tool_pmu_event tool_pmu__str_to_event(const char *str); +bool tool_pmu__skip_event(const char *name); +int tool_pmu__num_skip_events(void); + +bool tool_pmu__read_event(enum tool_pmu_event ev, + struct evsel *evsel, + bool system_wide, + const char *user_requested_cpu_list, + u64 *result); + + +u64 tool_pmu__cpu_slots_per_cycle(void); + +bool perf_pmu__is_tool(const struct perf_pmu *pmu); +struct perf_cpu_map *tool_pmu__cpus(struct perf_event_attr *attr); + +bool evsel__is_tool(const struct evsel *evsel); +enum tool_pmu_event evsel__tool_event(const struct evsel *evsel); +const char *evsel__tool_pmu_event_name(const struct evsel *evsel); +int evsel__tool_pmu_prepare_open(struct evsel *evsel, + struct perf_cpu_map *cpus, + int nthreads); +int evsel__tool_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx); +int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread); + +struct perf_pmu *tool_pmu__new(void); + +#endif /* __TOOL_PMU_H */ diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 4db3d1bd686c..b06e10a116bb 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -88,9 +88,9 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) else if (target->tid) ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s", target->tid); - else if (target->uid_str != NULL) + else if (top->uid_str != NULL) ret += SNPRINTF(bf + ret, size - ret, " (uid: %s", - target->uid_str); + top->uid_str); else ret += SNPRINTF(bf + ret, size - ret, " (all"); diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index 4c5588dbb131..04ff926846be 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -48,6 +48,7 @@ struct perf_top { const char *sym_filter; float min_percent; unsigned int nr_threads_synthesize; + const char *uid_str; struct { struct ordered_events *in; diff --git a/tools/perf/util/tp_pmu.c b/tools/perf/util/tp_pmu.c new file mode 100644 index 000000000000..eddb9807131a --- /dev/null +++ b/tools/perf/util/tp_pmu.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#include "tp_pmu.h" +#include "pmus.h" +#include <api/fs/fs.h> +#include <api/fs/tracing_path.h> +#include <api/io_dir.h> +#include <linux/kernel.h> +#include <errno.h> +#include <string.h> + +int tp_pmu__id(const char *sys, const char *name) +{ + char *tp_dir = get_events_file(sys); + char path[PATH_MAX]; + int id, err; + + if (!tp_dir) + return -1; + + scnprintf(path, PATH_MAX, "%s/%s/id", tp_dir, name); + put_events_file(tp_dir); + err = filename__read_int(path, &id); + if (err) + return err; + + return id; +} + + +int tp_pmu__for_each_tp_event(const char *sys, void *state, tp_event_callback cb) +{ + char *evt_path; + struct io_dirent64 *evt_ent; + struct io_dir evt_dir; + int ret = 0; + + evt_path = get_events_file(sys); + if (!evt_path) + return -errno; + + io_dir__init(&evt_dir, open(evt_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + if (evt_dir.dirfd < 0) { + ret = -errno; + put_events_file(evt_path); + return ret; + } + put_events_file(evt_path); + + while (!ret && (evt_ent = io_dir__readdir(&evt_dir))) { + if (!strcmp(evt_ent->d_name, ".") + || !strcmp(evt_ent->d_name, "..") + || !strcmp(evt_ent->d_name, "enable") + || !strcmp(evt_ent->d_name, "filter")) + continue; + + ret = cb(state, sys, evt_ent->d_name); + if (ret) + break; + } + close(evt_dir.dirfd); + return ret; +} + +int tp_pmu__for_each_tp_sys(void *state, tp_sys_callback cb) +{ + struct io_dirent64 *events_ent; + struct io_dir events_dir; + int ret = 0; + char *events_dir_path = get_tracing_file("events"); + + if (!events_dir_path) + return -errno; + + io_dir__init(&events_dir, open(events_dir_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY)); + if (events_dir.dirfd < 0) { + ret = -errno; + put_events_file(events_dir_path); + return ret; + } + put_events_file(events_dir_path); + + while (!ret && (events_ent = io_dir__readdir(&events_dir))) { + if (!strcmp(events_ent->d_name, ".") || + !strcmp(events_ent->d_name, "..") || + !strcmp(events_ent->d_name, "enable") || + !strcmp(events_ent->d_name, "header_event") || + !strcmp(events_ent->d_name, "header_page")) + continue; + + ret = cb(state, events_ent->d_name); + } + close(events_dir.dirfd); + return ret; +} + +bool perf_pmu__is_tracepoint(const struct perf_pmu *pmu) +{ + return pmu->type == PERF_TYPE_TRACEPOINT; +} + +struct for_each_event_args { + void *state; + pmu_event_callback cb; + const struct perf_pmu *pmu; +}; + +static int for_each_event_cb(void *state, const char *sys_name, const char *evt_name) +{ + struct for_each_event_args *args = state; + char name[2 * FILENAME_MAX + 2]; + /* 16 possible hex digits and 22 other characters and \0. */ + char encoding[16 + 22]; + char *format = NULL; + size_t format_size; + struct pmu_event_info info = { + .pmu = args->pmu, + .pmu_name = args->pmu->name, + .event_type_desc = "Tracepoint event", + }; + char *tp_dir = get_events_file(sys_name); + char path[PATH_MAX]; + int id, err; + + if (!tp_dir) + return -1; + + scnprintf(path, sizeof(path), "%s/%s/id", tp_dir, evt_name); + err = filename__read_int(path, &id); + if (err == 0) { + snprintf(encoding, sizeof(encoding), "tracepoint/config=0x%x/", id); + info.encoding_desc = encoding; + } + + scnprintf(path, sizeof(path), "%s/%s/format", tp_dir, evt_name); + put_events_file(tp_dir); + err = filename__read_str(path, &format, &format_size); + if (err == 0) { + info.long_desc = format; + for (size_t i = 0 ; i < format_size; i++) { + /* Swap tabs to spaces due to some rendering issues. */ + if (format[i] == '\t') + format[i] = ' '; + } + } + snprintf(name, sizeof(name), "%s:%s", sys_name, evt_name); + info.name = name; + err = args->cb(args->state, &info); + free(format); + return err; +} + +static int for_each_event_sys_cb(void *state, const char *sys_name) +{ + return tp_pmu__for_each_tp_event(sys_name, state, for_each_event_cb); +} + +int tp_pmu__for_each_event(struct perf_pmu *pmu, void *state, pmu_event_callback cb) +{ + struct for_each_event_args args = { + .state = state, + .cb = cb, + .pmu = pmu, + }; + + return tp_pmu__for_each_tp_sys(&args, for_each_event_sys_cb); +} + +static int num_events_cb(void *state, const char *sys_name __maybe_unused, + const char *evt_name __maybe_unused) +{ + size_t *count = state; + + (*count)++; + return 0; +} + +static int num_events_sys_cb(void *state, const char *sys_name) +{ + return tp_pmu__for_each_tp_event(sys_name, state, num_events_cb); +} + +size_t tp_pmu__num_events(struct perf_pmu *pmu __maybe_unused) +{ + size_t count = 0; + + tp_pmu__for_each_tp_sys(&count, num_events_sys_cb); + return count; +} + +bool tp_pmu__have_event(struct perf_pmu *pmu __maybe_unused, const char *name) +{ + char *dup_name, *colon; + int id; + + colon = strchr(name, ':'); + if (colon == NULL) + return false; + + dup_name = strdup(name); + if (!dup_name) + return false; + + colon = dup_name + (colon - name); + *colon = '\0'; + id = tp_pmu__id(dup_name, colon + 1); + free(dup_name); + return id >= 0; +} diff --git a/tools/perf/util/tp_pmu.h b/tools/perf/util/tp_pmu.h new file mode 100644 index 000000000000..30456bd6943d --- /dev/null +++ b/tools/perf/util/tp_pmu.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __TP_PMU_H +#define __TP_PMU_H + +#include "pmu.h" + +typedef int (*tp_sys_callback)(void *state, const char *sys_name); +typedef int (*tp_event_callback)(void *state, const char *sys_name, const char *evt_name); + +int tp_pmu__id(const char *sys, const char *name); +int tp_pmu__for_each_tp_event(const char *sys, void *state, tp_event_callback cb); +int tp_pmu__for_each_tp_sys(void *state, tp_sys_callback cb); + +bool perf_pmu__is_tracepoint(const struct perf_pmu *pmu); +int tp_pmu__for_each_event(struct perf_pmu *pmu, void *state, pmu_event_callback cb); +size_t tp_pmu__num_events(struct perf_pmu *pmu); +bool tp_pmu__have_event(struct perf_pmu *pmu, const char *name); + +#endif /* __TP_PMU_H */ diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index f0332bd3a501..9c015fc2bcfb 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -12,7 +12,7 @@ #include <linux/ctype.h> #include <linux/kernel.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> static int get_common_field(struct scripting_context *context, int *offset, int *size, const char *type) @@ -99,7 +99,7 @@ unsigned long long read_size(struct tep_event *event, void *ptr, int size) return tep_read_number(event->tep, ptr, size); } -void event_format__fprintf(struct tep_event *event, +void event_format__fprintf(const struct tep_event *event, int cpu, void *data, int size, FILE *fp) { struct tep_record record; @@ -116,12 +116,6 @@ void event_format__fprintf(struct tep_event *event, trace_seq_destroy(&s); } -void event_format__print(struct tep_event *event, - int cpu, void *data, int size) -{ - return event_format__fprintf(event, cpu, data, size, stdout); -} - /* * prev_state is of size long, which is 32 bits on 32 bit architectures. * As it needs to have the same bits for both 32 bit and 64 bit architectures diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 1162c49b8082..ecbbb93f0185 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -11,7 +11,7 @@ #include <sys/stat.h> #include <sys/wait.h> #include <sys/mman.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include <fcntl.h> #include <unistd.h> #include <errno.h> diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index bd0000300c77..72abb28b7b5a 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -10,17 +10,97 @@ #include <string.h> #include <errno.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif +#include "archinsn.h" #include "debug.h" +#include "event.h" #include "trace-event.h" #include "evsel.h" +#include <linux/perf_event.h> #include <linux/zalloc.h> #include "util/sample.h" +unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; + struct scripting_context *scripting_context; +struct script_spec { + struct list_head node; + struct scripting_ops *ops; + char spec[]; +}; + +static LIST_HEAD(script_specs); + +static struct script_spec *script_spec__new(const char *spec, + struct scripting_ops *ops) +{ + struct script_spec *s = malloc(sizeof(*s) + strlen(spec) + 1); + + if (s != NULL) { + strcpy(s->spec, spec); + s->ops = ops; + } + + return s; +} + +static void script_spec__add(struct script_spec *s) +{ + list_add_tail(&s->node, &script_specs); +} + +static struct script_spec *script_spec__find(const char *spec) +{ + struct script_spec *s; + + list_for_each_entry(s, &script_specs, node) + if (strcasecmp(s->spec, spec) == 0) + return s; + return NULL; +} + +static int script_spec_register(const char *spec, struct scripting_ops *ops) +{ + struct script_spec *s; + + s = script_spec__find(spec); + if (s) + return -1; + + s = script_spec__new(spec, ops); + if (!s) + return -1; + + script_spec__add(s); + return 0; +} + +struct scripting_ops *script_spec__lookup(const char *spec) +{ + struct script_spec *s = script_spec__find(spec); + + if (!s) + return NULL; + + return s->ops; +} + +int script_spec__for_each(int (*cb)(struct scripting_ops *ops, const char *spec)) +{ + struct script_spec *s; + int ret = 0; + + list_for_each_entry(s, &script_specs, node) { + ret = cb(s->ops, s->spec); + if (ret) + break; + } + return ret; +} + void scripting_context__update(struct scripting_context *c, union perf_event *event, struct perf_sample *sample, @@ -28,12 +108,14 @@ void scripting_context__update(struct scripting_context *c, struct addr_location *al, struct addr_location *addr_al) { - c->event_data = sample->raw_data; - c->pevent = NULL; #ifdef HAVE_LIBTRACEEVENT - if (evsel->tp_format) - c->pevent = evsel->tp_format->tep; + const struct tep_event *tp_format = evsel__tp_format(evsel); + + c->pevent = tp_format ? tp_format->tep : NULL; +#else + c->pevent = NULL; #endif + c->event_data = sample->raw_data; c->event = event; c->sample = sample; c->evsel = evsel; @@ -191,3 +273,154 @@ void setup_perl_scripting(void) } #endif #endif + +#if !defined(__i386__) && !defined(__x86_64__) +void arch_fetch_insn(struct perf_sample *sample __maybe_unused, + struct thread *thread __maybe_unused, + struct machine *machine __maybe_unused) +{ +} +#endif + +void script_fetch_insn(struct perf_sample *sample, struct thread *thread, + struct machine *machine, bool native_arch) +{ + if (sample->insn_len == 0 && native_arch) + arch_fetch_insn(sample, thread, machine); +} + +static const struct { + u32 flags; + const char *name; +} sample_flags[] = { + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL, "call"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN, "return"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL, "jcc"}, + {PERF_IP_FLAG_BRANCH, "jmp"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_INTERRUPT, "int"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_INTERRUPT, "iret"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_SYSCALLRET, "syscall"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_SYSCALLRET, "sysret"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_ASYNC, "async"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_INTERRUPT, + "hw int"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "tx abrt"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "tr strt"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "tr end"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMENTRY, "vmentry"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMEXIT, "vmexit"}, + {0, NULL} +}; + +static const struct { + u32 flags; + const char *name; +} branch_events[] = { + {PERF_IP_FLAG_BRANCH_MISS, "miss"}, + {PERF_IP_FLAG_NOT_TAKEN, "not_taken"}, + {0, NULL} +}; + +static int sample_flags_to_name(u32 flags, char *str, size_t size) +{ + int i; + const char *prefix; + int pos = 0, ret, ev_idx = 0; + u32 xf = flags & PERF_ADDITIONAL_STATE_MASK; + u32 types, events; + char xs[16] = { 0 }; + + /* Clear additional state bits */ + flags &= ~PERF_ADDITIONAL_STATE_MASK; + + if (flags & PERF_IP_FLAG_TRACE_BEGIN) + prefix = "tr strt "; + else if (flags & PERF_IP_FLAG_TRACE_END) + prefix = "tr end "; + else + prefix = ""; + + ret = snprintf(str + pos, size - pos, "%s", prefix); + if (ret < 0) + return ret; + pos += ret; + + flags &= ~(PERF_IP_FLAG_TRACE_BEGIN | PERF_IP_FLAG_TRACE_END); + + types = flags & ~PERF_IP_FLAG_BRANCH_EVENT_MASK; + for (i = 0; sample_flags[i].name; i++) { + if (sample_flags[i].flags != types) + continue; + + ret = snprintf(str + pos, size - pos, "%s", sample_flags[i].name); + if (ret < 0) + return ret; + pos += ret; + break; + } + + events = flags & PERF_IP_FLAG_BRANCH_EVENT_MASK; + for (i = 0; branch_events[i].name; i++) { + if (!(branch_events[i].flags & events)) + continue; + + ret = snprintf(str + pos, size - pos, !ev_idx ? "/%s" : ",%s", + branch_events[i].name); + if (ret < 0) + return ret; + pos += ret; + ev_idx++; + } + + /* Add an end character '/' for events */ + if (ev_idx) { + ret = snprintf(str + pos, size - pos, "/"); + if (ret < 0) + return ret; + pos += ret; + } + + if (!xf) + return pos; + + snprintf(xs, sizeof(xs), "(%s%s%s)", + flags & PERF_IP_FLAG_IN_TX ? "x" : "", + flags & PERF_IP_FLAG_INTR_DISABLE ? "D" : "", + flags & PERF_IP_FLAG_INTR_TOGGLE ? "t" : ""); + + /* Right align the string if its length is less than the limit */ + if ((pos + strlen(xs)) < SAMPLE_FLAGS_STR_ALIGNED_SIZE) + ret = snprintf(str + pos, size - pos, "%*s", + (int)(SAMPLE_FLAGS_STR_ALIGNED_SIZE - ret), xs); + else + ret = snprintf(str + pos, size - pos, " %s", xs); + if (ret < 0) + return ret; + + return pos + ret; +} + +int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz) +{ + const char *chars = PERF_IP_FLAG_CHARS; + const size_t n = strlen(PERF_IP_FLAG_CHARS); + size_t i, pos = 0; + int ret; + + ret = sample_flags_to_name(flags, str, sz); + if (ret > 0) + return ret; + + for (i = 0; i < n; i++, flags >>= 1) { + if ((flags & 1) && pos < sz) + str[pos++] = chars[i]; + } + for (; i < 32; i++, flags >>= 1) { + if ((flags & 1) && pos < sz) + str[pos++] = '?'; + } + if (pos < sz) + str[pos] = 0; + + return pos; +} diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index 8ad75b31e09b..6a8c66c64b70 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -8,7 +8,7 @@ #include <fcntl.h> #include <linux/kernel.h> #include <linux/err.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include <api/fs/tracing_path.h> #include <api/fs/fs.h> #include "trace-event.h" diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index bbf8b26bc8da..71e680bc3d4b 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -39,12 +39,9 @@ trace_event__tp_format(const char *sys, const char *name); struct tep_event *trace_event__tp_format_id(int id); -void event_format__fprintf(struct tep_event *event, +void event_format__fprintf(const struct tep_event *event, int cpu, void *data, int size, FILE *fp); -void event_format__print(struct tep_event *event, - int cpu, void *data, int size); - int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size); int parse_event_file(struct tep_handle *pevent, char *buf, unsigned long size, char *sys); @@ -116,10 +113,11 @@ struct scripting_ops { extern unsigned int scripting_max_stack; -int script_spec_register(const char *spec, struct scripting_ops *ops); +struct scripting_ops *script_spec__lookup(const char *spec); +int script_spec__for_each(int (*cb)(struct scripting_ops *ops, const char *spec)); void script_fetch_insn(struct perf_sample *sample, struct thread *thread, - struct machine *machine); + struct machine *machine, bool native_arch); void setup_perl_scripting(void); void setup_python_scripting(void); @@ -147,10 +145,12 @@ int common_flags(struct scripting_context *context); int common_lock_depth(struct scripting_context *context); #define SAMPLE_FLAGS_BUF_SIZE 64 +#define SAMPLE_FLAGS_STR_ALIGNED_SIZE 21 + int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz); #if defined(LIBTRACEEVENT_VERSION) && LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) -#include <traceevent/event-parse.h> +#include <event-parse.h> static inline bool tep_field_is_relative(unsigned long flags) { diff --git a/tools/perf/util/trace.h b/tools/perf/util/trace.h new file mode 100644 index 000000000000..fbbcfe6f44fe --- /dev/null +++ b/tools/perf/util/trace.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef UTIL_TRACE_H +#define UTIL_TRACE_H + +#include <stdio.h> /* for FILE */ + +enum trace_summary_mode { + SUMMARY__NONE = 0, + SUMMARY__BY_TOTAL, + SUMMARY__BY_THREAD, + SUMMARY__BY_CGROUP, +}; + +#ifdef HAVE_BPF_SKEL + +int trace_prepare_bpf_summary(enum trace_summary_mode mode); +void trace_start_bpf_summary(void); +void trace_end_bpf_summary(void); +int trace_print_bpf_summary(FILE *fp, int max_summary); +void trace_cleanup_bpf_summary(void); + +#else /* !HAVE_BPF_SKEL */ + +static inline int trace_prepare_bpf_summary(enum trace_summary_mode mode __maybe_unused) +{ + return -1; +} +static inline void trace_start_bpf_summary(void) {} +static inline void trace_end_bpf_summary(void) {} +static inline int trace_print_bpf_summary(FILE *fp __maybe_unused, int max_summary __maybe_unused) +{ + return 0; +} +static inline void trace_cleanup_bpf_summary(void) {} + +#endif /* HAVE_BPF_SKEL */ + +#endif /* UTIL_TRACE_H */ diff --git a/tools/perf/util/trace_augment.h b/tools/perf/util/trace_augment.h new file mode 100644 index 000000000000..4f729bc67753 --- /dev/null +++ b/tools/perf/util/trace_augment.h @@ -0,0 +1,66 @@ +#ifndef TRACE_AUGMENT_H +#define TRACE_AUGMENT_H + +#include <linux/compiler.h> + +struct bpf_program; +struct evlist; + +#ifdef HAVE_BPF_SKEL + +int augmented_syscalls__prepare(void); +int augmented_syscalls__create_bpf_output(struct evlist *evlist); +void augmented_syscalls__setup_bpf_output(void); +int augmented_syscalls__set_filter_pids(unsigned int nr, pid_t *pids); +int augmented_syscalls__get_map_fds(int *enter_fd, int *exit_fd, int *beauty_fd); +struct bpf_program *augmented_syscalls__find_by_title(const char *name); +struct bpf_program *augmented_syscalls__unaugmented(void); +void augmented_syscalls__cleanup(void); + +#else /* !HAVE_BPF_SKEL */ + +static inline int augmented_syscalls__prepare(void) +{ + return -1; +} + +static inline int augmented_syscalls__create_bpf_output(struct evlist *evlist __maybe_unused) +{ + return -1; +} + +static inline void augmented_syscalls__setup_bpf_output(void) +{ +} + +static inline int augmented_syscalls__set_filter_pids(unsigned int nr __maybe_unused, + pid_t *pids __maybe_unused) +{ + return 0; +} + +static inline int augmented_syscalls__get_map_fds(int *enter_fd __maybe_unused, + int *exit_fd __maybe_unused, + int *beauty_fd __maybe_unused) +{ + return -1; +} + +static inline struct bpf_program * +augmented_syscalls__find_by_title(const char *name __maybe_unused) +{ + return NULL; +} + +static inline struct bpf_program *augmented_syscalls__unaugmented(void) +{ + return NULL; +} + +static inline void augmented_syscalls__cleanup(void) +{ +} + +#endif /* HAVE_BPF_SKEL */ + +#endif diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c index f19791d46e99..511a517ce613 100644 --- a/tools/perf/util/tsc.c +++ b/tools/perf/util/tsc.c @@ -72,7 +72,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, } int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, - struct perf_tool *tool, + const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { @@ -119,7 +119,7 @@ size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp) size_t ret; ret = fprintf(fp, "\n... Time Shift %" PRI_lu64 "\n", tc->time_shift); - ret += fprintf(fp, "... Time Muliplier %" PRI_lu64 "\n", tc->time_mult); + ret += fprintf(fp, "... Time Multiplier %" PRI_lu64 "\n", tc->time_mult); ret += fprintf(fp, "... Time Zero %" PRI_lu64 "\n", tc->time_zero); /* diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h index 88fd1c4c1cb8..57ce8449647f 100644 --- a/tools/perf/util/tsc.h +++ b/tools/perf/util/tsc.h @@ -25,7 +25,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc); u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc); u64 rdtsc(void); -double arch_get_tsc_freq(void); +u64 arch_get_tsc_freq(void); size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c index 32c39cfe209b..4c6a86e1cb54 100644 --- a/tools/perf/util/units.c +++ b/tools/perf/util/units.c @@ -64,7 +64,7 @@ unsigned long convert_unit(unsigned long value, char *unit) int unit_number__scnprintf(char *buf, size_t size, u64 n) { - char unit[4] = "BKMG"; + char unit[] = "BKMG"; int i = 0; while (((n / 1024) > 1) && (i < 3)) { diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index b38d322734b4..ae70fb56a057 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -29,8 +29,8 @@ static int __find_debuginfo(Dwfl_Module *mod __maybe_unused, void **userdata, const struct dso *dso = *userdata; assert(dso); - if (dso->symsrc_filename && strcmp (file_name, dso->symsrc_filename)) - *debuginfo_file_name = strdup(dso->symsrc_filename); + if (dso__symsrc_filename(dso) && strcmp(file_name, dso__symsrc_filename(dso))) + *debuginfo_file_name = strdup(dso__symsrc_filename(dso)); return -1; } @@ -66,7 +66,7 @@ static int __report_module(struct addr_location *al, u64 ip, * a different code in another DSO. So just use the map->start * directly to pick the correct one. */ - if (!strncmp(dso->long_name, "/tmp/jitted-", 12)) + if (!strncmp(dso__long_name(dso), "/tmp/jitted-", 12)) base = map__start(al->map); else base = map__start(al->map) - map__pgoff(al->map); @@ -83,15 +83,18 @@ static int __report_module(struct addr_location *al, u64 ip, if (!mod) { char filename[PATH_MAX]; - __symbol__join_symfs(filename, sizeof(filename), dso->long_name); - mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1, - base, false); + __symbol__join_symfs(filename, sizeof(filename), dso__long_name(dso)); + /* Don't hang up on device files like /dev/dri/renderD128. */ + if (is_regular_file(filename)) { + mod = dwfl_report_elf(ui->dwfl, dso__short_name(dso), filename, -1, + base, false); + } } if (!mod) { char filename[PATH_MAX]; if (dso__build_id_filename(dso, filename, sizeof(filename), false)) - mod = dwfl_report_elf(ui->dwfl, dso->short_name, filename, -1, + mod = dwfl_report_elf(ui->dwfl, dso__short_name(dso), filename, -1, base, false); } @@ -190,7 +193,10 @@ static bool memory_read(Dwfl *dwfl __maybe_unused, Dwarf_Addr addr, Dwarf_Word * int offset; int ret; - ret = perf_reg_value(&start, &ui->sample->user_regs, + if (!ui->sample->user_regs) + return false; + + ret = perf_reg_value(&start, ui->sample->user_regs, perf_arch_reg_sp(arch)); if (ret) return false; @@ -273,7 +279,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, Dwarf_Word ip; int err = -EINVAL, i; - if (!data->user_regs.regs) + if (!data->user_regs || !data->user_regs->regs) return -EINVAL; ui = zalloc(sizeof(ui_buf) + sizeof(ui_buf.entries[0]) * max_stack); @@ -286,7 +292,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, if (!ui->dwfl) goto out; - err = perf_reg_value(&ip, &data->user_regs, perf_arch_reg_ip(arch)); + err = perf_reg_value(&ip, data->user_regs, perf_arch_reg_ip(arch)); if (err) goto out; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index cde267ea3e99..0b037e7389a0 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -330,8 +330,7 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct unwind_info *ui, int ret, fd; if (dso__data(dso)->eh_frame_hdr_offset == 0) { - fd = dso__data_get_fd(dso, ui->machine); - if (fd < 0) + if (!dso__data_get_fd(dso, ui->machine, &fd)) return -EINVAL; /* Check the .eh_frame section for unwinding info */ @@ -363,7 +362,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso, struct machine *machine, u64 *offset) { int fd; - u64 ofs = dso->data.debug_frame_offset; + u64 ofs = dso__data(dso)->debug_frame_offset; /* debug_frame can reside in: * - dso @@ -372,14 +371,13 @@ static int read_unwind_spec_debug_frame(struct dso *dso, * has to be pointed by symsrc_filename */ if (ofs == 0) { - fd = dso__data_get_fd(dso, machine); - if (fd >= 0) { + if (dso__data_get_fd(dso, machine, &fd)) { ofs = elf_section_offset(fd, ".debug_frame"); dso__data_put_fd(dso); } if (ofs <= 0) { - fd = open(dso->symsrc_filename, O_RDONLY); + fd = open(dso__symsrc_filename(dso), O_RDONLY); if (fd >= 0) { ofs = elf_section_offset(fd, ".debug_frame"); close(fd); @@ -390,6 +388,11 @@ static int read_unwind_spec_debug_frame(struct dso *dso, char *debuglink = malloc(PATH_MAX); int ret = 0; + if (debuglink == NULL) { + pr_err("unwind: Can't read unwind spec debug frame.\n"); + return -ENOMEM; + } + ret = dso__read_binary_type_filename( dso, DSO_BINARY_TYPE__DEBUGLINK, machine->root_dir, debuglink, PATH_MAX); @@ -402,21 +405,21 @@ static int read_unwind_spec_debug_frame(struct dso *dso, } } if (ofs > 0) { - if (dso->symsrc_filename != NULL) { + if (dso__symsrc_filename(dso) != NULL) { pr_warning( "%s: overwrite symsrc(%s,%s)\n", __func__, - dso->symsrc_filename, + dso__symsrc_filename(dso), debuglink); - zfree(&dso->symsrc_filename); + dso__free_symsrc_filename(dso); } - dso->symsrc_filename = debuglink; + dso__set_symsrc_filename(dso, debuglink); } else { free(debuglink); } } - dso->data.debug_frame_offset = ofs; + dso__data(dso)->debug_frame_offset = ofs; } *offset = ofs; @@ -480,16 +483,18 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, /* Check the .debug_frame section for unwinding info */ if (ret < 0 && !read_unwind_spec_debug_frame(dso, ui->machine, &segbase)) { - int fd = dso__data_get_fd(dso, ui->machine); - int is_exec = elf_is_exec(fd, dso->name); + int fd; u64 start = map__start(map); - unw_word_t base = is_exec ? 0 : start; + unw_word_t base = start; const char *symfile; - if (fd >= 0) + if (dso__data_get_fd(dso, ui->machine, &fd)) { + if (elf_is_exec(fd, dso__name(dso))) + base = 0; dso__data_put_fd(dso); + } - symfile = dso->symsrc_filename ?: dso->name; + symfile = dso__symsrc_filename(dso) ?: dso__name(dso); memset(&di, 0, sizeof(di)); if (dwarf_find_debug_frame(0, &di, ip, base, symfile, start, map__end(map))) @@ -574,12 +579,12 @@ static int access_mem(unw_addr_space_t __maybe_unused as, int ret; /* Don't support write, probably not needed. */ - if (__write || !stack || !ui->sample->user_regs.regs) { + if (__write || !stack || !ui->sample->user_regs || !ui->sample->user_regs->regs) { *valp = 0; return 0; } - ret = perf_reg_value(&start, &ui->sample->user_regs, + ret = perf_reg_value(&start, perf_sample__user_regs(ui->sample), perf_arch_reg_sp(arch)); if (ret) return ret; @@ -623,7 +628,7 @@ static int access_reg(unw_addr_space_t __maybe_unused as, return 0; } - if (!ui->sample->user_regs.regs) { + if (!ui->sample->user_regs || !ui->sample->user_regs->regs) { *valp = 0; return 0; } @@ -632,7 +637,7 @@ static int access_reg(unw_addr_space_t __maybe_unused as, if (id < 0) return -EINVAL; - ret = perf_reg_value(&val, &ui->sample->user_regs, id); + ret = perf_reg_value(&val, perf_sample__user_regs(ui->sample), id); if (ret) { if (!ui->best_effort) pr_err("unwind: can't read reg %d\n", regnum); @@ -736,7 +741,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, unw_cursor_t c; int ret, i = 0; - ret = perf_reg_value(&val, &ui->sample->user_regs, + ret = perf_reg_value(&val, perf_sample__user_regs(ui->sample), perf_arch_reg_ip(arch)); if (ret) return ret; @@ -803,7 +808,7 @@ static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg, .best_effort = best_effort }; - if (!data->user_regs.regs) + if (!data->user_regs || !data->user_regs->regs) return -EINVAL; if (max_stack <= 0) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 4f561e5e4162..0f031eb80b4c 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -78,17 +78,23 @@ bool sysctl__nmi_watchdog_enabled(void) bool test_attr__enabled; +bool exclude_GH_default; + bool perf_host = true; bool perf_guest = false; void event_attr_init(struct perf_event_attr *attr) { + /* to capture ABI version */ + attr->size = sizeof(*attr); + + if (!exclude_GH_default) + return; + if (!perf_host) attr->exclude_host = 1; if (!perf_guest) attr->exclude_guest = 1; - /* to capture ABI version */ - attr->size = sizeof(*attr); } int mkdir_p(char *path, mode_t mode) @@ -325,94 +331,15 @@ int perf_event_paranoid(void) bool perf_event_paranoid_check(int max_level) { - return perf_cap__capable(CAP_SYS_ADMIN) || - perf_cap__capable(CAP_PERFMON) || - perf_event_paranoid() <= max_level; -} - -static int -fetch_ubuntu_kernel_version(unsigned int *puint) -{ - ssize_t len; - size_t line_len = 0; - char *ptr, *line = NULL; - int version, patchlevel, sublevel, err; - FILE *vsig; - - if (!puint) - return 0; - - vsig = fopen("/proc/version_signature", "r"); - if (!vsig) { - pr_debug("Open /proc/version_signature failed: %s\n", - strerror(errno)); - return -1; - } - - len = getline(&line, &line_len, vsig); - fclose(vsig); - err = -1; - if (len <= 0) { - pr_debug("Reading from /proc/version_signature failed: %s\n", - strerror(errno)); - goto errout; - } - - ptr = strrchr(line, ' '); - if (!ptr) { - pr_debug("Parsing /proc/version_signature failed: %s\n", line); - goto errout; - } - - err = sscanf(ptr + 1, "%d.%d.%d", - &version, &patchlevel, &sublevel); - if (err != 3) { - pr_debug("Unable to get kernel version from /proc/version_signature '%s'\n", - line); - goto errout; - } - - *puint = (version << 16) + (patchlevel << 8) + sublevel; - err = 0; -errout: - free(line); - return err; -} - -int -fetch_kernel_version(unsigned int *puint, char *str, - size_t str_size) -{ - struct utsname utsname; - int version, patchlevel, sublevel, err; - bool int_ver_ready = false; - - if (access("/proc/version_signature", R_OK) == 0) - if (!fetch_ubuntu_kernel_version(puint)) - int_ver_ready = true; - - if (uname(&utsname)) - return -1; + bool used_root; - if (str && str_size) { - strncpy(str, utsname.release, str_size); - str[str_size - 1] = '\0'; - } - - if (!puint || int_ver_ready) - return 0; - - err = sscanf(utsname.release, "%d.%d.%d", - &version, &patchlevel, &sublevel); + if (perf_cap__capable(CAP_SYS_ADMIN, &used_root)) + return true; - if (err != 3) { - pr_debug("Unable to get kernel version from uname '%s'\n", - utsname.release); - return -1; - } + if (!used_root && perf_cap__capable(CAP_PERFMON, &used_root)) + return true; - *puint = (version << 16) + (patchlevel << 8) + sublevel; - return 0; + return perf_event_paranoid() <= max_level; } int perf_tip(char **strp, const char *dirpath) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 9966c21aaf04..3423778e39a5 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -21,6 +21,9 @@ extern const char perf_more_info_string[]; extern const char *input_name; +/* This will control if perf_{host,guest} will set attr.exclude_{host,guest}. */ +extern bool exclude_GH_default; + extern bool perf_host; extern bool perf_guest; @@ -43,14 +46,6 @@ int sysctl__max_stack(void); bool sysctl__nmi_watchdog_enabled(void); -int fetch_kernel_version(unsigned int *puint, - char *str, size_t str_sz); -#define KVER_VERSION(x) (((x) >> 16) & 0xff) -#define KVER_PATCHLEVEL(x) (((x) >> 8) & 0xff) -#define KVER_SUBLEVEL(x) ((x) & 0xff) -#define KVER_FMT "%d.%d.%d" -#define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x) - int perf_tip(char **strp, const char *dirpath); #ifndef HAVE_SCHED_GETCPU_SUPPORT @@ -81,13 +76,6 @@ char *perf_exe(char *buf, int len); #endif #endif -extern bool test_attr__enabled; -void test_attr__ready(void); -void test_attr__init(void); -struct perf_event_attr; -void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, - int fd, int group_fd, unsigned long flags); - struct perf_debuginfod { const char *urls; bool set; diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index b9823f414f10..ec72d29f3d58 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -8,6 +8,7 @@ #include "values.h" #include "debug.h" +#include "evsel.h" int perf_read_values_init(struct perf_read_values *values) { @@ -22,21 +23,17 @@ int perf_read_values_init(struct perf_read_values *values) values->threads = 0; values->counters_max = 16; - values->counterrawid = malloc(values->counters_max - * sizeof(*values->counterrawid)); - values->countername = malloc(values->counters_max - * sizeof(*values->countername)); - if (!values->counterrawid || !values->countername) { - pr_debug("failed to allocate read_values counters arrays"); + values->counters = malloc(values->counters_max * sizeof(*values->counters)); + if (!values->counters) { + pr_debug("failed to allocate read_values counters array"); goto out_free_counter; } - values->counters = 0; + values->num_counters = 0; return 0; out_free_counter: - zfree(&values->counterrawid); - zfree(&values->countername); + zfree(&values->counters); out_free_pid: zfree(&values->pid); zfree(&values->tid); @@ -56,10 +53,7 @@ void perf_read_values_destroy(struct perf_read_values *values) zfree(&values->value); zfree(&values->pid); zfree(&values->tid); - zfree(&values->counterrawid); - for (i = 0; i < values->counters; i++) - zfree(&values->countername[i]); - zfree(&values->countername); + zfree(&values->counters); } static int perf_read_values__enlarge_threads(struct perf_read_values *values) @@ -116,81 +110,71 @@ static int perf_read_values__findnew_thread(struct perf_read_values *values, static int perf_read_values__enlarge_counters(struct perf_read_values *values) { - char **countername; - int i, counters_max = values->counters_max * 2; - u64 *counterrawid = realloc(values->counterrawid, counters_max * sizeof(*values->counterrawid)); + int counters_max = values->counters_max * 2; + struct evsel **new_counters = realloc(values->counters, + counters_max * sizeof(*values->counters)); - if (!counterrawid) { - pr_debug("failed to enlarge read_values rawid array"); + if (!new_counters) { + pr_debug("failed to enlarge read_values counters array"); goto out_enomem; } - countername = realloc(values->countername, counters_max * sizeof(*values->countername)); - if (!countername) { - pr_debug("failed to enlarge read_values rawid array"); - goto out_free_rawid; - } - - for (i = 0; i < values->threads; i++) { + for (int i = 0; i < values->threads; i++) { u64 *value = realloc(values->value[i], counters_max * sizeof(**values->value)); - int j; if (!value) { pr_debug("failed to enlarge read_values ->values array"); - goto out_free_name; + goto out_free_counters; } - for (j = values->counters_max; j < counters_max; j++) + for (int j = values->counters_max; j < counters_max; j++) value[j] = 0; values->value[i] = value; } values->counters_max = counters_max; - values->counterrawid = counterrawid; - values->countername = countername; + values->counters = new_counters; return 0; -out_free_name: - free(countername); -out_free_rawid: - free(counterrawid); +out_free_counters: + free(new_counters); out_enomem: return -ENOMEM; } static int perf_read_values__findnew_counter(struct perf_read_values *values, - u64 rawid, const char *name) + struct evsel *evsel) { int i; - for (i = 0; i < values->counters; i++) - if (values->counterrawid[i] == rawid) + for (i = 0; i < values->num_counters; i++) + if (values->counters[i] == evsel) return i; - if (values->counters == values->counters_max) { - i = perf_read_values__enlarge_counters(values); - if (i) - return i; + if (values->num_counters == values->counters_max) { + int err = perf_read_values__enlarge_counters(values); + + if (err) + return err; } - i = values->counters++; - values->counterrawid[i] = rawid; - values->countername[i] = strdup(name); + i = values->num_counters++; + values->counters[i] = evsel; return i; } int perf_read_values_add_value(struct perf_read_values *values, u32 pid, u32 tid, - u64 rawid, const char *name, u64 value) + struct evsel *evsel, u64 value) { int tindex, cindex; tindex = perf_read_values__findnew_thread(values, pid, tid); if (tindex < 0) return tindex; - cindex = perf_read_values__findnew_counter(values, rawid, name); + cindex = perf_read_values__findnew_counter(values, evsel); if (cindex < 0) return cindex; @@ -205,15 +189,15 @@ static void perf_read_values__display_pretty(FILE *fp, int pidwidth, tidwidth; int *counterwidth; - counterwidth = malloc(values->counters * sizeof(*counterwidth)); + counterwidth = malloc(values->num_counters * sizeof(*counterwidth)); if (!counterwidth) { fprintf(fp, "INTERNAL ERROR: Failed to allocate counterwidth array\n"); return; } tidwidth = 3; pidwidth = 3; - for (j = 0; j < values->counters; j++) - counterwidth[j] = strlen(values->countername[j]); + for (j = 0; j < values->num_counters; j++) + counterwidth[j] = strlen(evsel__name(values->counters[j])); for (i = 0; i < values->threads; i++) { int width; @@ -223,7 +207,7 @@ static void perf_read_values__display_pretty(FILE *fp, width = snprintf(NULL, 0, "%d", values->tid[i]); if (width > tidwidth) tidwidth = width; - for (j = 0; j < values->counters; j++) { + for (j = 0; j < values->num_counters; j++) { width = snprintf(NULL, 0, "%" PRIu64, values->value[i][j]); if (width > counterwidth[j]) counterwidth[j] = width; @@ -231,14 +215,14 @@ static void perf_read_values__display_pretty(FILE *fp, } fprintf(fp, "# %*s %*s", pidwidth, "PID", tidwidth, "TID"); - for (j = 0; j < values->counters; j++) - fprintf(fp, " %*s", counterwidth[j], values->countername[j]); + for (j = 0; j < values->num_counters; j++) + fprintf(fp, " %*s", counterwidth[j], evsel__name(values->counters[j])); fprintf(fp, "\n"); for (i = 0; i < values->threads; i++) { fprintf(fp, " %*d %*d", pidwidth, values->pid[i], tidwidth, values->tid[i]); - for (j = 0; j < values->counters; j++) + for (j = 0; j < values->num_counters; j++) fprintf(fp, " %*" PRIu64, counterwidth[j], values->value[i][j]); fprintf(fp, "\n"); @@ -266,16 +250,16 @@ static void perf_read_values__display_raw(FILE *fp, if (width > tidwidth) tidwidth = width; } - for (j = 0; j < values->counters; j++) { - width = strlen(values->countername[j]); + for (j = 0; j < values->num_counters; j++) { + width = strlen(evsel__name(values->counters[j])); if (width > namewidth) namewidth = width; - width = snprintf(NULL, 0, "%" PRIx64, values->counterrawid[j]); + width = snprintf(NULL, 0, "%x", values->counters[j]->core.idx); if (width > rawwidth) rawwidth = width; } for (i = 0; i < values->threads; i++) { - for (j = 0; j < values->counters; j++) { + for (j = 0; j < values->num_counters; j++) { width = snprintf(NULL, 0, "%" PRIu64, values->value[i][j]); if (width > countwidth) countwidth = width; @@ -287,12 +271,12 @@ static void perf_read_values__display_raw(FILE *fp, namewidth, "Name", rawwidth, "Raw", countwidth, "Count"); for (i = 0; i < values->threads; i++) - for (j = 0; j < values->counters; j++) - fprintf(fp, " %*d %*d %*s %*" PRIx64 " %*" PRIu64, + for (j = 0; j < values->num_counters; j++) + fprintf(fp, " %*d %*d %*s %*x %*" PRIu64, pidwidth, values->pid[i], tidwidth, values->tid[i], - namewidth, values->countername[j], - rawwidth, values->counterrawid[j], + namewidth, evsel__name(values->counters[j]), + rawwidth, values->counters[j]->core.idx, countwidth, values->value[i][j]); } diff --git a/tools/perf/util/values.h b/tools/perf/util/values.h index 791c1ad606c2..bbca33daca19 100644 --- a/tools/perf/util/values.h +++ b/tools/perf/util/values.h @@ -5,14 +5,15 @@ #include <stdio.h> #include <linux/types.h> +struct evsel; + struct perf_read_values { int threads; int threads_max; u32 *pid, *tid; - int counters; + int num_counters; int counters_max; - u64 *counterrawid; - char **countername; + struct evsel **counters; u64 **value; }; @@ -21,7 +22,7 @@ void perf_read_values_destroy(struct perf_read_values *values); int perf_read_values_add_value(struct perf_read_values *values, u32 pid, u32 tid, - u64 rawid, const char *name, u64 value); + struct evsel *evsel, u64 value); void perf_read_values_display(FILE *fp, struct perf_read_values *values, int raw); diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 1b6f8f6db7aa..c12f5d8c4bf6 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -308,8 +308,10 @@ static struct dso *machine__find_vdso(struct machine *machine, if (!dso) { dso = dsos__find(&machine->dsos, DSO__NAME_VDSO, true); - if (dso && dso_type != dso__type(dso, machine)) + if (dso && dso_type != dso__type(dso, machine)) { + dso__put(dso); dso = NULL; + } } break; case DSO__TYPE_X32BIT: diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c index 78d2297c1b67..1f7c06523059 100644 --- a/tools/perf/util/zlib.c +++ b/tools/perf/util/zlib.c @@ -88,7 +88,7 @@ bool gzip_is_compressed(const char *input) ssize_t rc; if (fd < 0) - return -1; + return false; rc = read(fd, buf, sizeof(buf)); close(fd); |
