summaryrefslogtreecommitdiff
path: root/tools/perf/util
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/Build17
-rw-r--r--tools/perf/util/affinity.c18
-rw-r--r--tools/perf/util/affinity.h2
-rw-r--r--tools/perf/util/amd-sample-raw.c79
-rw-r--r--tools/perf/util/annotate.c3
-rw-r--r--tools/perf/util/annotate.h2
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.h17
-rw-r--r--tools/perf/util/arm-spe.c98
-rw-r--r--tools/perf/util/auxtrace.c13
-rw-r--r--tools/perf/util/auxtrace.h6
-rw-r--r--tools/perf/util/bpf-event.c380
-rw-r--r--tools/perf/util/bpf-event.h13
-rw-r--r--tools/perf/util/bpf-filter.c30
-rw-r--r--tools/perf/util/bpf-filter.h3
-rw-r--r--tools/perf/util/bpf-trace-summary.c458
-rw-r--r--tools/perf/util/bpf_ftrace.c75
-rw-r--r--tools/perf/util/bpf_lock_contention.c116
-rw-r--r--tools/perf/util/bpf_off_cpu.c119
-rw-r--r--tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c3
-rw-r--r--tools/perf/util/bpf_skel/func_latency.bpf.c148
-rw-r--r--tools/perf/util/bpf_skel/lock_contention.bpf.c107
-rw-r--r--tools/perf/util/bpf_skel/lock_data.h1
-rw-r--r--tools/perf/util/bpf_skel/off_cpu.bpf.c98
-rw-r--r--tools/perf/util/bpf_skel/perf_version.h17
-rw-r--r--tools/perf/util/bpf_skel/syscall_summary.bpf.c153
-rw-r--r--tools/perf/util/bpf_skel/syscall_summary.h27
-rw-r--r--tools/perf/util/bpf_skel/vmlinux/vmlinux.h9
-rw-r--r--tools/perf/util/bpf_trace_augment.c143
-rw-r--r--tools/perf/util/branch.c2
-rw-r--r--tools/perf/util/build-id.c65
-rw-r--r--tools/perf/util/build-id.h8
-rw-r--r--tools/perf/util/cap.c1
-rw-r--r--tools/perf/util/cap.h5
-rw-r--r--tools/perf/util/cgroup.c23
-rw-r--r--tools/perf/util/cgroup.h3
-rw-r--r--tools/perf/util/comm.c2
-rw-r--r--tools/perf/util/data-convert-bt.c16
-rw-r--r--tools/perf/util/data-convert-json.c36
-rw-r--r--tools/perf/util/db-export.c11
-rw-r--r--tools/perf/util/debug.c75
-rw-r--r--tools/perf/util/debug.h1
-rw-r--r--tools/perf/util/debuginfo.c2
-rw-r--r--tools/perf/util/demangle-cxx.h2
-rw-r--r--tools/perf/util/demangle-rust-v0.c2042
-rw-r--r--tools/perf/util/demangle-rust-v0.h88
-rw-r--r--tools/perf/util/demangle-rust.c269
-rw-r--r--tools/perf/util/demangle-rust.h8
-rw-r--r--tools/perf/util/disasm.c9
-rw-r--r--tools/perf/util/dlfilter.c2
-rw-r--r--tools/perf/util/drm_pmu.c686
-rw-r--r--tools/perf/util/drm_pmu.h39
-rw-r--r--tools/perf/util/dso.c160
-rw-r--r--tools/perf/util/dso.h75
-rw-r--r--tools/perf/util/dsos.c23
-rw-r--r--tools/perf/util/env.c132
-rw-r--r--tools/perf/util/env.h9
-rw-r--r--tools/perf/util/event.c29
-rw-r--r--tools/perf/util/event.h6
-rw-r--r--tools/perf/util/evlist.c120
-rw-r--r--tools/perf/util/evlist.h15
-rw-r--r--tools/perf/util/evsel.c313
-rw-r--r--tools/perf/util/evsel.h27
-rw-r--r--tools/perf/util/evsel_config.h1
-rw-r--r--tools/perf/util/expr.c8
-rw-r--r--tools/perf/util/fncache.c69
-rw-r--r--tools/perf/util/fncache.h1
-rw-r--r--tools/perf/util/ftrace.h5
-rw-r--r--tools/perf/util/genelf.c87
-rw-r--r--tools/perf/util/header.c256
-rw-r--r--tools/perf/util/header.h1
-rw-r--r--tools/perf/util/hist.c82
-rw-r--r--tools/perf/util/hist.h30
-rw-r--r--tools/perf/util/hwmon_pmu.c81
-rw-r--r--tools/perf/util/hwmon_pmu.h4
-rw-r--r--tools/perf/util/include/linux/linkage.h4
-rw-r--r--tools/perf/util/intel-pt.c205
-rw-r--r--tools/perf/util/intel-tpebs.c731
-rw-r--r--tools/perf/util/intel-tpebs.h40
-rw-r--r--tools/perf/util/jitdump.c21
-rw-r--r--tools/perf/util/lock-contention.h9
-rw-r--r--tools/perf/util/machine.c91
-rw-r--r--tools/perf/util/machine.h5
-rw-r--r--tools/perf/util/map.c15
-rw-r--r--tools/perf/util/map.h5
-rw-r--r--tools/perf/util/maps.c9
-rw-r--r--tools/perf/util/mem-events.c188
-rw-r--r--tools/perf/util/mem-events.h57
-rw-r--r--tools/perf/util/metricgroup.c374
-rw-r--r--tools/perf/util/metricgroup.h12
-rw-r--r--tools/perf/util/mutex.h11
-rw-r--r--tools/perf/util/off_cpu.h3
-rw-r--r--tools/perf/util/parse-events.c609
-rw-r--r--tools/perf/util/parse-events.h8
-rw-r--r--tools/perf/util/parse-events.l39
-rw-r--r--tools/perf/util/parse-events.y29
-rw-r--r--tools/perf/util/pfm.c6
-rw-r--r--tools/perf/util/pmu.c293
-rw-r--r--tools/perf/util/pmu.h14
-rw-r--r--tools/perf/util/pmus.c145
-rw-r--r--tools/perf/util/pmus.h7
-rw-r--r--tools/perf/util/print-events.c234
-rw-r--r--tools/perf/util/print-events.h7
-rw-r--r--tools/perf/util/probe-event.c12
-rw-r--r--tools/perf/util/probe-file.c4
-rw-r--r--tools/perf/util/probe-finder.c5
-rw-r--r--tools/perf/util/python.c315
-rw-r--r--tools/perf/util/record.h2
-rw-r--r--tools/perf/util/rwsem.c4
-rw-r--r--tools/perf/util/rwsem.h10
-rw-r--r--tools/perf/util/s390-cpumsf.c2
-rw-r--r--tools/perf/util/sample-raw.c7
-rw-r--r--tools/perf/util/sample-raw.h2
-rw-r--r--tools/perf/util/sample.h6
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c9
-rw-r--r--tools/perf/util/session.c41
-rw-r--r--tools/perf/util/session.h7
-rw-r--r--tools/perf/util/sha1.c97
-rw-r--r--tools/perf/util/sha1.h6
-rw-r--r--tools/perf/util/sort.c211
-rw-r--r--tools/perf/util/sort.h8
-rw-r--r--tools/perf/util/spark.c8
-rw-r--r--tools/perf/util/spark.h1
-rw-r--r--tools/perf/util/srccode.c4
-rw-r--r--tools/perf/util/srcline.c10
-rw-r--r--tools/perf/util/stat-display.c280
-rw-r--r--tools/perf/util/stat-shadow.c12
-rw-r--r--tools/perf/util/stat.c48
-rw-r--r--tools/perf/util/stat.h13
-rw-r--r--tools/perf/util/symbol-elf.c77
-rw-r--r--tools/perf/util/symbol-minimal.c169
-rw-r--r--tools/perf/util/symbol.c100
-rw-r--r--tools/perf/util/symbol_conf.h2
-rw-r--r--tools/perf/util/synthetic-events.c58
-rw-r--r--tools/perf/util/synthetic-events.h2
-rw-r--r--tools/perf/util/target.c54
-rw-r--r--tools/perf/util/target.h15
-rw-r--r--tools/perf/util/thread.c35
-rw-r--r--tools/perf/util/thread.h11
-rw-r--r--tools/perf/util/thread_map.c32
-rw-r--r--tools/perf/util/thread_map.h6
-rw-r--r--tools/perf/util/tool.c27
-rw-r--r--tools/perf/util/tool.h3
-rw-r--r--tools/perf/util/tool_pmu.c81
-rw-r--r--tools/perf/util/tool_pmu.h2
-rw-r--r--tools/perf/util/top.c4
-rw-r--r--tools/perf/util/top.h1
-rw-r--r--tools/perf/util/tp_pmu.c210
-rw-r--r--tools/perf/util/tp_pmu.h19
-rw-r--r--tools/perf/util/trace.h38
-rw-r--r--tools/perf/util/trace_augment.h62
-rw-r--r--tools/perf/util/unwind-libdw.c7
151 files changed, 9532 insertions, 2862 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 946bce6628f3..4959e7a990e4 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -41,6 +41,7 @@ perf-util-y += rbtree.o
perf-util-y += libstring.o
perf-util-y += bitmap.o
perf-util-y += hweight.o
+perf-util-y += sha1.o
perf-util-y += smt.o
perf-util-y += strbuf.o
perf-util-y += string.o
@@ -84,8 +85,10 @@ perf-util-y += pmu.o
perf-util-y += pmus.o
perf-util-y += pmu-flex.o
perf-util-y += pmu-bison.o
+perf-util-y += drm_pmu.o
perf-util-y += hwmon_pmu.o
perf-util-y += tool_pmu.o
+perf-util-y += tp_pmu.o
perf-util-y += svghelper.o
perf-util-y += trace-event-info.o
perf-util-y += trace-event-scripting.o
@@ -161,7 +164,7 @@ perf-util-y += clockid.o
perf-util-y += list_sort.o
perf-util-y += mutex.o
perf-util-y += sharded_mutex.o
-perf-util-$(CONFIG_X86_64) += intel-tpebs.o
+perf-util-y += intel-tpebs.o
perf-util-$(CONFIG_LIBBPF) += bpf_map.o
perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
@@ -173,6 +176,11 @@ perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-flex.o
perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-bison.o
perf-util-$(CONFIG_PERF_BPF_SKEL) += btf.o
+ifeq ($(CONFIG_TRACE),y)
+ perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-trace-summary.o
+ perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_trace_augment.o
+endif
+
ifeq ($(CONFIG_LIBTRACEEVENT),y)
perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o
endif
@@ -237,9 +245,12 @@ perf-util-y += cap.o
perf-util-$(CONFIG_CXX_DEMANGLE) += demangle-cxx.o
perf-util-y += demangle-ocaml.o
perf-util-y += demangle-java.o
-perf-util-y += demangle-rust.o
+perf-util-y += demangle-rust-v0.o
perf-util-$(CONFIG_LIBLLVM) += llvm-c-helpers.o
+CFLAGS_demangle-rust-v0.o += -Wno-shadow -Wno-declaration-after-statement \
+ -Wno-switch-default -Wno-switch-enum -Wno-missing-field-initializers
+
ifdef CONFIG_JITDUMP
perf-util-$(CONFIG_LIBELF) += jitdump.o
perf-util-$(CONFIG_LIBELF) += genelf.o
@@ -414,7 +425,7 @@ endif
$(OUTPUT)%.shellcheck_log: %
$(call rule_mkdir)
- $(Q)$(call echo-cmd,test)shellcheck -a -S warning "$<" > $@ || (cat $@ && rm $@ && false)
+ $(Q)$(call echo-cmd,test)$(SHELLCHECK) "$<" > $@ || (cat $@ && rm $@ && false)
perf-util-y += $(SHELL_TEST_LOGS)
diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c
index 38dc4524b7e8..4fe851334296 100644
--- a/tools/perf/util/affinity.c
+++ b/tools/perf/util/affinity.c
@@ -5,6 +5,7 @@
#include <stdlib.h>
#include <linux/bitmap.h>
#include <linux/zalloc.h>
+#include <perf/cpumap.h>
#include "perf.h"
#include "cpumap.h"
#include "affinity.h"
@@ -83,3 +84,20 @@ void affinity__cleanup(struct affinity *a)
if (a != NULL)
__affinity__cleanup(a);
}
+
+void cpu_map__set_affinity(const struct perf_cpu_map *cpumap)
+{
+ int cpu_set_size = get_cpu_set_size();
+ unsigned long *cpuset = bitmap_zalloc(cpu_set_size * 8);
+ struct perf_cpu cpu;
+ int idx;
+
+ if (!cpuset)
+ return;
+
+ perf_cpu_map__for_each_cpu_skip_any(cpu, idx, cpumap)
+ __set_bit(cpu.cpu, cpuset);
+
+ sched_setaffinity(0, cpu_set_size, (cpu_set_t *)cpuset);
+ zfree(&cpuset);
+}
diff --git a/tools/perf/util/affinity.h b/tools/perf/util/affinity.h
index 0ad6a18ef20c..7341194b2298 100644
--- a/tools/perf/util/affinity.h
+++ b/tools/perf/util/affinity.h
@@ -4,6 +4,7 @@
#include <stdbool.h>
+struct perf_cpu_map;
struct affinity {
unsigned long *orig_cpus;
unsigned long *sched_cpus;
@@ -13,5 +14,6 @@ struct affinity {
void affinity__cleanup(struct affinity *a);
void affinity__set(struct affinity *a, int cpu);
int affinity__setup(struct affinity *a);
+void cpu_map__set_affinity(const struct perf_cpu_map *cpumap);
#endif // PERF_AFFINITY_H
diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c
index 456ce64ad822..b084dee76b1a 100644
--- a/tools/perf/util/amd-sample-raw.c
+++ b/tools/perf/util/amd-sample-raw.c
@@ -19,6 +19,8 @@
static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type;
static bool zen4_ibs_extensions;
+static bool ldlat_cap;
+static bool dtlb_pgsize_cap;
static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg)
{
@@ -78,14 +80,20 @@ static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg)
static void pr_ibs_op_ctl(union ibs_op_ctl reg)
{
char l3_miss_only[sizeof(" L3MissOnly _")] = "";
+ char ldlat[sizeof(" LdLatThrsh __ LdLatEn _")] = "";
if (zen4_ibs_extensions)
snprintf(l3_miss_only, sizeof(l3_miss_only), " L3MissOnly %d", reg.l3_miss_only);
- printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d\n",
+ if (ldlat_cap) {
+ snprintf(ldlat, sizeof(ldlat), " LdLatThrsh %2d LdLatEn %d",
+ reg.ldlat_thrsh, reg.ldlat_en);
+ }
+
+ printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d%s\n",
reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, l3_miss_only,
reg.op_en, reg.op_val, reg.cnt_ctl,
- reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt);
+ reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt, ldlat);
}
static void pr_ibs_op_data(union ibs_op_data reg)
@@ -154,9 +162,20 @@ static void pr_ibs_op_data2(union ibs_op_data2 reg)
static void pr_ibs_op_data3(union ibs_op_data3 reg)
{
- char l2_miss_str[sizeof(" L2Miss _")] = "";
- char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = "";
+ static const char * const dc_page_sizes[] = {
+ " 4K",
+ " 2M",
+ " 1G",
+ " ??",
+ };
char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = "";
+ char dc_l1_l2tlb_miss_str[sizeof(" DcL1TlbMiss _ DcL2TlbMiss _")] = "";
+ char dc_l1tlb_hit_str[sizeof(" DcL1TlbHit2M _ DcL1TlbHit1G _")] = "";
+ char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = "";
+ char dc_l2tlb_hit_2m_str[sizeof(" DcL2TlbHit2M _")] = "";
+ char dc_l2tlb_hit_1g_str[sizeof(" DcL2TlbHit1G _")] = "";
+ char dc_page_size_str[sizeof(" DcPageSize ____")] = "";
+ char l2_miss_str[sizeof(" L2Miss _")] = "";
/*
* Erratum #1293
@@ -172,16 +191,40 @@ static void pr_ibs_op_data3(union ibs_op_data3 reg)
snprintf(op_mem_width_str, sizeof(op_mem_width_str),
" OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1));
- printf("ibs_op_data3:\t%016llx LdOp %d StOp %d DcL1TlbMiss %d DcL2TlbMiss %d "
- "DcL1TlbHit2M %d DcL1TlbHit1G %d DcL2TlbHit2M %d DcMiss %d DcMisAcc %d "
- "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d DcLinAddrValid %d "
- "DcPhyAddrValid %d DcL2TlbHit1G %d%s SwPf %d%s%s DcMissLat %5d TlbRefillLat %5d\n",
- reg.val, reg.ld_op, reg.st_op, reg.dc_l1tlb_miss, reg.dc_l2tlb_miss,
- reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g, reg.dc_l2tlb_hit_2m, reg.dc_miss,
- reg.dc_mis_acc, reg.dc_wc_mem_acc, reg.dc_uc_mem_acc, reg.dc_locked_op,
- reg.dc_miss_no_mab_alloc, reg.dc_lin_addr_valid, reg.dc_phy_addr_valid,
- reg.dc_l2_tlb_hit_1g, l2_miss_str, reg.sw_pf, op_mem_width_str,
- op_dc_miss_open_mem_reqs_str, reg.dc_miss_lat, reg.tlb_refill_lat);
+ if (dtlb_pgsize_cap) {
+ if (reg.dc_phy_addr_valid) {
+ int idx = (reg.dc_l1tlb_hit_1g << 1) | reg.dc_l1tlb_hit_2m;
+
+ snprintf(dc_l1_l2tlb_miss_str, sizeof(dc_l1_l2tlb_miss_str),
+ " DcL1TlbMiss %d DcL2TlbMiss %d",
+ reg.dc_l1tlb_miss, reg.dc_l2tlb_miss);
+ snprintf(dc_page_size_str, sizeof(dc_page_size_str),
+ " DcPageSize %4s", dc_page_sizes[idx]);
+ }
+ } else {
+ snprintf(dc_l1_l2tlb_miss_str, sizeof(dc_l1_l2tlb_miss_str),
+ " DcL1TlbMiss %d DcL2TlbMiss %d",
+ reg.dc_l1tlb_miss, reg.dc_l2tlb_miss);
+ snprintf(dc_l1tlb_hit_str, sizeof(dc_l1tlb_hit_str),
+ " DcL1TlbHit2M %d DcL1TlbHit1G %d",
+ reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g);
+ snprintf(dc_l2tlb_hit_2m_str, sizeof(dc_l2tlb_hit_2m_str),
+ " DcL2TlbHit2M %d", reg.dc_l2tlb_hit_2m);
+ snprintf(dc_l2tlb_hit_1g_str, sizeof(dc_l2tlb_hit_1g_str),
+ " DcL2TlbHit1G %d", reg.dc_l2_tlb_hit_1g);
+ }
+
+ printf("ibs_op_data3:\t%016llx LdOp %d StOp %d%s%s%s DcMiss %d DcMisAcc %d "
+ "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d "
+ "DcLinAddrValid %d DcPhyAddrValid %d%s%s SwPf %d%s%s "
+ "DcMissLat %5d TlbRefillLat %5d\n",
+ reg.val, reg.ld_op, reg.st_op, dc_l1_l2tlb_miss_str,
+ dtlb_pgsize_cap ? dc_page_size_str : dc_l1tlb_hit_str,
+ dc_l2tlb_hit_2m_str, reg.dc_miss, reg.dc_mis_acc, reg.dc_wc_mem_acc,
+ reg.dc_uc_mem_acc, reg.dc_locked_op, reg.dc_miss_no_mab_alloc,
+ reg.dc_lin_addr_valid, reg.dc_phy_addr_valid, dc_l2tlb_hit_1g_str,
+ l2_miss_str, reg.sw_pf, op_mem_width_str, op_dc_miss_open_mem_reqs_str,
+ reg.dc_miss_lat, reg.tlb_refill_lat);
}
/*
@@ -311,7 +354,7 @@ static void parse_cpuid(struct perf_env *env)
*/
bool evlist__has_amd_ibs(struct evlist *evlist)
{
- struct perf_env *env = evlist->env;
+ struct perf_env *env = perf_session__env(evlist->session);
int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env);
const char *pmu_mapping = perf_env__pmu_mappings(env);
char name[sizeof("ibs_fetch")];
@@ -331,6 +374,12 @@ bool evlist__has_amd_ibs(struct evlist *evlist)
if (perf_env__find_pmu_cap(env, "ibs_op", "zen4_ibs_extensions"))
zen4_ibs_extensions = 1;
+ if (perf_env__find_pmu_cap(env, "ibs_op", "ldlat"))
+ ldlat_cap = 1;
+
+ if (perf_env__find_pmu_cap(env, "ibs_op", "dtlb_pgsize"))
+ dtlb_pgsize_cap = 1;
+
if (ibs_fetch_type || ibs_op_type) {
if (!cpu_family)
parse_cpuid(env);
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 1e59b9e5339d..0dd475a744b6 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1451,6 +1451,7 @@ void annotated_source__purge(struct annotated_source *as)
list_del_init(&al->node);
disasm_line__free(disasm_line(al));
}
+ as->tried_source = false;
}
static size_t disasm_line__fprintf(struct disasm_line *dl, FILE *fp)
@@ -2280,6 +2281,8 @@ void annotation_options__init(void)
opt->annotate_src = true;
opt->offset_level = ANNOTATION__OFFSET_JUMP_TARGETS;
opt->percent_type = PERCENT_PERIOD_LOCAL;
+ opt->hide_src_code = true;
+ opt->hide_src_code_on_title = true;
}
void annotation_options__exit(void)
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 0e6e3f60a897..8b5131d257b0 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -44,6 +44,7 @@ enum perf_disassembler {
struct annotation_options {
bool hide_src_code,
+ hide_src_code_on_title,
use_offset,
jump_arrows,
print_lines,
@@ -293,6 +294,7 @@ struct annotated_source {
int nr_entries;
int nr_asm_entries;
int max_jump_sources;
+ bool tried_source;
u64 start;
struct {
u8 addr;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 5d232188643b..881d9f29c138 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -82,6 +82,23 @@ enum arm_spe_ampereone_data_source {
ARM_SPE_AMPEREONE_L2D = 0x9,
};
+enum arm_spe_hisi_hip_data_source {
+ ARM_SPE_HISI_HIP_PEER_CPU = 0,
+ ARM_SPE_HISI_HIP_PEER_CPU_HITM = 1,
+ ARM_SPE_HISI_HIP_L3 = 2,
+ ARM_SPE_HISI_HIP_L3_HITM = 3,
+ ARM_SPE_HISI_HIP_PEER_CLUSTER = 4,
+ ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM = 5,
+ ARM_SPE_HISI_HIP_REMOTE_SOCKET = 6,
+ ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM = 7,
+ ARM_SPE_HISI_HIP_LOCAL_MEM = 8,
+ ARM_SPE_HISI_HIP_REMOTE_MEM = 9,
+ ARM_SPE_HISI_HIP_NC_DEV = 13,
+ ARM_SPE_HISI_HIP_L2 = 16,
+ ARM_SPE_HISI_HIP_L2_HITM = 17,
+ ARM_SPE_HISI_HIP_L1 = 18,
+};
+
struct arm_spe_record {
enum arm_spe_sample_type type;
int err;
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 2a9775649cc2..8942fa598a84 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -571,6 +571,11 @@ static const struct midr_range ampereone_ds_encoding_cpus[] = {
{},
};
+static const struct midr_range hisi_hip_ds_encoding_cpus[] = {
+ MIDR_ALL_VERSIONS(MIDR_HISI_HIP12),
+ {},
+};
+
static void arm_spe__sample_flags(struct arm_spe_queue *speq)
{
const struct arm_spe_record *record = &speq->decoder->record;
@@ -718,9 +723,100 @@ static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *re
arm_spe__synth_data_source_common(&common_record, data_src);
}
+static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record,
+ union perf_mem_data_src *data_src)
+{
+ /* Use common synthesis method to handle store operations */
+ if (record->op & ARM_SPE_OP_ST) {
+ arm_spe__synth_data_source_common(record, data_src);
+ return;
+ }
+
+ switch (record->source) {
+ case ARM_SPE_HISI_HIP_PEER_CPU:
+ data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ case ARM_SPE_HISI_HIP_PEER_CPU_HITM:
+ data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+ data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ case ARM_SPE_HISI_HIP_L3:
+ data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+ data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
+ break;
+ case ARM_SPE_HISI_HIP_L3_HITM:
+ data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+ data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+ break;
+ case ARM_SPE_HISI_HIP_PEER_CLUSTER:
+ data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM:
+ data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+ data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
+ data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
+ data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ case ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM:
+ data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
+ data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+ data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+ data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+ break;
+ case ARM_SPE_HISI_HIP_LOCAL_MEM:
+ data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ break;
+ case ARM_SPE_HISI_HIP_REMOTE_MEM:
+ data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
+ data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+ break;
+ case ARM_SPE_HISI_HIP_NC_DEV:
+ data_src->mem_lvl = PERF_MEM_LVL_IO | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ break;
+ case ARM_SPE_HISI_HIP_L2:
+ data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ break;
+ case ARM_SPE_HISI_HIP_L2_HITM:
+ data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+ data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+ break;
+ case ARM_SPE_HISI_HIP_L1:
+ data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ break;
+ default:
+ break;
+ }
+}
+
static const struct data_source_handle data_source_handles[] = {
DS(common_ds_encoding_cpus, data_source_common),
DS(ampereone_ds_encoding_cpus, data_source_ampereone),
+ DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip),
};
static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
@@ -760,7 +856,7 @@ static bool arm_spe__synth_ds(struct arm_spe_queue *speq,
const char *cpuid;
pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n");
- cpuid = perf_env__cpuid(spe->session->evlist->env);
+ cpuid = perf_env__cpuid(perf_session__env(spe->session));
midr = strtol(cpuid, NULL, 16);
} else {
/* CPU ID is -1 for per-thread mode */
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 03211c2623de..ebd32f1b8f12 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -1890,7 +1890,7 @@ int __weak compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail)
}
static int __auxtrace_mmap__read(struct mmap *map,
- struct auxtrace_record *itr,
+ struct auxtrace_record *itr, struct perf_env *env,
const struct perf_tool *tool, process_auxtrace_t fn,
bool snapshot, size_t snapshot_size)
{
@@ -1900,7 +1900,7 @@ static int __auxtrace_mmap__read(struct mmap *map,
size_t size, head_off, old_off, len1, len2, padding;
union perf_event ev;
void *data1, *data2;
- int kernel_is_64_bit = perf_env__kernel_is_64_bit(evsel__env(NULL));
+ int kernel_is_64_bit = perf_env__kernel_is_64_bit(env);
head = auxtrace_mmap__read_head(mm, kernel_is_64_bit);
@@ -2002,17 +2002,18 @@ static int __auxtrace_mmap__read(struct mmap *map,
}
int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr,
- const struct perf_tool *tool, process_auxtrace_t fn)
+ struct perf_env *env, const struct perf_tool *tool,
+ process_auxtrace_t fn)
{
- return __auxtrace_mmap__read(map, itr, tool, fn, false, 0);
+ return __auxtrace_mmap__read(map, itr, env, tool, fn, false, 0);
}
int auxtrace_mmap__read_snapshot(struct mmap *map,
- struct auxtrace_record *itr,
+ struct auxtrace_record *itr, struct perf_env *env,
const struct perf_tool *tool, process_auxtrace_t fn,
size_t snapshot_size)
{
- return __auxtrace_mmap__read(map, itr, tool, fn, true, snapshot_size);
+ return __auxtrace_mmap__read(map, itr, env, tool, fn, true, snapshot_size);
}
/**
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index b0db84d27b25..f001cbb68f8e 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -23,6 +23,7 @@ union perf_event;
struct perf_session;
struct evlist;
struct evsel;
+struct perf_env;
struct perf_tool;
struct mmap;
struct perf_sample;
@@ -512,10 +513,11 @@ typedef int (*process_auxtrace_t)(const struct perf_tool *tool,
size_t len1, void *data2, size_t len2);
int auxtrace_mmap__read(struct mmap *map, struct auxtrace_record *itr,
- const struct perf_tool *tool, process_auxtrace_t fn);
+ struct perf_env *env, const struct perf_tool *tool,
+ process_auxtrace_t fn);
int auxtrace_mmap__read_snapshot(struct mmap *map,
- struct auxtrace_record *itr,
+ struct auxtrace_record *itr, struct perf_env *env,
const struct perf_tool *tool, process_auxtrace_t fn,
size_t snapshot_size);
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index c81444059ad0..5b6d3e899e11 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -1,13 +1,21 @@
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <bpf/bpf.h>
#include <bpf/btf.h>
#include <bpf/libbpf.h>
+#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/err.h>
+#include <linux/perf_event.h>
#include <linux/string.h>
+#include <linux/zalloc.h>
#include <internal/lib.h>
+#include <perf/event.h>
#include <symbol/kallsyms.h>
#include "bpf-event.h"
#include "bpf-utils.h"
@@ -151,6 +159,362 @@ static int synthesize_bpf_prog_name(char *buf, int size,
return name_len;
}
+#ifdef HAVE_LIBBPF_STRINGS_SUPPORT
+
+#define BPF_METADATA_PREFIX "bpf_metadata_"
+#define BPF_METADATA_PREFIX_LEN (sizeof(BPF_METADATA_PREFIX) - 1)
+
+static bool name_has_bpf_metadata_prefix(const char **s)
+{
+ if (strncmp(*s, BPF_METADATA_PREFIX, BPF_METADATA_PREFIX_LEN) != 0)
+ return false;
+ *s += BPF_METADATA_PREFIX_LEN;
+ return true;
+}
+
+struct bpf_metadata_map {
+ struct btf *btf;
+ const struct btf_type *datasec;
+ void *rodata;
+ size_t rodata_size;
+ unsigned int num_vars;
+};
+
+static int bpf_metadata_read_map_data(__u32 map_id, struct bpf_metadata_map *map)
+{
+ int map_fd;
+ struct bpf_map_info map_info;
+ __u32 map_info_len;
+ int key;
+ struct btf *btf;
+ const struct btf_type *datasec;
+ struct btf_var_secinfo *vsi;
+ unsigned int vlen, vars;
+ void *rodata;
+
+ map_fd = bpf_map_get_fd_by_id(map_id);
+ if (map_fd < 0)
+ return -1;
+
+ memset(&map_info, 0, sizeof(map_info));
+ map_info_len = sizeof(map_info);
+ if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len) < 0)
+ goto out_close;
+
+ /* If it's not an .rodata map, don't bother. */
+ if (map_info.type != BPF_MAP_TYPE_ARRAY ||
+ map_info.key_size != sizeof(int) ||
+ map_info.max_entries != 1 ||
+ !map_info.btf_value_type_id ||
+ !strstr(map_info.name, ".rodata")) {
+ goto out_close;
+ }
+
+ btf = btf__load_from_kernel_by_id(map_info.btf_id);
+ if (!btf)
+ goto out_close;
+ datasec = btf__type_by_id(btf, map_info.btf_value_type_id);
+ if (!btf_is_datasec(datasec))
+ goto out_free_btf;
+
+ /*
+ * If there aren't any variables with the "bpf_metadata_" prefix,
+ * don't bother.
+ */
+ vlen = btf_vlen(datasec);
+ vsi = btf_var_secinfos(datasec);
+ vars = 0;
+ for (unsigned int i = 0; i < vlen; i++, vsi++) {
+ const struct btf_type *t_var = btf__type_by_id(btf, vsi->type);
+ const char *name = btf__name_by_offset(btf, t_var->name_off);
+
+ if (name_has_bpf_metadata_prefix(&name))
+ vars++;
+ }
+ if (vars == 0)
+ goto out_free_btf;
+
+ rodata = zalloc(map_info.value_size);
+ if (!rodata)
+ goto out_free_btf;
+ key = 0;
+ if (bpf_map_lookup_elem(map_fd, &key, rodata)) {
+ free(rodata);
+ goto out_free_btf;
+ }
+ close(map_fd);
+
+ map->btf = btf;
+ map->datasec = datasec;
+ map->rodata = rodata;
+ map->rodata_size = map_info.value_size;
+ map->num_vars = vars;
+ return 0;
+
+out_free_btf:
+ btf__free(btf);
+out_close:
+ close(map_fd);
+ return -1;
+}
+
+struct format_btf_ctx {
+ char *buf;
+ size_t buf_size;
+ size_t buf_idx;
+};
+
+static void format_btf_cb(void *arg, const char *fmt, va_list ap)
+{
+ int n;
+ struct format_btf_ctx *ctx = (struct format_btf_ctx *)arg;
+
+ n = vsnprintf(ctx->buf + ctx->buf_idx, ctx->buf_size - ctx->buf_idx,
+ fmt, ap);
+ ctx->buf_idx += n;
+ if (ctx->buf_idx >= ctx->buf_size)
+ ctx->buf_idx = ctx->buf_size;
+}
+
+static void format_btf_variable(struct btf *btf, char *buf, size_t buf_size,
+ const struct btf_type *t, const void *btf_data)
+{
+ struct format_btf_ctx ctx = {
+ .buf = buf,
+ .buf_idx = 0,
+ .buf_size = buf_size,
+ };
+ const struct btf_dump_type_data_opts opts = {
+ .sz = sizeof(struct btf_dump_type_data_opts),
+ .skip_names = 1,
+ .compact = 1,
+ .emit_strings = 1,
+ };
+ struct btf_dump *d;
+ size_t btf_size;
+
+ d = btf_dump__new(btf, format_btf_cb, &ctx, NULL);
+ btf_size = btf__resolve_size(btf, t->type);
+ btf_dump__dump_type_data(d, t->type, btf_data, btf_size, &opts);
+ btf_dump__free(d);
+}
+
+static void bpf_metadata_fill_event(struct bpf_metadata_map *map,
+ struct perf_record_bpf_metadata *bpf_metadata_event)
+{
+ struct btf_var_secinfo *vsi;
+ unsigned int i, vlen;
+
+ memset(bpf_metadata_event->prog_name, 0, BPF_PROG_NAME_LEN);
+ vlen = btf_vlen(map->datasec);
+ vsi = btf_var_secinfos(map->datasec);
+
+ for (i = 0; i < vlen; i++, vsi++) {
+ const struct btf_type *t_var = btf__type_by_id(map->btf,
+ vsi->type);
+ const char *name = btf__name_by_offset(map->btf,
+ t_var->name_off);
+ const __u64 nr_entries = bpf_metadata_event->nr_entries;
+ struct perf_record_bpf_metadata_entry *entry;
+
+ if (!name_has_bpf_metadata_prefix(&name))
+ continue;
+
+ if (nr_entries >= (__u64)map->num_vars)
+ break;
+
+ entry = &bpf_metadata_event->entries[nr_entries];
+ memset(entry, 0, sizeof(*entry));
+ snprintf(entry->key, BPF_METADATA_KEY_LEN, "%s", name);
+ format_btf_variable(map->btf, entry->value,
+ BPF_METADATA_VALUE_LEN, t_var,
+ map->rodata + vsi->offset);
+ bpf_metadata_event->nr_entries++;
+ }
+}
+
+static void bpf_metadata_free_map_data(struct bpf_metadata_map *map)
+{
+ btf__free(map->btf);
+ free(map->rodata);
+}
+
+static struct bpf_metadata *bpf_metadata_alloc(__u32 nr_prog_tags,
+ __u32 nr_variables)
+{
+ struct bpf_metadata *metadata;
+ size_t event_size;
+
+ metadata = zalloc(sizeof(struct bpf_metadata));
+ if (!metadata)
+ return NULL;
+
+ metadata->prog_names = zalloc(nr_prog_tags * sizeof(char *));
+ if (!metadata->prog_names) {
+ bpf_metadata_free(metadata);
+ return NULL;
+ }
+ for (__u32 prog_index = 0; prog_index < nr_prog_tags; prog_index++) {
+ metadata->prog_names[prog_index] = zalloc(BPF_PROG_NAME_LEN);
+ if (!metadata->prog_names[prog_index]) {
+ bpf_metadata_free(metadata);
+ return NULL;
+ }
+ metadata->nr_prog_names++;
+ }
+
+ event_size = sizeof(metadata->event->bpf_metadata) +
+ nr_variables * sizeof(metadata->event->bpf_metadata.entries[0]);
+ metadata->event = zalloc(event_size);
+ if (!metadata->event) {
+ bpf_metadata_free(metadata);
+ return NULL;
+ }
+ metadata->event->bpf_metadata = (struct perf_record_bpf_metadata) {
+ .header = {
+ .type = PERF_RECORD_BPF_METADATA,
+ .size = event_size,
+ },
+ .nr_entries = 0,
+ };
+
+ return metadata;
+}
+
+static struct bpf_metadata *bpf_metadata_create(struct bpf_prog_info *info)
+{
+ struct bpf_metadata *metadata;
+ const __u32 *map_ids = (__u32 *)(uintptr_t)info->map_ids;
+
+ for (__u32 map_index = 0; map_index < info->nr_map_ids; map_index++) {
+ struct bpf_metadata_map map;
+
+ if (bpf_metadata_read_map_data(map_ids[map_index], &map) != 0)
+ continue;
+
+ metadata = bpf_metadata_alloc(info->nr_prog_tags, map.num_vars);
+ if (!metadata)
+ continue;
+
+ bpf_metadata_fill_event(&map, &metadata->event->bpf_metadata);
+
+ for (__u32 index = 0; index < info->nr_prog_tags; index++) {
+ synthesize_bpf_prog_name(metadata->prog_names[index],
+ BPF_PROG_NAME_LEN, info,
+ map.btf, index);
+ }
+
+ bpf_metadata_free_map_data(&map);
+
+ return metadata;
+ }
+
+ return NULL;
+}
+
+static int synthesize_perf_record_bpf_metadata(const struct bpf_metadata *metadata,
+ const struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ const size_t event_size = metadata->event->header.size;
+ union perf_event *event;
+ int err = 0;
+
+ event = zalloc(event_size + machine->id_hdr_size);
+ if (!event)
+ return -1;
+ memcpy(event, metadata->event, event_size);
+ memset((void *)event + event->header.size, 0, machine->id_hdr_size);
+ event->header.size += machine->id_hdr_size;
+ for (__u32 index = 0; index < metadata->nr_prog_names; index++) {
+ memcpy(event->bpf_metadata.prog_name,
+ metadata->prog_names[index], BPF_PROG_NAME_LEN);
+ err = perf_tool__process_synth_event(tool, event, machine,
+ process);
+ if (err != 0)
+ break;
+ }
+
+ free(event);
+ return err;
+}
+
+void bpf_metadata_free(struct bpf_metadata *metadata)
+{
+ if (metadata == NULL)
+ return;
+ for (__u32 index = 0; index < metadata->nr_prog_names; index++)
+ free(metadata->prog_names[index]);
+ free(metadata->prog_names);
+ free(metadata->event);
+ free(metadata);
+}
+
+#else /* HAVE_LIBBPF_STRINGS_SUPPORT */
+
+static struct bpf_metadata *bpf_metadata_create(struct bpf_prog_info *info __maybe_unused)
+{
+ return NULL;
+}
+
+static int synthesize_perf_record_bpf_metadata(const struct bpf_metadata *metadata __maybe_unused,
+ const struct perf_tool *tool __maybe_unused,
+ perf_event__handler_t process __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return 0;
+}
+
+void bpf_metadata_free(struct bpf_metadata *metadata __maybe_unused)
+{
+}
+
+#endif /* HAVE_LIBBPF_STRINGS_SUPPORT */
+
+struct bpf_metadata_final_ctx {
+ const struct perf_tool *tool;
+ perf_event__handler_t process;
+ struct machine *machine;
+};
+
+static void synthesize_final_bpf_metadata_cb(struct bpf_prog_info_node *node,
+ void *data)
+{
+ struct bpf_metadata_final_ctx *ctx = (struct bpf_metadata_final_ctx *)data;
+ struct bpf_metadata *metadata = node->metadata;
+ int err;
+
+ if (metadata == NULL)
+ return;
+ err = synthesize_perf_record_bpf_metadata(metadata, ctx->tool,
+ ctx->process, ctx->machine);
+ if (err != 0) {
+ const char *prog_name = metadata->prog_names[0];
+
+ if (prog_name != NULL)
+ pr_warning("Couldn't synthesize final BPF metadata for %s.\n", prog_name);
+ else
+ pr_warning("Couldn't synthesize final BPF metadata.\n");
+ }
+ bpf_metadata_free(metadata);
+ node->metadata = NULL;
+}
+
+void perf_event__synthesize_final_bpf_metadata(struct perf_session *session,
+ perf_event__handler_t process)
+{
+ struct perf_env *env = &session->header.env;
+ struct bpf_metadata_final_ctx ctx = {
+ .tool = session->tool,
+ .process = process,
+ .machine = &session->machines.host,
+ };
+
+ perf_env__iterate_bpf_prog_info(env, synthesize_final_bpf_metadata_cb,
+ &ctx);
+}
+
/*
* Synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for one bpf
* program. One PERF_RECORD_BPF_EVENT is generated for the program. And
@@ -173,6 +537,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
const struct perf_tool *tool = session->tool;
struct bpf_prog_info_node *info_node;
struct perf_bpil *info_linear;
+ struct bpf_metadata *metadata;
struct bpf_prog_info *info;
struct btf *btf = NULL;
struct perf_env *env;
@@ -184,7 +549,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
* for perf-record and perf-report use header.env;
* otherwise, use global perf_env.
*/
- env = session->data ? &session->header.env : &perf_env;
+ env = perf_session__env(session);
arrays = 1UL << PERF_BPIL_JITED_KSYMS;
arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS;
@@ -193,6 +558,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
arrays |= 1UL << PERF_BPIL_JITED_INSNS;
arrays |= 1UL << PERF_BPIL_LINE_INFO;
arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO;
+ arrays |= 1UL << PERF_BPIL_MAP_IDS;
info_linear = get_bpf_prog_info_linear(fd, arrays);
if (IS_ERR_OR_NULL(info_linear)) {
@@ -289,6 +655,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
}
info_node->info_linear = info_linear;
+ info_node->metadata = NULL;
if (!perf_env__insert_bpf_prog_info(env, info_node)) {
free(info_linear);
free(info_node);
@@ -301,6 +668,15 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
*/
err = perf_tool__process_synth_event(tool, event,
machine, process);
+
+ /* Synthesize PERF_RECORD_BPF_METADATA */
+ metadata = bpf_metadata_create(info);
+ if (metadata != NULL) {
+ err = synthesize_perf_record_bpf_metadata(metadata,
+ tool, process,
+ machine);
+ bpf_metadata_free(metadata);
+ }
}
out:
@@ -471,6 +847,7 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
arrays |= 1UL << PERF_BPIL_JITED_INSNS;
arrays |= 1UL << PERF_BPIL_LINE_INFO;
arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO;
+ arrays |= 1UL << PERF_BPIL_MAP_IDS;
info_linear = get_bpf_prog_info_linear(fd, arrays);
if (IS_ERR_OR_NULL(info_linear)) {
@@ -483,6 +860,7 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
info_node = malloc(sizeof(struct bpf_prog_info_node));
if (info_node) {
info_node->info_linear = info_linear;
+ info_node->metadata = bpf_metadata_create(&info_linear->info);
if (!perf_env__insert_bpf_prog_info(env, info_node)) {
free(info_linear);
free(info_node);
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
index e2f0420905f5..60d2c6637af5 100644
--- a/tools/perf/util/bpf-event.h
+++ b/tools/perf/util/bpf-event.h
@@ -17,8 +17,15 @@ struct record_opts;
struct evlist;
struct target;
+struct bpf_metadata {
+ union perf_event *event;
+ char **prog_names;
+ __u64 nr_prog_names;
+};
+
struct bpf_prog_info_node {
struct perf_bpil *info_linear;
+ struct bpf_metadata *metadata;
struct rb_node rb_node;
};
@@ -36,6 +43,7 @@ int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env);
void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info,
struct perf_env *env,
FILE *fp);
+void bpf_metadata_free(struct bpf_metadata *metadata);
#else
static inline int machine__process_bpf(struct machine *machine __maybe_unused,
union perf_event *event __maybe_unused,
@@ -56,5 +64,10 @@ static inline void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info _
{
}
+
+static inline void bpf_metadata_free(struct bpf_metadata *metadata __maybe_unused)
+{
+
+}
#endif // HAVE_LIBBPF_SUPPORT
#endif
diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c
index a4fdf6911ec1..d0e013eeb0f7 100644
--- a/tools/perf/util/bpf-filter.c
+++ b/tools/perf/util/bpf-filter.c
@@ -52,6 +52,7 @@
#include <internal/xyarray.h>
#include <perf/threadmap.h>
+#include "util/cap.h"
#include "util/debug.h"
#include "util/evsel.h"
#include "util/target.h"
@@ -449,7 +450,7 @@ int perf_bpf_filter__prepare(struct evsel *evsel, struct target *target)
struct bpf_program *prog;
struct bpf_link *link;
struct perf_bpf_filter_entry *entry;
- bool needs_idx_hash = !target__has_cpu(target) && !target->uid_str;
+ bool needs_idx_hash = !target__has_cpu(target);
entry = calloc(MAX_FILTERS, sizeof(*entry));
if (entry == NULL)
@@ -618,11 +619,38 @@ struct perf_bpf_filter_expr *perf_bpf_filter_expr__new(enum perf_bpf_filter_term
return expr;
}
+static bool check_bpf_filter_capable(void)
+{
+ bool used_root;
+
+ if (perf_cap__capable(CAP_BPF, &used_root))
+ return true;
+
+ if (!used_root) {
+ /* Check if root already pinned the filter programs and maps */
+ int fd = get_pinned_fd("filters");
+
+ if (fd >= 0) {
+ close(fd);
+ return true;
+ }
+ }
+
+ pr_err("Error: BPF filter only works for %s!\n"
+ "\tPlease run 'perf record --setup-filter pin' as root first.\n",
+ used_root ? "root" : "users with the CAP_BPF capability");
+
+ return false;
+}
+
int perf_bpf_filter__parse(struct list_head *expr_head, const char *str)
{
YY_BUFFER_STATE buffer;
int ret;
+ if (!check_bpf_filter_capable())
+ return -EPERM;
+
buffer = perf_bpf_filter__scan_string(str);
ret = perf_bpf_filter_parse(expr_head);
diff --git a/tools/perf/util/bpf-filter.h b/tools/perf/util/bpf-filter.h
index 916ed7770b73..122477f2de44 100644
--- a/tools/perf/util/bpf-filter.h
+++ b/tools/perf/util/bpf-filter.h
@@ -5,6 +5,7 @@
#include <linux/list.h>
#include "bpf_skel/sample-filter.h"
+#include "util/debug.h"
struct perf_bpf_filter_expr {
struct list_head list;
@@ -38,6 +39,8 @@ int perf_bpf_filter__unpin(void);
static inline int perf_bpf_filter__parse(struct list_head *expr_head __maybe_unused,
const char *str __maybe_unused)
{
+ pr_err("Error: BPF filter is requested but perf is not built with BPF.\n"
+ "\tPlease make sure to build with libbpf and BPF skeleton.\n");
return -EOPNOTSUPP;
}
static inline int perf_bpf_filter__prepare(struct evsel *evsel __maybe_unused,
diff --git a/tools/perf/util/bpf-trace-summary.c b/tools/perf/util/bpf-trace-summary.c
new file mode 100644
index 000000000000..69fb165da206
--- /dev/null
+++ b/tools/perf/util/bpf-trace-summary.c
@@ -0,0 +1,458 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <inttypes.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "dwarf-regs.h" /* for EM_HOST */
+#include "syscalltbl.h"
+#include "util/cgroup.h"
+#include "util/hashmap.h"
+#include "util/trace.h"
+#include "util/util.h"
+#include <bpf/bpf.h>
+#include <linux/rbtree.h>
+#include <linux/time64.h>
+#include <tools/libc_compat.h> /* reallocarray */
+
+#include "bpf_skel/syscall_summary.h"
+#include "bpf_skel/syscall_summary.skel.h"
+
+
+static struct syscall_summary_bpf *skel;
+static struct rb_root cgroups = RB_ROOT;
+
+int trace_prepare_bpf_summary(enum trace_summary_mode mode)
+{
+ skel = syscall_summary_bpf__open();
+ if (skel == NULL) {
+ fprintf(stderr, "failed to open syscall summary bpf skeleton\n");
+ return -1;
+ }
+
+ if (mode == SUMMARY__BY_THREAD)
+ skel->rodata->aggr_mode = SYSCALL_AGGR_THREAD;
+ else if (mode == SUMMARY__BY_CGROUP)
+ skel->rodata->aggr_mode = SYSCALL_AGGR_CGROUP;
+ else
+ skel->rodata->aggr_mode = SYSCALL_AGGR_CPU;
+
+ if (cgroup_is_v2("perf_event") > 0)
+ skel->rodata->use_cgroup_v2 = 1;
+
+ if (syscall_summary_bpf__load(skel) < 0) {
+ fprintf(stderr, "failed to load syscall summary bpf skeleton\n");
+ return -1;
+ }
+
+ if (syscall_summary_bpf__attach(skel) < 0) {
+ fprintf(stderr, "failed to attach syscall summary bpf skeleton\n");
+ return -1;
+ }
+
+ if (mode == SUMMARY__BY_CGROUP)
+ read_all_cgroups(&cgroups);
+
+ return 0;
+}
+
+void trace_start_bpf_summary(void)
+{
+ skel->bss->enabled = 1;
+}
+
+void trace_end_bpf_summary(void)
+{
+ skel->bss->enabled = 0;
+}
+
+struct syscall_node {
+ int syscall_nr;
+ struct syscall_stats stats;
+};
+
+static double rel_stddev(struct syscall_stats *stat)
+{
+ double variance, average;
+
+ if (stat->count < 2)
+ return 0;
+
+ average = (double)stat->total_time / stat->count;
+
+ variance = stat->squared_sum;
+ variance -= (stat->total_time * stat->total_time) / stat->count;
+ variance /= stat->count - 1;
+
+ return 100 * sqrt(variance / stat->count) / average;
+}
+
+/*
+ * The syscall_data is to maintain syscall stats ordered by total time.
+ * It supports different summary modes like per-thread or global.
+ *
+ * For per-thread stats, it uses two-level data strurcture -
+ * syscall_data is keyed by TID and has an array of nodes which
+ * represents each syscall for the thread.
+ *
+ * For global stats, it's still two-level technically but we don't need
+ * per-cpu analysis so it's keyed by the syscall number to combine stats
+ * from different CPUs. And syscall_data always has a syscall_node so
+ * it can effectively work as flat hierarchy.
+ *
+ * For per-cgroup stats, it uses two-level data structure like thread
+ * syscall_data is keyed by CGROUP and has an array of node which
+ * represents each syscall for the cgroup.
+ */
+struct syscall_data {
+ u64 key; /* tid if AGGR_THREAD, syscall-nr if AGGR_CPU, cgroup if AGGR_CGROUP */
+ int nr_events;
+ int nr_nodes;
+ u64 total_time;
+ struct syscall_node *nodes;
+};
+
+static int datacmp(const void *a, const void *b)
+{
+ const struct syscall_data * const *sa = a;
+ const struct syscall_data * const *sb = b;
+
+ return (*sa)->total_time > (*sb)->total_time ? -1 : 1;
+}
+
+static int nodecmp(const void *a, const void *b)
+{
+ const struct syscall_node *na = a;
+ const struct syscall_node *nb = b;
+
+ return na->stats.total_time > nb->stats.total_time ? -1 : 1;
+}
+
+static size_t sc_node_hash(long key, void *ctx __maybe_unused)
+{
+ return key;
+}
+
+static bool sc_node_equal(long key1, long key2, void *ctx __maybe_unused)
+{
+ return key1 == key2;
+}
+
+static int print_common_stats(struct syscall_data *data, FILE *fp)
+{
+ int printed = 0;
+
+ for (int i = 0; i < data->nr_nodes; i++) {
+ struct syscall_node *node = &data->nodes[i];
+ struct syscall_stats *stat = &node->stats;
+ double total = (double)(stat->total_time) / NSEC_PER_MSEC;
+ double min = (double)(stat->min_time) / NSEC_PER_MSEC;
+ double max = (double)(stat->max_time) / NSEC_PER_MSEC;
+ double avg = total / stat->count;
+ const char *name;
+
+ /* TODO: support other ABIs */
+ name = syscalltbl__name(EM_HOST, node->syscall_nr);
+ if (name)
+ printed += fprintf(fp, " %-15s", name);
+ else
+ printed += fprintf(fp, " syscall:%-7d", node->syscall_nr);
+
+ printed += fprintf(fp, " %8u %6u %9.3f %9.3f %9.3f %9.3f %9.2f%%\n",
+ stat->count, stat->error, total, min, avg, max,
+ rel_stddev(stat));
+ }
+ return printed;
+}
+
+static int update_thread_stats(struct hashmap *hash, struct syscall_key *map_key,
+ struct syscall_stats *map_data)
+{
+ struct syscall_data *data;
+ struct syscall_node *nodes;
+
+ if (!hashmap__find(hash, map_key->cpu_or_tid, &data)) {
+ data = zalloc(sizeof(*data));
+ if (data == NULL)
+ return -ENOMEM;
+
+ data->key = map_key->cpu_or_tid;
+ if (hashmap__add(hash, data->key, data) < 0) {
+ free(data);
+ return -ENOMEM;
+ }
+ }
+
+ /* update thread total stats */
+ data->nr_events += map_data->count;
+ data->total_time += map_data->total_time;
+
+ nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes));
+ if (nodes == NULL)
+ return -ENOMEM;
+
+ data->nodes = nodes;
+ nodes = &data->nodes[data->nr_nodes++];
+ nodes->syscall_nr = map_key->nr;
+
+ /* each thread has an entry for each syscall, just use the stat */
+ memcpy(&nodes->stats, map_data, sizeof(*map_data));
+ return 0;
+}
+
+static int print_thread_stat(struct syscall_data *data, FILE *fp)
+{
+ int printed = 0;
+
+ qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp);
+
+ printed += fprintf(fp, " thread (%d), ", (int)data->key);
+ printed += fprintf(fp, "%d events\n\n", data->nr_events);
+
+ printed += fprintf(fp, " syscall calls errors total min avg max stddev\n");
+ printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
+ printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
+
+ printed += print_common_stats(data, fp);
+ printed += fprintf(fp, "\n\n");
+
+ return printed;
+}
+
+static int print_thread_stats(struct syscall_data **data, int nr_data, FILE *fp)
+{
+ int printed = 0;
+
+ for (int i = 0; i < nr_data; i++)
+ printed += print_thread_stat(data[i], fp);
+
+ return printed;
+}
+
+static int update_total_stats(struct hashmap *hash, struct syscall_key *map_key,
+ struct syscall_stats *map_data)
+{
+ struct syscall_data *data;
+ struct syscall_stats *stat;
+
+ if (!hashmap__find(hash, map_key->nr, &data)) {
+ data = zalloc(sizeof(*data));
+ if (data == NULL)
+ return -ENOMEM;
+
+ data->nodes = zalloc(sizeof(*data->nodes));
+ if (data->nodes == NULL) {
+ free(data);
+ return -ENOMEM;
+ }
+
+ data->nr_nodes = 1;
+ data->key = map_key->nr;
+ data->nodes->syscall_nr = data->key;
+
+ if (hashmap__add(hash, data->key, data) < 0) {
+ free(data->nodes);
+ free(data);
+ return -ENOMEM;
+ }
+ }
+
+ /* update total stats for this syscall */
+ data->nr_events += map_data->count;
+ data->total_time += map_data->total_time;
+
+ /* This is sum of the same syscall from different CPUs */
+ stat = &data->nodes->stats;
+
+ stat->total_time += map_data->total_time;
+ stat->squared_sum += map_data->squared_sum;
+ stat->count += map_data->count;
+ stat->error += map_data->error;
+
+ if (stat->max_time < map_data->max_time)
+ stat->max_time = map_data->max_time;
+ if (stat->min_time > map_data->min_time || stat->min_time == 0)
+ stat->min_time = map_data->min_time;
+
+ return 0;
+}
+
+static int print_total_stats(struct syscall_data **data, int nr_data, FILE *fp)
+{
+ int printed = 0;
+ int nr_events = 0;
+
+ for (int i = 0; i < nr_data; i++)
+ nr_events += data[i]->nr_events;
+
+ printed += fprintf(fp, " total, %d events\n\n", nr_events);
+
+ printed += fprintf(fp, " syscall calls errors total min avg max stddev\n");
+ printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
+ printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
+
+ for (int i = 0; i < nr_data; i++)
+ printed += print_common_stats(data[i], fp);
+
+ printed += fprintf(fp, "\n\n");
+ return printed;
+}
+
+static int update_cgroup_stats(struct hashmap *hash, struct syscall_key *map_key,
+ struct syscall_stats *map_data)
+{
+ struct syscall_data *data;
+ struct syscall_node *nodes;
+
+ if (!hashmap__find(hash, map_key->cgroup, &data)) {
+ data = zalloc(sizeof(*data));
+ if (data == NULL)
+ return -ENOMEM;
+
+ data->key = map_key->cgroup;
+ if (hashmap__add(hash, data->key, data) < 0) {
+ free(data);
+ return -ENOMEM;
+ }
+ }
+
+ /* update thread total stats */
+ data->nr_events += map_data->count;
+ data->total_time += map_data->total_time;
+
+ nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes));
+ if (nodes == NULL)
+ return -ENOMEM;
+
+ data->nodes = nodes;
+ nodes = &data->nodes[data->nr_nodes++];
+ nodes->syscall_nr = map_key->nr;
+
+ /* each thread has an entry for each syscall, just use the stat */
+ memcpy(&nodes->stats, map_data, sizeof(*map_data));
+ return 0;
+}
+
+static int print_cgroup_stat(struct syscall_data *data, FILE *fp)
+{
+ int printed = 0;
+ struct cgroup *cgrp = __cgroup__find(&cgroups, data->key);
+
+ qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp);
+
+ if (cgrp)
+ printed += fprintf(fp, " cgroup %s,", cgrp->name);
+ else
+ printed += fprintf(fp, " cgroup id:%lu,", (unsigned long)data->key);
+
+ printed += fprintf(fp, " %d events\n\n", data->nr_events);
+
+ printed += fprintf(fp, " syscall calls errors total min avg max stddev\n");
+ printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
+ printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n");
+
+ printed += print_common_stats(data, fp);
+ printed += fprintf(fp, "\n\n");
+
+ return printed;
+}
+
+static int print_cgroup_stats(struct syscall_data **data, int nr_data, FILE *fp)
+{
+ int printed = 0;
+
+ for (int i = 0; i < nr_data; i++)
+ printed += print_cgroup_stat(data[i], fp);
+
+ return printed;
+}
+
+int trace_print_bpf_summary(FILE *fp)
+{
+ struct bpf_map *map = skel->maps.syscall_stats_map;
+ struct syscall_key *prev_key, key;
+ struct syscall_data **data = NULL;
+ struct hashmap schash;
+ struct hashmap_entry *entry;
+ int nr_data = 0;
+ int printed = 0;
+ int i;
+ size_t bkt;
+
+ hashmap__init(&schash, sc_node_hash, sc_node_equal, /*ctx=*/NULL);
+
+ printed = fprintf(fp, "\n Summary of events:\n\n");
+
+ /* get stats from the bpf map */
+ prev_key = NULL;
+ while (!bpf_map__get_next_key(map, prev_key, &key, sizeof(key))) {
+ struct syscall_stats stat;
+
+ if (!bpf_map__lookup_elem(map, &key, sizeof(key), &stat, sizeof(stat), 0)) {
+ switch (skel->rodata->aggr_mode) {
+ case SYSCALL_AGGR_THREAD:
+ update_thread_stats(&schash, &key, &stat);
+ break;
+ case SYSCALL_AGGR_CPU:
+ update_total_stats(&schash, &key, &stat);
+ break;
+ case SYSCALL_AGGR_CGROUP:
+ update_cgroup_stats(&schash, &key, &stat);
+ break;
+ default:
+ break;
+ }
+ }
+
+ prev_key = &key;
+ }
+
+ nr_data = hashmap__size(&schash);
+ data = calloc(nr_data, sizeof(*data));
+ if (data == NULL)
+ goto out;
+
+ i = 0;
+ hashmap__for_each_entry(&schash, entry, bkt)
+ data[i++] = entry->pvalue;
+
+ qsort(data, nr_data, sizeof(*data), datacmp);
+
+ switch (skel->rodata->aggr_mode) {
+ case SYSCALL_AGGR_THREAD:
+ printed += print_thread_stats(data, nr_data, fp);
+ break;
+ case SYSCALL_AGGR_CPU:
+ printed += print_total_stats(data, nr_data, fp);
+ break;
+ case SYSCALL_AGGR_CGROUP:
+ printed += print_cgroup_stats(data, nr_data, fp);
+ break;
+ default:
+ break;
+ }
+
+ for (i = 0; i < nr_data && data; i++) {
+ free(data[i]->nodes);
+ free(data[i]);
+ }
+ free(data);
+
+out:
+ hashmap__clear(&schash);
+ return printed;
+}
+
+void trace_cleanup_bpf_summary(void)
+{
+ if (!RB_EMPTY_ROOT(&cgroups)) {
+ struct cgroup *cgrp, *tmp;
+
+ rbtree_postorder_for_each_entry_safe(cgrp, tmp, &cgroups, node)
+ cgroup__put(cgrp);
+
+ cgroups = RB_ROOT;
+ }
+
+ syscall_summary_bpf__destroy(skel);
+}
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index 7324668cc83e..0cb02412043c 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -21,15 +21,26 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
{
int fd, err;
int i, ncpus = 1, ntasks = 1;
- struct filter_entry *func;
+ struct filter_entry *func = NULL;
- if (!list_is_singular(&ftrace->filters)) {
- pr_err("ERROR: %s target function(s).\n",
- list_empty(&ftrace->filters) ? "No" : "Too many");
- return -1;
- }
+ if (!list_empty(&ftrace->filters)) {
+ if (!list_is_singular(&ftrace->filters)) {
+ pr_err("ERROR: Too many target functions.\n");
+ return -1;
+ }
+ func = list_first_entry(&ftrace->filters, struct filter_entry, list);
+ } else {
+ int count = 0;
+ struct list_head *pos;
- func = list_first_entry(&ftrace->filters, struct filter_entry, list);
+ list_for_each(pos, &ftrace->event_pair)
+ count++;
+
+ if (count != 2) {
+ pr_err("ERROR: Needs two target events.\n");
+ return -1;
+ }
+ }
skel = func_latency_bpf__open();
if (!skel) {
@@ -93,20 +104,44 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
skel->bss->min = INT64_MAX;
- skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
- false, func->name);
- if (IS_ERR(skel->links.func_begin)) {
- pr_err("Failed to attach fentry program\n");
- err = PTR_ERR(skel->links.func_begin);
- goto out;
- }
+ if (func) {
+ skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
+ false, func->name);
+ if (IS_ERR(skel->links.func_begin)) {
+ pr_err("Failed to attach fentry program\n");
+ err = PTR_ERR(skel->links.func_begin);
+ goto out;
+ }
- skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
- true, func->name);
- if (IS_ERR(skel->links.func_end)) {
- pr_err("Failed to attach fexit program\n");
- err = PTR_ERR(skel->links.func_end);
- goto out;
+ skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
+ true, func->name);
+ if (IS_ERR(skel->links.func_end)) {
+ pr_err("Failed to attach fexit program\n");
+ err = PTR_ERR(skel->links.func_end);
+ goto out;
+ }
+ } else {
+ struct filter_entry *event;
+
+ event = list_first_entry(&ftrace->event_pair, struct filter_entry, list);
+
+ skel->links.event_begin = bpf_program__attach_raw_tracepoint(skel->progs.event_begin,
+ event->name);
+ if (IS_ERR(skel->links.event_begin)) {
+ pr_err("Failed to attach first tracepoint program\n");
+ err = PTR_ERR(skel->links.event_begin);
+ goto out;
+ }
+
+ event = list_next_entry(event, list);
+
+ skel->links.event_end = bpf_program__attach_raw_tracepoint(skel->progs.event_end,
+ event->name);
+ if (IS_ERR(skel->links.event_end)) {
+ pr_err("Failed to attach second tracepoint program\n");
+ err = PTR_ERR(skel->links.event_end);
+ goto out;
+ }
}
/* XXX: we don't actually use this fd - just for poll() */
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index 5af8f6d1bc95..60b81d586323 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -12,6 +12,7 @@
#include "util/lock-contention.h"
#include <linux/zalloc.h>
#include <linux/string.h>
+#include <api/fs/fs.h>
#include <bpf/bpf.h>
#include <bpf/btf.h>
#include <inttypes.h>
@@ -35,28 +36,26 @@ static bool slab_cache_equal(long key1, long key2, void *ctx __maybe_unused)
static void check_slab_cache_iter(struct lock_contention *con)
{
- struct btf *btf = btf__load_vmlinux_btf();
s32 ret;
hashmap__init(&slab_hash, slab_cache_hash, slab_cache_equal, /*ctx=*/NULL);
- if (btf == NULL) {
+ con->btf = btf__load_vmlinux_btf();
+ if (con->btf == NULL) {
pr_debug("BTF loading failed: %s\n", strerror(errno));
return;
}
- ret = btf__find_by_name_kind(btf, "bpf_iter__kmem_cache", BTF_KIND_STRUCT);
+ ret = btf__find_by_name_kind(con->btf, "bpf_iter__kmem_cache", BTF_KIND_STRUCT);
if (ret < 0) {
bpf_program__set_autoload(skel->progs.slab_cache_iter, false);
pr_debug("slab cache iterator is not available: %d\n", ret);
- goto out;
+ return;
}
has_slab_iter = true;
bpf_map__set_max_entries(skel->maps.slab_caches, con->map_nr_entries);
-out:
- btf__free(btf);
}
static void run_slab_cache_iter(void)
@@ -109,6 +108,75 @@ static void exit_slab_cache_iter(void)
hashmap__clear(&slab_hash);
}
+static void init_numa_data(struct lock_contention *con)
+{
+ struct symbol *sym;
+ struct map *kmap;
+ char *buf = NULL, *p;
+ size_t len;
+ long last = -1;
+ int ret;
+
+ /*
+ * 'struct zone' is embedded in 'struct pglist_data' as an array.
+ * As we may not have full information of the struct zone in the
+ * (fake) vmlinux.h, let's get the actual size from BTF.
+ */
+ ret = btf__find_by_name_kind(con->btf, "zone", BTF_KIND_STRUCT);
+ if (ret < 0) {
+ pr_debug("cannot get type of struct zone: %d\n", ret);
+ return;
+ }
+
+ ret = btf__resolve_size(con->btf, ret);
+ if (ret < 0) {
+ pr_debug("cannot get size of struct zone: %d\n", ret);
+ return;
+ }
+ skel->rodata->sizeof_zone = ret;
+
+ /* UMA system doesn't have 'node_data[]' - just use contig_page_data. */
+ sym = machine__find_kernel_symbol_by_name(con->machine,
+ "contig_page_data",
+ &kmap);
+ if (sym) {
+ skel->rodata->contig_page_data_addr = map__unmap_ip(kmap, sym->start);
+ map__put(kmap);
+ return;
+ }
+
+ /*
+ * The 'node_data' is an array of pointers to struct pglist_data.
+ * It needs to follow the pointer for each node in BPF to get the
+ * address of struct pglist_data and its zones.
+ */
+ sym = machine__find_kernel_symbol_by_name(con->machine,
+ "node_data",
+ &kmap);
+ if (sym == NULL)
+ return;
+
+ skel->rodata->node_data_addr = map__unmap_ip(kmap, sym->start);
+ map__put(kmap);
+
+ /* get the number of online nodes using the last node number + 1 */
+ ret = sysfs__read_str("devices/system/node/online", &buf, &len);
+ if (ret < 0) {
+ pr_debug("failed to read online node: %d\n", ret);
+ return;
+ }
+
+ p = buf;
+ while (p && *p) {
+ last = strtol(p, &p, 0);
+
+ if (p && (*p == ',' || *p == '-' || *p == '\n'))
+ p++;
+ }
+ skel->rodata->nr_nodes = last + 1;
+ free(buf);
+}
+
int lock_contention_prepare(struct lock_contention *con)
{
int i, fd;
@@ -193,6 +261,27 @@ int lock_contention_prepare(struct lock_contention *con)
skel->rodata->has_addr = 1;
}
+ /* resolve lock name in delays */
+ if (con->nr_delays) {
+ struct symbol *sym;
+ struct map *kmap;
+
+ for (i = 0; i < con->nr_delays; i++) {
+ sym = machine__find_kernel_symbol_by_name(con->machine,
+ con->delays[i].sym,
+ &kmap);
+ if (sym == NULL) {
+ pr_warning("ignore unknown symbol: %s\n",
+ con->delays[i].sym);
+ continue;
+ }
+
+ con->delays[i].addr = map__unmap_ip(kmap, sym->start);
+ }
+ skel->rodata->lock_delay = 1;
+ bpf_map__set_max_entries(skel->maps.lock_delays, con->nr_delays);
+ }
+
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
bpf_map__set_max_entries(skel->maps.task_filter, ntasks);
bpf_map__set_max_entries(skel->maps.type_filter, ntypes);
@@ -218,6 +307,8 @@ int lock_contention_prepare(struct lock_contention *con)
bpf_map__set_max_entries(skel->maps.slab_filter, nslabs);
+ init_numa_data(con);
+
if (lock_contention_bpf__load(skel) < 0) {
pr_err("Failed to load lock-contention BPF skeleton\n");
return -1;
@@ -282,6 +373,13 @@ int lock_contention_prepare(struct lock_contention *con)
bpf_map_update_elem(fd, &con->filters->cgrps[i], &val, BPF_ANY);
}
+ if (con->nr_delays) {
+ fd = bpf_map__fd(skel->maps.lock_delays);
+
+ for (i = 0; i < con->nr_delays; i++)
+ bpf_map_update_elem(fd, &con->delays[i].addr, &con->delays[i].time, BPF_ANY);
+ }
+
if (con->aggr_mode == LOCK_AGGR_CGROUP)
read_all_cgroups(&con->cgroups);
@@ -505,6 +603,11 @@ static const char *lock_contention_get_name(struct lock_contention *con,
return "rq_lock";
}
+ if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr_or_cgroup, &flags)) {
+ if (flags == LOCK_CLASS_ZONE_LOCK)
+ return "zone_lock";
+ }
+
/* look slab_hash for dynamic locks in a slab object */
if (hashmap__find(&slab_hash, flags & LCB_F_SLAB_ID_MASK, &slab_data)) {
snprintf(name_buf, sizeof(name_buf), "&%s", slab_data->name);
@@ -743,6 +846,7 @@ int lock_contention_finish(struct lock_contention *con)
}
exit_slab_cache_iter();
+ btf__free(con->btf);
return 0;
}
diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
index 4269b41d1771..c367fefe6ecb 100644
--- a/tools/perf/util/bpf_off_cpu.c
+++ b/tools/perf/util/bpf_off_cpu.c
@@ -13,6 +13,8 @@
#include "util/cgroup.h"
#include "util/strlist.h"
#include <bpf/bpf.h>
+#include <internal/xyarray.h>
+#include <linux/time64.h>
#include "bpf_skel/off_cpu.skel.h"
@@ -36,34 +38,25 @@ union off_cpu_data {
u64 array[1024 / sizeof(u64)];
};
+u64 off_cpu_raw[MAX_STACKS + 5];
+
static int off_cpu_config(struct evlist *evlist)
{
+ char off_cpu_event[64];
struct evsel *evsel;
- struct perf_event_attr attr = {
- .type = PERF_TYPE_SOFTWARE,
- .config = PERF_COUNT_SW_BPF_OUTPUT,
- .size = sizeof(attr), /* to capture ABI version */
- };
- char *evname = strdup(OFFCPU_EVENT);
- if (evname == NULL)
- return -ENOMEM;
-
- evsel = evsel__new(&attr);
- if (!evsel) {
- free(evname);
- return -ENOMEM;
+ scnprintf(off_cpu_event, sizeof(off_cpu_event), "bpf-output/name=%s/", OFFCPU_EVENT);
+ if (parse_event(evlist, off_cpu_event)) {
+ pr_err("Failed to open off-cpu event\n");
+ return -1;
}
- evsel->core.attr.freq = 1;
- evsel->core.attr.sample_period = 1;
- /* off-cpu analysis depends on stack trace */
- evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN;
-
- evlist__add(evlist, evsel);
-
- free(evsel->name);
- evsel->name = evname;
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel__is_offcpu_event(evsel)) {
+ evsel->core.system_wide = true;
+ break;
+ }
+ }
return 0;
}
@@ -71,6 +64,9 @@ static int off_cpu_config(struct evlist *evlist)
static void off_cpu_start(void *arg)
{
struct evlist *evlist = arg;
+ struct evsel *evsel;
+ struct perf_cpu pcpu;
+ int i;
/* update task filter for the given workload */
if (skel->rodata->has_task && skel->rodata->uses_tgid &&
@@ -84,6 +80,26 @@ static void off_cpu_start(void *arg)
bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
}
+ /* update BPF perf_event map */
+ evsel = evlist__find_evsel_by_str(evlist, OFFCPU_EVENT);
+ if (evsel == NULL) {
+ pr_err("%s evsel not found\n", OFFCPU_EVENT);
+ return;
+ }
+
+ perf_cpu_map__for_each_cpu(pcpu, i, evsel->core.cpus) {
+ int err;
+ int cpu_nr = pcpu.cpu;
+
+ err = bpf_map__update_elem(skel->maps.offcpu_output, &cpu_nr, sizeof(int),
+ xyarray__entry(evsel->core.fd, cpu_nr, 0),
+ sizeof(int), BPF_ANY);
+ if (err) {
+ pr_err("Failed to update perf event map for direct off-cpu dumping\n");
+ return;
+ }
+ }
+
skel->bss->enabled = 1;
}
@@ -277,6 +293,8 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
}
}
+ skel->bss->offcpu_thresh_ns = opts->off_cpu_thresh_ns;
+
err = off_cpu_bpf__attach(skel);
if (err) {
pr_err("Failed to attach off-cpu BPF skeleton\n");
@@ -300,6 +318,7 @@ int off_cpu_write(struct perf_session *session)
{
int bytes = 0, size;
int fd, stack;
+ u32 raw_size;
u64 sample_type, val, sid = 0;
struct evsel *evsel;
struct perf_data_file *file = &session->data->file;
@@ -339,46 +358,54 @@ int off_cpu_write(struct perf_session *session)
while (!bpf_map_get_next_key(fd, &prev, &key)) {
int n = 1; /* start from perf_event_header */
- int ip_pos = -1;
bpf_map_lookup_elem(fd, &key, &val);
+ /* zero-fill some of the fields, will be overwritten by raw_data when parsing */
if (sample_type & PERF_SAMPLE_IDENTIFIER)
data.array[n++] = sid;
- if (sample_type & PERF_SAMPLE_IP) {
- ip_pos = n;
+ if (sample_type & PERF_SAMPLE_IP)
data.array[n++] = 0; /* will be updated */
- }
if (sample_type & PERF_SAMPLE_TID)
- data.array[n++] = (u64)key.pid << 32 | key.tgid;
+ data.array[n++] = 0;
if (sample_type & PERF_SAMPLE_TIME)
data.array[n++] = tstamp;
- if (sample_type & PERF_SAMPLE_ID)
- data.array[n++] = sid;
if (sample_type & PERF_SAMPLE_CPU)
data.array[n++] = 0;
if (sample_type & PERF_SAMPLE_PERIOD)
- data.array[n++] = val;
- if (sample_type & PERF_SAMPLE_CALLCHAIN) {
- int len = 0;
-
- /* data.array[n] is callchain->nr (updated later) */
- data.array[n + 1] = PERF_CONTEXT_USER;
- data.array[n + 2] = 0;
-
- bpf_map_lookup_elem(stack, &key.stack_id, &data.array[n + 2]);
- while (data.array[n + 2 + len])
+ data.array[n++] = 0;
+ if (sample_type & PERF_SAMPLE_RAW) {
+ /*
+ * [ size ][ data ]
+ * [ data ]
+ * [ data ]
+ * [ data ]
+ * [ data ][ empty]
+ */
+ int len = 0, i = 0;
+ void *raw_data = (void *)data.array + n * sizeof(u64);
+
+ off_cpu_raw[i++] = (u64)key.pid << 32 | key.tgid;
+ off_cpu_raw[i++] = val;
+
+ /* off_cpu_raw[i] is callchain->nr (updated later) */
+ off_cpu_raw[i + 1] = PERF_CONTEXT_USER;
+ off_cpu_raw[i + 2] = 0;
+
+ bpf_map_lookup_elem(stack, &key.stack_id, &off_cpu_raw[i + 2]);
+ while (off_cpu_raw[i + 2 + len])
len++;
- /* update length of callchain */
- data.array[n] = len + 1;
+ off_cpu_raw[i] = len + 1;
+ i += len + 2;
+
+ off_cpu_raw[i++] = key.cgroup_id;
- /* update sample ip with the first callchain entry */
- if (ip_pos >= 0)
- data.array[ip_pos] = data.array[n + 2];
+ raw_size = i * sizeof(u64) + sizeof(u32); /* 4 bytes for alignment */
+ memcpy(raw_data, &raw_size, sizeof(raw_size));
+ memcpy(raw_data + sizeof(u32), off_cpu_raw, i * sizeof(u64));
- /* calculate sample callchain data array length */
- n += len + 2;
+ n += i + 1;
}
if (sample_type & PERF_SAMPLE_CGROUP)
data.array[n++] = key.cgroup_id;
diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
index e4352881e3fa..cb86e261b4de 100644
--- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
+++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
@@ -7,7 +7,6 @@
*/
#include "vmlinux.h"
-#include "../trace_augment.h"
#include <bpf/bpf_helpers.h>
#include <linux/limits.h>
@@ -27,6 +26,8 @@
#define MAX_CPUS 4096
+#define TRACE_AUG_MAX_BUF 32 /* for buffer augmentation in perf trace */
+
/* bpf-output associated map */
struct __augmented_syscalls__ {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index e731a79a753a..621e2022c8bc 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -52,34 +52,89 @@ const volatile unsigned int min_latency;
const volatile unsigned int max_latency;
const volatile unsigned int bucket_num = NUM_BUCKET;
-SEC("kprobe/func")
-int BPF_PROG(func_begin)
+static bool can_record(void)
{
- __u64 key, now;
-
- if (!enabled)
- return 0;
-
- key = bpf_get_current_pid_tgid();
-
if (has_cpu) {
__u32 cpu = bpf_get_smp_processor_id();
__u8 *ok;
ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
if (!ok)
- return 0;
+ return false;
}
if (has_task) {
- __u32 pid = key & 0xffffffff;
+ __u32 pid = bpf_get_current_pid_tgid();
__u8 *ok;
ok = bpf_map_lookup_elem(&task_filter, &pid);
if (!ok)
- return 0;
+ return false;
}
+ return true;
+}
+
+static void update_latency(__s64 delta)
+{
+ __u64 val = delta;
+ __u32 key = 0;
+ __u64 *hist;
+ __u64 cmp_base = use_nsec ? 1 : 1000;
+
+ if (delta < 0)
+ return;
+ if (bucket_range != 0) {
+ val = delta / cmp_base;
+
+ if (min_latency > 0) {
+ if (val > min_latency)
+ val -= min_latency;
+ else
+ goto do_lookup;
+ }
+
+ // Less than 1 unit (ms or ns), or, in the future,
+ // than the min latency desired.
+ if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units )
+ key = val / bucket_range + 1;
+ if (key >= bucket_num)
+ key = bucket_num - 1;
+ }
+
+ goto do_lookup;
+ }
+ // calculate index using delta
+ for (key = 0; key < (bucket_num - 1); key++) {
+ if (delta < (cmp_base << key))
+ break;
+ }
+
+do_lookup:
+ hist = bpf_map_lookup_elem(&latency, &key);
+ if (!hist)
+ return;
+
+ __sync_fetch_and_add(hist, 1);
+
+ __sync_fetch_and_add(&total, delta); // always in nsec
+ __sync_fetch_and_add(&count, 1);
+
+ if (delta > max)
+ max = delta;
+ if (delta < min)
+ min = delta;
+}
+
+SEC("kprobe/func")
+int BPF_PROG(func_begin)
+{
+ __u64 key, now;
+
+ if (!enabled || !can_record())
+ return 0;
+
+ key = bpf_get_current_pid_tgid();
now = bpf_ktime_get_ns();
// overwrite timestamp for nested functions
@@ -92,7 +147,6 @@ int BPF_PROG(func_end)
{
__u64 tid;
__u64 *start;
- __u64 cmp_base = use_nsec ? 1 : 1000;
if (!enabled)
return 0;
@@ -101,56 +155,44 @@ int BPF_PROG(func_end)
start = bpf_map_lookup_elem(&functime, &tid);
if (start) {
- __s64 delta = bpf_ktime_get_ns() - *start;
- __u64 val = delta;
- __u32 key = 0;
- __u64 *hist;
-
+ update_latency(bpf_ktime_get_ns() - *start);
bpf_map_delete_elem(&functime, &tid);
+ }
- if (delta < 0)
- return 0;
+ return 0;
+}
- if (bucket_range != 0) {
- val = delta / cmp_base;
+SEC("raw_tp")
+int BPF_PROG(event_begin)
+{
+ __u64 key, now;
- if (min_latency > 0) {
- if (val > min_latency)
- val -= min_latency;
- else
- goto do_lookup;
- }
+ if (!enabled || !can_record())
+ return 0;
- // Less than 1 unit (ms or ns), or, in the future,
- // than the min latency desired.
- if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units )
- key = val / bucket_range + 1;
- if (key >= bucket_num)
- key = bucket_num - 1;
- }
+ key = bpf_get_current_pid_tgid();
+ now = bpf_ktime_get_ns();
- goto do_lookup;
- }
- // calculate index using delta
- for (key = 0; key < (bucket_num - 1); key++) {
- if (delta < (cmp_base << key))
- break;
- }
+ // overwrite timestamp for nested events
+ bpf_map_update_elem(&functime, &key, &now, BPF_ANY);
+ return 0;
+}
-do_lookup:
- hist = bpf_map_lookup_elem(&latency, &key);
- if (!hist)
- return 0;
+SEC("raw_tp")
+int BPF_PROG(event_end)
+{
+ __u64 tid;
+ __u64 *start;
- __sync_fetch_and_add(hist, 1);
+ if (!enabled)
+ return 0;
- __sync_fetch_and_add(&total, delta); // always in nsec
- __sync_fetch_and_add(&count, 1);
+ tid = bpf_get_current_pid_tgid();
- if (delta > max)
- max = delta;
- if (delta < min)
- min = delta;
+ start = bpf_map_lookup_elem(&functime, &tid);
+ if (start) {
+ update_latency(bpf_ktime_get_ns() - *start);
+ bpf_map_delete_elem(&functime, &tid);
}
return 0;
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 69be7a4234e0..96e7d853b9ed 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -11,6 +11,12 @@
/* for collect_lock_syms(). 4096 was rejected by the verifier */
#define MAX_CPUS 1024
+/* for collect_zone_lock(). It should be more than the actual zones. */
+#define MAX_ZONES 10
+
+/* for do_lock_delay(). Arbitrarily set to 1 million. */
+#define MAX_LOOP (1U << 20)
+
/* lock contention flags from include/trace/events/lock.h */
#define LCB_F_SPIN (1U << 0)
#define LCB_F_READ (1U << 1)
@@ -146,6 +152,13 @@ struct {
__uint(max_entries, 1);
} slab_caches SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, sizeof(__u64));
+ __uint(value_size, sizeof(__u64));
+ __uint(max_entries, 1);
+} lock_delays SEC(".maps");
+
struct rw_semaphore___old {
struct task_struct *owner;
} __attribute__((preserve_access_index));
@@ -176,6 +189,7 @@ const volatile int stack_skip;
const volatile int lock_owner;
const volatile int use_cgroup_v2;
const volatile int max_stack;
+const volatile int lock_delay;
/* determine the key of lock stat */
const volatile int aggr_mode;
@@ -384,6 +398,35 @@ static inline __u32 check_lock_type(__u64 lock, __u32 flags)
return 0;
}
+static inline long delay_callback(__u64 idx, void *arg)
+{
+ __u64 target = *(__u64 *)arg;
+
+ if (target <= bpf_ktime_get_ns())
+ return 1;
+
+ /* just to kill time */
+ (void)bpf_get_prandom_u32();
+
+ return 0;
+}
+
+static inline void do_lock_delay(__u64 duration)
+{
+ __u64 target = bpf_ktime_get_ns() + duration;
+
+ bpf_loop(MAX_LOOP, delay_callback, &target, /*flags=*/0);
+}
+
+static inline void check_lock_delay(__u64 lock)
+{
+ __u64 *delay;
+
+ delay = bpf_map_lookup_elem(&lock_delays, &lock);
+ if (delay)
+ do_lock_delay(*delay);
+}
+
static inline struct tstamp_data *get_tstamp_elem(__u32 flags)
{
__u32 pid;
@@ -793,6 +836,9 @@ found:
update_contention_data(data, duration, 1);
out:
+ if (lock_delay)
+ check_lock_delay(pelem->lock);
+
pelem->lock = 0;
if (need_delete)
bpf_map_delete_elem(&tstamp, &pid);
@@ -801,6 +847,11 @@ out:
extern struct rq runqueues __ksym;
+const volatile __u64 contig_page_data_addr;
+const volatile __u64 node_data_addr;
+const volatile int nr_nodes;
+const volatile int sizeof_zone;
+
struct rq___old {
raw_spinlock_t lock;
} __attribute__((preserve_access_index));
@@ -809,6 +860,59 @@ struct rq___new {
raw_spinlock_t __lock;
} __attribute__((preserve_access_index));
+static void collect_zone_lock(void)
+{
+ __u64 nr_zones, zone_off;
+ __u64 lock_addr, lock_off;
+ __u32 lock_flag = LOCK_CLASS_ZONE_LOCK;
+
+ zone_off = offsetof(struct pglist_data, node_zones);
+ lock_off = offsetof(struct zone, lock);
+
+ if (contig_page_data_addr) {
+ struct pglist_data *contig_page_data;
+
+ contig_page_data = (void *)(long)contig_page_data_addr;
+ nr_zones = BPF_CORE_READ(contig_page_data, nr_zones);
+
+ for (int i = 0; i < MAX_ZONES; i++) {
+ __u64 zone_addr;
+
+ if (i >= nr_zones)
+ break;
+
+ zone_addr = contig_page_data_addr + (sizeof_zone * i) + zone_off;
+ lock_addr = zone_addr + lock_off;
+
+ bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY);
+ }
+ } else if (nr_nodes > 0) {
+ struct pglist_data **node_data = (void *)(long)node_data_addr;
+
+ for (int i = 0; i < nr_nodes; i++) {
+ struct pglist_data *pgdat = NULL;
+ int err;
+
+ err = bpf_core_read(&pgdat, sizeof(pgdat), &node_data[i]);
+ if (err < 0 || pgdat == NULL)
+ break;
+
+ nr_zones = BPF_CORE_READ(pgdat, nr_zones);
+ for (int k = 0; k < MAX_ZONES; k++) {
+ __u64 zone_addr;
+
+ if (k >= nr_zones)
+ break;
+
+ zone_addr = (__u64)(void *)pgdat + (sizeof_zone * k) + zone_off;
+ lock_addr = zone_addr + lock_off;
+
+ bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY);
+ }
+ }
+ }
+}
+
SEC("raw_tp/bpf_test_finish")
int BPF_PROG(collect_lock_syms)
{
@@ -830,6 +934,9 @@ int BPF_PROG(collect_lock_syms)
lock_flag = LOCK_CLASS_RQLOCK;
bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY);
}
+
+ collect_zone_lock();
+
return 0;
}
diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h
index 15f5743bd409..28c5e5aced7f 100644
--- a/tools/perf/util/bpf_skel/lock_data.h
+++ b/tools/perf/util/bpf_skel/lock_data.h
@@ -67,6 +67,7 @@ enum lock_aggr_mode {
enum lock_class_sym {
LOCK_CLASS_NONE,
LOCK_CLASS_RQLOCK,
+ LOCK_CLASS_ZONE_LOCK,
};
struct slab_cache_data {
diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c
index c152116df72f..72763bb8d1de 100644
--- a/tools/perf/util/bpf_skel/off_cpu.bpf.c
+++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c
@@ -18,10 +18,19 @@
#define MAX_STACKS 32
#define MAX_ENTRIES 102400
+#define MAX_CPUS 4096
+#define MAX_OFFCPU_LEN 37
+
+// We have a 'struct stack' in vmlinux.h when building with GEN_VMLINUX_H=1
+struct __stack {
+ u64 array[MAX_STACKS];
+};
+
struct tstamp_data {
__u32 stack_id;
__u32 state;
__u64 timestamp;
+ struct __stack stack;
};
struct offcpu_key {
@@ -39,6 +48,24 @@ struct {
__uint(max_entries, MAX_ENTRIES);
} stacks SEC(".maps");
+struct offcpu_data {
+ u64 array[MAX_OFFCPU_LEN];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __uint(max_entries, MAX_CPUS);
+} offcpu_output SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct offcpu_data));
+ __uint(max_entries, 1);
+} offcpu_payload SEC(".maps");
+
struct {
__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
@@ -97,6 +124,8 @@ const volatile bool uses_cgroup_v1 = false;
int perf_subsys_id = -1;
+__u64 offcpu_thresh_ns;
+
/*
* Old kernel used to call it task_struct->state and now it's '__state'.
* Use BPF CO-RE "ignored suffix rule" to deal with it like below:
@@ -183,6 +212,47 @@ static inline int can_record(struct task_struct *t, int state)
return 1;
}
+static inline int copy_stack(struct __stack *from, struct offcpu_data *to, int n)
+{
+ int len = 0;
+
+ for (int i = 0; i < MAX_STACKS && from->array[i]; ++i, ++len)
+ to->array[n + 2 + i] = from->array[i];
+
+ return len;
+}
+
+/**
+ * off_cpu_dump - dump off-cpu samples to ring buffer
+ * @data: payload for dumping off-cpu samples
+ * @key: off-cpu data
+ * @stack: stack trace of the task before being scheduled out
+ *
+ * If the threshold of off-cpu time is reached, acquire tid, period, callchain, and cgroup id
+ * information of the task, and dump it as a raw sample to perf ring buffer
+ */
+static int off_cpu_dump(void *ctx, struct offcpu_data *data, struct offcpu_key *key,
+ struct __stack *stack, __u64 delta)
+{
+ int n = 0, len = 0;
+
+ data->array[n++] = (u64)key->tgid << 32 | key->pid;
+ data->array[n++] = delta;
+
+ /* data->array[n] is callchain->nr (updated later) */
+ data->array[n + 1] = PERF_CONTEXT_USER;
+ data->array[n + 2] = 0;
+ len = copy_stack(stack, data, n);
+
+ /* update length of callchain */
+ data->array[n] = len + 1;
+ n += len + 2;
+
+ data->array[n++] = key->cgroup_id;
+
+ return bpf_perf_event_output(ctx, &offcpu_output, BPF_F_CURRENT_CPU, data, n * sizeof(u64));
+}
+
static int off_cpu_stat(u64 *ctx, struct task_struct *prev,
struct task_struct *next, int state)
{
@@ -207,6 +277,16 @@ static int off_cpu_stat(u64 *ctx, struct task_struct *prev,
pelem->state = state;
pelem->stack_id = stack_id;
+ /*
+ * If stacks are successfully collected by bpf_get_stackid(), collect them once more
+ * in task_storage for direct off-cpu sample dumping
+ */
+ if (stack_id > 0 && bpf_get_stack(ctx, &pelem->stack, MAX_STACKS * sizeof(u64), BPF_F_USER_STACK)) {
+ /*
+ * This empty if block is used to avoid 'result unused warning' from bpf_get_stack().
+ * If the collection fails, continue with the logic for the next task.
+ */
+ }
next:
pelem = bpf_task_storage_get(&tstamp, next, NULL, 0);
@@ -221,11 +301,19 @@ next:
__u64 delta = ts - pelem->timestamp;
__u64 *total;
- total = bpf_map_lookup_elem(&off_cpu, &key);
- if (total)
- *total += delta;
- else
- bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY);
+ if (delta >= offcpu_thresh_ns) {
+ int zero = 0;
+ struct offcpu_data *data = bpf_map_lookup_elem(&offcpu_payload, &zero);
+
+ if (data)
+ off_cpu_dump(ctx, data, &key, &pelem->stack, delta);
+ } else {
+ total = bpf_map_lookup_elem(&off_cpu, &key);
+ if (total)
+ *total += delta;
+ else
+ bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY);
+ }
/* prevent to reuse the timestamp later */
pelem->timestamp = 0;
diff --git a/tools/perf/util/bpf_skel/perf_version.h b/tools/perf/util/bpf_skel/perf_version.h
new file mode 100644
index 000000000000..1ed5b2e59bf5
--- /dev/null
+++ b/tools/perf/util/bpf_skel/perf_version.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef __PERF_VERSION_H__
+#define __PERF_VERSION_H__
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+/*
+ * This is used by tests/shell/record_bpf_metadata.sh
+ * to verify that BPF metadata generation works.
+ *
+ * PERF_VERSION is defined by a build rule at compile time.
+ */
+const char bpf_metadata_perf_version[] SEC(".rodata") = PERF_VERSION;
+
+#endif /* __PERF_VERSION_H__ */
diff --git a/tools/perf/util/bpf_skel/syscall_summary.bpf.c b/tools/perf/util/bpf_skel/syscall_summary.bpf.c
new file mode 100644
index 000000000000..1bcd066a5199
--- /dev/null
+++ b/tools/perf/util/bpf_skel/syscall_summary.bpf.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Trace raw_syscalls tracepoints to collect system call statistics.
+ */
+
+#include "vmlinux.h"
+#include "syscall_summary.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+/* This is to calculate a delta between sys-enter and sys-exit for each thread */
+struct syscall_trace {
+ int nr; /* syscall number is only available at sys-enter */
+ int unused;
+ u64 timestamp;
+};
+
+#define MAX_ENTRIES (128 * 1024)
+
+struct syscall_trace_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int); /* tid */
+ __type(value, struct syscall_trace);
+ __uint(max_entries, MAX_ENTRIES);
+} syscall_trace_map SEC(".maps");
+
+struct syscall_stats_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct syscall_key);
+ __type(value, struct syscall_stats);
+ __uint(max_entries, MAX_ENTRIES);
+} syscall_stats_map SEC(".maps");
+
+int enabled; /* controlled from userspace */
+
+const volatile enum syscall_aggr_mode aggr_mode;
+const volatile int use_cgroup_v2;
+
+int perf_subsys_id = -1;
+
+static inline __u64 get_current_cgroup_id(void)
+{
+ struct task_struct *task;
+ struct cgroup *cgrp;
+
+ if (use_cgroup_v2)
+ return bpf_get_current_cgroup_id();
+
+ task = bpf_get_current_task_btf();
+
+ if (perf_subsys_id == -1) {
+#if __has_builtin(__builtin_preserve_enum_value)
+ perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id,
+ perf_event_cgrp_id);
+#else
+ perf_subsys_id = perf_event_cgrp_id;
+#endif
+ }
+
+ cgrp = BPF_CORE_READ(task, cgroups, subsys[perf_subsys_id], cgroup);
+ return BPF_CORE_READ(cgrp, kn, id);
+}
+
+static void update_stats(int cpu_or_tid, u64 cgroup_id, int nr, s64 duration,
+ long ret)
+{
+ struct syscall_key key = {
+ .cpu_or_tid = cpu_or_tid,
+ .cgroup = cgroup_id,
+ .nr = nr,
+ };
+ struct syscall_stats *stats;
+
+ stats = bpf_map_lookup_elem(&syscall_stats_map, &key);
+ if (stats == NULL) {
+ struct syscall_stats zero = {};
+
+ bpf_map_update_elem(&syscall_stats_map, &key, &zero, BPF_NOEXIST);
+ stats = bpf_map_lookup_elem(&syscall_stats_map, &key);
+ if (stats == NULL)
+ return;
+ }
+
+ __sync_fetch_and_add(&stats->count, 1);
+ if (ret < 0)
+ __sync_fetch_and_add(&stats->error, 1);
+
+ if (duration > 0) {
+ __sync_fetch_and_add(&stats->total_time, duration);
+ __sync_fetch_and_add(&stats->squared_sum, duration * duration);
+ if (stats->max_time < duration)
+ stats->max_time = duration;
+ if (stats->min_time > duration || stats->min_time == 0)
+ stats->min_time = duration;
+ }
+
+ return;
+}
+
+SEC("tp_btf/sys_enter")
+int sys_enter(u64 *ctx)
+{
+ int tid;
+ struct syscall_trace st;
+
+ if (!enabled)
+ return 0;
+
+ st.nr = ctx[1]; /* syscall number */
+ st.unused = 0;
+ st.timestamp = bpf_ktime_get_ns();
+
+ tid = bpf_get_current_pid_tgid();
+ bpf_map_update_elem(&syscall_trace_map, &tid, &st, BPF_ANY);
+
+ return 0;
+}
+
+SEC("tp_btf/sys_exit")
+int sys_exit(u64 *ctx)
+{
+ int tid;
+ int key = 0;
+ u64 cgroup = 0;
+ long ret = ctx[1]; /* return value of the syscall */
+ struct syscall_trace *st;
+ s64 delta;
+
+ if (!enabled)
+ return 0;
+
+ tid = bpf_get_current_pid_tgid();
+ st = bpf_map_lookup_elem(&syscall_trace_map, &tid);
+ if (st == NULL)
+ return 0;
+
+ if (aggr_mode == SYSCALL_AGGR_THREAD)
+ key = tid;
+ else if (aggr_mode == SYSCALL_AGGR_CGROUP)
+ cgroup = get_current_cgroup_id();
+ else
+ key = bpf_get_smp_processor_id();
+
+ delta = bpf_ktime_get_ns() - st->timestamp;
+ update_stats(key, cgroup, st->nr, delta, ret);
+
+ bpf_map_delete_elem(&syscall_trace_map, &tid);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/perf/util/bpf_skel/syscall_summary.h b/tools/perf/util/bpf_skel/syscall_summary.h
new file mode 100644
index 000000000000..72ccccb45925
--- /dev/null
+++ b/tools/perf/util/bpf_skel/syscall_summary.h
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Data structures shared between BPF and tools. */
+#ifndef UTIL_BPF_SKEL_SYSCALL_SUMMARY_H
+#define UTIL_BPF_SKEL_SYSCALL_SUMMARY_H
+
+enum syscall_aggr_mode {
+ SYSCALL_AGGR_THREAD,
+ SYSCALL_AGGR_CPU,
+ SYSCALL_AGGR_CGROUP,
+};
+
+struct syscall_key {
+ u64 cgroup;
+ int cpu_or_tid;
+ int nr;
+};
+
+struct syscall_stats {
+ u64 total_time;
+ u64 squared_sum;
+ u64 max_time;
+ u64 min_time;
+ u32 count;
+ u32 error;
+};
+
+#endif /* UTIL_BPF_SKEL_SYSCALL_SUMMARY_H */
diff --git a/tools/perf/util/bpf_skel/vmlinux/vmlinux.h b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h
index 7b81d3173917..a59ce912be18 100644
--- a/tools/perf/util/bpf_skel/vmlinux/vmlinux.h
+++ b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h
@@ -203,4 +203,13 @@ struct bpf_iter__kmem_cache {
struct kmem_cache *s;
} __attribute__((preserve_access_index));
+struct zone {
+ spinlock_t lock;
+} __attribute__((preserve_access_index));
+
+struct pglist_data {
+ struct zone node_zones[6]; /* value for all possible config */
+ int nr_zones;
+} __attribute__((preserve_access_index));
+
#endif // __VMLINUX_H
diff --git a/tools/perf/util/bpf_trace_augment.c b/tools/perf/util/bpf_trace_augment.c
new file mode 100644
index 000000000000..56ed17534caa
--- /dev/null
+++ b/tools/perf/util/bpf_trace_augment.c
@@ -0,0 +1,143 @@
+#include <bpf/libbpf.h>
+#include <internal/xyarray.h>
+
+#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/trace_augment.h"
+
+#include "bpf_skel/augmented_raw_syscalls.skel.h"
+
+static struct augmented_raw_syscalls_bpf *skel;
+static struct evsel *bpf_output;
+
+int augmented_syscalls__prepare(void)
+{
+ struct bpf_program *prog;
+ char buf[128];
+ int err;
+
+ skel = augmented_raw_syscalls_bpf__open();
+ if (!skel) {
+ pr_debug("Failed to open augmented syscalls BPF skeleton\n");
+ return -errno;
+ }
+
+ /*
+ * Disable attaching the BPF programs except for sys_enter and
+ * sys_exit that tail call into this as necessary.
+ */
+ bpf_object__for_each_program(prog, skel->obj) {
+ if (prog != skel->progs.sys_enter && prog != skel->progs.sys_exit)
+ bpf_program__set_autoattach(prog, /*autoattach=*/false);
+ }
+
+ err = augmented_raw_syscalls_bpf__load(skel);
+ if (err < 0) {
+ libbpf_strerror(err, buf, sizeof(buf));
+ pr_debug("Failed to load augmented syscalls BPF skeleton: %s\n", buf);
+ return err;
+ }
+
+ augmented_raw_syscalls_bpf__attach(skel);
+ return 0;
+}
+
+int augmented_syscalls__create_bpf_output(struct evlist *evlist)
+{
+ int err = parse_event(evlist, "bpf-output/no-inherit=1,name=__augmented_syscalls__/");
+
+ if (err) {
+ pr_err("ERROR: Setup BPF output event failed: %d\n", err);
+ return err;
+ }
+
+ bpf_output = evlist__last(evlist);
+ assert(evsel__name_is(bpf_output, "__augmented_syscalls__"));
+
+ return 0;
+}
+
+void augmented_syscalls__setup_bpf_output(void)
+{
+ struct perf_cpu cpu;
+ int i;
+
+ if (bpf_output == NULL)
+ return;
+
+ /*
+ * Set up the __augmented_syscalls__ BPF map to hold for each
+ * CPU the bpf-output event's file descriptor.
+ */
+ perf_cpu_map__for_each_cpu(cpu, i, bpf_output->core.cpus) {
+ int mycpu = cpu.cpu;
+
+ bpf_map__update_elem(skel->maps.__augmented_syscalls__,
+ &mycpu, sizeof(mycpu),
+ xyarray__entry(bpf_output->core.fd,
+ mycpu, 0),
+ sizeof(__u32), BPF_ANY);
+ }
+}
+
+int augmented_syscalls__set_filter_pids(unsigned int nr, pid_t *pids)
+{
+ bool value = true;
+ int err = 0;
+
+ if (skel == NULL)
+ return 0;
+
+ for (size_t i = 0; i < nr; ++i) {
+ err = bpf_map__update_elem(skel->maps.pids_filtered, &pids[i],
+ sizeof(*pids), &value, sizeof(value),
+ BPF_ANY);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+int augmented_syscalls__get_map_fds(int *enter_fd, int *exit_fd, int *beauty_fd)
+{
+ if (skel == NULL)
+ return -1;
+
+ *enter_fd = bpf_map__fd(skel->maps.syscalls_sys_enter);
+ *exit_fd = bpf_map__fd(skel->maps.syscalls_sys_exit);
+ *beauty_fd = bpf_map__fd(skel->maps.beauty_map_enter);
+
+ if (*enter_fd < 0 || *exit_fd < 0 || *beauty_fd < 0) {
+ pr_err("Error: failed to get syscall or beauty map fd\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+struct bpf_program *augmented_syscalls__unaugmented(void)
+{
+ return skel->progs.syscall_unaugmented;
+}
+
+struct bpf_program *augmented_syscalls__find_by_title(const char *name)
+{
+ struct bpf_program *pos;
+ const char *sec_name;
+
+ if (skel->obj == NULL)
+ return NULL;
+
+ bpf_object__for_each_program(pos, skel->obj) {
+ sec_name = bpf_program__section_name(pos);
+ if (sec_name && !strcmp(sec_name, name))
+ return pos;
+ }
+
+ return NULL;
+}
+
+void augmented_syscalls__cleanup(void)
+{
+ augmented_raw_syscalls_bpf__destroy(skel);
+}
diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c
index ab760e267d41..3712be067464 100644
--- a/tools/perf/util/branch.c
+++ b/tools/perf/util/branch.c
@@ -46,7 +46,7 @@ const char *branch_new_type_name(int new_type)
"FAULT_DATA",
"FAULT_INST",
/*
- * TODO: This switch should happen on 'session->header.env.arch'
+ * TODO: This switch should happen on 'perf_session__env(session)->arch'
* instead, because an arm64 platform perf recording could be
* opened for analysis on other platforms as well.
*/
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index e763e8d99a43..a7018a3b0437 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -42,10 +42,20 @@
static bool no_buildid_cache;
+static int mark_dso_hit_callback(struct callchain_cursor_node *node, void *data __maybe_unused)
+{
+ struct map *map = node->ms.map;
+
+ if (map)
+ dso__set_hit(map__dso(map));
+
+ return 0;
+}
+
int build_id__mark_dso_hit(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
- struct evsel *evsel __maybe_unused,
+ struct evsel *evsel,
struct machine *machine)
{
struct addr_location al;
@@ -63,31 +73,29 @@ int build_id__mark_dso_hit(const struct perf_tool *tool __maybe_unused,
dso__set_hit(map__dso(al.map));
addr_location__exit(&al);
+
+ sample__for_each_callchain_node(thread, evsel, sample, PERF_MAX_STACK_DEPTH,
+ /*symbols=*/false, mark_dso_hit_callback, /*data=*/NULL);
+
+
thread__put(thread);
return 0;
}
-int build_id__sprintf(const struct build_id *build_id, char *bf)
+int build_id__snprintf(const struct build_id *build_id, char *bf, size_t bf_size)
{
- char *bid = bf;
- const u8 *raw = build_id->data;
- size_t i;
+ size_t offs = 0;
- bf[0] = 0x0;
+ for (size_t i = 0; i < build_id->size && offs < bf_size; ++i)
+ offs += snprintf(bf + offs, bf_size - offs, "%02x", build_id->data[i]);
- for (i = 0; i < build_id->size; ++i) {
- sprintf(bid, "%02x", *raw);
- ++raw;
- bid += 2;
- }
-
- return (bid - bf) + 1;
+ return offs;
}
-int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id)
+int sysfs__snprintf_build_id(const char *root_dir, char *sbuild_id, size_t sbuild_id_size)
{
char notes[PATH_MAX];
- struct build_id bid;
+ struct build_id bid = { .size = 0, };
int ret;
if (!root_dir)
@@ -99,19 +107,19 @@ int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id)
if (ret < 0)
return ret;
- return build_id__sprintf(&bid, sbuild_id);
+ return build_id__snprintf(&bid, sbuild_id, sbuild_id_size);
}
-int filename__sprintf_build_id(const char *pathname, char *sbuild_id)
+int filename__snprintf_build_id(const char *pathname, char *sbuild_id, size_t sbuild_id_size)
{
- struct build_id bid;
+ struct build_id bid = { .size = 0, };
int ret;
ret = filename__read_build_id(pathname, &bid);
if (ret < 0)
return ret;
- return build_id__sprintf(&bid, sbuild_id);
+ return build_id__snprintf(&bid, sbuild_id, sbuild_id_size);
}
/* asnprintf consolidates asprintf and snprintf */
@@ -212,9 +220,9 @@ static bool build_id_cache__valid_id(char *sbuild_id)
return false;
if (!strcmp(pathname, DSO__NAME_KALLSYMS))
- ret = sysfs__sprintf_build_id("/", real_sbuild_id);
+ ret = sysfs__snprintf_build_id("/", real_sbuild_id, sizeof(real_sbuild_id));
else if (pathname[0] == '/')
- ret = filename__sprintf_build_id(pathname, real_sbuild_id);
+ ret = filename__snprintf_build_id(pathname, real_sbuild_id, sizeof(real_sbuild_id));
else
ret = -EINVAL; /* Should we support other special DSO cache? */
if (ret >= 0)
@@ -243,7 +251,7 @@ char *__dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
if (!dso__has_build_id(dso))
return NULL;
- build_id__sprintf(dso__bid_const(dso), sbuild_id);
+ build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id));
linkname = build_id_cache__linkname(sbuild_id, NULL, 0);
if (!linkname)
return NULL;
@@ -326,7 +334,7 @@ static int machine__write_buildid_table_cb(struct dso *dso, void *data)
}
in_kernel = dso__kernel(dso) || is_kernel_module(name, PERF_RECORD_MISC_CPUMODE_UNKNOWN);
- return write_buildid(name, name_len, dso__bid(dso), args->machine->pid,
+ return write_buildid(name, name_len, &dso__id(dso)->build_id, args->machine->pid,
in_kernel ? args->kmisc : args->umisc, args->fd);
}
@@ -769,7 +777,7 @@ static int build_id_cache__add_b(const struct build_id *bid,
{
char sbuild_id[SBUILD_ID_SIZE];
- build_id__sprintf(bid, sbuild_id);
+ build_id__snprintf(bid, sbuild_id, sizeof(sbuild_id));
return __build_id_cache__add_s(sbuild_id, name, nsi, is_kallsyms,
is_vdso, proper_name, root_dir);
@@ -841,7 +849,7 @@ static int filename__read_build_id_ns(const char *filename,
static bool dso__build_id_mismatch(struct dso *dso, const char *name)
{
- struct build_id bid;
+ struct build_id bid = { .size = 0, };
bool ret = false;
mutex_lock(dso__lock(dso));
@@ -864,7 +872,7 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine,
char *allocated_name = NULL;
int ret = 0;
- if (!dso__has_build_id(dso))
+ if (!dso__has_build_id(dso) || !dso__hit(dso))
return 0;
if (dso__is_kcore(dso)) {
@@ -951,7 +959,10 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
void build_id__init(struct build_id *bid, const u8 *data, size_t size)
{
- WARN_ON(size > BUILD_ID_SIZE);
+ if (size > BUILD_ID_SIZE) {
+ pr_debug("Truncating build_id size from %zd\n", size);
+ size = BUILD_ID_SIZE;
+ }
memcpy(bid->data, data, size);
bid->size = size;
}
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index a212497bfdb0..47e621cebe1b 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -13,7 +13,7 @@
struct build_id {
u8 data[BUILD_ID_SIZE];
- size_t size;
+ u8 size;
};
struct dso;
@@ -21,10 +21,10 @@ struct feat_fd;
struct nsinfo;
void build_id__init(struct build_id *bid, const u8 *data, size_t size);
-int build_id__sprintf(const struct build_id *build_id, char *bf);
+int build_id__snprintf(const struct build_id *build_id, char *bf, size_t bf_size);
bool build_id__is_defined(const struct build_id *bid);
-int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id);
-int filename__sprintf_build_id(const char *pathname, char *sbuild_id);
+int sysfs__snprintf_build_id(const char *root_dir, char *sbuild_id, size_t sbuild_id_size);
+int filename__snprintf_build_id(const char *pathname, char *sbuild_id, size_t sbuild_id_size);
char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
size_t size);
diff --git a/tools/perf/util/cap.c b/tools/perf/util/cap.c
index 69d9a2bcd40b..24a0ea7e6d97 100644
--- a/tools/perf/util/cap.c
+++ b/tools/perf/util/cap.c
@@ -7,7 +7,6 @@
#include "debug.h"
#include <errno.h>
#include <string.h>
-#include <linux/capability.h>
#include <sys/syscall.h>
#include <unistd.h>
diff --git a/tools/perf/util/cap.h b/tools/perf/util/cap.h
index 0c6a1ff55f07..c1b8ac033ccc 100644
--- a/tools/perf/util/cap.h
+++ b/tools/perf/util/cap.h
@@ -3,6 +3,7 @@
#define __PERF_CAP_H
#include <stdbool.h>
+#include <linux/capability.h>
/* For older systems */
#ifndef CAP_SYSLOG
@@ -13,6 +14,10 @@
#define CAP_PERFMON 38
#endif
+#ifndef CAP_BPF
+#define CAP_BPF 39
+#endif
+
/* Query if a capability is supported, used_root is set if the fallback root check was used. */
bool perf_cap__capable(int cap, bool *used_root);
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index fbcc0626f9ce..25e2769b5e74 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -413,8 +413,7 @@ static bool has_pattern_string(const char *str)
return !!strpbrk(str, "{}[]()|*+?^$");
}
-int evlist__expand_cgroup(struct evlist *evlist, const char *str,
- struct rblist *metric_events, bool open_cgroup)
+int evlist__expand_cgroup(struct evlist *evlist, const char *str, bool open_cgroup)
{
struct evlist *orig_list, *tmp_list;
struct evsel *pos, *evsel, *leader;
@@ -440,12 +439,8 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str,
evlist__splice_list_tail(orig_list, &evlist->core.entries);
evlist->core.nr_entries = 0;
- if (metric_events) {
- orig_metric_events = *metric_events;
- rblist__init(metric_events);
- } else {
- rblist__init(&orig_metric_events);
- }
+ orig_metric_events = evlist->metric_events;
+ metricgroup__rblist_init(&evlist->metric_events);
if (has_pattern_string(str))
prefix_len = match_cgroups(str);
@@ -490,12 +485,10 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str,
cgroup__put(cgrp);
nr_cgroups++;
- if (metric_events) {
- if (metricgroup__copy_metric_events(tmp_list, cgrp,
- metric_events,
- &orig_metric_events) < 0)
- goto out_err;
- }
+ if (metricgroup__copy_metric_events(tmp_list, cgrp,
+ &evlist->metric_events,
+ &orig_metric_events) < 0)
+ goto out_err;
evlist__splice_list_tail(evlist, &tmp_list->core.entries);
tmp_list->core.nr_entries = 0;
@@ -512,7 +505,7 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str,
out_err:
evlist__delete(orig_list);
evlist__delete(tmp_list);
- rblist__exit(&orig_metric_events);
+ metricgroup__rblist_exit(&orig_metric_events);
release_cgroup_list();
return ret;
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index de8882d6e8d3..7b1bda22878c 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -28,8 +28,7 @@ struct rblist;
struct cgroup *cgroup__new(const char *name, bool do_open);
struct cgroup *evlist__findnew_cgroup(struct evlist *evlist, const char *name);
-int evlist__expand_cgroup(struct evlist *evlist, const char *cgroups,
- struct rblist *metric_events, bool open_cgroup);
+int evlist__expand_cgroup(struct evlist *evlist, const char *cgroups, bool open_cgroup);
void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup);
diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
index 8aa456d7c2cd..9880247a2c33 100644
--- a/tools/perf/util/comm.c
+++ b/tools/perf/util/comm.c
@@ -24,6 +24,7 @@ static struct comm_strs {
static void comm_strs__remove_if_last(struct comm_str *cs);
static void comm_strs__init(void)
+ NO_THREAD_SAFETY_ANALYSIS /* Inherently single threaded due to pthread_once. */
{
init_rwsem(&_comm_strs.lock);
_comm_strs.capacity = 16;
@@ -119,6 +120,7 @@ static void comm_strs__remove_if_last(struct comm_str *cs)
}
static struct comm_str *__comm_strs__find(struct comm_strs *comm_strs, const char *str)
+ SHARED_LOCKS_REQUIRED(comm_strs->lock)
{
struct comm_str **result;
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 5e7ff09fbc95..3d2e437e1354 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -1338,14 +1338,14 @@ static void cleanup_events(struct perf_session *session)
static int setup_streams(struct ctf_writer *cw, struct perf_session *session)
{
struct ctf_stream **stream;
- struct perf_header *ph = &session->header;
+ struct perf_env *env = perf_session__env(session);
int ncpus;
/*
* Try to get the number of cpus used in the data file,
* if not present fallback to the MAX_CPUS.
*/
- ncpus = ph->env.nr_cpus_avail ?: MAX_CPUS;
+ ncpus = env->nr_cpus_avail ?: MAX_CPUS;
stream = zalloc(sizeof(*stream) * ncpus);
if (!stream) {
@@ -1371,7 +1371,7 @@ static void free_streams(struct ctf_writer *cw)
static int ctf_writer__setup_env(struct ctf_writer *cw,
struct perf_session *session)
{
- struct perf_header *header = &session->header;
+ struct perf_env *env = perf_session__env(session);
struct bt_ctf_writer *writer = cw->writer;
#define ADD(__n, __v) \
@@ -1380,11 +1380,11 @@ do { \
return -1; \
} while (0)
- ADD("host", header->env.hostname);
+ ADD("host", env->hostname);
ADD("sysname", "Linux");
- ADD("release", header->env.os_release);
- ADD("version", header->env.version);
- ADD("machine", header->env.arch);
+ ADD("release", env->os_release);
+ ADD("version", env->version);
+ ADD("machine", env->arch);
ADD("domain", "kernel");
ADD("tracer_name", "perf");
@@ -1401,7 +1401,7 @@ static int ctf_writer__setup_clock(struct ctf_writer *cw,
int64_t offset = 0;
if (tod) {
- struct perf_env *env = &session->header.env;
+ struct perf_env *env = perf_session__env(session);
if (!env->clock.enabled) {
pr_err("Can't provide --tod time, missing clock data. "
diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c
index d9f805bf6fb0..9dc1e184cf3c 100644
--- a/tools/perf/util/data-convert-json.c
+++ b/tools/perf/util/data-convert-json.c
@@ -257,7 +257,8 @@ static int process_sample_event(const struct perf_tool *tool,
static void output_headers(struct perf_session *session, struct convert_json *c)
{
struct stat st;
- struct perf_header *header = &session->header;
+ const struct perf_header *header = &session->header;
+ const struct perf_env *env = perf_session__env(session);
int ret;
int fd = perf_data__fd(session->data);
int i;
@@ -280,32 +281,32 @@ static void output_headers(struct perf_session *session, struct convert_json *c)
output_json_key_format(out, true, 2, "data-size", "%" PRIu64, header->data_size);
output_json_key_format(out, true, 2, "feat-offset", "%" PRIu64, header->feat_offset);
- output_json_key_string(out, true, 2, "hostname", header->env.hostname);
- output_json_key_string(out, true, 2, "os-release", header->env.os_release);
- output_json_key_string(out, true, 2, "arch", header->env.arch);
+ output_json_key_string(out, true, 2, "hostname", env->hostname);
+ output_json_key_string(out, true, 2, "os-release", env->os_release);
+ output_json_key_string(out, true, 2, "arch", env->arch);
- if (header->env.cpu_desc)
- output_json_key_string(out, true, 2, "cpu-desc", header->env.cpu_desc);
+ if (env->cpu_desc)
+ output_json_key_string(out, true, 2, "cpu-desc", env->cpu_desc);
- output_json_key_string(out, true, 2, "cpuid", header->env.cpuid);
- output_json_key_format(out, true, 2, "nrcpus-online", "%u", header->env.nr_cpus_online);
- output_json_key_format(out, true, 2, "nrcpus-avail", "%u", header->env.nr_cpus_avail);
+ output_json_key_string(out, true, 2, "cpuid", env->cpuid);
+ output_json_key_format(out, true, 2, "nrcpus-online", "%u", env->nr_cpus_online);
+ output_json_key_format(out, true, 2, "nrcpus-avail", "%u", env->nr_cpus_avail);
- if (header->env.clock.enabled) {
+ if (env->clock.enabled) {
output_json_key_format(out, true, 2, "clockid",
- "%u", header->env.clock.clockid);
+ "%u", env->clock.clockid);
output_json_key_format(out, true, 2, "clock-time",
- "%" PRIu64, header->env.clock.clockid_ns);
+ "%" PRIu64, env->clock.clockid_ns);
output_json_key_format(out, true, 2, "real-time",
- "%" PRIu64, header->env.clock.tod_ns);
+ "%" PRIu64, env->clock.tod_ns);
}
- output_json_key_string(out, true, 2, "perf-version", header->env.version);
+ output_json_key_string(out, true, 2, "perf-version", env->version);
output_json_key_format(out, true, 2, "cmdline", "[");
- for (i = 0; i < header->env.nr_cmdline; i++) {
+ for (i = 0; i < env->nr_cmdline; i++) {
output_json_delimiters(out, i != 0, 3);
- output_json_string(c->out, header->env.cmdline_argv[i]);
+ output_json_string(c->out, env->cmdline_argv[i]);
}
output_json_format(out, false, 2, "]");
}
@@ -376,8 +377,7 @@ int bt_convert__perf2json(const char *input_name, const char *output_name,
fprintf(stderr, "Error creating perf session!\n");
goto err_fclose;
}
-
- if (symbol__init(&session->header.env) < 0) {
+ if (symbol__init(perf_session__env(session)) < 0) {
fprintf(stderr, "Symbol init error!\n");
goto err_session_delete;
}
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index 50f916374d87..8f52e8cefcf3 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -181,7 +181,7 @@ static int db_ids_from_al(struct db_export *dbe, struct addr_location *al,
if (al->map) {
struct dso *dso = map__dso(al->map);
- err = db_export__dso(dbe, dso, maps__machine(al->maps));
+ err = db_export__dso(dbe, dso, maps__machine(thread__maps(al->thread)));
if (err)
return err;
*dso_db_id = dso__db_id(dso);
@@ -256,6 +256,7 @@ static struct call_path *call_path_from_sample(struct db_export *dbe,
al.map = map__get(node->ms.map);
al.maps = maps__get(thread__maps(thread));
al.addr = node->ip;
+ al.thread = thread__get(thread);
if (al.map && !al.sym)
al.sym = dso__find_symbol(map__dso(al.map), al.addr);
@@ -358,14 +359,18 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
};
struct thread *main_thread;
struct comm *comm = NULL;
- struct machine *machine;
+ struct machine *machine = NULL;
int err;
+ if (thread__maps(thread))
+ machine = maps__machine(thread__maps(thread));
+ if (!machine)
+ return -1;
+
err = db_export__evsel(dbe, evsel);
if (err)
return err;
- machine = maps__machine(al->maps);
err = db_export__machine(dbe, machine);
if (err)
return err;
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index f9ef7d045c92..1dfa4d0eec4d 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -14,11 +14,19 @@
#ifdef HAVE_BACKTRACE_SUPPORT
#include <execinfo.h>
#endif
+#include "addr_location.h"
#include "color.h"
-#include "event.h"
#include "debug.h"
+#include "env.h"
+#include "event.h"
+#include "machine.h"
+#include "map.h"
#include "print_binary.h"
+#include "srcline.h"
+#include "symbol.h"
+#include "synthetic-events.h"
#include "target.h"
+#include "thread.h"
#include "trace-event.h"
#include "ui/helpline.h"
#include "ui/ui.h"
@@ -298,21 +306,66 @@ void perf_debug_setup(void)
libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper);
}
+void __dump_stack(FILE *file, void **stackdump, size_t stackdump_size)
+{
+ /* TODO: async safety. printf, malloc, etc. aren't safe inside a signal handler. */
+ pid_t pid = getpid();
+ struct machine *machine;
+ struct thread *thread = NULL;
+ struct perf_env host_env;
+
+ perf_env__init(&host_env);
+ machine = machine__new_live(&host_env, /*kernel_maps=*/false, pid);
+
+ if (machine)
+ thread = machine__find_thread(machine, pid, pid);
+
+#ifdef HAVE_BACKTRACE_SUPPORT
+ if (!machine || !thread) {
+ /*
+ * Backtrace functions are async signal safe. Fall back on them
+ * if machine/thread creation fails.
+ */
+ backtrace_symbols_fd(stackdump, stackdump_size, fileno(file));
+ machine__delete(machine);
+ perf_env__exit(&host_env);
+ return;
+ }
+#endif
+
+ for (size_t i = 0; i < stackdump_size; i++) {
+ struct addr_location al;
+ u64 addr = (u64)(uintptr_t)stackdump[i];
+ bool printed = false;
+
+ addr_location__init(&al);
+ if (thread && thread__find_map(thread, PERF_RECORD_MISC_USER, addr, &al)) {
+ al.sym = map__find_symbol(al.map, al.addr);
+ if (al.sym) {
+ fprintf(file, " #%zd %p in %s ", i, stackdump[i], al.sym->name);
+ printed = true;
+ }
+ }
+ if (!printed)
+ fprintf(file, " #%zd %p ", i, stackdump[i]);
+
+ map__fprintf_srcline(al.map, al.addr, "", file);
+ fprintf(file, "\n");
+ addr_location__exit(&al);
+ }
+ thread__put(thread);
+ machine__delete(machine);
+ perf_env__exit(&host_env);
+}
+
/* Obtain a backtrace and print it to stdout. */
#ifdef HAVE_BACKTRACE_SUPPORT
void dump_stack(void)
{
- void *array[16];
- size_t size = backtrace(array, ARRAY_SIZE(array));
- char **strings = backtrace_symbols(array, size);
- size_t i;
-
- printf("Obtained %zd stack frames.\n", size);
-
- for (i = 0; i < size; i++)
- printf("%s\n", strings[i]);
+ void *stackdump[32];
+ size_t size = backtrace(stackdump, ARRAY_SIZE(stackdump));
- free(strings);
+ __dump_stack(stdout, stackdump, size);
}
#else
void dump_stack(void) {}
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index a4026d1fd6a3..6b737e195ce1 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -85,6 +85,7 @@ void debug_set_display_time(bool set);
void perf_debug_setup(void);
int perf_quiet_option(void);
+void __dump_stack(FILE *file, void **stackdump, size_t stackdump_size);
void dump_stack(void);
void sighandler_dump_stack(int sig);
diff --git a/tools/perf/util/debuginfo.c b/tools/perf/util/debuginfo.c
index b5deea7cbdf2..a44c70f93156 100644
--- a/tools/perf/util/debuginfo.c
+++ b/tools/perf/util/debuginfo.c
@@ -103,7 +103,7 @@ struct debuginfo *debuginfo__new(const char *path)
char buf[PATH_MAX], nil = '\0';
struct dso *dso;
struct debuginfo *dinfo = NULL;
- struct build_id bid;
+ struct build_id bid = { .size = 0};
/* Try to open distro debuginfo files */
dso = dso__new(path);
diff --git a/tools/perf/util/demangle-cxx.h b/tools/perf/util/demangle-cxx.h
index 26b5b66c0b4e..9359937a881a 100644
--- a/tools/perf/util/demangle-cxx.h
+++ b/tools/perf/util/demangle-cxx.h
@@ -2,6 +2,8 @@
#ifndef __PERF_DEMANGLE_CXX
#define __PERF_DEMANGLE_CXX 1
+#include <stdbool.h>
+
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/tools/perf/util/demangle-rust-v0.c b/tools/perf/util/demangle-rust-v0.c
new file mode 100644
index 000000000000..19924d85407d
--- /dev/null
+++ b/tools/perf/util/demangle-rust-v0.c
@@ -0,0 +1,2042 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// The contents of this file come from the Rust rustc-demangle library, hosted
+// in the <https://github.com/rust-lang/rustc-demangle> repository, licensed
+// under "Apache-2.0 OR MIT". For copyright details, see
+// <https://github.com/rust-lang/rustc-demangle/blob/main/README.md>.
+// Please note that the file should be kept as close as possible to upstream.
+
+// Code for demangling Rust symbols. This code is mostly
+// a line-by-line translation of the Rust code in `rustc-demangle`.
+
+// you can find the latest version of this code in https://github.com/rust-lang/rustc-demangle
+
+#include <stdint.h>
+#include <stddef.h>
+#include <string.h>
+#include <stdbool.h>
+#include <sys/param.h>
+#include <stdio.h>
+
+#include "demangle-rust-v0.h"
+
+#if defined(__GNUC__) || defined(__clang__)
+#define NODISCARD __attribute__((warn_unused_result))
+#else
+#define NODISCARD
+#endif
+
+#define MAX_DEPTH 500
+
+typedef enum {
+ DemangleOk,
+ DemangleInvalid,
+ DemangleRecursed,
+ DemangleBug,
+} demangle_status;
+
+struct demangle_v0 {
+ const char *mangled;
+ size_t mangled_len;
+};
+
+struct demangle_legacy {
+ const char *mangled;
+ size_t mangled_len;
+ size_t elements;
+};
+
+// private version of memrchr to avoid _GNU_SOURCE
+static void *demangle_memrchr(const void *s, int c, size_t n) {
+ const uint8_t *s_ = s;
+ for (; n != 0; n--) {
+ if (s_[n-1] == c) {
+ return (void*)&s_[n-1];
+ }
+ }
+ return NULL;
+}
+
+
+static bool unicode_iscontrol(uint32_t ch) {
+ // this is *technically* a unicode table, but
+ // some unicode properties are simpler than you might think
+ return ch < 0x20 || (ch >= 0x7f && ch < 0xa0);
+}
+
+// "good enough" tables, the only consequence is that when printing
+// *constant strings*, some characters are printed as `\u{abcd}` rather than themselves.
+//
+// I'm leaving these here to allow easily replacing them with actual
+// tables if desired.
+static bool unicode_isprint(uint32_t ch) {
+ if (ch < 0x20) {
+ return false;
+ }
+ if (ch < 0x7f) {
+ return true;
+ }
+ return false;
+}
+
+static bool unicode_isgraphemextend(uint32_t ch) {
+ (void)ch;
+ return false;
+}
+
+static bool str_isascii(const char *s, size_t s_len) {
+ for (size_t i = 0; i < s_len; i++) {
+ if (s[i] & 0x80) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+typedef enum {
+ PunycodeOk,
+ PunycodeError
+} punycode_status;
+
+struct parser {
+ // the parser assumes that `sym` has a safe "terminating byte". It might be NUL,
+ // but it might also be something else if a symbol is "truncated".
+ const char *sym;
+ size_t sym_len;
+ size_t next;
+ uint32_t depth;
+};
+
+struct printer {
+ demangle_status status; // if status == 0 parser is valid
+ struct parser parser;
+ char *out; // NULL for no output [in which case out_len is not decremented]
+ size_t out_len;
+ uint32_t bound_lifetime_depth;
+ bool alternate;
+};
+
+static NODISCARD overflow_status printer_print_path(struct printer *printer, bool in_value);
+static NODISCARD overflow_status printer_print_type(struct printer *printer);
+static NODISCARD overflow_status printer_print_const(struct printer *printer, bool in_value);
+
+static NODISCARD demangle_status try_parse_path(struct parser *parser) {
+ struct printer printer = {
+ DemangleOk,
+ *parser,
+ NULL,
+ SIZE_MAX,
+ 0,
+ false
+ };
+ overflow_status ignore = printer_print_path(&printer, false); // can't fail since no output
+ (void)ignore;
+ *parser = printer.parser;
+ return printer.status;
+}
+
+NODISCARD static demangle_status rust_demangle_v0_demangle(const char *s, size_t s_len, struct demangle_v0 *res, const char **rest) {
+ if (s_len > strlen(s)) {
+ // s_len only exists to shorten the string, this is not a buffer API
+ return DemangleInvalid;
+ }
+
+ const char *inner;
+ size_t inner_len;
+ if (s_len >= 2 && !strncmp(s, "_R", strlen("_R"))) {
+ inner = s+2;
+ inner_len = s_len - 2;
+ } else if (s_len >= 1 && !strncmp(s, "R", strlen("R"))) {
+ // On Windows, dbghelp strips leading underscores, so we accept "R..."
+ // form too.
+ inner = s+1;
+ inner_len = s_len - 1;
+ } else if (s_len >= 3 && !strncmp(s, "__R", strlen("__R"))) {
+ // On OSX, symbols are prefixed with an extra _
+ inner = s+3;
+ inner_len = s_len - 3;
+ } else {
+ return DemangleInvalid;
+ }
+
+ // Paths always start with uppercase characters.
+ if (*inner < 'A' || *inner > 'Z') {
+ return DemangleInvalid;
+ }
+
+ if (!str_isascii(inner, inner_len)) {
+ return DemangleInvalid;
+ }
+
+ struct parser parser = { inner, inner_len, 0, 0 };
+
+ demangle_status status = try_parse_path(&parser);
+ if (status != DemangleOk) return status;
+ char next = parser.sym[parser.next];
+
+ // Instantiating crate (paths always start with uppercase characters).
+ if (parser.next < parser.sym_len && next >= 'A' && next <= 'Z') {
+ status = try_parse_path(&parser);
+ if (status != DemangleOk) return status;
+ }
+
+ res->mangled = inner;
+ res->mangled_len = inner_len;
+ if (rest) {
+ *rest = parser.sym + parser.next;
+ }
+
+ return DemangleOk;
+}
+
+// This might require `len` to be up to 3 characters bigger than the real output len in case of utf-8
+NODISCARD static overflow_status rust_demangle_v0_display_demangle(struct demangle_v0 res, char *out, size_t len, bool alternate) {
+ struct printer printer = {
+ DemangleOk,
+ {
+ res.mangled,
+ res.mangled_len,
+ 0,
+ 0
+ },
+ out,
+ len,
+ 0,
+ alternate
+ };
+ if (printer_print_path(&printer, true) == OverflowOverflow) {
+ return OverflowOverflow;
+ }
+ if (printer.out_len < OVERFLOW_MARGIN) {
+ return OverflowOverflow;
+ }
+ *printer.out = '\0';
+ return OverflowOk;
+}
+
+static size_t code_to_utf8(unsigned char *buffer, uint32_t code)
+{
+ if (code <= 0x7F) {
+ buffer[0] = code;
+ return 1;
+ }
+ if (code <= 0x7FF) {
+ buffer[0] = 0xC0 | (code >> 6); /* 110xxxxx */
+ buffer[1] = 0x80 | (code & 0x3F); /* 10xxxxxx */
+ return 2;
+ }
+ if (code <= 0xFFFF) {
+ buffer[0] = 0xE0 | (code >> 12); /* 1110xxxx */
+ buffer[1] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */
+ buffer[2] = 0x80 | (code & 0x3F); /* 10xxxxxx */
+ return 3;
+ }
+ if (code <= 0x10FFFF) {
+ buffer[0] = 0xF0 | (code >> 18); /* 11110xxx */
+ buffer[1] = 0x80 | ((code >> 12) & 0x3F); /* 10xxxxxx */
+ buffer[2] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */
+ buffer[3] = 0x80 | (code & 0x3F); /* 10xxxxxx */
+ return 4;
+ }
+ return 0;
+}
+
+
+// return length of char at byte, or SIZE_MAX if invalid. buf should have 4 valid characters
+static NODISCARD size_t utf8_next_char(uint8_t *s, uint32_t *ch) {
+ uint8_t byte = *s;
+ // UTF8-1 = %x00-7F
+ // UTF8-2 = %xC2-DF UTF8-tail
+ // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
+ // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
+ // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
+ // %xF4 %x80-8F 2( UTF8-tail )
+ if (byte < 0x80) {
+ *ch = byte;
+ return 1;
+ } else if (byte < 0xc2) {
+ return SIZE_MAX;
+ } else if (byte < 0xe0) {
+ if (s[1] >= 0x80 && s[1] < 0xc0) {
+ *ch = ((byte&0x1f)<<6) + (s[1] & 0x3f);
+ return 2;
+ }
+ return SIZE_MAX;
+ } if (byte < 0xf0) {
+ if (!(s[1] >= 0x80 && s[1] < 0xc0) || !(s[2] >= 0x80 && s[2] < 0xc0)) {
+ return SIZE_MAX; // basic validation
+ }
+ if (byte == 0xe0 && s[1] < 0xa0) {
+ return SIZE_MAX; // overshort
+ }
+ if (byte == 0xed && s[1] >= 0xa0) {
+ return SIZE_MAX; // surrogate
+ }
+ *ch = ((byte&0x0f)<<12) + ((s[1] & 0x3f)<<6) + (s[2] & 0x3f);
+ return 3;
+ } else if (byte < 0xf5) {
+ if (!(s[1] >= 0x80 && s[1] < 0xc0) || !(s[2] >= 0x80 && s[2] < 0xc0) || !(s[3] >= 0x80 && s[3] < 0xc0)) {
+ return SIZE_MAX; // basic validation
+ }
+ if (byte == 0xf0 && s[1] < 0x90) {
+ return SIZE_MAX; // overshort
+ }
+ if (byte == 0xf4 && s[1] >= 0x90) {
+ return SIZE_MAX; // over max
+ }
+ *ch = ((byte&0x07)<<18) + ((s[1] & 0x3f)<<12) + ((s[2] & 0x3f)<<6) + (s[3]&0x3f);
+ return 4;
+ } else {
+ return SIZE_MAX;
+ }
+}
+
+static NODISCARD bool validate_char(uint32_t n) {
+ return ((n ^ 0xd800) - 0x800) < 0x110000 - 0x800;
+}
+
+#define SMALL_PUNYCODE_LEN 128
+
+static NODISCARD punycode_status punycode_decode(const char *start, size_t ascii_len, const char *punycode_start, size_t punycode_len, uint32_t (*out_)[SMALL_PUNYCODE_LEN], size_t *out_len) {
+ uint32_t *out = *out_;
+
+ if (punycode_len == 0) {
+ return PunycodeError;
+ }
+
+ if (ascii_len > SMALL_PUNYCODE_LEN) {
+ return PunycodeError;
+ }
+ for (size_t i = 0; i < ascii_len; i++) {
+ out[i] = start[i];
+ }
+ size_t len = ascii_len;
+
+ size_t base = 36, t_min = 1, t_max = 26, skew = 38, damp = 700, bias = 72, i = 0, n = 0x80;
+ for (;;) {
+ size_t delta = 0, w = 1, k = 0;
+ for (;;) {
+ k += base;
+ size_t biased = k < bias ? 0 : k - bias;
+ size_t t = MIN(MAX(biased, t_min), t_max);
+ size_t d;
+ if (punycode_len == 0) {
+ return PunycodeError;
+ }
+ char nx = *punycode_start++;
+ punycode_len--;
+ if ('a' <= nx && nx <= 'z') {
+ d = nx - 'a';
+ } else if ('0' <= nx && nx <= '9') {
+ d = 26 + (nx - '0');
+ } else {
+ return PunycodeError;
+ }
+ if (w == 0 || d > SIZE_MAX / w || d*w > SIZE_MAX - delta) {
+ return PunycodeError;
+ }
+ delta += d * w;
+ if (d < t) {
+ break;
+ }
+ if (base < t || w == 0 || (base - t) > SIZE_MAX / w) {
+ return PunycodeError;
+ }
+ w *= (base - t);
+ }
+
+ len += 1;
+ if (i > SIZE_MAX - delta) {
+ return PunycodeError;
+ }
+ i += delta;
+ if (n > SIZE_MAX - i / len) {
+ return PunycodeError;
+ }
+ n += i / len;
+ i %= len;
+
+ // char validation
+ if (n > UINT32_MAX || !validate_char((uint32_t)n)) {
+ return PunycodeError;
+ }
+
+ // insert new character
+ if (len > SMALL_PUNYCODE_LEN) {
+ return PunycodeError;
+ }
+ memmove(out + i + 1, out + i, (len - i - 1) * sizeof(uint32_t));
+ out[i] = (uint32_t)n;
+
+ // start i index at incremented position
+ i++;
+
+ // If there are no more deltas, decoding is complete.
+ if (punycode_len == 0) {
+ *out_len = len;
+ return PunycodeOk;
+ }
+
+ // Perform bias adaptation.
+ delta /= damp;
+ damp = 2;
+
+ delta += delta / len;
+ k = 0;
+ while (delta > ((base - t_min) * t_max) / 2) {
+ delta /= base - t_min;
+ k += base;
+ }
+ bias = k + ((base - t_min + 1) * delta) / (delta + skew);
+ }
+}
+
+struct ident {
+ const char *ascii_start;
+ size_t ascii_len;
+ const char *punycode_start;
+ size_t punycode_len;
+};
+
+static NODISCARD overflow_status display_ident(const char *ascii_start, size_t ascii_len, const char *punycode_start, size_t punycode_len, uint8_t *out, size_t *out_len) {
+ uint32_t outbuf[SMALL_PUNYCODE_LEN];
+
+ size_t wide_len;
+ size_t out_buflen = *out_len;
+
+ if (punycode_len == 0) {
+ if (ascii_len > out_buflen) {
+ return OverflowOverflow;
+ }
+ memcpy(out, ascii_start, ascii_len);
+ *out_len = ascii_len;
+ } else if (punycode_decode(ascii_start, ascii_len, punycode_start, punycode_len, &outbuf, &wide_len) == PunycodeOk) {
+ size_t narrow_len = 0;
+ for (size_t i = 0; i < wide_len; i++) {
+ if (out_buflen - narrow_len < 4) {
+ return OverflowOverflow;
+ }
+ unsigned char *pos = &out[narrow_len];
+ narrow_len += code_to_utf8(pos, outbuf[i]);
+ }
+ *out_len = narrow_len;
+ } else {
+ size_t narrow_len = 0;
+ if (out_buflen < strlen("punycode{")) {
+ return OverflowOverflow;
+ }
+ memcpy(out, "punycode{", strlen("punycode{"));
+ narrow_len = strlen("punycode{");
+ if (ascii_len > 0) {
+ if (out_buflen - narrow_len < ascii_len || out_buflen - narrow_len - ascii_len < 1) {
+ return OverflowOverflow;
+ }
+ memcpy(out + narrow_len, ascii_start, ascii_len);
+ narrow_len += ascii_len;
+ out[narrow_len] = '-';
+ narrow_len++;
+ }
+ if (out_buflen - narrow_len < punycode_len || out_buflen - narrow_len - punycode_len < 1) {
+ return OverflowOverflow;
+ }
+ memcpy(out + narrow_len, punycode_start, punycode_len);
+ narrow_len += punycode_len;
+ out[narrow_len] = '}';
+ narrow_len++;
+ *out_len = narrow_len;
+ }
+
+ return OverflowOk;
+}
+
+static NODISCARD bool try_parse_uint(const char *buf, size_t len, uint64_t *result) {
+ size_t cur = 0;
+ for(;cur < len && buf[cur] == '0';cur++);
+ uint64_t result_val = 0;
+ if (len - cur > 16) return false;
+ for(;cur < len;cur++) {
+ char c = buf[cur];
+ result_val <<= 4;
+ if ('0' <= c && c <= '9') {
+ result_val += c - '0';
+ } else if ('a' <= c && c <= 'f') {
+ result_val += 10 + (c - 'a');
+ } else {
+ return false;
+ }
+ }
+ *result = result_val;
+ return true;
+}
+
+static NODISCARD bool dinibble2int(const char *buf, uint8_t *result) {
+ uint8_t result_val = 0;
+ for (int i = 0; i < 2; i++) {
+ char c = buf[i];
+ result_val <<= 4;
+ if ('0' <= c && c <= '9') {
+ result_val += c - '0';
+ } else if ('a' <= c && c <= 'f') {
+ result_val += 10 + (c - 'a');
+ } else {
+ return false;
+ }
+ }
+ *result = result_val;
+ return true;
+}
+
+
+typedef enum {
+ NtsOk = 0,
+ NtsOverflow = 1,
+ NtsInvalid = 2
+} nibbles_to_string_status;
+
+// '\u{10ffff}', +margin
+#define ESCAPED_SIZE 12
+
+static NODISCARD size_t char_to_string(uint32_t ch, uint8_t quote, bool first, char (*buf)[ESCAPED_SIZE]) {
+ // encode the character
+ char *escaped_buf = *buf;
+ escaped_buf[0] = '\\';
+ size_t escaped_len = 2;
+ switch (ch) {
+ case '\0':
+ escaped_buf[1] = '0';
+ break;
+ case '\t':
+ escaped_buf[1] = 't';
+ break;
+ case '\r':
+ escaped_buf[1] = 'r';
+ break;
+ case '\n':
+ escaped_buf[1] = 'n';
+ break;
+ case '\\':
+ escaped_buf[1] = '\\';
+ break;
+ default:
+ if (ch == quote) {
+ escaped_buf[1] = ch;
+ } else if (!unicode_isprint(ch) || (first && unicode_isgraphemextend(ch))) {
+ int hexlen = snprintf(escaped_buf, ESCAPED_SIZE, "\\u{%x}", (unsigned int)ch);
+ if (hexlen < 0) {
+ return 0; // (snprintf shouldn't fail!)
+ }
+ escaped_len = hexlen;
+ } else {
+ // printable character
+ escaped_buf[0] = ch;
+ escaped_len = 1;
+ }
+ break;
+ }
+
+ return escaped_len;
+}
+
+// convert nibbles to a single/double-quoted string
+static NODISCARD nibbles_to_string_status nibbles_to_string(const char *buf, size_t len, uint8_t *out, size_t *out_len) {
+ uint8_t quote = '"';
+ bool first = true;
+
+ if ((len % 2) != 0) {
+ return NtsInvalid; // odd number of nibbles
+ }
+
+ size_t cur_out_len = 0;
+
+ // write starting quote
+ if (out != NULL) {
+ cur_out_len = *out_len;
+ if (cur_out_len == 0) {
+ return NtsOverflow;
+ }
+ *out++ = quote;
+ cur_out_len--;
+ }
+
+ uint8_t conv_buf[4] = {0};
+ size_t conv_buf_len = 0;
+ while (len > 1 || conv_buf_len > 0) {
+ while (len > 1 && conv_buf_len < sizeof(conv_buf)) {
+ if (!dinibble2int(buf, &conv_buf[conv_buf_len])) {
+ return NtsInvalid;
+ }
+ conv_buf_len++;
+ buf += 2;
+ len -= 2;
+ }
+
+ // conv_buf is full here if possible, process 1 UTF-8 character
+ uint32_t ch = 0;
+ size_t consumed = utf8_next_char(conv_buf, &ch);
+ if (consumed > conv_buf_len) {
+ // either SIZE_MAX (invalid UTF-8) or finished input buffer and
+ // there are still bytes remaining, in both cases invalid
+ return NtsInvalid;
+ }
+
+ // "consume" the character
+ memmove(conv_buf, conv_buf+consumed, conv_buf_len-consumed);
+ conv_buf_len -= consumed;
+
+ char escaped_buf[ESCAPED_SIZE];
+ size_t escaped_len = char_to_string(ch, '"', first, &escaped_buf);
+ if (out != NULL) {
+ if (cur_out_len < escaped_len) {
+ return NtsOverflow;
+ }
+ memcpy(out, escaped_buf, escaped_len);
+ out += escaped_len;
+ cur_out_len -= escaped_len;
+ }
+ first = false;
+ }
+
+ // write ending quote
+ if (out != NULL) {
+ if (cur_out_len == 0) {
+ return NtsOverflow;
+ }
+ *out++ = quote;
+ cur_out_len--;
+ *out_len -= cur_out_len; // subtract remaining space to get used space
+ }
+
+ return NtsOk;
+}
+
+static const char* basic_type(uint8_t tag) {
+ switch(tag) {
+ case 'b':
+ return "bool";
+ case 'c':
+ return "char";
+ case 'e':
+ return "str";
+ case 'u':
+ return "()";
+ case 'a':
+ return "i8";
+ case 's':
+ return "i16";
+ case 'l':
+ return "i32";
+ case 'x':
+ return "i64";
+ case 'n':
+ return "i128";
+ case 'i':
+ return "isize";
+ case 'h':
+ return "u8";
+ case 't':
+ return "u16";
+ case 'm':
+ return "u32";
+ case 'y':
+ return "u64";
+ case 'o':
+ return "u128";
+ case 'j':
+ return "usize";
+ case 'f':
+ return "f32";
+ case 'd':
+ return "f64";
+ case 'z':
+ return "!";
+ case 'p':
+ return "_";
+ case 'v':
+ return "...";
+ default:
+ return NULL;
+ }
+}
+
+static NODISCARD demangle_status parser_push_depth(struct parser *parser) {
+ parser->depth++;
+ if (parser->depth > MAX_DEPTH) {
+ return DemangleRecursed;
+ } else {
+ return DemangleOk;
+ }
+}
+
+static demangle_status parser_pop_depth(struct parser *parser) {
+ parser->depth--;
+ return DemangleOk;
+}
+
+static uint8_t parser_peek(struct parser const *parser) {
+ if (parser->next == parser->sym_len) {
+ return 0; // add a "pseudo nul terminator" to avoid peeking past the end of a symbol
+ } else {
+ return parser->sym[parser->next];
+ }
+}
+
+static bool parser_eat(struct parser *parser, uint8_t ch) {
+ if (parser_peek(parser) == ch) {
+ if (ch != 0) { // safety: make sure we don't skip past the NUL terminator
+ parser->next++;
+ }
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static uint8_t parser_next(struct parser *parser) {
+ // don't advance after end of input, and return an imaginary NUL terminator
+ if (parser->next == parser->sym_len) {
+ return 0;
+ } else {
+ return parser->sym[parser->next++];
+ }
+}
+
+static NODISCARD demangle_status parser_ch(struct parser *parser, uint8_t *next) {
+ // don't advance after end of input
+ if (parser->next == parser->sym_len) {
+ return DemangleInvalid;
+ } else {
+ *next = parser->sym[parser->next++];
+ return DemangleOk;
+ }
+}
+
+struct buf {
+ const char *start;
+ size_t len;
+};
+
+static NODISCARD demangle_status parser_hex_nibbles(struct parser *parser, struct buf *buf) {
+ size_t start = parser->next;
+ for (;;) {
+ uint8_t ch = parser_next(parser);
+ if (ch == '_') {
+ break;
+ }
+ if (!(('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f'))) {
+ return DemangleInvalid;
+ }
+ }
+ buf->start = parser->sym + start;
+ buf->len = parser->next - start - 1; // skip final _
+ return DemangleOk;
+}
+
+static NODISCARD demangle_status parser_digit_10(struct parser *parser, uint8_t *out) {
+ uint8_t ch = parser_peek(parser);
+ if ('0' <= ch && ch <= '9') {
+ *out = ch - '0';
+ parser->next++;
+ return DemangleOk;
+ } else {
+ return DemangleInvalid;
+ }
+}
+
+static NODISCARD demangle_status parser_digit_62(struct parser *parser, uint64_t *out) {
+ uint8_t ch = parser_peek(parser);
+ if ('0' <= ch && ch <= '9') {
+ *out = ch - '0';
+ parser->next++;
+ return DemangleOk;
+ } else if ('a' <= ch && ch <= 'z') {
+ *out = 10 + (ch - 'a');
+ parser->next++;
+ return DemangleOk;
+ } else if ('A' <= ch && ch <= 'Z') {
+ *out = 10 + 26 + (ch - 'A');
+ parser->next++;
+ return DemangleOk;
+ } else {
+ return DemangleInvalid;
+ }
+}
+
+static NODISCARD demangle_status parser_integer_62(struct parser *parser, uint64_t *out) {
+ if (parser_eat(parser, '_')) {
+ *out = 0;
+ return DemangleOk;
+ }
+
+ uint64_t x = 0;
+ demangle_status status;
+ while (!parser_eat(parser, '_')) {
+ uint64_t d;
+ if ((status = parser_digit_62(parser, &d)) != DemangleOk) {
+ return status;
+ }
+ if (x > UINT64_MAX / 62) {
+ return DemangleInvalid;
+ }
+ x *= 62;
+ if (x > UINT64_MAX - d) {
+ return DemangleInvalid;
+ }
+ x += d;
+ }
+ if (x == UINT64_MAX) {
+ return DemangleInvalid;
+ }
+ *out = x + 1;
+ return DemangleOk;
+}
+
+static NODISCARD demangle_status parser_opt_integer_62(struct parser *parser, uint8_t tag, uint64_t *out) {
+ if (!parser_eat(parser, tag)) {
+ *out = 0;
+ return DemangleOk;
+ }
+
+ demangle_status status;
+ if ((status = parser_integer_62(parser, out)) != DemangleOk) {
+ return status;
+ }
+ if (*out == UINT64_MAX) {
+ return DemangleInvalid;
+ }
+ *out = *out + 1;
+ return DemangleOk;
+}
+
+static NODISCARD demangle_status parser_disambiguator(struct parser *parser, uint64_t *out) {
+ return parser_opt_integer_62(parser, 's', out);
+}
+
+typedef uint8_t parser_namespace_type;
+
+static NODISCARD demangle_status parser_namespace(struct parser *parser, parser_namespace_type *out) {
+ uint8_t next = parser_next(parser);
+ if ('A' <= next && next <= 'Z') {
+ *out = next;
+ return DemangleOk;
+ } else if ('a' <= next && next <= 'z') {
+ *out = 0;
+ return DemangleOk;
+ } else {
+ return DemangleInvalid;
+ }
+}
+
+static NODISCARD demangle_status parser_backref(struct parser *parser, struct parser *out) {
+ size_t start = parser->next;
+ if (start == 0) {
+ return DemangleBug;
+ }
+ size_t s_start = start - 1;
+ uint64_t i;
+ demangle_status status = parser_integer_62(parser, &i);
+ if (status != DemangleOk) {
+ return status;
+ }
+ if (i >= s_start) {
+ return DemangleInvalid;
+ }
+ struct parser res = {
+ .sym = parser->sym,
+ .sym_len = parser->sym_len,
+ .next = (size_t)i,
+ .depth = parser->depth
+ };
+ status = parser_push_depth(&res);
+ if (status != DemangleOk) {
+ return status;
+ }
+ *out = res;
+ return DemangleOk;
+}
+
+static NODISCARD demangle_status parser_ident(struct parser *parser, struct ident *out) {
+ bool is_punycode = parser_eat(parser, 'u');
+ size_t len;
+ uint8_t d;
+ demangle_status status = parser_digit_10(parser, &d);
+ len = d;
+ if (status != DemangleOk) {
+ return status;
+ }
+ if (len) {
+ for (;;) {
+ status = parser_digit_10(parser, &d);
+ if (status != DemangleOk) {
+ break;
+ }
+ if (len > SIZE_MAX / 10) {
+ return DemangleInvalid;
+ }
+ len *= 10;
+ if (len > SIZE_MAX - d) {
+ return DemangleInvalid;
+ }
+ len += d;
+ }
+ }
+
+ // Skip past the optional `_` separator.
+ parser_eat(parser, '_');
+
+ size_t start = parser->next;
+ if (parser->sym_len - parser->next < len) {
+ return DemangleInvalid;
+ }
+ parser->next += len;
+
+ const char *ident = &parser->sym[start];
+
+ if (is_punycode) {
+ const char *underscore = demangle_memrchr(ident, '_', (size_t)len);
+ if (underscore == NULL) {
+ *out = (struct ident){
+ .ascii_start="",
+ .ascii_len=0,
+ .punycode_start=ident,
+ .punycode_len=len
+ };
+ } else {
+ size_t ascii_len = underscore - ident;
+ // ascii_len <= len - 1 since `_` is in the first len bytes
+ size_t punycode_len = len - 1 - ascii_len;
+ *out = (struct ident){
+ .ascii_start=ident,
+ .ascii_len=ascii_len,
+ .punycode_start=underscore + 1,
+ .punycode_len=punycode_len
+ };
+ }
+ if (out->punycode_len == 0) {
+ return DemangleInvalid;
+ }
+ return DemangleOk;
+ } else {
+ *out = (struct ident) {
+ .ascii_start=ident,
+ .ascii_len=(size_t)len,
+ .punycode_start="",
+ .punycode_len=0,
+ };
+ return DemangleOk;
+ }
+}
+
+#define INVALID_SYNTAX "{invalid syntax}"
+
+static const char *demangle_error_message(demangle_status status) {
+ switch (status) {
+ case DemangleInvalid:
+ return INVALID_SYNTAX;
+ case DemangleBug:
+ return "{bug}";
+ case DemangleRecursed:
+ return "{recursion limit reached}";
+ default:
+ return "{unknown error}";
+ }
+}
+
+#define PRINT(print_fn) \
+ do { \
+ if ((print_fn) == OverflowOverflow) { \
+ return OverflowOverflow; \
+ } \
+ } while(0)
+
+#define PRINT_CH(printer, s) PRINT(printer_print_ch((printer), (s)))
+#define PRINT_STR(printer, s) PRINT(printer_print_str((printer), (s)))
+#define PRINT_U64(printer, s) PRINT(printer_print_u64((printer), (s)))
+#define PRINT_IDENT(printer, s) PRINT(printer_print_ident((printer), (s)))
+
+#define INVALID(printer) \
+ do { \
+ PRINT_STR((printer), INVALID_SYNTAX); \
+ (printer)->status = DemangleInvalid; \
+ return OverflowOk; \
+ } while(0)
+
+#define PARSE(printer, method, ...) \
+ do { \
+ if ((printer)->status != DemangleOk) { \
+ PRINT_STR((printer), "?"); \
+ return OverflowOk; \
+ } else { \
+ demangle_status _parse_status = method(&(printer)->parser, ## __VA_ARGS__); \
+ if (_parse_status != DemangleOk) { \
+ PRINT_STR((printer), demangle_error_message(_parse_status)); \
+ (printer)->status = _parse_status; \
+ return OverflowOk; \
+ } \
+ } \
+ } while(0)
+
+#define PRINT_SEP_LIST(printer, body, sep) \
+ do { \
+ size_t _sep_list_i; \
+ PRINT_SEP_LIST_COUNT(printer, _sep_list_i, body, sep); \
+ } while(0)
+
+#define PRINT_SEP_LIST_COUNT(printer, count, body, sep) \
+ do { \
+ count = 0; \
+ while ((printer)->status == DemangleOk && !printer_eat((printer), 'E')) { \
+ if (count > 0) { PRINT_STR(printer, sep); } \
+ body; \
+ count++; \
+ } \
+ } while(0)
+
+static bool printer_eat(struct printer *printer, uint8_t b) {
+ if (printer->status != DemangleOk) {
+ return false;
+ }
+
+ return parser_eat(&printer->parser, b);
+}
+
+static void printer_pop_depth(struct printer *printer) {
+ if (printer->status == DemangleOk) {
+ parser_pop_depth(&printer->parser);
+ }
+}
+
+static NODISCARD overflow_status printer_print_buf(struct printer *printer, const char *start, size_t len) {
+ if (printer->out == NULL) {
+ return OverflowOk;
+ }
+ if (printer->out_len < len) {
+ return OverflowOverflow;
+ }
+
+ memcpy(printer->out, start, len);
+ printer->out += len;
+ printer->out_len -= len;
+ return OverflowOk;
+}
+
+static NODISCARD overflow_status printer_print_str(struct printer *printer, const char *buf) {
+ return printer_print_buf(printer, buf, strlen(buf));
+}
+
+static NODISCARD overflow_status printer_print_ch(struct printer *printer, char ch) {
+ return printer_print_buf(printer, &ch, 1);
+}
+
+static NODISCARD overflow_status printer_print_u64(struct printer *printer, uint64_t n) {
+ char buf[32] = {0};
+ sprintf(buf, "%llu", (unsigned long long)n); // printing uint64 uses 21 < 32 chars
+ return printer_print_str(printer, buf);
+}
+
+static NODISCARD overflow_status printer_print_ident(struct printer *printer, struct ident *ident) {
+ if (printer->out == NULL) {
+ return OverflowOk;
+ }
+
+ size_t out_len = printer->out_len;
+ overflow_status status;
+ if ((status = display_ident(ident->ascii_start, ident->ascii_len, ident->punycode_start, ident->punycode_len, (uint8_t*)printer->out, &out_len)) != OverflowOk) {
+ return status;
+ }
+ printer->out += out_len;
+ printer->out_len -= out_len;
+ return OverflowOk;
+}
+
+typedef overflow_status (*printer_fn)(struct printer *printer);
+typedef overflow_status (*backref_fn)(struct printer *printer, bool *arg);
+
+static NODISCARD overflow_status printer_print_backref(struct printer *printer, backref_fn func, bool *arg) {
+ struct parser backref;
+ PARSE(printer, parser_backref, &backref);
+
+ if (printer->out == NULL) {
+ return OverflowOk;
+ }
+
+ struct parser orig_parser = printer->parser;
+ demangle_status orig_status = printer->status; // fixme not sure this is needed match for Ok on the Rust side
+ printer->parser = backref;
+ printer->status = DemangleOk;
+ overflow_status status = func(printer, arg);
+ printer->parser = orig_parser;
+ printer->status = orig_status;
+
+ return status;
+}
+
+static NODISCARD overflow_status printer_print_lifetime_from_index(struct printer *printer, uint64_t lt) {
+ // Bound lifetimes aren't tracked when skipping printing.
+ if (printer->out == NULL) {
+ return OverflowOk;
+ }
+
+ PRINT_STR(printer, "'");
+ if (lt == 0) {
+ PRINT_STR(printer, "_");
+ return OverflowOk;
+ }
+
+ if (printer->bound_lifetime_depth < lt) {
+ INVALID(printer);
+ } else {
+ uint64_t depth = printer->bound_lifetime_depth - lt;
+ if (depth < 26) {
+ PRINT_CH(printer, 'a' + depth);
+ } else {
+ PRINT_STR(printer, "_");
+ PRINT_U64(printer, depth);
+ }
+
+ return OverflowOk;
+ }
+}
+
+static NODISCARD overflow_status printer_in_binder(struct printer *printer, printer_fn func) {
+ uint64_t bound_lifetimes;
+ PARSE(printer, parser_opt_integer_62, 'G', &bound_lifetimes);
+
+ // Don't track bound lifetimes when skipping printing.
+ if (printer->out == NULL) {
+ return func(printer);
+ }
+
+ if (bound_lifetimes > 0) {
+ PRINT_STR(printer, "for<");
+ for (uint64_t i = 0; i < bound_lifetimes; i++) {
+ if (i > 0) {
+ PRINT_STR(printer, ", ");
+ }
+ printer->bound_lifetime_depth++;
+ PRINT(printer_print_lifetime_from_index(printer, 1));
+ }
+ PRINT_STR(printer, "> ");
+ }
+
+ overflow_status r = func(printer);
+ printer->bound_lifetime_depth -= bound_lifetimes;
+
+ return r;
+}
+
+static NODISCARD overflow_status printer_print_generic_arg(struct printer *printer) {
+ if (printer_eat(printer, 'L')) {
+ uint64_t lt;
+ PARSE(printer, parser_integer_62, &lt);
+ return printer_print_lifetime_from_index(printer, lt);
+ } else if (printer_eat(printer, 'K')) {
+ return printer_print_const(printer, false);
+ } else {
+ return printer_print_type(printer);
+ }
+}
+
+static NODISCARD overflow_status printer_print_generic_args(struct printer *printer) {
+ PRINT_STR(printer, "<");
+ PRINT_SEP_LIST(printer, PRINT(printer_print_generic_arg(printer)), ", ");
+ PRINT_STR(printer, ">");
+ return OverflowOk;
+}
+
+static NODISCARD overflow_status printer_print_path_out_of_value(struct printer *printer, bool *_arg) {
+ (void)_arg;
+ return printer_print_path(printer, false);
+}
+
+static NODISCARD overflow_status printer_print_path_in_value(struct printer *printer, bool *_arg) {
+ (void)_arg;
+ return printer_print_path(printer, true);
+}
+
+static NODISCARD overflow_status printer_print_path(struct printer *printer, bool in_value) {
+ PARSE(printer, parser_push_depth);
+ uint8_t tag;
+ PARSE(printer, parser_ch, &tag);
+
+ overflow_status st;
+ uint64_t dis;
+ struct ident name;
+ parser_namespace_type ns;
+ char *orig_out;
+
+ switch(tag) {
+ case 'C':
+ PARSE(printer, parser_disambiguator, &dis);
+ PARSE(printer, parser_ident, &name);
+
+ PRINT_IDENT(printer, &name);
+
+ if (printer->out != NULL && !printer->alternate && dis != 0) {
+ PRINT_STR(printer, "[");
+ char buf[24] = {0};
+ sprintf(buf, "%llx", (unsigned long long)dis);
+ PRINT_STR(printer, buf);
+ PRINT_STR(printer, "]");
+ }
+ break;
+ case 'N':
+ PARSE(printer, parser_namespace, &ns);
+ if ((st = printer_print_path(printer, in_value)) != OverflowOk) {
+ return st;
+ }
+
+ // HACK(eddyb) if the parser is already marked as having errored,
+ // `parse!` below will print a `?` without its preceding `::`
+ // (because printing the `::` is skipped in certain conditions,
+ // i.e. a lowercase namespace with an empty identifier),
+ // so in order to get `::?`, the `::` has to be printed here.
+ if (printer->status != DemangleOk) {
+ PRINT_STR(printer, "::");
+ }
+
+ PARSE(printer, parser_disambiguator, &dis);
+ PARSE(printer, parser_ident, &name);
+ // Special namespace, like closures and shims
+ if (ns) {
+ PRINT_STR(printer, "::{");
+ if (ns == 'C') {
+ PRINT_STR(printer, "closure");
+ } else if (ns == 'S') {
+ PRINT_STR(printer, "shim");
+ } else {
+ PRINT_CH(printer, ns);
+ }
+ if (name.ascii_len != 0 || name.punycode_len != 0) {
+ PRINT_STR(printer, ":");
+ PRINT_IDENT(printer, &name);
+ }
+ PRINT_STR(printer, "#");
+ PRINT_U64(printer, dis);
+ PRINT_STR(printer, "}");
+ } else {
+ // Implementation-specific/unspecified namespaces
+ if (name.ascii_len != 0 || name.punycode_len != 0) {
+ PRINT_STR(printer, "::");
+ PRINT_IDENT(printer, &name);
+ }
+ }
+ break;
+ case 'M':
+ case 'X':
+ // for impls, ignore the impls own path
+ PARSE(printer, parser_disambiguator, &dis);
+ orig_out = printer->out;
+ printer->out = NULL;
+ PRINT(printer_print_path(printer, false));
+ printer->out = orig_out;
+
+ // fallthru
+ case 'Y':
+ PRINT_STR(printer, "<");
+ PRINT(printer_print_type(printer));
+ if (tag != 'M') {
+ PRINT_STR(printer, " as ");
+ PRINT(printer_print_path(printer, false));
+ }
+ PRINT_STR(printer, ">");
+ break;
+ case 'I':
+ PRINT(printer_print_path(printer, in_value));
+ if (in_value) {
+ PRINT_STR(printer, "::");
+ }
+ PRINT(printer_print_generic_args(printer));
+ break;
+ case 'B':
+ PRINT(printer_print_backref(printer, in_value ? printer_print_path_in_value : printer_print_path_out_of_value, NULL));
+ break;
+ default:
+ INVALID(printer);
+ break;
+ }
+
+ printer_pop_depth(printer);
+ return OverflowOk;
+}
+
+static NODISCARD overflow_status printer_print_const_uint(struct printer *printer, uint8_t tag) {
+ struct buf hex;
+ PARSE(printer, parser_hex_nibbles, &hex);
+
+ uint64_t val;
+ if (try_parse_uint(hex.start, hex.len, &val)) {
+ PRINT_U64(printer, val);
+ } else {
+ PRINT_STR(printer, "0x");
+ PRINT(printer_print_buf(printer, hex.start, hex.len));
+ }
+
+ if (printer->out != NULL && !printer->alternate) {
+ const char *ty = basic_type(tag);
+ if (/* safety */ ty != NULL) {
+ PRINT_STR(printer, ty);
+ }
+ }
+
+ return OverflowOk;
+}
+
+static NODISCARD overflow_status printer_print_const_str_literal(struct printer *printer) {
+ struct buf hex;
+ PARSE(printer, parser_hex_nibbles, &hex);
+
+ size_t out_len = SIZE_MAX;
+ nibbles_to_string_status nts_status = nibbles_to_string(hex.start, hex.len, NULL, &out_len);
+ switch (nts_status) {
+ case NtsOk:
+ if (printer->out != NULL) {
+ out_len = printer->out_len;
+ nts_status = nibbles_to_string(hex.start, hex.len, (uint8_t*)printer->out, &out_len);
+ if (nts_status != NtsOk) {
+ return OverflowOverflow;
+ }
+ printer->out += out_len;
+ printer->out_len -= out_len;
+ }
+ return OverflowOk;
+ case NtsOverflow:
+ // technically if there is a string of size `SIZE_MAX/6` whose escaped version overflows
+ // SIZE_MAX but has an invalid char, this will be a "fake" overflow. In practice,
+ // that is not going to happen and a fuzzer will not generate strings of this length.
+ return OverflowOverflow;
+ case NtsInvalid:
+ default:
+ INVALID(printer);
+ }
+}
+
+static NODISCARD overflow_status printer_print_const_struct(struct printer *printer) {
+ uint64_t dis;
+ struct ident name;
+ PARSE(printer, parser_disambiguator, &dis);
+ PARSE(printer, parser_ident, &name);
+ PRINT_IDENT(printer, &name);
+ PRINT_STR(printer, ": ");
+ return printer_print_const(printer, true);
+}
+
+static NODISCARD overflow_status printer_print_const_out_of_value(struct printer *printer, bool *_arg) {
+ (void)_arg;
+ return printer_print_const(printer, false);
+}
+
+static NODISCARD overflow_status printer_print_const_in_value(struct printer *printer, bool *_arg) {
+ (void)_arg;
+ return printer_print_const(printer, true);
+}
+
+static NODISCARD overflow_status printer_print_const(struct printer *printer, bool in_value) {
+ uint8_t tag;
+
+ PARSE(printer, parser_ch, &tag);
+ PARSE(printer, parser_push_depth);
+
+ struct buf hex;
+ uint64_t val;
+ size_t count;
+
+ bool opened_brace = false;
+#define OPEN_BRACE_IF_OUTSIDE_EXPR \
+ do { if (!in_value) { \
+ opened_brace = true; \
+ PRINT_STR(printer, "{"); \
+ } } while(0)
+
+ switch(tag) {
+ case 'p':
+ PRINT_STR(printer, "_");
+ break;
+ // Primitive leaves with hex-encoded values (see `basic_type`).
+ case 'a':
+ case 's':
+ case 'l':
+ case 'x':
+ case 'n':
+ case 'i':
+ if (printer_eat(printer, 'n')) {
+ PRINT_STR(printer, "-");
+ }
+ /* fallthrough */
+ case 'h':
+ case 't':
+ case 'm':
+ case 'y':
+ case 'o':
+ case 'j':
+ PRINT(printer_print_const_uint(printer, tag));
+ break;
+ case 'b':
+ PARSE(printer, parser_hex_nibbles, &hex);
+ if (try_parse_uint(hex.start, hex.len, &val)) {
+ if (val == 0) {
+ PRINT_STR(printer, "false");
+ } else if (val == 1) {
+ PRINT_STR(printer, "true");
+ } else {
+ INVALID(printer);
+ }
+ } else {
+ INVALID(printer);
+ }
+ break;
+ case 'c':
+ PARSE(printer, parser_hex_nibbles, &hex);
+ if (try_parse_uint(hex.start, hex.len, &val)
+ && val < UINT32_MAX
+ && validate_char((uint32_t)val))
+ {
+ char escaped_buf[ESCAPED_SIZE];
+ size_t escaped_size = char_to_string((uint32_t)val, '\'', true, &escaped_buf);
+
+ PRINT_STR(printer, "'");
+ PRINT(printer_print_buf(printer, escaped_buf, escaped_size));
+ PRINT_STR(printer, "'");
+ } else {
+ INVALID(printer);
+ }
+ break;
+ case 'e':
+ OPEN_BRACE_IF_OUTSIDE_EXPR;
+ PRINT_STR(printer, "*");
+ PRINT(printer_print_const_str_literal(printer));
+ break;
+ case 'R':
+ case 'Q':
+ if (tag == 'R' && printer_eat(printer, 'e')) {
+ PRINT(printer_print_const_str_literal(printer));
+ } else {
+ OPEN_BRACE_IF_OUTSIDE_EXPR;
+ PRINT_STR(printer, "&");
+ if (tag != 'R') {
+ PRINT_STR(printer, "mut ");
+ }
+ PRINT(printer_print_const(printer, true));
+ }
+ break;
+ case 'A':
+ OPEN_BRACE_IF_OUTSIDE_EXPR;
+ PRINT_STR(printer, "[");
+ PRINT_SEP_LIST(printer, PRINT(printer_print_const(printer, true)), ", ");
+ PRINT_STR(printer, "]");
+ break;
+ case 'T':
+ OPEN_BRACE_IF_OUTSIDE_EXPR;
+ PRINT_STR(printer, "(");
+ PRINT_SEP_LIST_COUNT(printer, count, PRINT(printer_print_const(printer, true)), ", ");
+ if (count == 1) {
+ PRINT_STR(printer, ",");
+ }
+ PRINT_STR(printer, ")");
+ break;
+ case 'V':
+ OPEN_BRACE_IF_OUTSIDE_EXPR;
+ PRINT(printer_print_path(printer, true));
+ PARSE(printer, parser_ch, &tag);
+ switch(tag) {
+ case 'U':
+ break;
+ case 'T':
+ PRINT_STR(printer, "(");
+ PRINT_SEP_LIST(printer, PRINT(printer_print_const(printer, true)), ", ");
+ PRINT_STR(printer, ")");
+ break;
+ case 'S':
+ PRINT_STR(printer, " { ");
+ PRINT_SEP_LIST(printer, PRINT(printer_print_const_struct(printer)), ", ");
+ PRINT_STR(printer, " }");
+ break;
+ default:
+ INVALID(printer);
+ }
+ break;
+ case 'B':
+ PRINT(printer_print_backref(printer, in_value ? printer_print_const_in_value : printer_print_const_out_of_value, NULL));
+ break;
+ default:
+ INVALID(printer);
+ }
+#undef OPEN_BRACE_IF_OUTSIDE_EXPR
+
+ if (opened_brace) {
+ PRINT_STR(printer, "}");
+ }
+ printer_pop_depth(printer);
+
+ return OverflowOk;
+}
+
+/// A trait in a trait object may have some "existential projections"
+/// (i.e. associated type bindings) after it, which should be printed
+/// in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`.
+/// To this end, this method will keep the `<...>` of an 'I' path
+/// open, by omitting the `>`, and return `Ok(true)` in that case.
+static NODISCARD overflow_status printer_print_maybe_open_generics(struct printer *printer, bool *open) {
+ if (printer_eat(printer, 'B')) {
+ // NOTE(eddyb) the closure may not run if printing is being skipped,
+ // but in that case the returned boolean doesn't matter.
+ *open = false;
+ return printer_print_backref(printer, printer_print_maybe_open_generics, open);
+ } else if(printer_eat(printer, 'I')) {
+ PRINT(printer_print_path(printer, false));
+ PRINT_STR(printer, "<");
+ PRINT_SEP_LIST(printer, PRINT(printer_print_generic_arg(printer)), ", ");
+ *open = true;
+ return OverflowOk;
+ } else {
+ PRINT(printer_print_path(printer, false));
+ *open = false;
+ return OverflowOk;
+ }
+}
+
+static NODISCARD overflow_status printer_print_dyn_trait(struct printer *printer) {
+ bool open;
+ PRINT(printer_print_maybe_open_generics(printer, &open));
+
+ while (printer_eat(printer, 'p')) {
+ if (!open) {
+ PRINT_STR(printer, "<");
+ open = true;
+ } else {
+ PRINT_STR(printer, ", ");
+ }
+
+ struct ident name;
+ PARSE(printer, parser_ident, &name);
+
+ PRINT_IDENT(printer, &name);
+ PRINT_STR(printer, " = ");
+ PRINT(printer_print_type(printer));
+ }
+
+ if (open) {
+ PRINT_STR(printer, ">");
+ }
+
+ return OverflowOk;
+}
+
+static NODISCARD overflow_status printer_print_object_bounds(struct printer *printer) {
+ PRINT_SEP_LIST(printer, PRINT(printer_print_dyn_trait(printer)), " + ");
+ return OverflowOk;
+}
+
+static NODISCARD overflow_status printer_print_function_type(struct printer *printer) {
+ bool is_unsafe = printer_eat(printer, 'U');
+ const char *abi;
+ size_t abi_len;
+ if (printer_eat(printer, 'K')) {
+ if (printer_eat(printer, 'C')) {
+ abi = "C";
+ abi_len = 1;
+ } else {
+ struct ident abi_ident;
+ PARSE(printer, parser_ident, &abi_ident);
+ if (abi_ident.ascii_len == 0 || abi_ident.punycode_len != 0) {
+ INVALID(printer);
+ }
+ abi = abi_ident.ascii_start;
+ abi_len = abi_ident.ascii_len;
+ }
+ } else {
+ abi = NULL;
+ abi_len = 0;
+ }
+
+ if (is_unsafe) {
+ PRINT_STR(printer, "unsafe ");
+ }
+
+ if (abi != NULL) {
+ PRINT_STR(printer, "extern \"");
+
+ // replace _ with -
+ while (abi_len > 0) {
+ const char *minus = memchr(abi, '_', abi_len);
+ if (minus == NULL) {
+ PRINT(printer_print_buf(printer, (const char*)abi, abi_len));
+ break;
+ } else {
+ size_t space_to_minus = minus - abi;
+ PRINT(printer_print_buf(printer, (const char*)abi, space_to_minus));
+ PRINT_STR(printer, "-");
+ abi = minus + 1;
+ abi_len -= (space_to_minus + 1);
+ }
+ }
+
+ PRINT_STR(printer, "\" ");
+ }
+
+ PRINT_STR(printer, "fn(");
+ PRINT_SEP_LIST(printer, PRINT(printer_print_type(printer)), ", ");
+ PRINT_STR(printer, ")");
+
+ if (printer_eat(printer, 'u')) {
+ // Skip printing the return type if it's 'u', i.e. `()`.
+ } else {
+ PRINT_STR(printer, " -> ");
+ PRINT(printer_print_type(printer));
+ }
+
+ return OverflowOk;
+}
+
+static NODISCARD overflow_status printer_print_type_backref(struct printer *printer, bool *_arg) {
+ (void)_arg;
+ return printer_print_type(printer);
+}
+
+static NODISCARD overflow_status printer_print_type(struct printer *printer) {
+ uint8_t tag;
+ PARSE(printer, parser_ch, &tag);
+
+ const char *basic_ty = basic_type(tag);
+ if (basic_ty) {
+ return printer_print_str(printer, basic_ty);
+ }
+
+ uint64_t count;
+ uint64_t lt;
+
+ PARSE(printer, parser_push_depth);
+ switch (tag) {
+ case 'R':
+ case 'Q':
+ PRINT_STR(printer, "&");
+ if (printer_eat(printer, 'L')) {
+ PARSE(printer, parser_integer_62, &lt);
+ if (lt != 0) {
+ PRINT(printer_print_lifetime_from_index(printer, lt));
+ PRINT_STR(printer, " ");
+ }
+ }
+ if (tag != 'R') {
+ PRINT_STR(printer, "mut ");
+ }
+ PRINT(printer_print_type(printer));
+ break;
+ case 'P':
+ case 'O':
+ PRINT_STR(printer, "*");
+ if (tag != 'P') {
+ PRINT_STR(printer, "mut ");
+ } else {
+ PRINT_STR(printer, "const ");
+ }
+ PRINT(printer_print_type(printer));
+ break;
+ case 'A':
+ case 'S':
+ PRINT_STR(printer, "[");
+ PRINT(printer_print_type(printer));
+ if (tag == 'A') {
+ PRINT_STR(printer, "; ");
+ PRINT(printer_print_const(printer, true));
+ }
+ PRINT_STR(printer, "]");
+ break;
+ case 'T':
+ PRINT_STR(printer, "(");
+ PRINT_SEP_LIST_COUNT(printer, count, PRINT(printer_print_type(printer)), ", ");
+ if (count == 1) {
+ PRINT_STR(printer, ",");
+ }
+ PRINT_STR(printer, ")");
+ break;
+ case 'F':
+ PRINT(printer_in_binder(printer, printer_print_function_type));
+ break;
+ case 'D':
+ PRINT_STR(printer, "dyn ");
+ PRINT(printer_in_binder(printer, printer_print_object_bounds));
+
+ if (!printer_eat(printer, 'L')) {
+ INVALID(printer);
+ }
+ PARSE(printer, parser_integer_62, &lt);
+
+ if (lt != 0) {
+ PRINT_STR(printer, " + ");
+ PRINT(printer_print_lifetime_from_index(printer, lt));
+ }
+ break;
+ case 'B':
+ PRINT(printer_print_backref(printer, printer_print_type_backref, NULL));
+ break;
+ default:
+ // Go back to the tag, so `print_path` also sees it.
+ if (printer->status == DemangleOk && /* safety */ printer->parser.next > 0) {
+ printer->parser.next--;
+ }
+ PRINT(printer_print_path(printer, false));
+ }
+
+ printer_pop_depth(printer);
+ return OverflowOk;
+}
+
+NODISCARD static demangle_status rust_demangle_legacy_demangle(const char *s, size_t s_len, struct demangle_legacy *res, const char **rest)
+{
+ if (s_len > strlen(s)) {
+ // s_len only exists to shorten the string, this is not a buffer API
+ return DemangleInvalid;
+ }
+
+ const char *inner;
+ size_t inner_len;
+ if (s_len >= 3 && !strncmp(s, "_ZN", 3)) {
+ inner = s + 3;
+ inner_len = s_len - 3;
+ } else if (s_len >= 2 && !strncmp(s, "ZN", 2)) {
+ // On Windows, dbghelp strips leading underscores, so we accept "ZN...E"
+ // form too.
+ inner = s + 2;
+ inner_len = s_len - 2;
+ } else if (s_len >= 4 && !strncmp(s, "__ZN", 4)) {
+ // On OSX, symbols are prefixed with an extra _
+ inner = s + 4;
+ inner_len = s_len - 4;
+ } else {
+ return DemangleInvalid;
+ }
+
+ if (!str_isascii(inner, inner_len)) {
+ return DemangleInvalid;
+ }
+
+ size_t elements = 0;
+ const char *chars = inner;
+ size_t chars_len = inner_len;
+ if (chars_len == 0) {
+ return DemangleInvalid;
+ }
+ char c;
+ while ((c = *chars) != 'E') {
+ // Decode an identifier element's length
+ if (c < '0' || c > '9') {
+ return DemangleInvalid;
+ }
+ size_t len = 0;
+ while (c >= '0' && c <= '9') {
+ size_t d = c - '0';
+ if (len > SIZE_MAX / 10) {
+ return DemangleInvalid;
+ }
+ len *= 10;
+ if (len > SIZE_MAX - d) {
+ return DemangleInvalid;
+ }
+ len += d;
+
+ chars++;
+ chars_len--;
+ if (chars_len == 0) {
+ return DemangleInvalid;
+ }
+ c = *chars;
+ }
+
+ // Advance by the length
+ if (chars_len <= len) {
+ return DemangleInvalid;
+ }
+ chars += len;
+ chars_len -= len;
+ elements++;
+ }
+ *res = (struct demangle_legacy) { inner, inner_len, elements };
+ *rest = chars + 1;
+ return DemangleOk;
+}
+
+static bool is_rust_hash(const char *s, size_t len) {
+ if (len == 0 || s[0] != 'h') {
+ return false;
+ }
+
+ for (size_t i = 1; i < len; i++) {
+ if (!((s[i] >= '0' && s[i] <= '9') || (s[i] >= 'a' && s[i] <= 'f') || (s[i] >= 'A' && s[i] <= 'F'))) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+NODISCARD static overflow_status rust_demangle_legacy_display_demangle(struct demangle_legacy res, char *out, size_t len, bool alternate)
+{
+ struct printer printer = {
+ // not actually using the parser part of the printer, just keeping it to share the format functions
+ DemangleOk,
+ { NULL },
+ out,
+ len,
+ 0,
+ alternate
+ };
+ const char *inner = res.mangled;
+ for (size_t element = 0; element < res.elements; element++) {
+ size_t i = 0;
+ const char *rest;
+ for (rest = inner; rest < res.mangled + res.mangled_len && *rest >= '0' && *rest <= '9'; rest++) {
+ i *= 10;
+ i += *rest - '0';
+ }
+ if ((size_t)(res.mangled + res.mangled_len - rest) < i) {
+ // safety: shouldn't reach this place if the input string is validated. bail out.
+ // safety: we knwo rest <= res.mangled + res.mangled_len from the for-loop above
+ break;
+ }
+
+ size_t len = i;
+ inner = rest + len;
+
+ // From here on, inner contains a pointer to the next element, rest[:len] to the current one
+ if (alternate && element + 1 == res.elements && is_rust_hash(rest, i)) {
+ break;
+ }
+ if (element != 0) {
+ PRINT_STR(&printer, "::");
+ }
+
+ if (len >= 2 && !strncmp(rest, "_$", 2)) {
+ rest++;
+ len--;
+ }
+
+ while (len > 0) {
+ if (rest[0] == '.') {
+ if (len >= 2 && rest[1] == '.') {
+ PRINT_STR(&printer, "::");
+ rest += 2;
+ len -= 2;
+ } else {
+ PRINT_STR(&printer, ".");
+ rest += 1;
+ len -= 1;
+ }
+ } else if (rest[0] == '$') {
+ const char *escape = memchr(rest + 1, '$', len - 1);
+ if (escape == NULL) {
+ break;
+ }
+ const char *escape_start = rest + 1;
+ size_t escape_len = escape - (rest + 1);
+
+ size_t next_len = len - (escape + 1 - rest);
+ const char *next_rest = escape + 1;
+
+ char ch;
+ if ((escape_len == 2 && escape_start[0] == 'S' && escape_start[1] == 'P')) {
+ ch = '@';
+ } else if ((escape_len == 2 && escape_start[0] == 'B' && escape_start[1] == 'P')) {
+ ch = '*';
+ } else if ((escape_len == 2 && escape_start[0] == 'R' && escape_start[1] == 'F')) {
+ ch = '&';
+ } else if ((escape_len == 2 && escape_start[0] == 'L' && escape_start[1] == 'T')) {
+ ch = '<';
+ } else if ((escape_len == 2 && escape_start[0] == 'G' && escape_start[1] == 'T')) {
+ ch = '>';
+ } else if ((escape_len == 2 && escape_start[0] == 'L' && escape_start[1] == 'P')) {
+ ch = '(';
+ } else if ((escape_len == 2 && escape_start[0] == 'R' && escape_start[1] == 'P')) {
+ ch = ')';
+ } else if ((escape_len == 1 && escape_start[0] == 'C')) {
+ ch = ',';
+ } else {
+ if (escape_len > 1 && escape_start[0] == 'u') {
+ escape_start++;
+ escape_len--;
+ uint64_t val;
+ if (try_parse_uint(escape_start, escape_len, &val)
+ && val < UINT32_MAX
+ && validate_char((uint32_t)val))
+ {
+ if (!unicode_iscontrol(val)) {
+ uint8_t wchr[4];
+ size_t wchr_len = code_to_utf8(wchr, (uint32_t)val);
+ PRINT(printer_print_buf(&printer, (const char*)wchr, wchr_len));
+ len = next_len;
+ rest = next_rest;
+ continue;
+ }
+ }
+ }
+ break; // print the rest of this element raw
+ }
+ PRINT_CH(&printer, ch);
+ len = next_len;
+ rest = next_rest;
+ } else {
+ size_t j = 0;
+ for (;j < len && rest[j] != '$' && rest[j] != '.';j++);
+ if (j == len) {
+ break;
+ }
+ PRINT(printer_print_buf(&printer, rest, j));
+ rest += j;
+ len -= j;
+ }
+ }
+ PRINT(printer_print_buf(&printer, rest, len));
+ }
+
+ if (printer.out_len < OVERFLOW_MARGIN) {
+ return OverflowOverflow;
+ }
+ *printer.out = '\0';
+ return OverflowOk;
+}
+
+static bool is_symbol_like(const char *s, size_t len) {
+ // rust-demangle definition of symbol like: control characters and space are not symbol-like, all else is
+ for (size_t i = 0; i < len; i++) {
+ char ch = s[i];
+ if (!(ch >= 0x21 && ch <= 0x7e)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+void rust_demangle_demangle(const char *s, struct demangle *res)
+{
+ // During ThinLTO LLVM may import and rename internal symbols, so strip out
+ // those endings first as they're one of the last manglings applied to symbol
+ // names.
+ const char *llvm = ".llvm.";
+ const char *found_llvm = strstr(s, llvm);
+ size_t s_len = strlen(s);
+ if (found_llvm) {
+ const char *all_hex_ptr = found_llvm + strlen(".llvm.");
+ bool all_hex = true;
+ for (;*all_hex_ptr;all_hex_ptr++) {
+ if (!(('0' <= *all_hex_ptr && *all_hex_ptr <= '9') ||
+ ('A' <= *all_hex_ptr && *all_hex_ptr <= 'F') ||
+ *all_hex_ptr == '@')) {
+ all_hex = false;
+ break;
+ }
+ }
+
+ if (all_hex) {
+ s_len = found_llvm - s;
+ }
+ }
+
+ const char *suffix;
+ struct demangle_legacy legacy;
+ demangle_status st = rust_demangle_legacy_demangle(s, s_len, &legacy, &suffix);
+ if (st == DemangleOk) {
+ *res = (struct demangle) {
+ .style=DemangleStyleLegacy,
+ .mangled=legacy.mangled,
+ .mangled_len=legacy.mangled_len,
+ .elements=legacy.elements,
+ .original=s,
+ .original_len=s_len,
+ .suffix=suffix,
+ .suffix_len=s_len - (suffix - s),
+ };
+ } else {
+ struct demangle_v0 v0;
+ st = rust_demangle_v0_demangle(s, s_len, &v0, &suffix);
+ if (st == DemangleOk) {
+ *res = (struct demangle) {
+ .style=DemangleStyleV0,
+ .mangled=v0.mangled,
+ .mangled_len=v0.mangled_len,
+ .elements=0,
+ .original=s,
+ .original_len=s_len,
+ .suffix=suffix,
+ .suffix_len=s_len - (suffix - s),
+ };
+ } else {
+ *res = (struct demangle) {
+ .style=DemangleStyleUnknown,
+ .mangled=NULL,
+ .mangled_len=0,
+ .elements=0,
+ .original=s,
+ .original_len=s_len,
+ .suffix=s,
+ .suffix_len=0,
+ };
+ }
+ }
+
+ // Output like LLVM IR adds extra period-delimited words. See if
+ // we are in that case and save the trailing words if so.
+ if (res->suffix_len) {
+ if (res->suffix[0] == '.' && is_symbol_like(res->suffix, res->suffix_len)) {
+ // Keep the suffix
+ } else {
+ // Reset the suffix and invalidate the demangling
+ res->style = DemangleStyleUnknown;
+ res->suffix_len = 0;
+ }
+ }
+}
+
+bool rust_demangle_is_known(struct demangle *res) {
+ return res->style != DemangleStyleUnknown;
+}
+
+overflow_status rust_demangle_display_demangle(struct demangle const *res, char *out, size_t len, bool alternate) {
+ size_t original_len = res->original_len;
+ size_t out_len;
+ switch (res->style) {
+ case DemangleStyleUnknown:
+ if (len < original_len) {
+ return OverflowOverflow;
+ } else {
+ memcpy(out, res->original, original_len);
+ out += original_len;
+ len -= original_len;
+ break;
+ }
+ break;
+ case DemangleStyleLegacy: {
+ struct demangle_legacy legacy = {
+ res->mangled,
+ res->mangled_len,
+ res->elements
+ };
+ if (rust_demangle_legacy_display_demangle(legacy, out, len, alternate) == OverflowOverflow) {
+ return OverflowOverflow;
+ }
+ out_len = strlen(out);
+ out += out_len;
+ len -= out_len;
+ break;
+ }
+ case DemangleStyleV0: {
+ struct demangle_v0 v0 = {
+ res->mangled,
+ res->mangled_len
+ };
+ if (rust_demangle_v0_display_demangle(v0, out, len, alternate) == OverflowOverflow) {
+ return OverflowOverflow;
+ }
+ out_len = strlen(out);
+ out += out_len;
+ len -= out_len;
+ break;
+ }
+ }
+ size_t suffix_len = res->suffix_len;
+ if (len < suffix_len || len - suffix_len < OVERFLOW_MARGIN) {
+ return OverflowOverflow;
+ }
+ memcpy(out, res->suffix, suffix_len);
+ out[suffix_len] = 0;
+ return OverflowOk;
+}
diff --git a/tools/perf/util/demangle-rust-v0.h b/tools/perf/util/demangle-rust-v0.h
new file mode 100644
index 000000000000..d0092818610a
--- /dev/null
+++ b/tools/perf/util/demangle-rust-v0.h
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+// The contents of this file come from the Rust rustc-demangle library, hosted
+// in the <https://github.com/rust-lang/rustc-demangle> repository, licensed
+// under "Apache-2.0 OR MIT". For copyright details, see
+// <https://github.com/rust-lang/rustc-demangle/blob/main/README.md>.
+// Please note that the file should be kept as close as possible to upstream.
+
+#ifndef _H_DEMANGLE_V0_H
+#define _H_DEMANGLE_V0_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h>
+
+#if defined(__GNUC__) || defined(__clang__)
+#define DEMANGLE_NODISCARD __attribute__((warn_unused_result))
+#else
+#define DEMANGLE_NODISCARD
+#endif
+
+typedef enum {
+ OverflowOk,
+ OverflowOverflow
+} overflow_status;
+
+enum demangle_style {
+ DemangleStyleUnknown = 0,
+ DemangleStyleLegacy,
+ DemangleStyleV0,
+};
+
+// Not using a union here to make the struct easier to copy-paste if needed.
+struct demangle {
+ enum demangle_style style;
+ // points to the "mangled" part of the name,
+ // not including `ZN` or `R` prefixes.
+ const char *mangled;
+ size_t mangled_len;
+ // In DemangleStyleLegacy, is the number of path elements
+ size_t elements;
+ // while it's called "original", it will not contain `.llvm.9D1C9369@@16` suffixes
+ // that are to be ignored.
+ const char *original;
+ size_t original_len;
+ // Contains the part after the mangled name that is to be outputted,
+ // which can be `.exit.i.i` suffixes LLVM sometimes adds.
+ const char *suffix;
+ size_t suffix_len;
+};
+
+// if the length of the output buffer is less than `output_len-OVERFLOW_MARGIN`,
+// the demangler will return `OverflowOverflow` even if there is no overflow.
+#define OVERFLOW_MARGIN 4
+
+/// Demangle a C string that refers to a Rust symbol and put the demangle intermediate result in `res`.
+/// Beware that `res` contains references into `s`. If `s` is modified (or free'd) before calling
+/// `rust_demangle_display_demangle` behavior is undefined.
+///
+/// Use `rust_demangle_display_demangle` to convert it to an actual string.
+void rust_demangle_demangle(const char *s, struct demangle *res);
+
+/// Write the string in a `struct demangle` into a buffer.
+///
+/// Return `OverflowOk` if the output buffer was sufficiently big, `OverflowOverflow` if it wasn't.
+/// This function is `O(n)` in the length of the input + *output* [$], but the demangled output of demangling a symbol can
+/// be exponentially[$$] large, therefore it is recommended to have a sane bound (`rust-demangle`
+/// uses 1,000,000 bytes) on `len`.
+///
+/// `alternate`, if true, uses the less verbose alternate formatting (Rust `{:#}`) is used, which does not show
+/// symbol hashes and types of constant ints.
+///
+/// [$] It's `O(n * MAX_DEPTH)`, but `MAX_DEPTH` is a constant 300 and therefore it's `O(n)`
+/// [$$] Technically, bounded by `O(n^MAX_DEPTH)`, but this is practically exponential.
+DEMANGLE_NODISCARD overflow_status rust_demangle_display_demangle(struct demangle const *res, char *out, size_t len, bool alternate);
+
+/// Returns true if `res` refers to a known valid Rust demangling style, false if it's an unknown style.
+bool rust_demangle_is_known(struct demangle *res);
+
+#undef DEMANGLE_NODISCARD
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/tools/perf/util/demangle-rust.c b/tools/perf/util/demangle-rust.c
deleted file mode 100644
index a659fc69f73a..000000000000
--- a/tools/perf/util/demangle-rust.c
+++ /dev/null
@@ -1,269 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <string.h>
-#include "debug.h"
-
-#include "demangle-rust.h"
-
-/*
- * Mangled Rust symbols look like this:
- *
- * _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
- *
- * The original symbol is:
- *
- * <std::sys::fd::FileDesc as core::ops::Drop>::drop
- *
- * The last component of the path is a 64-bit hash in lowercase hex, prefixed
- * with "h". Rust does not have a global namespace between crates, an illusion
- * which Rust maintains by using the hash to distinguish things that would
- * otherwise have the same symbol.
- *
- * Any path component not starting with a XID_Start character is prefixed with
- * "_".
- *
- * The following escape sequences are used:
- *
- * "," => $C$
- * "@" => $SP$
- * "*" => $BP$
- * "&" => $RF$
- * "<" => $LT$
- * ">" => $GT$
- * "(" => $LP$
- * ")" => $RP$
- * " " => $u20$
- * "'" => $u27$
- * "[" => $u5b$
- * "]" => $u5d$
- * "~" => $u7e$
- *
- * A double ".." means "::" and a single "." means "-".
- *
- * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$
- */
-
-static const char *hash_prefix = "::h";
-static const size_t hash_prefix_len = 3;
-static const size_t hash_len = 16;
-
-static bool is_prefixed_hash(const char *start);
-static bool looks_like_rust(const char *sym, size_t len);
-static bool unescape(const char **in, char **out, const char *seq, char value);
-
-/*
- * INPUT:
- * sym: symbol that has been through BFD-demangling
- *
- * This function looks for the following indicators:
- *
- * 1. The hash must consist of "h" followed by 16 lowercase hex digits.
- *
- * 2. As a sanity check, the hash must use between 5 and 15 of the 16 possible
- * hex digits. This is true of 99.9998% of hashes so once in your life you
- * may see a false negative. The point is to notice path components that
- * could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In
- * this case a false positive (non-Rust symbol has an important path
- * component removed because it looks like a Rust hash) is worse than a
- * false negative (the rare Rust symbol is not demangled) so this sets the
- * balance in favor of false negatives.
- *
- * 3. There must be no characters other than a-zA-Z0-9 and _.:$
- *
- * 4. There must be no unrecognized $-sign sequences.
- *
- * 5. There must be no sequence of three or more dots in a row ("...").
- */
-bool
-rust_is_mangled(const char *sym)
-{
- size_t len, len_without_hash;
-
- if (!sym)
- return false;
-
- len = strlen(sym);
- if (len <= hash_prefix_len + hash_len)
- /* Not long enough to contain "::h" + hash + something else */
- return false;
-
- len_without_hash = len - (hash_prefix_len + hash_len);
- if (!is_prefixed_hash(sym + len_without_hash))
- return false;
-
- return looks_like_rust(sym, len_without_hash);
-}
-
-/*
- * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex
- * digits must comprise between 5 and 15 (inclusive) distinct digits.
- */
-static bool is_prefixed_hash(const char *str)
-{
- const char *end;
- bool seen[16];
- size_t i;
- int count;
-
- if (strncmp(str, hash_prefix, hash_prefix_len))
- return false;
- str += hash_prefix_len;
-
- memset(seen, false, sizeof(seen));
- for (end = str + hash_len; str < end; str++)
- if (*str >= '0' && *str <= '9')
- seen[*str - '0'] = true;
- else if (*str >= 'a' && *str <= 'f')
- seen[*str - 'a' + 10] = true;
- else
- return false;
-
- /* Count how many distinct digits seen */
- count = 0;
- for (i = 0; i < 16; i++)
- if (seen[i])
- count++;
-
- return count >= 5 && count <= 15;
-}
-
-static bool looks_like_rust(const char *str, size_t len)
-{
- const char *end = str + len;
-
- while (str < end)
- switch (*str) {
- case '$':
- if (!strncmp(str, "$C$", 3))
- str += 3;
- else if (!strncmp(str, "$SP$", 4)
- || !strncmp(str, "$BP$", 4)
- || !strncmp(str, "$RF$", 4)
- || !strncmp(str, "$LT$", 4)
- || !strncmp(str, "$GT$", 4)
- || !strncmp(str, "$LP$", 4)
- || !strncmp(str, "$RP$", 4))
- str += 4;
- else if (!strncmp(str, "$u20$", 5)
- || !strncmp(str, "$u27$", 5)
- || !strncmp(str, "$u5b$", 5)
- || !strncmp(str, "$u5d$", 5)
- || !strncmp(str, "$u7e$", 5))
- str += 5;
- else
- return false;
- break;
- case '.':
- /* Do not allow three or more consecutive dots */
- if (!strncmp(str, "...", 3))
- return false;
- /* Fall through */
- case 'a' ... 'z':
- case 'A' ... 'Z':
- case '0' ... '9':
- case '_':
- case ':':
- str++;
- break;
- default:
- return false;
- }
-
- return true;
-}
-
-/*
- * INPUT:
- * sym: symbol for which rust_is_mangled(sym) returns true
- *
- * The input is demangled in-place because the mangled name is always longer
- * than the demangled one.
- */
-void
-rust_demangle_sym(char *sym)
-{
- const char *in;
- char *out;
- const char *end;
-
- if (!sym)
- return;
-
- in = sym;
- out = sym;
- end = sym + strlen(sym) - (hash_prefix_len + hash_len);
-
- while (in < end)
- switch (*in) {
- case '$':
- if (!(unescape(&in, &out, "$C$", ',')
- || unescape(&in, &out, "$SP$", '@')
- || unescape(&in, &out, "$BP$", '*')
- || unescape(&in, &out, "$RF$", '&')
- || unescape(&in, &out, "$LT$", '<')
- || unescape(&in, &out, "$GT$", '>')
- || unescape(&in, &out, "$LP$", '(')
- || unescape(&in, &out, "$RP$", ')')
- || unescape(&in, &out, "$u20$", ' ')
- || unescape(&in, &out, "$u27$", '\'')
- || unescape(&in, &out, "$u5b$", '[')
- || unescape(&in, &out, "$u5d$", ']')
- || unescape(&in, &out, "$u7e$", '~'))) {
- pr_err("demangle-rust: unexpected escape sequence");
- goto done;
- }
- break;
- case '_':
- /*
- * If this is the start of a path component and the next
- * character is an escape sequence, ignore the
- * underscore. The mangler inserts an underscore to make
- * sure the path component begins with a XID_Start
- * character.
- */
- if ((in == sym || in[-1] == ':') && in[1] == '$')
- in++;
- else
- *out++ = *in++;
- break;
- case '.':
- if (in[1] == '.') {
- /* ".." becomes "::" */
- *out++ = ':';
- *out++ = ':';
- in += 2;
- } else {
- /* "." becomes "-" */
- *out++ = '-';
- in++;
- }
- break;
- case 'a' ... 'z':
- case 'A' ... 'Z':
- case '0' ... '9':
- case ':':
- *out++ = *in++;
- break;
- default:
- pr_err("demangle-rust: unexpected character '%c' in symbol\n",
- *in);
- goto done;
- }
-
-done:
- *out = '\0';
-}
-
-static bool unescape(const char **in, char **out, const char *seq, char value)
-{
- size_t len = strlen(seq);
-
- if (strncmp(*in, seq, len))
- return false;
-
- **out = value;
-
- *in += len;
- *out += 1;
-
- return true;
-}
diff --git a/tools/perf/util/demangle-rust.h b/tools/perf/util/demangle-rust.h
deleted file mode 100644
index 2fca618b1aa5..000000000000
--- a/tools/perf/util/demangle-rust.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __PERF_DEMANGLE_RUST
-#define __PERF_DEMANGLE_RUST 1
-
-bool rust_is_mangled(const char *str);
-void rust_demangle_sym(char *str);
-
-#endif /* __PERF_DEMANGLE_RUST */
diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
index 8f0eb56c6fc6..b1e4919d016f 100644
--- a/tools/perf/util/disasm.c
+++ b/tools/perf/util/disasm.c
@@ -1218,7 +1218,7 @@ int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, s
char *build_id_msg = NULL;
if (dso__has_build_id(dso)) {
- build_id__sprintf(dso__bid(dso), bf + 15);
+ build_id__snprintf(dso__bid(dso), bf + 15, sizeof(bf) - 15);
build_id_msg = bf;
}
scnprintf(buf, buflen,
@@ -2284,6 +2284,13 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
}
}
+ /* FIXME: LLVM and CAPSTONE should support source code */
+ if (options->annotate_src && !options->hide_src_code) {
+ err = symbol__disassemble_objdump(symfs_filename, sym, args);
+ if (err == 0)
+ goto out_remove_tmp;
+ }
+
err = -1;
for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers) && err != 0; i++) {
enum perf_disassembler dis = options->disassemblers[i];
diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c
index ddacef881af2..c0afcbd954f8 100644
--- a/tools/perf/util/dlfilter.c
+++ b/tools/perf/util/dlfilter.c
@@ -513,6 +513,7 @@ int dlfilter__do_filter_event(struct dlfilter *d,
d->d_addr_al = &d_addr_al;
d_sample.size = sizeof(d_sample);
+ d_sample.p_stage_cyc = sample->weight3;
d_ip_al.size = 0; /* To indicate d_ip_al is not initialized */
d_addr_al.size = 0; /* To indicate d_addr_al is not initialized */
@@ -526,7 +527,6 @@ int dlfilter__do_filter_event(struct dlfilter *d,
ASSIGN(period);
ASSIGN(weight);
ASSIGN(ins_lat);
- ASSIGN(p_stage_cyc);
ASSIGN(transaction);
ASSIGN(insn_cnt);
ASSIGN(cyc_cnt);
diff --git a/tools/perf/util/drm_pmu.c b/tools/perf/util/drm_pmu.c
new file mode 100644
index 000000000000..988890f37ba7
--- /dev/null
+++ b/tools/perf/util/drm_pmu.c
@@ -0,0 +1,686 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+#include "drm_pmu.h"
+#include "counts.h"
+#include "cpumap.h"
+#include "debug.h"
+#include "evsel.h"
+#include "pmu.h"
+#include <perf/threadmap.h>
+#include <api/fs/fs.h>
+#include <api/io.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <linux/unistd.h>
+#include <linux/kcmp.h>
+#include <linux/zalloc.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+
+enum drm_pmu_unit {
+ DRM_PMU_UNIT_BYTES,
+ DRM_PMU_UNIT_CAPACITY,
+ DRM_PMU_UNIT_CYCLES,
+ DRM_PMU_UNIT_HZ,
+ DRM_PMU_UNIT_NS,
+
+ DRM_PMU_UNIT_MAX,
+};
+
+struct drm_pmu_event {
+ const char *name;
+ const char *desc;
+ enum drm_pmu_unit unit;
+};
+
+struct drm_pmu {
+ struct perf_pmu pmu;
+ struct drm_pmu_event *events;
+ int num_events;
+};
+
+static const char * const drm_pmu_unit_strs[DRM_PMU_UNIT_MAX] = {
+ "bytes",
+ "capacity",
+ "cycles",
+ "hz",
+ "ns",
+};
+
+static const char * const drm_pmu_scale_unit_strs[DRM_PMU_UNIT_MAX] = {
+ "1bytes",
+ "1capacity",
+ "1cycles",
+ "1hz",
+ "1ns",
+};
+
+bool perf_pmu__is_drm(const struct perf_pmu *pmu)
+{
+ return pmu && pmu->type >= PERF_PMU_TYPE_DRM_START &&
+ pmu->type <= PERF_PMU_TYPE_DRM_END;
+}
+
+bool evsel__is_drm(const struct evsel *evsel)
+{
+ return perf_pmu__is_drm(evsel->pmu);
+}
+
+static struct drm_pmu *add_drm_pmu(struct list_head *pmus, char *line, size_t line_len)
+{
+ struct drm_pmu *drm;
+ struct perf_pmu *pmu;
+ const char *name;
+ __u32 max_drm_pmu_type = 0, type;
+ int i = 12;
+
+ if (line[line_len - 1] == '\n')
+ line[line_len - 1] = '\0';
+ while (isspace(line[i]))
+ i++;
+
+ line[--i] = '_';
+ line[--i] = 'm';
+ line[--i] = 'r';
+ line[--i] = 'd';
+ name = &line[i];
+
+ list_for_each_entry(pmu, pmus, list) {
+ if (!perf_pmu__is_drm(pmu))
+ continue;
+ if (pmu->type > max_drm_pmu_type)
+ max_drm_pmu_type = pmu->type;
+ if (!strcmp(pmu->name, name)) {
+ /* PMU already exists. */
+ return NULL;
+ }
+ }
+
+ if (max_drm_pmu_type != 0)
+ type = max_drm_pmu_type + 1;
+ else
+ type = PERF_PMU_TYPE_DRM_START;
+
+ if (type > PERF_PMU_TYPE_DRM_END) {
+ zfree(&drm);
+ pr_err("Unable to encode DRM PMU type for %s\n", name);
+ return NULL;
+ }
+
+ drm = zalloc(sizeof(*drm));
+ if (!drm)
+ return NULL;
+
+ if (perf_pmu__init(&drm->pmu, type, name) != 0) {
+ perf_pmu__delete(&drm->pmu);
+ return NULL;
+ }
+
+ drm->pmu.cpus = perf_cpu_map__new("0");
+ if (!drm->pmu.cpus) {
+ perf_pmu__delete(&drm->pmu);
+ return NULL;
+ }
+ return drm;
+}
+
+
+static bool starts_with(const char *str, const char *prefix)
+{
+ return !strncmp(prefix, str, strlen(prefix));
+}
+
+static int add_event(struct drm_pmu_event **events, int *num_events,
+ const char *line, enum drm_pmu_unit unit, const char *desc)
+{
+ const char *colon = strchr(line, ':');
+ struct drm_pmu_event *tmp;
+
+ if (!colon)
+ return -EINVAL;
+
+ tmp = reallocarray(*events, *num_events + 1, sizeof(struct drm_pmu_event));
+ if (!tmp)
+ return -ENOMEM;
+ tmp[*num_events].unit = unit;
+ tmp[*num_events].desc = desc;
+ tmp[*num_events].name = strndup(line, colon - line);
+ if (!tmp[*num_events].name)
+ return -ENOMEM;
+ (*num_events)++;
+ *events = tmp;
+ return 0;
+}
+
+static int read_drm_pmus_cb(void *args, int fdinfo_dir_fd, const char *fd_name)
+{
+ struct list_head *pmus = args;
+ char buf[640];
+ struct io io;
+ char *line = NULL;
+ size_t line_len;
+ struct drm_pmu *drm = NULL;
+ struct drm_pmu_event *events = NULL;
+ int num_events = 0;
+
+ io__init(&io, openat(fdinfo_dir_fd, fd_name, O_RDONLY), buf, sizeof(buf));
+ if (io.fd == -1) {
+ /* Failed to open file, ignore. */
+ return 0;
+ }
+
+ while (io__getline(&io, &line, &line_len) > 0) {
+ if (starts_with(line, "drm-driver:")) {
+ drm = add_drm_pmu(pmus, line, line_len);
+ if (!drm)
+ break;
+ continue;
+ }
+ /*
+ * Note the string matching below is alphabetical, with more
+ * specific matches appearing before less specific.
+ */
+ if (starts_with(line, "drm-active-")) {
+ add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES,
+ "Total memory active in one or more engines");
+ continue;
+ }
+ if (starts_with(line, "drm-cycles-")) {
+ add_event(&events, &num_events, line, DRM_PMU_UNIT_CYCLES,
+ "Busy cycles");
+ continue;
+ }
+ if (starts_with(line, "drm-engine-capacity-")) {
+ add_event(&events, &num_events, line, DRM_PMU_UNIT_CAPACITY,
+ "Engine capacity");
+ continue;
+ }
+ if (starts_with(line, "drm-engine-")) {
+ add_event(&events, &num_events, line, DRM_PMU_UNIT_NS,
+ "Utilization in ns");
+ continue;
+ }
+ if (starts_with(line, "drm-maxfreq-")) {
+ add_event(&events, &num_events, line, DRM_PMU_UNIT_HZ,
+ "Maximum frequency");
+ continue;
+ }
+ if (starts_with(line, "drm-purgeable-")) {
+ add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES,
+ "Size of resident and purgeable memory buffers");
+ continue;
+ }
+ if (starts_with(line, "drm-resident-")) {
+ add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES,
+ "Size of resident memory buffers");
+ continue;
+ }
+ if (starts_with(line, "drm-shared-")) {
+ add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES,
+ "Size of shared memory buffers");
+ continue;
+ }
+ if (starts_with(line, "drm-total-cycles-")) {
+ add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES,
+ "Total busy cycles");
+ continue;
+ }
+ if (starts_with(line, "drm-total-")) {
+ add_event(&events, &num_events, line, DRM_PMU_UNIT_BYTES,
+ "Size of shared and private memory");
+ continue;
+ }
+ if (verbose > 1 && starts_with(line, "drm-") &&
+ !starts_with(line, "drm-client-id:") &&
+ !starts_with(line, "drm-pdev:"))
+ pr_debug("Unhandled DRM PMU fdinfo line match '%s'\n", line);
+ }
+ if (drm) {
+ drm->events = events;
+ drm->num_events = num_events;
+ list_add_tail(&drm->pmu.list, pmus);
+ }
+ free(line);
+ if (io.fd != -1)
+ close(io.fd);
+ return 0;
+}
+
+void drm_pmu__exit(struct perf_pmu *pmu)
+{
+ struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu);
+
+ free(drm->events);
+}
+
+bool drm_pmu__have_event(const struct perf_pmu *pmu, const char *name)
+{
+ struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu);
+
+ if (!starts_with(name, "drm-"))
+ return false;
+
+ for (int i = 0; i < drm->num_events; i++) {
+ if (!strcasecmp(drm->events[i].name, name))
+ return true;
+ }
+ return false;
+}
+
+int drm_pmu__for_each_event(const struct perf_pmu *pmu, void *state, pmu_event_callback cb)
+{
+ struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu);
+
+ for (int i = 0; i < drm->num_events; i++) {
+ char encoding_buf[128];
+ struct pmu_event_info info = {
+ .pmu = pmu,
+ .name = drm->events[i].name,
+ .alias = NULL,
+ .scale_unit = drm_pmu_scale_unit_strs[drm->events[i].unit],
+ .desc = drm->events[i].desc,
+ .long_desc = NULL,
+ .encoding_desc = encoding_buf,
+ .topic = "drm",
+ .pmu_name = pmu->name,
+ .event_type_desc = "DRM event",
+ };
+ int ret;
+
+ snprintf(encoding_buf, sizeof(encoding_buf), "%s/config=0x%x/", pmu->name, i);
+
+ ret = cb(state, &info);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+size_t drm_pmu__num_events(const struct perf_pmu *pmu)
+{
+ const struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu);
+
+ return drm->num_events;
+}
+
+static int drm_pmu__index_for_event(const struct drm_pmu *drm, const char *name)
+{
+ for (int i = 0; i < drm->num_events; i++) {
+ if (!strcmp(drm->events[i].name, name))
+ return i;
+ }
+ return -1;
+}
+
+static int drm_pmu__config_term(const struct drm_pmu *drm,
+ struct perf_event_attr *attr,
+ struct parse_events_term *term,
+ struct parse_events_error *err)
+{
+ if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) {
+ int i = drm_pmu__index_for_event(drm, term->config);
+
+ if (i >= 0) {
+ attr->config = i;
+ return 0;
+ }
+ }
+ if (err) {
+ char *err_str;
+
+ parse_events_error__handle(err, term->err_val,
+ asprintf(&err_str,
+ "unexpected drm event term (%s) %s",
+ parse_events__term_type_str(term->type_term),
+ term->config) < 0
+ ? strdup("unexpected drm event term")
+ : err_str,
+ NULL);
+ }
+ return -EINVAL;
+}
+
+int drm_pmu__config_terms(const struct perf_pmu *pmu,
+ struct perf_event_attr *attr,
+ struct parse_events_terms *terms,
+ struct parse_events_error *err)
+{
+ struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu);
+ struct parse_events_term *term;
+
+ list_for_each_entry(term, &terms->terms, list) {
+ if (drm_pmu__config_term(drm, attr, term, err))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int drm_pmu__check_alias(const struct perf_pmu *pmu, struct parse_events_terms *terms,
+ struct perf_pmu_info *info, struct parse_events_error *err)
+{
+ struct drm_pmu *drm = container_of(pmu, struct drm_pmu, pmu);
+ struct parse_events_term *term =
+ list_first_entry(&terms->terms, struct parse_events_term, list);
+
+ if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) {
+ int i = drm_pmu__index_for_event(drm, term->config);
+
+ if (i >= 0) {
+ info->unit = drm_pmu_unit_strs[drm->events[i].unit];
+ info->scale = 1;
+ return 0;
+ }
+ }
+ if (err) {
+ char *err_str;
+
+ parse_events_error__handle(err, term->err_val,
+ asprintf(&err_str,
+ "unexpected drm event term (%s) %s",
+ parse_events__term_type_str(term->type_term),
+ term->config) < 0
+ ? strdup("unexpected drm event term")
+ : err_str,
+ NULL);
+ }
+ return -EINVAL;
+}
+
+struct minor_info {
+ unsigned int *minors;
+ int minors_num, minors_len;
+};
+
+static int for_each_drm_fdinfo_in_dir(int (*cb)(void *args, int fdinfo_dir_fd, const char *fd_name),
+ void *args, int proc_dir, const char *pid_name,
+ struct minor_info *minors)
+{
+ char buf[256];
+ DIR *fd_dir;
+ struct dirent *fd_entry;
+ int fd_dir_fd, fdinfo_dir_fd = -1;
+
+
+ scnprintf(buf, sizeof(buf), "%s/fd", pid_name);
+ fd_dir_fd = openat(proc_dir, buf, O_DIRECTORY);
+ if (fd_dir_fd == -1)
+ return 0; /* Presumably lost race to open. */
+ fd_dir = fdopendir(fd_dir_fd);
+ if (!fd_dir) {
+ close(fd_dir_fd);
+ return -ENOMEM;
+ }
+ while ((fd_entry = readdir(fd_dir)) != NULL) {
+ struct stat stat;
+ unsigned int minor;
+ bool is_dup = false;
+ int ret;
+
+ if (fd_entry->d_type != DT_LNK)
+ continue;
+
+ if (fstatat(fd_dir_fd, fd_entry->d_name, &stat, 0) != 0)
+ continue;
+
+ if ((stat.st_mode & S_IFMT) != S_IFCHR || major(stat.st_rdev) != 226)
+ continue;
+
+ minor = minor(stat.st_rdev);
+ for (int i = 0; i < minors->minors_num; i++) {
+ if (minor(stat.st_rdev) == minors->minors[i]) {
+ is_dup = true;
+ break;
+ }
+ }
+ if (is_dup)
+ continue;
+
+ if (minors->minors_num == minors->minors_len) {
+ unsigned int *tmp = reallocarray(minors->minors, minors->minors_len + 4,
+ sizeof(unsigned int));
+
+ if (tmp) {
+ minors->minors = tmp;
+ minors->minors_len += 4;
+ }
+ }
+ minors->minors[minors->minors_num++] = minor;
+ if (fdinfo_dir_fd == -1) {
+ /* Open fdinfo dir if we have a DRM fd. */
+ scnprintf(buf, sizeof(buf), "%s/fdinfo", pid_name);
+ fdinfo_dir_fd = openat(proc_dir, buf, O_DIRECTORY);
+ if (fdinfo_dir_fd == -1)
+ continue;
+ }
+ ret = cb(args, fdinfo_dir_fd, fd_entry->d_name);
+ if (ret)
+ return ret;
+ }
+ if (fdinfo_dir_fd != -1)
+ close(fdinfo_dir_fd);
+ closedir(fd_dir);
+ return 0;
+}
+
+static int for_each_drm_fdinfo(bool skip_all_duplicates,
+ int (*cb)(void *args, int fdinfo_dir_fd, const char *fd_name),
+ void *args)
+{
+ DIR *proc_dir;
+ struct dirent *proc_entry;
+ int ret;
+ /*
+ * minors maintains an array of DRM minor device numbers seen for a pid,
+ * or for all pids if skip_all_duplicates is true, so that duplicates
+ * are ignored.
+ */
+ struct minor_info minors = {
+ .minors = NULL,
+ .minors_num = 0,
+ .minors_len = 0,
+ };
+
+ proc_dir = opendir(procfs__mountpoint());
+ if (!proc_dir)
+ return 0;
+
+ /* Walk through the /proc directory. */
+ while ((proc_entry = readdir(proc_dir)) != NULL) {
+ if (proc_entry->d_type != DT_DIR ||
+ !isdigit(proc_entry->d_name[0]))
+ continue;
+ if (!skip_all_duplicates) {
+ /* Reset the seen minor numbers for each pid. */
+ minors.minors_num = 0;
+ }
+ ret = for_each_drm_fdinfo_in_dir(cb, args,
+ dirfd(proc_dir), proc_entry->d_name,
+ &minors);
+ if (ret)
+ break;
+ }
+ free(minors.minors);
+ closedir(proc_dir);
+ return ret;
+}
+
+int perf_pmus__read_drm_pmus(struct list_head *pmus)
+{
+ return for_each_drm_fdinfo(/*skip_all_duplicates=*/true, read_drm_pmus_cb, pmus);
+}
+
+int evsel__drm_pmu_open(struct evsel *evsel,
+ struct perf_thread_map *threads,
+ int start_cpu_map_idx, int end_cpu_map_idx)
+{
+ (void)evsel;
+ (void)threads;
+ (void)start_cpu_map_idx;
+ (void)end_cpu_map_idx;
+ return 0;
+}
+
+static uint64_t read_count_and_apply_unit(const char *count_and_unit, enum drm_pmu_unit unit)
+{
+ char *unit_ptr = NULL;
+ uint64_t count = strtoul(count_and_unit, &unit_ptr, 10);
+
+ if (!unit_ptr)
+ return 0;
+
+ while (isblank(*unit_ptr))
+ unit_ptr++;
+
+ switch (unit) {
+ case DRM_PMU_UNIT_BYTES:
+ if (*unit_ptr == '\0')
+ assert(count == 0); /* Generally undocumented, happens for 0. */
+ else if (!strcmp(unit_ptr, "KiB"))
+ count *= 1024;
+ else if (!strcmp(unit_ptr, "MiB"))
+ count *= 1024 * 1024;
+ else
+ pr_err("Unexpected bytes unit '%s'\n", unit_ptr);
+ break;
+ case DRM_PMU_UNIT_CAPACITY:
+ /* No units expected. */
+ break;
+ case DRM_PMU_UNIT_CYCLES:
+ /* No units expected. */
+ break;
+ case DRM_PMU_UNIT_HZ:
+ if (!strcmp(unit_ptr, "Hz"))
+ count *= 1;
+ else if (!strcmp(unit_ptr, "KHz"))
+ count *= 1000;
+ else if (!strcmp(unit_ptr, "MHz"))
+ count *= 1000000;
+ else
+ pr_err("Unexpected hz unit '%s'\n", unit_ptr);
+ break;
+ case DRM_PMU_UNIT_NS:
+ /* Only unit ns expected. */
+ break;
+ case DRM_PMU_UNIT_MAX:
+ default:
+ break;
+ }
+ return count;
+}
+
+static uint64_t read_drm_event(int fdinfo_dir_fd, const char *fd_name,
+ const char *match, enum drm_pmu_unit unit)
+{
+ char buf[640];
+ struct io io;
+ char *line = NULL;
+ size_t line_len;
+ uint64_t count = 0;
+
+ io__init(&io, openat(fdinfo_dir_fd, fd_name, O_RDONLY), buf, sizeof(buf));
+ if (io.fd == -1) {
+ /* Failed to open file, ignore. */
+ return 0;
+ }
+ while (io__getline(&io, &line, &line_len) > 0) {
+ size_t i = strlen(match);
+
+ if (strncmp(line, match, i))
+ continue;
+ if (line[i] != ':')
+ continue;
+ while (isblank(line[++i]))
+ ;
+ if (line[line_len - 1] == '\n')
+ line[line_len - 1] = '\0';
+ count = read_count_and_apply_unit(&line[i], unit);
+ break;
+ }
+ free(line);
+ close(io.fd);
+ return count;
+}
+
+struct read_drm_event_cb_args {
+ const char *match;
+ uint64_t count;
+ enum drm_pmu_unit unit;
+};
+
+static int read_drm_event_cb(void *vargs, int fdinfo_dir_fd, const char *fd_name)
+{
+ struct read_drm_event_cb_args *args = vargs;
+
+ args->count += read_drm_event(fdinfo_dir_fd, fd_name, args->match, args->unit);
+ return 0;
+}
+
+static uint64_t drm_pmu__read_system_wide(struct drm_pmu *drm, struct evsel *evsel)
+{
+ struct read_drm_event_cb_args args = {
+ .count = 0,
+ .match = drm->events[evsel->core.attr.config].name,
+ .unit = drm->events[evsel->core.attr.config].unit,
+ };
+
+ for_each_drm_fdinfo(/*skip_all_duplicates=*/false, read_drm_event_cb, &args);
+ return args.count;
+}
+
+static uint64_t drm_pmu__read_for_pid(struct drm_pmu *drm, struct evsel *evsel, int pid)
+{
+ struct read_drm_event_cb_args args = {
+ .count = 0,
+ .match = drm->events[evsel->core.attr.config].name,
+ .unit = drm->events[evsel->core.attr.config].unit,
+ };
+ struct minor_info minors = {
+ .minors = NULL,
+ .minors_num = 0,
+ .minors_len = 0,
+ };
+ int proc_dir = open(procfs__mountpoint(), O_DIRECTORY);
+ char pid_name[12];
+ int ret;
+
+ if (proc_dir < 0)
+ return 0;
+
+ snprintf(pid_name, sizeof(pid_name), "%d", pid);
+ ret = for_each_drm_fdinfo_in_dir(read_drm_event_cb, &args, proc_dir, pid_name, &minors);
+ free(minors.minors);
+ close(proc_dir);
+ return ret == 0 ? args.count : 0;
+}
+
+int evsel__drm_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
+{
+ struct drm_pmu *drm = container_of(evsel->pmu, struct drm_pmu, pmu);
+ struct perf_counts_values *count, *old_count = NULL;
+ int pid = perf_thread_map__pid(evsel->core.threads, thread);
+ uint64_t counter;
+
+ if (pid != -1)
+ counter = drm_pmu__read_for_pid(drm, evsel, pid);
+ else
+ counter = drm_pmu__read_system_wide(drm, evsel);
+
+ if (evsel->prev_raw_counts)
+ old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
+
+ count = perf_counts(evsel->counts, cpu_map_idx, thread);
+ if (old_count) {
+ count->val = old_count->val + counter;
+ count->run = old_count->run + 1;
+ count->ena = old_count->ena + 1;
+ } else {
+ count->val = counter;
+ count->run++;
+ count->ena++;
+ }
+ return 0;
+}
diff --git a/tools/perf/util/drm_pmu.h b/tools/perf/util/drm_pmu.h
new file mode 100644
index 000000000000..e7f366fca8a4
--- /dev/null
+++ b/tools/perf/util/drm_pmu.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __DRM_PMU_H
+#define __DRM_PMU_H
+/*
+ * Linux DRM clients expose information through usage stats as documented in
+ * Documentation/gpu/drm-usage-stats.rst (available online at
+ * https://docs.kernel.org/gpu/drm-usage-stats.html). This is a tool like PMU
+ * that exposes DRM information.
+ */
+
+#include "pmu.h"
+#include <stdbool.h>
+
+struct list_head;
+struct perf_thread_map;
+
+void drm_pmu__exit(struct perf_pmu *pmu);
+bool drm_pmu__have_event(const struct perf_pmu *pmu, const char *name);
+int drm_pmu__for_each_event(const struct perf_pmu *pmu, void *state, pmu_event_callback cb);
+size_t drm_pmu__num_events(const struct perf_pmu *pmu);
+int drm_pmu__config_terms(const struct perf_pmu *pmu,
+ struct perf_event_attr *attr,
+ struct parse_events_terms *terms,
+ struct parse_events_error *err);
+int drm_pmu__check_alias(const struct perf_pmu *pmu, struct parse_events_terms *terms,
+ struct perf_pmu_info *info, struct parse_events_error *err);
+
+
+bool perf_pmu__is_drm(const struct perf_pmu *pmu);
+bool evsel__is_drm(const struct evsel *evsel);
+
+int perf_pmus__read_drm_pmus(struct list_head *pmus);
+
+int evsel__drm_pmu_open(struct evsel *evsel,
+ struct perf_thread_map *threads,
+ int start_cpu_map_idx, int end_cpu_map_idx);
+int evsel__drm_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread);
+
+#endif /* __DRM_PMU_H */
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 8619b6eea62d..282e3af85d5a 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -217,7 +217,7 @@ int dso__read_binary_type_filename(const struct dso *dso,
break;
}
- build_id__sprintf(dso__bid_const(dso), build_id_hex);
+ build_id__snprintf(dso__bid(dso), build_id_hex, sizeof(build_id_hex));
len = __symbol__join_symfs(filename, size, "/usr/lib/debug/.build-id/");
snprintf(filename + len, size - len, "%.2s/%s.debug",
build_id_hex, build_id_hex + 2);
@@ -1349,6 +1349,16 @@ struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
return dso;
}
+static void __dso__set_long_name_id(struct dso *dso, const char *name, bool name_allocated)
+{
+ if (dso__long_name_allocated(dso))
+ free((char *)dso__long_name(dso));
+
+ RC_CHK_ACCESS(dso)->long_name = name;
+ RC_CHK_ACCESS(dso)->long_name_len = strlen(name);
+ dso__set_long_name_allocated(dso, name_allocated);
+}
+
static void dso__set_long_name_id(struct dso *dso, const char *name, bool name_allocated)
{
struct dsos *dsos = dso__dsos(dso);
@@ -1362,81 +1372,86 @@ static void dso__set_long_name_id(struct dso *dso, const char *name, bool name_a
* renaming the dso.
*/
down_write(&dsos->lock);
- }
-
- if (dso__long_name_allocated(dso))
- free((char *)dso__long_name(dso));
-
- RC_CHK_ACCESS(dso)->long_name = name;
- RC_CHK_ACCESS(dso)->long_name_len = strlen(name);
- dso__set_long_name_allocated(dso, name_allocated);
-
- if (dsos) {
+ __dso__set_long_name_id(dso, name, name_allocated);
dsos->sorted = false;
up_write(&dsos->lock);
+ } else {
+ __dso__set_long_name_id(dso, name, name_allocated);
}
}
static int __dso_id__cmp(const struct dso_id *a, const struct dso_id *b)
{
- if (a->maj > b->maj) return -1;
- if (a->maj < b->maj) return 1;
+ if (a->mmap2_valid && b->mmap2_valid) {
+ if (a->maj > b->maj) return -1;
+ if (a->maj < b->maj) return 1;
- if (a->min > b->min) return -1;
- if (a->min < b->min) return 1;
+ if (a->min > b->min) return -1;
+ if (a->min < b->min) return 1;
- if (a->ino > b->ino) return -1;
- if (a->ino < b->ino) return 1;
-
- /*
- * Synthesized MMAP events have zero ino_generation, avoid comparing
- * them with MMAP events with actual ino_generation.
- *
- * I found it harmful because the mismatch resulted in a new
- * dso that did not have a build ID whereas the original dso did have a
- * build ID. The build ID was essential because the object was not found
- * otherwise. - Adrian
- */
- if (a->ino_generation && b->ino_generation) {
+ if (a->ino > b->ino) return -1;
+ if (a->ino < b->ino) return 1;
+ }
+ if (a->mmap2_ino_generation_valid && b->mmap2_ino_generation_valid) {
if (a->ino_generation > b->ino_generation) return -1;
if (a->ino_generation < b->ino_generation) return 1;
}
-
+ if (build_id__is_defined(&a->build_id) && build_id__is_defined(&b->build_id)) {
+ if (a->build_id.size != b->build_id.size)
+ return a->build_id.size < b->build_id.size ? -1 : 1;
+ return memcmp(a->build_id.data, b->build_id.data, a->build_id.size);
+ }
return 0;
}
-bool dso_id__empty(const struct dso_id *id)
-{
- if (!id)
- return true;
-
- return !id->maj && !id->min && !id->ino && !id->ino_generation;
-}
+const struct dso_id dso_id_empty = {
+ {
+ .maj = 0,
+ .min = 0,
+ .ino = 0,
+ .ino_generation = 0,
+ },
+ .mmap2_valid = false,
+ .mmap2_ino_generation_valid = false,
+ {
+ .size = 0,
+ }
+};
-void __dso__inject_id(struct dso *dso, const struct dso_id *id)
+void __dso__improve_id(struct dso *dso, const struct dso_id *id)
{
struct dsos *dsos = dso__dsos(dso);
struct dso_id *dso_id = dso__id(dso);
+ bool changed = false;
/* dsos write lock held by caller. */
- dso_id->maj = id->maj;
- dso_id->min = id->min;
- dso_id->ino = id->ino;
- dso_id->ino_generation = id->ino_generation;
-
- if (dsos)
+ if (id->mmap2_valid && !dso_id->mmap2_valid) {
+ dso_id->maj = id->maj;
+ dso_id->min = id->min;
+ dso_id->ino = id->ino;
+ dso_id->mmap2_valid = true;
+ changed = true;
+ }
+ if (id->mmap2_ino_generation_valid && !dso_id->mmap2_ino_generation_valid) {
+ dso_id->ino_generation = id->ino_generation;
+ dso_id->mmap2_ino_generation_valid = true;
+ changed = true;
+ }
+ if (build_id__is_defined(&id->build_id) && !build_id__is_defined(&dso_id->build_id)) {
+ dso_id->build_id = id->build_id;
+ changed = true;
+ }
+ if (changed && dsos)
dsos->sorted = false;
}
int dso_id__cmp(const struct dso_id *a, const struct dso_id *b)
{
- /*
- * The second is always dso->id, so zeroes if not set, assume passing
- * NULL for a means a zeroed id
- */
- if (dso_id__empty(a) || dso_id__empty(b))
+ if (a == &dso_id_empty || b == &dso_id_empty) {
+ /* There is no valid data to compare so the comparison always returns identical. */
return 0;
+ }
return __dso_id__cmp(a, b);
}
@@ -1451,6 +1466,16 @@ void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated)
dso__set_long_name_id(dso, name, name_allocated);
}
+static void __dso__set_short_name(struct dso *dso, const char *name, bool name_allocated)
+{
+ if (dso__short_name_allocated(dso))
+ free((char *)dso__short_name(dso));
+
+ RC_CHK_ACCESS(dso)->short_name = name;
+ RC_CHK_ACCESS(dso)->short_name_len = strlen(name);
+ dso__set_short_name_allocated(dso, name_allocated);
+}
+
void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated)
{
struct dsos *dsos = dso__dsos(dso);
@@ -1464,17 +1489,11 @@ void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated)
* renaming the dso.
*/
down_write(&dsos->lock);
- }
- if (dso__short_name_allocated(dso))
- free((char *)dso__short_name(dso));
-
- RC_CHK_ACCESS(dso)->short_name = name;
- RC_CHK_ACCESS(dso)->short_name_len = strlen(name);
- dso__set_short_name_allocated(dso, name_allocated);
-
- if (dsos) {
+ __dso__set_short_name(dso, name, name_allocated);
dsos->sorted = false;
up_write(&dsos->lock);
+ } else {
+ __dso__set_short_name(dso, name, name_allocated);
}
}
@@ -1533,7 +1552,6 @@ struct dso *dso__new_id(const char *name, const struct dso_id *id)
dso->loaded = 0;
dso->rel = 0;
dso->sorted_by_name = 0;
- dso->has_build_id = 0;
dso->has_srcline = 1;
dso->a2l_fails = 1;
dso->kernel = DSO_SPACE__USER;
@@ -1605,6 +1623,10 @@ struct dso *dso__get(struct dso *dso)
void dso__put(struct dso *dso)
{
+#ifdef REFCNT_CHECKING
+ if (dso && dso__data(dso) && refcount_read(&RC_CHK_ACCESS(dso)->refcnt) == 2)
+ dso__data_close(dso);
+#endif
if (dso && refcount_dec_and_test(&RC_CHK_ACCESS(dso)->refcnt))
dso__delete(dso);
else
@@ -1638,15 +1660,14 @@ int dso__swap_init(struct dso *dso, unsigned char eidata)
return 0;
}
-void dso__set_build_id(struct dso *dso, struct build_id *bid)
+void dso__set_build_id(struct dso *dso, const struct build_id *bid)
{
- RC_CHK_ACCESS(dso)->bid = *bid;
- RC_CHK_ACCESS(dso)->has_build_id = 1;
+ dso__id(dso)->build_id = *bid;
}
-bool dso__build_id_equal(const struct dso *dso, struct build_id *bid)
+bool dso__build_id_equal(const struct dso *dso, const struct build_id *bid)
{
- const struct build_id *dso_bid = dso__bid_const(dso);
+ const struct build_id *dso_bid = dso__bid(dso);
if (dso_bid->size > bid->size && dso_bid->size == BUILD_ID_SIZE) {
/*
@@ -1665,18 +1686,20 @@ bool dso__build_id_equal(const struct dso *dso, struct build_id *bid)
void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine)
{
char path[PATH_MAX];
+ struct build_id bid = { .size = 0, };
if (machine__is_default_guest(machine))
return;
sprintf(path, "%s/sys/kernel/notes", machine->root_dir);
- if (sysfs__read_build_id(path, dso__bid(dso)) == 0)
- dso__set_has_build_id(dso);
+ sysfs__read_build_id(path, &bid);
+ dso__set_build_id(dso, &bid);
}
int dso__kernel_module_get_build_id(struct dso *dso,
const char *root_dir)
{
char filename[PATH_MAX];
+ struct build_id bid = { .size = 0, };
/*
* kernel module short names are of the form "[module]" and
* we need just "module" here.
@@ -1687,9 +1710,8 @@ int dso__kernel_module_get_build_id(struct dso *dso,
"%s/sys/module/%.*s/notes/.note.gnu.build-id",
root_dir, (int)strlen(name) - 1, name);
- if (sysfs__read_build_id(filename, dso__bid(dso)) == 0)
- dso__set_has_build_id(dso);
-
+ sysfs__read_build_id(filename, &bid);
+ dso__set_build_id(dso, &bid);
return 0;
}
@@ -1697,7 +1719,7 @@ static size_t dso__fprintf_buildid(struct dso *dso, FILE *fp)
{
char sbuild_id[SBUILD_ID_SIZE];
- build_id__sprintf(dso__bid(dso), sbuild_id);
+ build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id));
return fprintf(fp, "%s", sbuild_id);
}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index c87564471f9b..3457d713d3c5 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -185,14 +185,33 @@ enum dso_load_errno {
#define DSO__DATA_CACHE_SIZE 4096
#define DSO__DATA_CACHE_MASK ~(DSO__DATA_CACHE_SIZE - 1)
-/*
- * Data about backing storage DSO, comes from PERF_RECORD_MMAP2 meta events
+/**
+ * struct dso_id
+ *
+ * Data about backing storage DSO, comes from PERF_RECORD_MMAP2 meta events,
+ * reading from /proc/pid/maps or synthesis of build_ids from DSOs. Possibly
+ * incomplete at any particular use.
*/
struct dso_id {
- u32 maj;
- u32 min;
- u64 ino;
- u64 ino_generation;
+ /* Data related to the mmap2 event or read from /proc/pid/maps. */
+ struct {
+ u32 maj;
+ u32 min;
+ u64 ino;
+ u64 ino_generation;
+ };
+ /** @mmap2_valid: Are the maj, min and ino fields valid? */
+ bool mmap2_valid;
+ /**
+ * @mmap2_ino_generation_valid: Is the ino_generation valid? Generally
+ * false for /proc/pid/maps mmap event.
+ */
+ bool mmap2_ino_generation_valid;
+ /**
+ * @build_id: A possibly populated build_id. build_id__is_defined checks
+ * whether it is populated.
+ */
+ struct build_id build_id;
};
struct dso_cache {
@@ -243,7 +262,6 @@ DECLARE_RC_STRUCT(dso) {
u64 addr;
struct symbol *symbol;
} last_find_result;
- struct build_id bid;
u64 text_offset;
u64 text_end;
const char *short_name;
@@ -276,7 +294,6 @@ DECLARE_RC_STRUCT(dso) {
enum dso_swap_type needs_swap:2;
bool is_kmod:1;
u8 adjust_symbols:1;
- u8 has_build_id:1;
u8 header_build_id:1;
u8 has_srcline:1;
u8 hit:1;
@@ -292,6 +309,9 @@ DECLARE_RC_STRUCT(dso) {
};
extern struct mutex _dso__data_open_lock;
+extern const struct dso_id dso_id_empty;
+
+int dso_id__cmp(const struct dso_id *a, const struct dso_id *b);
/* dso__for_each_symbol - iterate over the symbols of given type
*
@@ -362,31 +382,11 @@ static inline void dso__set_auxtrace_cache(struct dso *dso, struct auxtrace_cach
RC_CHK_ACCESS(dso)->auxtrace_cache = cache;
}
-static inline struct build_id *dso__bid(struct dso *dso)
-{
- return &RC_CHK_ACCESS(dso)->bid;
-}
-
-static inline const struct build_id *dso__bid_const(const struct dso *dso)
-{
- return &RC_CHK_ACCESS(dso)->bid;
-}
-
static inline struct dso_bpf_prog *dso__bpf_prog(struct dso *dso)
{
return &RC_CHK_ACCESS(dso)->bpf_prog;
}
-static inline bool dso__has_build_id(const struct dso *dso)
-{
- return RC_CHK_ACCESS(dso)->has_build_id;
-}
-
-static inline void dso__set_has_build_id(struct dso *dso)
-{
- RC_CHK_ACCESS(dso)->has_build_id = true;
-}
-
static inline bool dso__has_srcline(const struct dso *dso)
{
return RC_CHK_ACCESS(dso)->has_srcline;
@@ -462,6 +462,16 @@ static inline const struct dso_id *dso__id_const(const struct dso *dso)
return &RC_CHK_ACCESS(dso)->id;
}
+static inline const struct build_id *dso__bid(const struct dso *dso)
+{
+ return &dso__id_const(dso)->build_id;
+}
+
+static inline bool dso__has_build_id(const struct dso *dso)
+{
+ return build_id__is_defined(dso__bid(dso));
+}
+
static inline struct rb_root_cached *dso__inlined_nodes(struct dso *dso)
{
return &RC_CHK_ACCESS(dso)->inlined_nodes;
@@ -699,9 +709,6 @@ static inline void dso__set_text_offset(struct dso *dso, u64 val)
RC_CHK_ACCESS(dso)->text_offset = val;
}
-int dso_id__cmp(const struct dso_id *a, const struct dso_id *b);
-bool dso_id__empty(const struct dso_id *id);
-
struct dso *dso__new_id(const char *name, const struct dso_id *id);
struct dso *dso__new(const char *name);
void dso__delete(struct dso *dso);
@@ -709,7 +716,7 @@ void dso__delete(struct dso *dso);
int dso__cmp_id(struct dso *a, struct dso *b);
void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated);
void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated);
-void __dso__inject_id(struct dso *dso, const struct dso_id *id);
+void __dso__improve_id(struct dso *dso, const struct dso_id *id);
int dso__name_len(const struct dso *dso);
@@ -739,8 +746,8 @@ void dso__sort_by_name(struct dso *dso);
int dso__swap_init(struct dso *dso, unsigned char eidata);
-void dso__set_build_id(struct dso *dso, struct build_id *bid);
-bool dso__build_id_equal(const struct dso *dso, struct build_id *bid);
+void dso__set_build_id(struct dso *dso, const struct build_id *bid);
+bool dso__build_id_equal(const struct dso *dso, const struct build_id *bid);
void dso__read_running_kernel_build_id(struct dso *dso,
struct machine *machine);
int dso__kernel_module_get_build_id(struct dso *dso, const char *root_dir);
diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c
index e0998e2a7c4e..0a7645c7fae7 100644
--- a/tools/perf/util/dsos.c
+++ b/tools/perf/util/dsos.c
@@ -72,6 +72,7 @@ static int dsos__read_build_ids_cb(struct dso *dso, void *data)
{
struct dsos__read_build_ids_cb_args *args = data;
struct nscookie nsc;
+ struct build_id bid = { .size = 0, };
if (args->with_hits && !dso__hit(dso) && !dso__is_vdso(dso))
return 0;
@@ -80,15 +81,15 @@ static int dsos__read_build_ids_cb(struct dso *dso, void *data)
return 0;
}
nsinfo__mountns_enter(dso__nsinfo(dso), &nsc);
- if (filename__read_build_id(dso__long_name(dso), dso__bid(dso)) > 0) {
+ if (filename__read_build_id(dso__long_name(dso), &bid) > 0) {
+ dso__set_build_id(dso, &bid);
args->have_build_id = true;
- dso__set_has_build_id(dso);
} else if (errno == ENOENT && dso__nsinfo(dso)) {
char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso));
- if (new_name && filename__read_build_id(new_name, dso__bid(dso)) > 0) {
+ if (new_name && filename__read_build_id(new_name, &bid) > 0) {
+ dso__set_build_id(dso, &bid);
args->have_build_id = true;
- dso__set_has_build_id(dso);
}
free(new_name);
}
@@ -157,6 +158,7 @@ static struct dso *__dsos__find_by_longname_id(struct dsos *dsos,
const char *name,
const struct dso_id *id,
bool write_locked)
+ SHARED_LOCKS_REQUIRED(dsos->lock)
{
struct dsos__key key = {
.long_name = name,
@@ -262,6 +264,7 @@ static int dsos__find_id_cb(struct dso *dso, void *data)
static struct dso *__dsos__find_id(struct dsos *dsos, const char *name, const struct dso_id *id,
bool cmp_short, bool write_locked)
+ SHARED_LOCKS_REQUIRED(dsos->lock)
{
struct dso *res;
@@ -284,7 +287,7 @@ struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
struct dso *res;
down_read(&dsos->lock);
- res = __dsos__find_id(dsos, name, NULL, cmp_short, /*write_locked=*/false);
+ res = __dsos__find_id(dsos, name, &dso_id_empty, cmp_short, /*write_locked=*/false);
up_read(&dsos->lock);
return res;
}
@@ -338,11 +341,12 @@ static struct dso *__dsos__addnew_id(struct dsos *dsos, const char *name, const
}
static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, const struct dso_id *id)
+ SHARED_LOCKS_REQUIRED(dsos->lock)
{
struct dso *dso = __dsos__find_id(dsos, name, id, false, /*write_locked=*/true);
- if (dso && dso_id__empty(dso__id(dso)) && !dso_id__empty(id))
- __dso__inject_id(dso, id);
+ if (dso)
+ __dso__improve_id(dso, id);
return dso ? dso : __dsos__addnew_id(dsos, name, id);
}
@@ -370,7 +374,7 @@ static int dsos__fprintf_buildid_cb(struct dso *dso, void *data)
if (args->skip && args->skip(dso, args->parm))
return 0;
- build_id__sprintf(dso__bid(dso), sbuild_id);
+ build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id));
args->ret += fprintf(args->fp, "%-40s %s\n", sbuild_id, dso__long_name(dso));
return 0;
}
@@ -433,7 +437,8 @@ struct dso *dsos__findnew_module_dso(struct dsos *dsos,
down_write(&dsos->lock);
- dso = __dsos__find_id(dsos, m->name, NULL, /*cmp_short=*/true, /*write_locked=*/true);
+ dso = __dsos__find_id(dsos, m->name, &dso_id_empty, /*cmp_short=*/true,
+ /*write_locked=*/true);
if (dso) {
up_write(&dsos->lock);
return dso;
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 36411749e007..c8c248754621 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -3,8 +3,10 @@
#include "debug.h"
#include "env.h"
#include "util/header.h"
-#include "linux/compiler.h"
+#include "util/rwsem.h"
+#include <linux/compiler.h>
#include <linux/ctype.h>
+#include <linux/rbtree.h>
#include <linux/string.h>
#include <linux/zalloc.h>
#include "cgroup.h"
@@ -17,8 +19,6 @@
#include "strbuf.h"
#include "trace/beauty/beauty.h"
-struct perf_env perf_env;
-
#ifdef HAVE_LIBBPF_SUPPORT
#include "bpf-event.h"
#include "bpf-utils.h"
@@ -89,6 +89,20 @@ out:
return node;
}
+void perf_env__iterate_bpf_prog_info(struct perf_env *env,
+ void (*cb)(struct bpf_prog_info_node *node,
+ void *data),
+ void *data)
+{
+ struct rb_node *first;
+
+ down_read(&env->bpf_progs.lock);
+ first = rb_first(&env->bpf_progs.infos);
+ for (struct rb_node *node = first; node != NULL; node = rb_next(node))
+ (*cb)(rb_entry(node, struct bpf_prog_info_node, rb_node), data);
+ up_read(&env->bpf_progs.lock);
+}
+
bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
{
bool ret;
@@ -174,6 +188,7 @@ static void perf_env__purge_bpf(struct perf_env *env)
next = rb_next(&node->rb_node);
rb_erase(&node->rb_node, root);
zfree(&node->info_linear);
+ bpf_metadata_free(node->metadata);
free(node);
}
@@ -254,6 +269,7 @@ void perf_env__exit(struct perf_env *env)
void perf_env__init(struct perf_env *env)
{
+ memset(env, 0, sizeof(*env));
#ifdef HAVE_LIBBPF_SUPPORT
env->bpf_progs.infos = RB_ROOT;
env->bpf_progs.btfs = RB_ROOT;
@@ -416,6 +432,116 @@ static int perf_env__read_nr_cpus_avail(struct perf_env *env)
return env->nr_cpus_avail ? 0 : -ENOENT;
}
+static int __perf_env__read_core_pmu_caps(const struct perf_pmu *pmu,
+ int *nr_caps, char ***caps,
+ unsigned int *max_branches,
+ unsigned int *br_cntr_nr,
+ unsigned int *br_cntr_width)
+{
+ struct perf_pmu_caps *pcaps = NULL;
+ char *ptr, **tmp;
+ int ret = 0;
+
+ *nr_caps = 0;
+ *caps = NULL;
+
+ if (!pmu->nr_caps)
+ return 0;
+
+ *caps = calloc(pmu->nr_caps, sizeof(char *));
+ if (!*caps)
+ return -ENOMEM;
+
+ tmp = *caps;
+ list_for_each_entry(pcaps, &pmu->caps, list) {
+ if (asprintf(&ptr, "%s=%s", pcaps->name, pcaps->value) < 0) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ *tmp++ = ptr;
+
+ if (!strcmp(pcaps->name, "branches"))
+ *max_branches = atoi(pcaps->value);
+ else if (!strcmp(pcaps->name, "branch_counter_nr"))
+ *br_cntr_nr = atoi(pcaps->value);
+ else if (!strcmp(pcaps->name, "branch_counter_width"))
+ *br_cntr_width = atoi(pcaps->value);
+ }
+ *nr_caps = pmu->nr_caps;
+ return 0;
+error:
+ while (tmp-- != *caps)
+ zfree(tmp);
+ zfree(caps);
+ *nr_caps = 0;
+ return ret;
+}
+
+int perf_env__read_core_pmu_caps(struct perf_env *env)
+{
+ struct pmu_caps *pmu_caps;
+ struct perf_pmu *pmu = NULL;
+ int nr_pmu, i = 0, j;
+ int ret;
+
+ nr_pmu = perf_pmus__num_core_pmus();
+
+ if (!nr_pmu)
+ return -ENODEV;
+
+ if (nr_pmu == 1) {
+ pmu = perf_pmus__find_core_pmu();
+ if (!pmu)
+ return -ENODEV;
+ ret = perf_pmu__caps_parse(pmu);
+ if (ret < 0)
+ return ret;
+ return __perf_env__read_core_pmu_caps(pmu, &env->nr_cpu_pmu_caps,
+ &env->cpu_pmu_caps,
+ &env->max_branches,
+ &env->br_cntr_nr,
+ &env->br_cntr_width);
+ }
+
+ pmu_caps = calloc(nr_pmu, sizeof(*pmu_caps));
+ if (!pmu_caps)
+ return -ENOMEM;
+
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ if (perf_pmu__caps_parse(pmu) <= 0)
+ continue;
+ ret = __perf_env__read_core_pmu_caps(pmu, &pmu_caps[i].nr_caps,
+ &pmu_caps[i].caps,
+ &pmu_caps[i].max_branches,
+ &pmu_caps[i].br_cntr_nr,
+ &pmu_caps[i].br_cntr_width);
+ if (ret)
+ goto error;
+
+ pmu_caps[i].pmu_name = strdup(pmu->name);
+ if (!pmu_caps[i].pmu_name) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ i++;
+ }
+
+ env->nr_pmus_with_caps = nr_pmu;
+ env->pmu_caps = pmu_caps;
+
+ return 0;
+error:
+ for (i = 0; i < nr_pmu; i++) {
+ for (j = 0; j < pmu_caps[i].nr_caps; j++)
+ zfree(&pmu_caps[i].caps[j]);
+ zfree(&pmu_caps[i].caps);
+ zfree(&pmu_caps[i].pmu_name);
+ }
+ zfree(&pmu_caps);
+ return ret;
+}
+
const char *perf_env__raw_arch(struct perf_env *env)
{
return env && !perf_env__read_arch(env) ? env->arch : "unknown";
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index d90e343cf1fa..e00179787a34 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -150,8 +150,7 @@ enum perf_compress_type {
struct bpf_prog_info_node;
struct btf_node;
-extern struct perf_env perf_env;
-
+int perf_env__read_core_pmu_caps(struct perf_env *env);
void perf_env__exit(struct perf_env *env);
int perf_env__kernel_is_64_bit(struct perf_env *env);
@@ -174,16 +173,22 @@ const char *perf_env__raw_arch(struct perf_env *env);
int perf_env__nr_cpus_avail(struct perf_env *env);
void perf_env__init(struct perf_env *env);
+#ifdef HAVE_LIBBPF_SUPPORT
bool __perf_env__insert_bpf_prog_info(struct perf_env *env,
struct bpf_prog_info_node *info_node);
bool perf_env__insert_bpf_prog_info(struct perf_env *env,
struct bpf_prog_info_node *info_node);
struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
__u32 prog_id);
+void perf_env__iterate_bpf_prog_info(struct perf_env *env,
+ void (*cb)(struct bpf_prog_info_node *node,
+ void *data),
+ void *data);
bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id);
struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id);
+#endif // HAVE_LIBBPF_SUPPORT
int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu);
char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name,
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index c23b77f8f854..fcf44149feb2 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1,9 +1,12 @@
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
+#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <perf/cpumap.h>
+#include <perf/event.h>
+#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
@@ -77,6 +80,8 @@ static const char *perf_event__names[] = {
[PERF_RECORD_HEADER_FEATURE] = "FEATURE",
[PERF_RECORD_COMPRESSED] = "COMPRESSED",
[PERF_RECORD_FINISHED_INIT] = "FINISHED_INIT",
+ [PERF_RECORD_COMPRESSED2] = "COMPRESSED2",
+ [PERF_RECORD_BPF_METADATA] = "BPF_METADATA",
};
const char *perf_event__name(unsigned int id)
@@ -329,7 +334,7 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp)
build_id__init(&bid, event->mmap2.build_id,
event->mmap2.build_id_size);
- build_id__sprintf(&bid, sbuild_id);
+ build_id__snprintf(&bid, sbuild_id, sizeof(sbuild_id));
return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64
" <%s>]: %c%c%c%c %s\n",
@@ -448,12 +453,13 @@ int perf_event__exit_del_thread(const struct perf_tool *tool __maybe_unused,
size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp)
{
- return fprintf(fp, " offset: %#"PRI_lx64" size: %#"PRI_lx64" flags: %#"PRI_lx64" [%s%s%s]\n",
+ return fprintf(fp, " offset: %#"PRI_lx64" size: %#"PRI_lx64" flags: %#"PRI_lx64" [%s%s%s%s]\n",
event->aux.aux_offset, event->aux.aux_size,
event->aux.flags,
event->aux.flags & PERF_AUX_FLAG_TRUNCATED ? "T" : "",
event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : "",
- event->aux.flags & PERF_AUX_FLAG_PARTIAL ? "P" : "");
+ event->aux.flags & PERF_AUX_FLAG_PARTIAL ? "P" : "",
+ event->aux.flags & PERF_AUX_FLAG_COLLISION ? "C" : "");
}
size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp)
@@ -503,6 +509,20 @@ size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp)
event->bpf.type, event->bpf.flags, event->bpf.id);
}
+size_t perf_event__fprintf_bpf_metadata(union perf_event *event, FILE *fp)
+{
+ struct perf_record_bpf_metadata *metadata = &event->bpf_metadata;
+ size_t ret;
+
+ ret = fprintf(fp, " prog %s\n", metadata->prog_name);
+ for (__u32 i = 0; i < metadata->nr_entries; i++) {
+ ret += fprintf(fp, " entry %d: %20s = %s\n", i,
+ metadata->entries[i].key,
+ metadata->entries[i].value);
+ }
+ return ret;
+}
+
static int text_poke_printer(enum binary_printer_ops op, unsigned int val,
void *extra, FILE *fp)
{
@@ -600,6 +620,9 @@ size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FIL
case PERF_RECORD_AUX_OUTPUT_HW_ID:
ret += perf_event__fprintf_aux_output_hw_id(event, fp);
break;
+ case PERF_RECORD_BPF_METADATA:
+ ret += perf_event__fprintf_bpf_metadata(event, fp);
+ break;
default:
ret += fprintf(fp, "\n");
}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 664bf39567ce..e40d16d3246c 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -370,6 +370,7 @@ size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_bpf_metadata(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine,FILE *fp);
size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp);
@@ -390,11 +391,6 @@ extern unsigned int proc_map_timeout;
#define PAGE_SIZE_NAME_LEN 32
char *get_page_size_name(u64 size, char *str);
-void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type);
-void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type);
-const char *arch_perf_header_entry(const char *se_header);
-int arch_support_sort_key(const char *sort_key);
-
static inline bool perf_event_header__cpumode_is_guest(u8 cpumode)
{
return cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index c1a04141aed0..80d8387e6b97 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -35,6 +35,8 @@
#include "util/util.h"
#include "util/env.h"
#include "util/intel-tpebs.h"
+#include "util/metricgroup.h"
+#include "util/strbuf.h"
#include <signal.h>
#include <unistd.h>
#include <sched.h>
@@ -82,6 +84,7 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
evlist->ctl_fd.ack = -1;
evlist->ctl_fd.pos = -1;
evlist->nr_br_cntr = -1;
+ metricgroup__rblist_init(&evlist->metric_events);
}
struct evlist *evlist__new(void)
@@ -172,6 +175,7 @@ static void evlist__purge(struct evlist *evlist)
void evlist__exit(struct evlist *evlist)
{
+ metricgroup__rblist_exit(&evlist->metric_events);
event_enable_timer__exit(&evlist->eet);
zfree(&evlist->mmap);
zfree(&evlist->overwrite_mmap);
@@ -183,7 +187,6 @@ void evlist__delete(struct evlist *evlist)
if (evlist == NULL)
return;
- tpebs_delete();
evlist__free_stats(evlist);
evlist__munmap(evlist);
evlist__close(evlist);
@@ -1006,8 +1009,7 @@ int evlist__create_maps(struct evlist *evlist, struct target *target)
* per-thread data. thread_map__new_str will call
* thread_map__new_all_cpus to enumerate all threads.
*/
- threads = thread_map__new_str(target->pid, target->tid, target->uid,
- all_threads);
+ threads = thread_map__new_str(target->pid, target->tid, all_threads);
if (!threads)
return -1;
@@ -2468,23 +2470,36 @@ struct evsel *evlist__find_evsel(struct evlist *evlist, int idx)
return NULL;
}
-int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf)
+void evlist__format_evsels(struct evlist *evlist, struct strbuf *sb, size_t max_length)
{
- struct evsel *evsel;
- int printed = 0;
+ struct evsel *evsel, *leader = NULL;
+ bool first = true;
evlist__for_each_entry(evlist, evsel) {
+ struct evsel *new_leader = evsel__leader(evsel);
+
if (evsel__is_dummy_event(evsel))
continue;
- if (size > (strlen(evsel__name(evsel)) + (printed ? 2 : 1))) {
- printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "," : "", evsel__name(evsel));
- } else {
- printed += scnprintf(bf + printed, size - printed, "%s...", printed ? "," : "");
- break;
+
+ if (leader != new_leader && leader && leader->core.nr_members > 1)
+ strbuf_addch(sb, '}');
+
+ if (!first)
+ strbuf_addch(sb, ',');
+
+ if (sb->len > max_length) {
+ strbuf_addstr(sb, "...");
+ return;
}
- }
+ if (leader != new_leader && new_leader->core.nr_members > 1)
+ strbuf_addch(sb, '{');
- return printed;
+ strbuf_addstr(sb, evsel__name(evsel));
+ first = false;
+ leader = new_leader;
+ }
+ if (leader && leader->core.nr_members > 1)
+ strbuf_addch(sb, '}');
}
void evlist__check_mem_load_aux(struct evlist *evlist)
@@ -2534,52 +2549,61 @@ void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_lis
return;
evlist__for_each_entry(evlist, pos) {
- struct perf_cpu_map *intersect, *to_test, *online = cpu_map__online();
- const struct perf_pmu *pmu = evsel__find_pmu(pos);
+ evsel__warn_user_requested_cpus(pos, user_requested_cpus);
+ }
+ perf_cpu_map__put(user_requested_cpus);
+}
- to_test = pmu && pmu->is_core ? pmu->cpus : online;
- intersect = perf_cpu_map__intersect(to_test, user_requested_cpus);
- if (!perf_cpu_map__equal(intersect, user_requested_cpus)) {
- char buf[128];
+/* Should uniquify be disabled for the evlist? */
+static bool evlist__disable_uniquify(const struct evlist *evlist)
+{
+ struct evsel *counter;
+ struct perf_pmu *last_pmu = NULL;
+ bool first = true;
- cpu_map__snprint(to_test, buf, sizeof(buf));
- pr_warning("WARNING: A requested CPU in '%s' is not supported by PMU '%s' (CPUs %s) for event '%s'\n",
- cpu_list, pmu ? pmu->name : "cpu", buf, evsel__name(pos));
+ evlist__for_each_entry(evlist, counter) {
+ /* If PMUs vary then uniquify can be useful. */
+ if (!first && counter->pmu != last_pmu)
+ return false;
+ first = false;
+ if (counter->pmu) {
+ /* Allow uniquify for uncore PMUs. */
+ if (!counter->pmu->is_core)
+ return false;
+ /* Keep hybrid event names uniquified for clarity. */
+ if (perf_pmus__num_core_pmus() > 1)
+ return false;
}
- perf_cpu_map__put(intersect);
- perf_cpu_map__put(online);
+ last_pmu = counter->pmu;
}
- perf_cpu_map__put(user_requested_cpus);
+ return true;
}
-void evlist__uniquify_name(struct evlist *evlist)
+static bool evlist__set_needs_uniquify(struct evlist *evlist, const struct perf_stat_config *config)
{
- char *new_name, empty_attributes[2] = ":", *attributes;
- struct evsel *pos;
+ struct evsel *counter;
+ bool needs_uniquify = false;
- if (perf_pmus__num_core_pmus() == 1)
- return;
-
- evlist__for_each_entry(evlist, pos) {
- if (!evsel__is_hybrid(pos))
- continue;
+ if (evlist__disable_uniquify(evlist)) {
+ evlist__for_each_entry(evlist, counter)
+ counter->uniquified_name = true;
+ return false;
+ }
- if (strchr(pos->name, '/'))
- continue;
+ evlist__for_each_entry(evlist, counter) {
+ if (evsel__set_needs_uniquify(counter, config))
+ needs_uniquify = true;
+ }
+ return needs_uniquify;
+}
- attributes = strchr(pos->name, ':');
- if (attributes)
- *attributes = '\0';
- else
- attributes = empty_attributes;
+void evlist__uniquify_evsel_names(struct evlist *evlist, const struct perf_stat_config *config)
+{
+ if (evlist__set_needs_uniquify(evlist, config)) {
+ struct evsel *pos;
- if (asprintf(&new_name, "%s/%s/%s", pos->pmu ? pos->pmu->name : "",
- pos->name, attributes + 1)) {
- free(pos->name);
- pos->name = new_name;
- } else {
- *attributes = ':';
- }
+ evlist__for_each_entry(evlist, pos)
+ evsel__uniquify_counter(pos);
}
}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index edcbf1c10e92..5e71e3dc6042 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -12,6 +12,7 @@
#include <perf/evlist.h>
#include "events_stats.h"
#include "evsel.h"
+#include "rblist.h"
#include <pthread.h>
#include <signal.h>
#include <unistd.h>
@@ -19,7 +20,9 @@
struct pollfd;
struct thread_map;
struct perf_cpu_map;
+struct perf_stat_config;
struct record_opts;
+struct strbuf;
struct target;
/*
@@ -68,7 +71,7 @@ struct evlist {
struct mmap *overwrite_mmap;
struct evsel *selected;
struct events_stats stats;
- struct perf_env *env;
+ struct perf_session *session;
void (*trace_event_sample_raw)(struct evlist *evlist,
union perf_event *event,
struct perf_sample *sample);
@@ -84,6 +87,11 @@ struct evlist {
int pos; /* index at evlist core object to check signals */
} ctl_fd;
struct event_enable_timer *eet;
+ /**
+ * @metric_events: A list of struct metric_event which each have a list
+ * of struct metric_expr.
+ */
+ struct rblist metric_events;
};
struct evsel_str_handler {
@@ -103,6 +111,7 @@ void evlist__add(struct evlist *evlist, struct evsel *entry);
void evlist__remove(struct evlist *evlist, struct evsel *evsel);
int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs);
+int arch_evlist__add_required_events(struct list_head *list);
int evlist__add_dummy(struct evlist *evlist);
struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide);
@@ -430,10 +439,10 @@ int event_enable_timer__process(struct event_enable_timer *eet);
struct evsel *evlist__find_evsel(struct evlist *evlist, int idx);
-int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf);
+void evlist__format_evsels(struct evlist *evlist, struct strbuf *sb, size_t max_length);
void evlist__check_mem_load_aux(struct evlist *evlist);
void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list);
-void evlist__uniquify_name(struct evlist *evlist);
+void evlist__uniquify_evsel_names(struct evlist *evlist, const struct perf_stat_config *config);
bool evlist__has_bpf_output(struct evlist *evlist);
bool evlist__needs_bpf_sb_event(struct evlist *evlist);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 3c030da2e477..d264c143b592 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -48,6 +48,7 @@
#include "record.h"
#include "debug.h"
#include "trace-event.h"
+#include "session.h"
#include "stat.h"
#include "string2.h"
#include "memswap.h"
@@ -56,8 +57,10 @@
#include "off_cpu.h"
#include "pmu.h"
#include "pmus.h"
+#include "drm_pmu.h"
#include "hwmon_pmu.h"
#include "tool_pmu.h"
+#include "tp_pmu.h"
#include "rlimit.h"
#include "../perf-sys.h"
#include "util/parse-branch-options.h"
@@ -487,7 +490,7 @@ struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig)
return NULL;
evsel->core.cpus = perf_cpu_map__get(orig->core.cpus);
- evsel->core.own_cpus = perf_cpu_map__get(orig->core.own_cpus);
+ evsel->core.pmu_cpus = perf_cpu_map__get(orig->core.pmu_cpus);
evsel->core.threads = perf_thread_map__get(orig->core.threads);
evsel->core.nr_members = orig->core.nr_members;
evsel->core.system_wide = orig->core.system_wide;
@@ -552,11 +555,11 @@ struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig)
evsel->exclude_GH = orig->exclude_GH;
evsel->sample_read = orig->sample_read;
- evsel->auto_merge_stats = orig->auto_merge_stats;
evsel->collect_stat = orig->collect_stat;
evsel->weak_group = orig->weak_group;
evsel->use_config_name = orig->use_config_name;
evsel->pmu = orig->pmu;
+ evsel->first_wildcard_match = orig->first_wildcard_match;
if (evsel__copy_config_terms(evsel, orig) < 0)
goto out_err;
@@ -570,24 +573,6 @@ out_err:
return NULL;
}
-static int trace_event__id(const char *sys, const char *name)
-{
- char *tp_dir = get_events_file(sys);
- char path[PATH_MAX];
- int id, err;
-
- if (!tp_dir)
- return -1;
-
- scnprintf(path, PATH_MAX, "%s/%s/id", tp_dir, name);
- put_events_file(tp_dir);
- err = filename__read_int(path, &id);
- if (err)
- return err;
-
- return id;
-}
-
/*
* Returns pointer with encoded error via <linux/err.h> interface.
*/
@@ -621,7 +606,7 @@ struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx, bool
event_attr_init(&attr);
if (format) {
- id = trace_event__id(sys, name);
+ id = tp_pmu__id(sys, name);
if (id < 0) {
err = id;
goto out_free;
@@ -1275,9 +1260,10 @@ static void evsel__set_default_freq_period(struct record_opts *opts,
}
}
-static bool evsel__is_offcpu_event(struct evsel *evsel)
+bool evsel__is_offcpu_event(struct evsel *evsel)
{
- return evsel__is_bpf_output(evsel) && evsel__name_is(evsel, OFFCPU_EVENT);
+ return evsel__is_bpf_output(evsel) && evsel__name_is(evsel, OFFCPU_EVENT) &&
+ evsel->core.attr.sample_type & PERF_SAMPLE_RAW;
}
/*
@@ -1425,7 +1411,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
evsel__set_sample_bit(evsel, CPU);
}
- if (opts->sample_address)
+ if (opts->sample_data_src)
evsel__set_sample_bit(evsel, DATA_SRC);
if (opts->sample_phys_addr)
@@ -1440,9 +1426,10 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
attr->branch_sample_type = opts->branch_stack;
}
- if (opts->sample_weight)
+ if (opts->sample_weight || evsel->retire_lat) {
arch_evsel__set_sample_weight(evsel);
-
+ evsel->retire_lat = false;
+ }
attr->task = track;
attr->mmap = track;
attr->mmap2 = track && !perf_missing_features.mmap2;
@@ -1524,7 +1511,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
attr->exclude_user = 1;
}
- if (evsel->core.own_cpus || evsel->unit)
+ if (evsel->core.pmu_cpus || evsel->unit)
evsel->core.attr.read_format |= PERF_FORMAT_ID;
/*
@@ -1554,8 +1541,10 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
if (evsel__is_dummy_event(evsel))
evsel__reset_sample_bit(evsel, BRANCH_STACK);
- if (evsel__is_offcpu_event(evsel))
+ if (evsel__is_offcpu_event(evsel)) {
evsel->core.attr.sample_type &= OFFCPU_SAMPLE_TYPES;
+ attr->inherit = 0;
+ }
arch__post_evsel_config(evsel, attr);
}
@@ -1652,10 +1641,21 @@ static void evsel__free_config_terms(struct evsel *evsel)
free_config_terms(&evsel->config_terms);
}
+static void (*evsel__priv_destructor)(void *priv);
+
+void evsel__set_priv_destructor(void (*destructor)(void *priv))
+{
+ assert(evsel__priv_destructor == NULL);
+
+ evsel__priv_destructor = destructor;
+}
+
void evsel__exit(struct evsel *evsel)
{
assert(list_empty(&evsel->core.node));
assert(evsel->evlist == NULL);
+ if (evsel__is_retire_lat(evsel))
+ evsel__tpebs_close(evsel);
bpf_counter__destroy(evsel);
perf_bpf_filter__destroy(evsel);
evsel__free_counts(evsel);
@@ -1663,9 +1663,7 @@ void evsel__exit(struct evsel *evsel)
perf_evsel__free_id(&evsel->core);
evsel__free_config_terms(evsel);
cgroup__put(evsel->cgrp);
- perf_cpu_map__put(evsel->core.cpus);
- perf_cpu_map__put(evsel->core.own_cpus);
- perf_thread_map__put(evsel->core.threads);
+ perf_evsel__exit(&evsel->core);
zfree(&evsel->group_name);
zfree(&evsel->name);
#ifdef HAVE_LIBTRACEEVENT
@@ -1680,6 +1678,8 @@ void evsel__exit(struct evsel *evsel)
hashmap__free(evsel->per_pkg_mask);
evsel->per_pkg_mask = NULL;
zfree(&evsel->metric_events);
+ if (evsel__priv_destructor)
+ evsel__priv_destructor(evsel->priv);
perf_evsel__object.fini(evsel);
if (evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME ||
evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME)
@@ -1718,11 +1718,6 @@ static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread)
return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
}
-static int evsel__read_retire_lat(struct evsel *evsel, int cpu_map_idx, int thread)
-{
- return tpebs_set_evsel(evsel, cpu_map_idx, thread);
-}
-
static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
u64 val, u64 ena, u64 run, u64 lost)
{
@@ -1730,8 +1725,8 @@ static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
count = perf_counts(counter->counts, cpu_map_idx, thread);
- if (counter->retire_lat) {
- evsel__read_retire_lat(counter, cpu_map_idx, thread);
+ if (evsel__is_retire_lat(counter)) {
+ evsel__tpebs_read(counter, cpu_map_idx, thread);
perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
return;
}
@@ -1888,8 +1883,11 @@ int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)
if (evsel__is_hwmon(evsel))
return evsel__hwmon_pmu_read(evsel, cpu_map_idx, thread);
+ if (evsel__is_drm(evsel))
+ return evsel__drm_pmu_read(evsel, cpu_map_idx, thread);
+
if (evsel__is_retire_lat(evsel))
- return evsel__read_retire_lat(evsel, cpu_map_idx, thread);
+ return evsel__tpebs_read(evsel, cpu_map_idx, thread);
if (evsel->core.attr.read_format & PERF_FORMAT_GROUP)
return evsel__read_group(evsel, cpu_map_idx, thread);
@@ -2576,7 +2574,7 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_cpu cpu;
if (evsel__is_retire_lat(evsel))
- return tpebs_start(evsel->evlist);
+ return evsel__tpebs_open(evsel);
err = __evsel__prepare_open(evsel, cpus, threads);
if (err)
@@ -2609,6 +2607,11 @@ fallback_missing_features:
start_cpu_map_idx,
end_cpu_map_idx);
}
+ if (evsel__is_drm(evsel)) {
+ return evsel__drm_pmu_open(evsel, threads,
+ start_cpu_map_idx,
+ end_cpu_map_idx);
+ }
for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
cpu = perf_cpu_map__cpu(cpus, idx);
@@ -2737,22 +2740,37 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
void evsel__close(struct evsel *evsel)
{
if (evsel__is_retire_lat(evsel))
- tpebs_delete();
+ evsel__tpebs_close(evsel);
perf_evsel__close(&evsel->core);
perf_evsel__free_id(&evsel->core);
}
-int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx)
+int evsel__open_per_cpu_and_thread(struct evsel *evsel,
+ struct perf_cpu_map *cpus, int cpu_map_idx,
+ struct perf_thread_map *threads)
{
if (cpu_map_idx == -1)
- return evsel__open_cpu(evsel, cpus, NULL, 0, perf_cpu_map__nr(cpus));
+ return evsel__open_cpu(evsel, cpus, threads, 0, perf_cpu_map__nr(cpus));
- return evsel__open_cpu(evsel, cpus, NULL, cpu_map_idx, cpu_map_idx + 1);
+ return evsel__open_cpu(evsel, cpus, threads, cpu_map_idx, cpu_map_idx + 1);
+}
+
+int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx)
+{
+ struct perf_thread_map *threads = thread_map__new_by_tid(-1);
+ int ret = evsel__open_per_cpu_and_thread(evsel, cpus, cpu_map_idx, threads);
+
+ perf_thread_map__put(threads);
+ return ret;
}
int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads)
{
- return evsel__open(evsel, NULL, threads);
+ struct perf_cpu_map *cpus = perf_cpu_map__new_any_cpu();
+ int ret = evsel__open_per_cpu_and_thread(evsel, cpus, -1, threads);
+
+ perf_cpu_map__put(cpus);
+ return ret;
}
static int perf_evsel__parse_id_sample(const struct evsel *evsel,
@@ -2845,11 +2863,18 @@ perf_event__check_size(union perf_event *event, unsigned int sample_size)
return 0;
}
-void __weak arch_perf_parse_sample_weight(struct perf_sample *data,
- const __u64 *array,
- u64 type __maybe_unused)
+static void perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type)
{
- data->weight = *array;
+ union perf_sample_weight weight;
+
+ weight.full = *array;
+ if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
+ data->weight = weight.var1_dw;
+ data->ins_lat = weight.var2_w;
+ data->weight3 = weight.var3_w;
+ } else {
+ data->weight = weight.full;
+ }
}
u64 evsel__bitfield_swap_branch_flags(u64 value)
@@ -2923,6 +2948,35 @@ static inline bool evsel__has_branch_counters(const struct evsel *evsel)
return false;
}
+static int __set_offcpu_sample(struct perf_sample *data)
+{
+ u64 *array = data->raw_data;
+ u32 max_size = data->raw_size, *p32;
+ const void *endp = (void *)array + max_size;
+
+ if (array == NULL)
+ return -EFAULT;
+
+ OVERFLOW_CHECK_u64(array);
+ p32 = (void *)array++;
+ data->pid = p32[0];
+ data->tid = p32[1];
+
+ OVERFLOW_CHECK_u64(array);
+ data->period = *array++;
+
+ OVERFLOW_CHECK_u64(array);
+ data->callchain = (struct ip_callchain *)array++;
+ OVERFLOW_CHECK(array, data->callchain->nr * sizeof(u64), max_size);
+ data->ip = data->callchain->ips[1];
+ array += data->callchain->nr;
+
+ OVERFLOW_CHECK_u64(array);
+ data->cgroup = *array;
+
+ return 0;
+}
+
int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
struct perf_sample *data)
{
@@ -3206,7 +3260,7 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
if (type & PERF_SAMPLE_WEIGHT_TYPE) {
OVERFLOW_CHECK_u64(array);
- arch_perf_parse_sample_weight(data, array, type);
+ perf_parse_sample_weight(data, array, type);
array++;
}
@@ -3277,6 +3331,9 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
array = (void *)array + sz;
}
+ if (evsel__is_offcpu_event(evsel))
+ return __set_offcpu_sample(data);
+
return 0;
}
@@ -3752,6 +3809,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
return scnprintf(msg, size, "%s",
"No hardware sampling interrupt available.\n");
#endif
+ if (!target__has_cpu(target))
+ return scnprintf(msg, size,
+ "Unsupported event (%s) in per-thread mode, enable system wide with '-a'.",
+ evsel__name(evsel));
break;
case EBUSY:
if (find_process("oprofiled"))
@@ -3802,11 +3863,16 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
err, str_error_r(err, sbuf, sizeof(sbuf)), evsel__name(evsel));
}
+struct perf_session *evsel__session(struct evsel *evsel)
+{
+ return evsel && evsel->evlist ? evsel->evlist->session : NULL;
+}
+
struct perf_env *evsel__env(struct evsel *evsel)
{
- if (evsel && evsel->evlist && evsel->evlist->env)
- return evsel->evlist->env;
- return &perf_env;
+ struct perf_session *session = evsel__session(evsel);
+
+ return session ? perf_session__env(session) : NULL;
}
static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
@@ -3917,3 +3983,144 @@ void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader)
leader->core.nr_members--;
}
}
+
+bool evsel__set_needs_uniquify(struct evsel *counter, const struct perf_stat_config *config)
+{
+ struct evsel *evsel;
+
+ if (counter->needs_uniquify) {
+ /* Already set. */
+ return true;
+ }
+
+ if (counter->use_config_name || counter->is_libpfm_event) {
+ /* Original name will be used. */
+ return false;
+ }
+
+ if (!config->hybrid_merge && evsel__is_hybrid(counter)) {
+ /* Unique hybrid counters necessary. */
+ counter->needs_uniquify = true;
+ return true;
+ }
+
+ if (counter->core.attr.type < PERF_TYPE_MAX && counter->core.attr.type != PERF_TYPE_RAW) {
+ /* Legacy event, don't uniquify. */
+ return false;
+ }
+
+ if (counter->pmu && counter->pmu->is_core &&
+ counter->alternate_hw_config != PERF_COUNT_HW_MAX) {
+ /* A sysfs or json event replacing a legacy event, don't uniquify. */
+ return false;
+ }
+
+ if (config->aggr_mode == AGGR_NONE) {
+ /* Always unique with no aggregation. */
+ counter->needs_uniquify = true;
+ return true;
+ }
+
+ if (counter->first_wildcard_match != NULL) {
+ /*
+ * If stats are merged then only the first_wildcard_match is
+ * displayed, there is no need to uniquify this evsel as the
+ * name won't be shown.
+ */
+ return false;
+ }
+
+ /*
+ * Do other non-merged events in the evlist have the same name? If so
+ * uniquify is necessary.
+ */
+ evlist__for_each_entry(counter->evlist, evsel) {
+ if (evsel == counter || evsel->first_wildcard_match || evsel->pmu == counter->pmu)
+ continue;
+
+ if (evsel__name_is(counter, evsel__name(evsel))) {
+ counter->needs_uniquify = true;
+ return true;
+ }
+ }
+ return false;
+}
+
+void evsel__uniquify_counter(struct evsel *counter)
+{
+ const char *name, *pmu_name;
+ char *new_name, *config;
+ int ret;
+
+ /* No uniquification necessary. */
+ if (!counter->needs_uniquify)
+ return;
+
+ /* The evsel was already uniquified. */
+ if (counter->uniquified_name)
+ return;
+
+ /* Avoid checking to uniquify twice. */
+ counter->uniquified_name = true;
+
+ name = evsel__name(counter);
+ pmu_name = counter->pmu->name;
+ /* Already prefixed by the PMU name. */
+ if (!strncmp(name, pmu_name, strlen(pmu_name)))
+ return;
+
+ config = strchr(name, '/');
+ if (config) {
+ int len = config - name;
+
+ if (config[1] == '/') {
+ /* case: event// */
+ ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 2);
+ } else {
+ /* case: event/.../ */
+ ret = asprintf(&new_name, "%s/%.*s,%s", pmu_name, len, name, config + 1);
+ }
+ } else {
+ config = strchr(name, ':');
+ if (config) {
+ /* case: event:.. */
+ int len = config - name;
+
+ ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 1);
+ } else {
+ /* case: event */
+ ret = asprintf(&new_name, "%s/%s/", pmu_name, name);
+ }
+ }
+ if (ret > 0) {
+ free(counter->name);
+ counter->name = new_name;
+ } else {
+ /* ENOMEM from asprintf. */
+ counter->uniquified_name = false;
+ }
+}
+
+void evsel__warn_user_requested_cpus(struct evsel *evsel, struct perf_cpu_map *user_requested_cpus)
+{
+ struct perf_cpu_map *intersect, *online = NULL;
+ const struct perf_pmu *pmu = evsel__find_pmu(evsel);
+
+ if (pmu && pmu->is_core) {
+ intersect = perf_cpu_map__intersect(pmu->cpus, user_requested_cpus);
+ } else {
+ online = cpu_map__online();
+ intersect = perf_cpu_map__intersect(online, user_requested_cpus);
+ }
+ if (!perf_cpu_map__equal(intersect, user_requested_cpus)) {
+ char buf1[128];
+ char buf2[128];
+
+ cpu_map__snprint(user_requested_cpus, buf1, sizeof(buf1));
+ cpu_map__snprint(online ?: pmu->cpus, buf2, sizeof(buf2));
+ pr_warning("WARNING: A requested CPU in '%s' is not supported by PMU '%s' (CPUs %s) for event '%s'\n",
+ buf1, pmu ? pmu->name : "cpu", buf2, evsel__name(evsel));
+ }
+ perf_cpu_map__put(intersect);
+ perf_cpu_map__put(online);
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index aae431d63d64..5797a02e5d6a 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -16,6 +16,7 @@
struct bpf_object;
struct cgroup;
struct perf_counts;
+struct perf_stat_config;
struct perf_stat_evsel;
union perf_event;
struct bpf_counter_ops;
@@ -69,6 +70,11 @@ struct evsel {
const char *unit;
struct cgroup *cgrp;
const char *metric_id;
+ /*
+ * This point to the first evsel with the same name, intended to store the
+ * aggregated counts in aggregation mode.
+ */
+ struct evsel *first_wildcard_match;
/* parse modifier helper */
int exclude_GH;
int sample_read;
@@ -77,7 +83,6 @@ struct evsel {
bool percore;
bool precise_max;
bool is_libpfm_event;
- bool auto_merge_stats;
bool collect_stat;
bool weak_group;
bool bpf_counter;
@@ -114,7 +119,6 @@ struct evsel {
bool ignore_missing_thread;
bool forced_leader;
bool cmdline_group_boundary;
- bool merged_stat;
bool reset_group;
bool errored;
bool needs_auxtrace_mmap;
@@ -177,6 +181,12 @@ struct evsel {
/* For tool events */
/* Beginning time subtracted when the counter is read. */
union {
+ /* Defaults for retirement latency events. */
+ struct _retirement_latency {
+ double mean;
+ double min;
+ double max;
+ } retirement_latency;
/* duration_time is a single global time. */
__u64 start_time;
/*
@@ -270,6 +280,8 @@ void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx);
void evsel__exit(struct evsel *evsel);
void evsel__delete(struct evsel *evsel);
+void evsel__set_priv_destructor(void (*destructor)(void *priv));
+
struct callchain_param;
void evsel__config(struct evsel *evsel, struct record_opts *opts,
@@ -339,6 +351,9 @@ int evsel__enable(struct evsel *evsel);
int evsel__disable(struct evsel *evsel);
int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx);
+int evsel__open_per_cpu_and_thread(struct evsel *evsel,
+ struct perf_cpu_map *cpus, int cpu_map_idx,
+ struct perf_thread_map *threads);
int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx);
int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads);
int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
@@ -527,6 +542,7 @@ static inline bool evsel__is_dummy_event(struct evsel *evsel)
(evsel->core.attr.config == PERF_COUNT_SW_DUMMY);
}
+struct perf_session *evsel__session(struct evsel *evsel);
struct perf_env *evsel__env(struct evsel *evsel);
int evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
@@ -542,6 +558,9 @@ void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader);
bool arch_evsel__must_be_in_group(const struct evsel *evsel);
+bool evsel__set_needs_uniquify(struct evsel *counter, const struct perf_stat_config *config);
+void evsel__uniquify_counter(struct evsel *counter);
+
/*
* Macro to swap the bit-field postition and size.
* Used when,
@@ -557,4 +576,8 @@ u64 evsel__bitfield_swap_branch_flags(u64 value);
void evsel__set_config_if_unset(struct perf_pmu *pmu, struct evsel *evsel,
const char *config_name, u64 val);
+bool evsel__is_offcpu_event(struct evsel *evsel);
+
+void evsel__warn_user_requested_cpus(struct evsel *evsel, struct perf_cpu_map *user_requested_cpus);
+
#endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h
index af52a1516d0b..94a1e9cf73d6 100644
--- a/tools/perf/util/evsel_config.h
+++ b/tools/perf/util/evsel_config.h
@@ -48,6 +48,7 @@ struct evsel_config_term {
u32 aux_sample_size;
u64 cfg_chg;
char *str;
+ int cpu;
} val;
bool weak;
};
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index 6413537442aa..7fda0ff89c16 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -166,8 +166,12 @@ int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id,
data_ptr->kind = EXPR_ID_DATA__VALUE;
ret = hashmap__set(ctx->ids, id, data_ptr, &old_key, &old_data);
- if (ret)
+ if (ret) {
free(data_ptr);
+ } else if (old_data) {
+ data_ptr->val.val += old_data->val.val;
+ data_ptr->val.source_count += old_data->val.source_count;
+ }
free(old_key);
free(old_data);
return ret;
@@ -397,7 +401,7 @@ double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx
if (ev != TOOL_PMU__EVENT_NONE) {
u64 count;
- if (tool_pmu__read_event(ev, &count))
+ if (tool_pmu__read_event(ev, /*evsel=*/NULL, &count))
result = count;
else
pr_err("Failure to read '%s'", literal);
diff --git a/tools/perf/util/fncache.c b/tools/perf/util/fncache.c
index 6225cbc52310..bf9559c55c63 100644
--- a/tools/perf/util/fncache.c
+++ b/tools/perf/util/fncache.c
@@ -1,53 +1,58 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Manage a cache of file names' existence */
+#include <pthread.h>
#include <stdlib.h>
-#include <unistd.h>
#include <string.h>
-#include <linux/list.h>
+#include <unistd.h>
+#include <linux/compiler.h>
#include "fncache.h"
+#include "hashmap.h"
-struct fncache {
- struct hlist_node nd;
- bool res;
- char name[];
-};
+static struct hashmap *fncache;
-#define FNHSIZE 61
+static size_t fncache__hash(long key, void *ctx __maybe_unused)
+{
+ return str_hash((const char *)key);
+}
-static struct hlist_head fncache_hash[FNHSIZE];
+static bool fncache__equal(long key1, long key2, void *ctx __maybe_unused)
+{
+ return strcmp((const char *)key1, (const char *)key2) == 0;
+}
-unsigned shash(const unsigned char *s)
+static void fncache__init(void)
{
- unsigned h = 0;
- while (*s)
- h = 65599 * h + *s++;
- return h ^ (h >> 16);
+ fncache = hashmap__new(fncache__hash, fncache__equal, /*ctx=*/NULL);
+}
+
+static struct hashmap *fncache__get(void)
+{
+ static pthread_once_t fncache_once = PTHREAD_ONCE_INIT;
+
+ pthread_once(&fncache_once, fncache__init);
+
+ return fncache;
}
static bool lookup_fncache(const char *name, bool *res)
{
- int h = shash((const unsigned char *)name) % FNHSIZE;
- struct fncache *n;
-
- hlist_for_each_entry(n, &fncache_hash[h], nd) {
- if (!strcmp(n->name, name)) {
- *res = n->res;
- return true;
- }
- }
- return false;
+ long val;
+
+ if (!hashmap__find(fncache__get(), name, &val))
+ return false;
+
+ *res = (val != 0);
+ return true;
}
static void update_fncache(const char *name, bool res)
{
- struct fncache *n = malloc(sizeof(struct fncache) + strlen(name) + 1);
- int h = shash((const unsigned char *)name) % FNHSIZE;
-
- if (!n)
- return;
- strcpy(n->name, name);
- n->res = res;
- hlist_add_head(&n->nd, &fncache_hash[h]);
+ char *old_key = NULL, *key = strdup(name);
+
+ if (key) {
+ hashmap__set(fncache__get(), key, res, &old_key, /*old_value*/NULL);
+ free(old_key);
+ }
}
/* No LRU, only use when bounded in some other way. */
diff --git a/tools/perf/util/fncache.h b/tools/perf/util/fncache.h
index fe020beaefb1..b6a0f209493e 100644
--- a/tools/perf/util/fncache.h
+++ b/tools/perf/util/fncache.h
@@ -1,7 +1,6 @@
#ifndef _FCACHE_H
#define _FCACHE_H 1
-unsigned shash(const unsigned char *s);
bool file_available(const char *name);
#endif
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index a9bc47da83a5..950f2efafad2 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -17,6 +17,7 @@ struct perf_ftrace {
struct list_head notrace;
struct list_head graph_funcs;
struct list_head nograph_funcs;
+ struct list_head event_pair;
struct hashmap *profile_hash;
unsigned long percpu_buffer_size;
bool inherit;
@@ -29,6 +30,10 @@ struct perf_ftrace {
int graph_depth;
int func_stack_trace;
int func_irq_info;
+ int graph_args;
+ int graph_retval;
+ int graph_retval_hex;
+ int graph_retaddr;
int graph_nosleep_time;
int graph_noirqs;
int graph_verbose;
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
index cdce7f173d00..591548b10e34 100644
--- a/tools/perf/util/genelf.c
+++ b/tools/perf/util/genelf.c
@@ -12,15 +12,14 @@
#include <libelf.h>
#include <string.h>
#include <stdlib.h>
-#include <unistd.h>
#include <inttypes.h>
-#include <fcntl.h>
#include <err.h>
#ifdef HAVE_LIBDW_SUPPORT
#include <dwarf.h>
#endif
#include "genelf.h"
+#include "sha1.h"
#include "../util/jitdump.h"
#include <linux/compiler.h>
@@ -28,25 +27,6 @@
#define NT_GNU_BUILD_ID 3
#endif
-#define BUILD_ID_URANDOM /* different uuid for each run */
-
-#ifdef HAVE_LIBCRYPTO_SUPPORT
-
-#define BUILD_ID_MD5
-#undef BUILD_ID_SHA /* does not seem to work well when linked with Java */
-#undef BUILD_ID_URANDOM /* different uuid for each run */
-
-#ifdef BUILD_ID_SHA
-#include <openssl/sha.h>
-#endif
-
-#ifdef BUILD_ID_MD5
-#include <openssl/evp.h>
-#include <openssl/md5.h>
-#endif
-#endif
-
-
typedef struct {
unsigned int namesz; /* Size of entry's owner string */
unsigned int descsz; /* Size of the note descriptor */
@@ -71,7 +51,7 @@ static char shd_string_table[] = {
static struct buildid_note {
Elf_Note desc; /* descsz: size of build-id, must be multiple of 4 */
char name[4]; /* GNU\0 */
- char build_id[20];
+ u8 build_id[SHA1_DIGEST_SIZE];
} bnote;
static Elf_Sym symtab[]={
@@ -92,65 +72,6 @@ static Elf_Sym symtab[]={
}
};
-#ifdef BUILD_ID_URANDOM
-static void
-gen_build_id(struct buildid_note *note,
- unsigned long load_addr __maybe_unused,
- const void *code __maybe_unused,
- size_t csize __maybe_unused)
-{
- int fd;
- size_t sz = sizeof(note->build_id);
- ssize_t sret;
-
- fd = open("/dev/urandom", O_RDONLY);
- if (fd == -1)
- err(1, "cannot access /dev/urandom for buildid");
-
- sret = read(fd, note->build_id, sz);
-
- close(fd);
-
- if (sret != (ssize_t)sz)
- memset(note->build_id, 0, sz);
-}
-#endif
-
-#ifdef BUILD_ID_SHA
-static void
-gen_build_id(struct buildid_note *note,
- unsigned long load_addr __maybe_unused,
- const void *code,
- size_t csize)
-{
- if (sizeof(note->build_id) < SHA_DIGEST_LENGTH)
- errx(1, "build_id too small for SHA1");
-
- SHA1(code, csize, (unsigned char *)note->build_id);
-}
-#endif
-
-#ifdef BUILD_ID_MD5
-static void
-gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *code, size_t csize)
-{
- EVP_MD_CTX *mdctx;
-
- if (sizeof(note->build_id) < 16)
- errx(1, "build_id too small for MD5");
-
- mdctx = EVP_MD_CTX_new();
- if (!mdctx)
- errx(2, "failed to create EVP_MD_CTX");
-
- EVP_DigestInit_ex(mdctx, EVP_md5(), NULL);
- EVP_DigestUpdate(mdctx, &load_addr, sizeof(load_addr));
- EVP_DigestUpdate(mdctx, code, csize);
- EVP_DigestFinal_ex(mdctx, (unsigned char *)note->build_id, NULL);
- EVP_MD_CTX_free(mdctx);
-}
-#endif
-
static int
jit_add_eh_frame_info(Elf *e, void* unwinding, uint64_t unwinding_header_size,
uint64_t unwinding_size, uint64_t base_offset)
@@ -239,7 +160,7 @@ jit_add_eh_frame_info(Elf *e, void* unwinding, uint64_t unwinding_header_size,
* csize: the code size in bytes
*/
int
-jit_write_elf(int fd, uint64_t load_addr, const char *sym,
+jit_write_elf(int fd, uint64_t load_addr __maybe_unused, const char *sym,
const void *code, int csize,
void *debug __maybe_unused, int nr_debug_entries __maybe_unused,
void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size)
@@ -473,7 +394,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym,
/*
* build-id generation
*/
- gen_build_id(&bnote, load_addr, code, csize);
+ sha1(code, csize, bnote.build_id);
bnote.desc.namesz = sizeof(bnote.name); /* must include 0 termination */
bnote.desc.descsz = sizeof(bnote.build_id);
bnote.desc.type = NT_GNU_BUILD_ID;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index e3cdc3b7b4ab..4f2a6e10ed5c 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -557,6 +557,7 @@ static int write_event_desc(struct feat_fd *ff,
static int write_cmdline(struct feat_fd *ff,
struct evlist *evlist __maybe_unused)
{
+ struct perf_env *env = &ff->ph->env;
char pbuf[MAXPATHLEN], *buf;
int i, ret, n;
@@ -564,7 +565,7 @@ static int write_cmdline(struct feat_fd *ff,
buf = perf_exe(pbuf, MAXPATHLEN);
/* account for binary path */
- n = perf_env.nr_cmdline + 1;
+ n = env->nr_cmdline + 1;
ret = do_write(ff, &n, sizeof(n));
if (ret < 0)
@@ -574,8 +575,8 @@ static int write_cmdline(struct feat_fd *ff,
if (ret < 0)
return ret;
- for (i = 0 ; i < perf_env.nr_cmdline; i++) {
- ret = do_write_string(ff, perf_env.cmdline_argv[i]);
+ for (i = 0 ; i < env->nr_cmdline; i++) {
+ ret = do_write_string(ff, env->cmdline_argv[i]);
if (ret < 0)
return ret;
}
@@ -586,6 +587,7 @@ static int write_cmdline(struct feat_fd *ff,
static int write_cpu_topology(struct feat_fd *ff,
struct evlist *evlist __maybe_unused)
{
+ struct perf_env *env = &ff->ph->env;
struct cpu_topology *tp;
u32 i;
int ret, j;
@@ -613,17 +615,17 @@ static int write_cpu_topology(struct feat_fd *ff,
break;
}
- ret = perf_env__read_cpu_topology_map(&perf_env);
+ ret = perf_env__read_cpu_topology_map(env);
if (ret < 0)
goto done;
- for (j = 0; j < perf_env.nr_cpus_avail; j++) {
- ret = do_write(ff, &perf_env.cpu[j].core_id,
- sizeof(perf_env.cpu[j].core_id));
+ for (j = 0; j < env->nr_cpus_avail; j++) {
+ ret = do_write(ff, &env->cpu[j].core_id,
+ sizeof(env->cpu[j].core_id));
if (ret < 0)
return ret;
- ret = do_write(ff, &perf_env.cpu[j].socket_id,
- sizeof(perf_env.cpu[j].socket_id));
+ ret = do_write(ff, &env->cpu[j].socket_id,
+ sizeof(env->cpu[j].socket_id));
if (ret < 0)
return ret;
}
@@ -641,9 +643,9 @@ static int write_cpu_topology(struct feat_fd *ff,
goto done;
}
- for (j = 0; j < perf_env.nr_cpus_avail; j++) {
- ret = do_write(ff, &perf_env.cpu[j].die_id,
- sizeof(perf_env.cpu[j].die_id));
+ for (j = 0; j < env->nr_cpus_avail; j++) {
+ ret = do_write(ff, &env->cpu[j].die_id,
+ sizeof(env->cpu[j].die_id));
if (ret < 0)
return ret;
}
@@ -1016,10 +1018,13 @@ static int write_bpf_prog_info(struct feat_fd *ff,
struct perf_env *env = &ff->ph->env;
struct rb_root *root;
struct rb_node *next;
- int ret;
+ int ret = 0;
down_read(&env->bpf_progs.lock);
+ if (env->bpf_progs.infos_cnt == 0)
+ goto out;
+
ret = do_write(ff, &env->bpf_progs.infos_cnt,
sizeof(env->bpf_progs.infos_cnt));
if (ret < 0)
@@ -1058,10 +1063,13 @@ static int write_bpf_btf(struct feat_fd *ff,
struct perf_env *env = &ff->ph->env;
struct rb_root *root;
struct rb_node *next;
- int ret;
+ int ret = 0;
down_read(&env->bpf_progs.lock);
+ if (env->bpf_progs.btfs_cnt == 0)
+ goto out;
+
ret = do_write(ff, &env->bpf_progs.btfs_cnt,
sizeof(env->bpf_progs.btfs_cnt));
@@ -1814,6 +1822,9 @@ static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp)
root = &env->bpf_progs.infos;
next = rb_first(root);
+ if (!next)
+ printf("# bpf_prog_info empty\n");
+
while (next) {
struct bpf_prog_info_node *node;
@@ -1838,6 +1849,9 @@ static void print_bpf_btf(struct feat_fd *ff, FILE *fp)
root = &env->bpf_progs.btfs;
next = rb_first(root);
+ if (!next)
+ printf("# btf info empty\n");
+
while (next) {
struct btf_node *node;
@@ -2111,17 +2125,18 @@ static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp)
static void print_pmu_caps(struct feat_fd *ff, FILE *fp)
{
+ struct perf_env *env = &ff->ph->env;
struct pmu_caps *pmu_caps;
- for (int i = 0; i < ff->ph->env.nr_pmus_with_caps; i++) {
- pmu_caps = &ff->ph->env.pmu_caps[i];
+ for (int i = 0; i < env->nr_pmus_with_caps; i++) {
+ pmu_caps = &env->pmu_caps[i];
__print_pmu_caps(fp, pmu_caps->nr_caps, pmu_caps->caps,
pmu_caps->pmu_name);
}
- if (strcmp(perf_env__arch(&ff->ph->env), "x86") == 0 &&
- perf_env__has_pmu_mapping(&ff->ph->env, "ibs_op")) {
- char *max_precise = perf_env__find_pmu_cap(&ff->ph->env, "cpu", "max_precise");
+ if (strcmp(perf_env__arch(env), "x86") == 0 &&
+ perf_env__has_pmu_mapping(env, "ibs_op")) {
+ char *max_precise = perf_env__find_pmu_cap(env, "cpu", "max_precise");
if (max_precise != NULL && atoi(max_precise) == 0)
fprintf(fp, "# AMD systems uses ibs_op// PMU for some precise events, e.g.: cycles:p, see the 'perf list' man page for further details.\n");
@@ -2130,18 +2145,19 @@ static void print_pmu_caps(struct feat_fd *ff, FILE *fp)
static void print_pmu_mappings(struct feat_fd *ff, FILE *fp)
{
+ struct perf_env *env = &ff->ph->env;
const char *delimiter = "# pmu mappings: ";
char *str, *tmp;
u32 pmu_num;
u32 type;
- pmu_num = ff->ph->env.nr_pmu_mappings;
+ pmu_num = env->nr_pmu_mappings;
if (!pmu_num) {
fprintf(fp, "# pmu mappings: not available\n");
return;
}
- str = ff->ph->env.pmu_mappings;
+ str = env->pmu_mappings;
while (pmu_num) {
type = strtoul(str, &tmp, 0);
@@ -2223,17 +2239,18 @@ static void memory_node__fprintf(struct memory_node *n,
static void print_mem_topology(struct feat_fd *ff, FILE *fp)
{
+ struct perf_env *env = &ff->ph->env;
struct memory_node *nodes;
int i, nr;
- nodes = ff->ph->env.memory_nodes;
- nr = ff->ph->env.nr_memory_nodes;
+ nodes = env->memory_nodes;
+ nr = env->nr_memory_nodes;
fprintf(fp, "# memory nodes (nr %d, block size 0x%llx):\n",
- nr, ff->ph->env.memory_bsize);
+ nr, env->memory_bsize);
for (i = 0; i < nr; i++) {
- memory_node__fprintf(&nodes[i], ff->ph->env.memory_bsize, fp);
+ memory_node__fprintf(&nodes[i], env->memory_bsize, fp);
}
}
@@ -2291,7 +2308,7 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev,
free(m.name);
}
- build_id__sprintf(dso__bid(dso), sbuild_id);
+ build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id));
pr_debug("build id event received for %s: %s [%zu]\n",
dso__long_name(dso), sbuild_id, size);
dso__put(dso);
@@ -2431,6 +2448,7 @@ static int process_build_id(struct feat_fd *ff, void *data __maybe_unused)
static int process_nrcpus(struct feat_fd *ff, void *data __maybe_unused)
{
+ struct perf_env *env = &ff->ph->env;
int ret;
u32 nr_cpus_avail, nr_cpus_online;
@@ -2441,20 +2459,21 @@ static int process_nrcpus(struct feat_fd *ff, void *data __maybe_unused)
ret = do_read_u32(ff, &nr_cpus_online);
if (ret)
return ret;
- ff->ph->env.nr_cpus_avail = (int)nr_cpus_avail;
- ff->ph->env.nr_cpus_online = (int)nr_cpus_online;
+ env->nr_cpus_avail = (int)nr_cpus_avail;
+ env->nr_cpus_online = (int)nr_cpus_online;
return 0;
}
static int process_total_mem(struct feat_fd *ff, void *data __maybe_unused)
{
+ struct perf_env *env = &ff->ph->env;
u64 total_mem;
int ret;
ret = do_read_u64(ff, &total_mem);
if (ret)
return -1;
- ff->ph->env.total_mem = (unsigned long long)total_mem;
+ env->total_mem = (unsigned long long)total_mem;
return 0;
}
@@ -2515,13 +2534,14 @@ process_event_desc(struct feat_fd *ff, void *data __maybe_unused)
static int process_cmdline(struct feat_fd *ff, void *data __maybe_unused)
{
+ struct perf_env *env = &ff->ph->env;
char *str, *cmdline = NULL, **argv = NULL;
u32 nr, i, len = 0;
if (do_read_u32(ff, &nr))
return -1;
- ff->ph->env.nr_cmdline = nr;
+ env->nr_cmdline = nr;
cmdline = zalloc(ff->size + nr + 1);
if (!cmdline)
@@ -2541,8 +2561,8 @@ static int process_cmdline(struct feat_fd *ff, void *data __maybe_unused)
len += strlen(str) + 1;
free(str);
}
- ff->ph->env.cmdline = cmdline;
- ff->ph->env.cmdline_argv = (const char **) argv;
+ env->cmdline = cmdline;
+ env->cmdline_argv = (const char **) argv;
return 0;
error:
@@ -2556,19 +2576,18 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
u32 nr, i;
char *str = NULL;
struct strbuf sb;
- int cpu_nr = ff->ph->env.nr_cpus_avail;
+ struct perf_env *env = &ff->ph->env;
+ int cpu_nr = env->nr_cpus_avail;
u64 size = 0;
- struct perf_header *ph = ff->ph;
- bool do_core_id_test = true;
- ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu));
- if (!ph->env.cpu)
+ env->cpu = calloc(cpu_nr, sizeof(*env->cpu));
+ if (!env->cpu)
return -1;
if (do_read_u32(ff, &nr))
goto free_cpu;
- ph->env.nr_sibling_cores = nr;
+ env->nr_sibling_cores = nr;
size += sizeof(u32);
if (strbuf_init(&sb, 128) < 0)
goto free_cpu;
@@ -2584,12 +2603,12 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
size += string_size(str);
zfree(&str);
}
- ph->env.sibling_cores = strbuf_detach(&sb, NULL);
+ env->sibling_cores = strbuf_detach(&sb, NULL);
if (do_read_u32(ff, &nr))
return -1;
- ph->env.nr_sibling_threads = nr;
+ env->nr_sibling_threads = nr;
size += sizeof(u32);
for (i = 0; i < nr; i++) {
@@ -2603,43 +2622,28 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
size += string_size(str);
zfree(&str);
}
- ph->env.sibling_threads = strbuf_detach(&sb, NULL);
+ env->sibling_threads = strbuf_detach(&sb, NULL);
/*
* The header may be from old perf,
* which doesn't include core id and socket id information.
*/
if (ff->size <= size) {
- zfree(&ph->env.cpu);
+ zfree(&env->cpu);
return 0;
}
- /* On s390 the socket_id number is not related to the numbers of cpus.
- * The socket_id number might be higher than the numbers of cpus.
- * This depends on the configuration.
- * AArch64 is the same.
- */
- if (ph->env.arch && (!strncmp(ph->env.arch, "s390", 4)
- || !strncmp(ph->env.arch, "aarch64", 7)))
- do_core_id_test = false;
-
for (i = 0; i < (u32)cpu_nr; i++) {
if (do_read_u32(ff, &nr))
goto free_cpu;
- ph->env.cpu[i].core_id = nr;
+ env->cpu[i].core_id = nr;
size += sizeof(u32);
if (do_read_u32(ff, &nr))
goto free_cpu;
- if (do_core_id_test && nr != (u32)-1 && nr > (u32)cpu_nr) {
- pr_debug("socket_id number is too big."
- "You may need to upgrade the perf tool.\n");
- goto free_cpu;
- }
-
- ph->env.cpu[i].socket_id = nr;
+ env->cpu[i].socket_id = nr;
size += sizeof(u32);
}
@@ -2653,7 +2657,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
if (do_read_u32(ff, &nr))
return -1;
- ph->env.nr_sibling_dies = nr;
+ env->nr_sibling_dies = nr;
size += sizeof(u32);
for (i = 0; i < nr; i++) {
@@ -2667,13 +2671,13 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
size += string_size(str);
zfree(&str);
}
- ph->env.sibling_dies = strbuf_detach(&sb, NULL);
+ env->sibling_dies = strbuf_detach(&sb, NULL);
for (i = 0; i < (u32)cpu_nr; i++) {
if (do_read_u32(ff, &nr))
goto free_cpu;
- ph->env.cpu[i].die_id = nr;
+ env->cpu[i].die_id = nr;
}
return 0;
@@ -2682,12 +2686,13 @@ error:
strbuf_release(&sb);
zfree(&str);
free_cpu:
- zfree(&ph->env.cpu);
+ zfree(&env->cpu);
return -1;
}
static int process_numa_topology(struct feat_fd *ff, void *data __maybe_unused)
{
+ struct perf_env *env = &ff->ph->env;
struct numa_node *nodes, *n;
u32 nr, i;
char *str;
@@ -2722,8 +2727,8 @@ static int process_numa_topology(struct feat_fd *ff, void *data __maybe_unused)
if (!n->map)
goto error;
}
- ff->ph->env.nr_numa_nodes = nr;
- ff->ph->env.numa_nodes = nodes;
+ env->nr_numa_nodes = nr;
+ env->numa_nodes = nodes;
return 0;
error:
@@ -2733,6 +2738,7 @@ error:
static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused)
{
+ struct perf_env *env = &ff->ph->env;
char *name;
u32 pmu_num;
u32 type;
@@ -2746,7 +2752,7 @@ static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused)
return 0;
}
- ff->ph->env.nr_pmu_mappings = pmu_num;
+ env->nr_pmu_mappings = pmu_num;
if (strbuf_init(&sb, 128) < 0)
return -1;
@@ -2765,14 +2771,14 @@ static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused)
goto error;
if (!strcmp(name, "msr"))
- ff->ph->env.msr_pmu_type = type;
+ env->msr_pmu_type = type;
free(name);
pmu_num--;
}
/* AMD may set it by evlist__has_amd_ibs() from perf_session__new() */
- free(ff->ph->env.pmu_mappings);
- ff->ph->env.pmu_mappings = strbuf_detach(&sb, NULL);
+ free(env->pmu_mappings);
+ env->pmu_mappings = strbuf_detach(&sb, NULL);
return 0;
error:
@@ -2782,6 +2788,7 @@ error:
static int process_group_desc(struct feat_fd *ff, void *data __maybe_unused)
{
+ struct perf_env *env = &ff->ph->env;
size_t ret = -1;
u32 i, nr, nr_groups;
struct perf_session *session;
@@ -2795,7 +2802,7 @@ static int process_group_desc(struct feat_fd *ff, void *data __maybe_unused)
if (do_read_u32(ff, &nr_groups))
return -1;
- ff->ph->env.nr_groups = nr_groups;
+ env->nr_groups = nr_groups;
if (!nr_groups) {
pr_debug("group desc not available\n");
return 0;
@@ -2879,6 +2886,7 @@ static int process_auxtrace(struct feat_fd *ff, void *data __maybe_unused)
static int process_cache(struct feat_fd *ff, void *data __maybe_unused)
{
+ struct perf_env *env = &ff->ph->env;
struct cpu_cache_level *caches;
u32 cnt, i, version;
@@ -2919,8 +2927,8 @@ static int process_cache(struct feat_fd *ff, void *data __maybe_unused)
#undef _R
}
- ff->ph->env.caches = caches;
- ff->ph->env.caches_cnt = cnt;
+ env->caches = caches;
+ env->caches_cnt = cnt;
return 0;
out_free_caches:
for (i = 0; i < cnt; i++) {
@@ -2956,6 +2964,7 @@ static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
static int process_mem_topology(struct feat_fd *ff,
void *data __maybe_unused)
{
+ struct perf_env *env = &ff->ph->env;
struct memory_node *nodes;
u64 version, i, nr, bsize;
int ret = -1;
@@ -2994,9 +3003,9 @@ static int process_mem_topology(struct feat_fd *ff,
nodes[i] = n;
}
- ff->ph->env.memory_bsize = bsize;
- ff->ph->env.memory_nodes = nodes;
- ff->ph->env.nr_memory_nodes = nr;
+ env->memory_bsize = bsize;
+ env->memory_nodes = nodes;
+ env->nr_memory_nodes = nr;
ret = 0;
out:
@@ -3008,7 +3017,9 @@ out:
static int process_clockid(struct feat_fd *ff,
void *data __maybe_unused)
{
- if (do_read_u64(ff, &ff->ph->env.clock.clockid_res_ns))
+ struct perf_env *env = &ff->ph->env;
+
+ if (do_read_u64(ff, &env->clock.clockid_res_ns))
return -1;
return 0;
@@ -3017,6 +3028,7 @@ static int process_clockid(struct feat_fd *ff,
static int process_clock_data(struct feat_fd *ff,
void *_data __maybe_unused)
{
+ struct perf_env *env = &ff->ph->env;
u32 data32;
u64 data64;
@@ -3031,26 +3043,27 @@ static int process_clock_data(struct feat_fd *ff,
if (do_read_u32(ff, &data32))
return -1;
- ff->ph->env.clock.clockid = data32;
+ env->clock.clockid = data32;
/* TOD ref time */
if (do_read_u64(ff, &data64))
return -1;
- ff->ph->env.clock.tod_ns = data64;
+ env->clock.tod_ns = data64;
/* clockid ref time */
if (do_read_u64(ff, &data64))
return -1;
- ff->ph->env.clock.clockid_ns = data64;
- ff->ph->env.clock.enabled = true;
+ env->clock.clockid_ns = data64;
+ env->clock.enabled = true;
return 0;
}
static int process_hybrid_topology(struct feat_fd *ff,
void *data __maybe_unused)
{
+ struct perf_env *env = &ff->ph->env;
struct hybrid_node *nodes, *n;
u32 nr, i;
@@ -3074,8 +3087,8 @@ static int process_hybrid_topology(struct feat_fd *ff,
goto error;
}
- ff->ph->env.nr_hybrid_nodes = nr;
- ff->ph->env.hybrid_nodes = nodes;
+ env->nr_hybrid_nodes = nr;
+ env->hybrid_nodes = nodes;
return 0;
error:
@@ -3161,6 +3174,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused)
/* after reading from file, translate offset to address */
bpil_offs_to_addr(info_linear);
info_node->info_linear = info_linear;
+ info_node->metadata = NULL;
if (!__perf_env__insert_bpf_prog_info(env, info_node)) {
free(info_linear);
free(info_node);
@@ -3227,19 +3241,21 @@ out:
static int process_compressed(struct feat_fd *ff,
void *data __maybe_unused)
{
- if (do_read_u32(ff, &(ff->ph->env.comp_ver)))
+ struct perf_env *env = &ff->ph->env;
+
+ if (do_read_u32(ff, &(env->comp_ver)))
return -1;
- if (do_read_u32(ff, &(ff->ph->env.comp_type)))
+ if (do_read_u32(ff, &(env->comp_type)))
return -1;
- if (do_read_u32(ff, &(ff->ph->env.comp_level)))
+ if (do_read_u32(ff, &(env->comp_level)))
return -1;
- if (do_read_u32(ff, &(ff->ph->env.comp_ratio)))
+ if (do_read_u32(ff, &(env->comp_ratio)))
return -1;
- if (do_read_u32(ff, &(ff->ph->env.comp_mmap_len)))
+ if (do_read_u32(ff, &(env->comp_mmap_len)))
return -1;
return 0;
@@ -3311,19 +3327,21 @@ error:
static int process_cpu_pmu_caps(struct feat_fd *ff,
void *data __maybe_unused)
{
- int ret = __process_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps,
- &ff->ph->env.cpu_pmu_caps,
- &ff->ph->env.max_branches,
- &ff->ph->env.br_cntr_nr,
- &ff->ph->env.br_cntr_width);
+ struct perf_env *env = &ff->ph->env;
+ int ret = __process_pmu_caps(ff, &env->nr_cpu_pmu_caps,
+ &env->cpu_pmu_caps,
+ &env->max_branches,
+ &env->br_cntr_nr,
+ &env->br_cntr_width);
- if (!ret && !ff->ph->env.cpu_pmu_caps)
+ if (!ret && !env->cpu_pmu_caps)
pr_debug("cpu pmu capabilities not available\n");
return ret;
}
static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused)
{
+ struct perf_env *env = &ff->ph->env;
struct pmu_caps *pmu_caps;
u32 nr_pmu, i;
int ret;
@@ -3361,8 +3379,8 @@ static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused)
}
}
- ff->ph->env.nr_pmus_with_caps = nr_pmu;
- ff->ph->env.pmu_caps = pmu_caps;
+ env->nr_pmus_with_caps = nr_pmu;
+ env->pmu_caps = pmu_caps;
return 0;
err:
@@ -3660,6 +3678,7 @@ static int perf_session__do_write_header(struct perf_session *session,
struct perf_header *header = &session->header;
struct evsel *evsel;
struct feat_fd ff = {
+ .ph = header,
.fd = fd,
};
u64 attr_offset = sizeof(f_header), attr_size = 0;
@@ -4228,7 +4247,7 @@ int perf_session__read_header(struct perf_session *session)
if (session->evlist == NULL)
return -ENOMEM;
- session->evlist->env = &header->env;
+ session->evlist->session = session;
session->machines.host.env = &header->env;
/*
@@ -4341,12 +4360,12 @@ out_delete_evlist:
int perf_event__process_feature(struct perf_session *session,
union perf_event *event)
{
- const struct perf_tool *tool = session->tool;
struct feat_fd ff = { .fd = 0 };
struct perf_record_header_feature *fe = (struct perf_record_header_feature *)event;
int type = fe->header.type;
u64 feat = fe->feat_id;
int ret = 0;
+ bool print = dump_trace;
if (type < 0 || type >= PERF_RECORD_HEADER_MAX) {
pr_warning("invalid record type %d in pipe-mode\n", type);
@@ -4357,28 +4376,35 @@ int perf_event__process_feature(struct perf_session *session,
return -1;
}
- if (!feat_ops[feat].process)
- return 0;
-
ff.buf = (void *)fe->data;
ff.size = event->header.size - sizeof(*fe);
ff.ph = &session->header;
- if (feat_ops[feat].process(&ff, NULL)) {
+ if (feat_ops[feat].process && feat_ops[feat].process(&ff, NULL)) {
ret = -1;
goto out;
}
- if (!feat_ops[feat].print || !tool->show_feat_hdr)
- goto out;
+ if (session->tool->show_feat_hdr) {
+ if (!feat_ops[feat].full_only ||
+ session->tool->show_feat_hdr >= SHOW_FEAT_HEADER_FULL_INFO) {
+ print = true;
+ } else {
+ fprintf(stdout, "# %s info available, use -I to display\n",
+ feat_ops[feat].name);
+ }
+ }
- if (!feat_ops[feat].full_only ||
- tool->show_feat_hdr >= SHOW_FEAT_HEADER_FULL_INFO) {
- feat_ops[feat].print(&ff, stdout);
- } else {
- fprintf(stdout, "# %s info available, use -I to display\n",
- feat_ops[feat].name);
+ if (dump_trace)
+ printf(", ");
+
+ if (print) {
+ if (feat_ops[feat].print)
+ feat_ops[feat].print(&ff, stdout);
+ else
+ printf("# %s", feat_ops[feat].name);
}
+
out:
free_event_desc(ff.events);
return ret;
@@ -4420,6 +4446,11 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
return ret;
}
+size_t perf_event__fprintf_attr(union perf_event *event, FILE *fp)
+{
+ return perf_event_attr__fprintf(fp, &event->attr.attr, __desc_attr__fprintf, NULL);
+}
+
int perf_event__process_attr(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct evlist **pevlist)
@@ -4429,6 +4460,9 @@ int perf_event__process_attr(const struct perf_tool *tool __maybe_unused,
struct evsel *evsel;
struct evlist *evlist = *pevlist;
+ if (dump_trace)
+ perf_event__fprintf_attr(event, stdout);
+
if (evlist == NULL) {
*pevlist = evlist = evlist__new();
if (evlist == NULL)
@@ -4495,8 +4529,8 @@ int perf_event__process_event_update(const struct perf_tool *tool __maybe_unused
case PERF_EVENT_UPDATE__CPUS:
map = cpu_map__new_data(&ev->cpus.cpus);
if (map) {
- perf_cpu_map__put(evsel->core.own_cpus);
- evsel->core.own_cpus = map;
+ perf_cpu_map__put(evsel->core.pmu_cpus);
+ evsel->core.pmu_cpus = map;
} else
pr_err("failed to get event_update cpus\n");
default:
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 5201af6305f4..d16dfceccd74 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -175,6 +175,7 @@ int perf_event__process_attr(const struct perf_tool *tool, union perf_event *eve
int perf_event__process_event_update(const struct perf_tool *tool,
union perf_event *event,
struct evlist **pevlist);
+size_t perf_event__fprintf_attr(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp);
#ifdef HAVE_LIBTRACEEVENT
int perf_event__process_tracing_data(struct perf_session *session,
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index d65228c11412..64ff427040c3 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -336,6 +336,69 @@ static void he_stat__decay(struct he_stat *he_stat)
he_stat->latency = (he_stat->latency * 7) / 8;
}
+static int hists__update_mem_stat(struct hists *hists, struct hist_entry *he,
+ struct mem_info *mi, u64 period)
+{
+ if (hists->nr_mem_stats == 0)
+ return 0;
+
+ if (he->mem_stat == NULL) {
+ he->mem_stat = calloc(hists->nr_mem_stats, sizeof(*he->mem_stat));
+ if (he->mem_stat == NULL)
+ return -1;
+ }
+
+ for (int i = 0; i < hists->nr_mem_stats; i++) {
+ int idx = mem_stat_index(hists->mem_stat_types[i],
+ mem_info__const_data_src(mi)->val);
+
+ assert(0 <= idx && idx < MEM_STAT_LEN);
+ he->mem_stat[i].entries[idx] += period;
+ hists->mem_stat_total[i].entries[idx] += period;
+ }
+ return 0;
+}
+
+static void hists__add_mem_stat(struct hists *hists, struct hist_entry *dst,
+ struct hist_entry *src)
+{
+ if (hists->nr_mem_stats == 0)
+ return;
+
+ for (int i = 0; i < hists->nr_mem_stats; i++) {
+ for (int k = 0; k < MEM_STAT_LEN; k++)
+ dst->mem_stat[i].entries[k] += src->mem_stat[i].entries[k];
+ }
+}
+
+static int hists__clone_mem_stat(struct hists *hists, struct hist_entry *dst,
+ struct hist_entry *src)
+{
+ if (hists->nr_mem_stats == 0)
+ return 0;
+
+ dst->mem_stat = calloc(hists->nr_mem_stats, sizeof(*dst->mem_stat));
+ if (dst->mem_stat == NULL)
+ return -1;
+
+ for (int i = 0; i < hists->nr_mem_stats; i++) {
+ for (int k = 0; k < MEM_STAT_LEN; k++)
+ dst->mem_stat[i].entries[k] = src->mem_stat[i].entries[k];
+ }
+ return 0;
+}
+
+static void hists__decay_mem_stat(struct hists *hists, struct hist_entry *he)
+{
+ if (hists->nr_mem_stats == 0)
+ return;
+
+ for (int i = 0; i < hists->nr_mem_stats; i++) {
+ for (int k = 0; k < MEM_STAT_LEN; k++)
+ he->mem_stat[i].entries[k] = (he->mem_stat[i].entries[k] * 7) / 8;
+ }
+}
+
static void hists__delete_entry(struct hists *hists, struct hist_entry *he);
static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
@@ -350,6 +413,7 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
if (symbol_conf.cumulate_callchain)
he_stat__decay(he->stat_acc);
decay_callchain(he->callchain);
+ hists__decay_mem_stat(hists, he);
if (!he->depth) {
u64 period_diff = prev_period - he->stat.period;
@@ -693,6 +757,10 @@ out:
he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
if (symbol_conf.cumulate_callchain)
he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period);
+ if (hists__update_mem_stat(hists, he, entry->mem_info, period) < 0) {
+ hist_entry__delete(he);
+ return NULL;
+ }
return he;
}
@@ -761,7 +829,7 @@ __hists__add_entry(struct hists *hists,
.period = sample->period,
.weight1 = sample->weight,
.weight2 = sample->ins_lat,
- .weight3 = sample->p_stage_cyc,
+ .weight3 = sample->weight3,
.latency = al->latency,
},
.parent = sym_parent,
@@ -778,7 +846,7 @@ __hists__add_entry(struct hists *hists,
.time = hist_time(sample->time),
.weight = sample->weight,
.ins_lat = sample->ins_lat,
- .p_stage_cyc = sample->p_stage_cyc,
+ .weight3 = sample->weight3,
.simd_flags = sample->simd_flags,
}, *he = hists__findnew_entry(hists, &entry, al, sample_self);
@@ -1423,6 +1491,7 @@ void hist_entry__delete(struct hist_entry *he)
free_callchain(he->callchain);
zfree(&he->trace_output);
zfree(&he->raw_data);
+ zfree(&he->mem_stat);
ops->free(he);
}
@@ -1572,6 +1641,7 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
cmp = hist_entry__collapse_hierarchy(hpp_list, iter, he);
if (!cmp) {
he_stat__add_stat(&iter->stat, &he->stat);
+ hists__add_mem_stat(hists, iter, he);
return iter;
}
@@ -1613,6 +1683,11 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
new->srcfile = NULL;
}
+ if (hists__clone_mem_stat(hists, new, he) < 0) {
+ hist_entry__delete(new);
+ return NULL;
+ }
+
rb_link_node(&new->rb_node_in, parent, p);
rb_insert_color_cached(&new->rb_node_in, root, leftmost);
return new;
@@ -1695,6 +1770,7 @@ static int hists__collapse_insert_entry(struct hists *hists,
he_stat__add_stat(&iter->stat, &he->stat);
if (symbol_conf.cumulate_callchain)
he_stat__add_stat(iter->stat_acc, he->stat_acc);
+ hists__add_mem_stat(hists, iter, he);
if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) {
struct callchain_cursor *cursor = get_tls_callchain_cursor();
@@ -2978,6 +3054,8 @@ static void hists_evsel__exit(struct evsel *evsel)
struct perf_hpp_list_node *node, *tmp;
hists__delete_all_entries(hists);
+ zfree(&hists->mem_stat_types);
+ zfree(&hists->mem_stat_total);
list_for_each_entry_safe(node, tmp, &hists->hpp_formats, list) {
perf_hpp_list__for_each_format_safe(&node->hpp, fmt, pos) {
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 317d06cca8b8..70438d03ca9c 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -9,6 +9,7 @@
#include "events_stats.h"
#include "evsel.h"
#include "map_symbol.h"
+#include "mem-events.h"
#include "mutex.h"
#include "sample.h"
#include "spark.h"
@@ -41,6 +42,7 @@ enum hist_column {
HISTC_TIME,
HISTC_DSO,
HISTC_THREAD,
+ HISTC_TGID,
HISTC_COMM,
HISTC_CGROUP_ID,
HISTC_CGROUP,
@@ -100,6 +102,13 @@ enum hist_column {
struct thread;
struct dso;
+#define MEM_STAT_LEN 8
+
+struct he_mem_stat {
+ /* meaning of entries depends on enum mem_stat_type */
+ u64 entries[MEM_STAT_LEN];
+};
+
struct hists {
struct rb_root_cached entries_in_array[2];
struct rb_root_cached *entries_in;
@@ -125,6 +134,9 @@ struct hists {
struct perf_hpp_list *hpp_list;
struct list_head hpp_formats;
int nr_hpp_node;
+ int nr_mem_stats;
+ enum mem_stat_type *mem_stat_types;
+ struct he_mem_stat *mem_stat_total;
};
#define hists__has(__h, __f) (__h)->hpp_list->__f
@@ -232,6 +244,7 @@ struct hist_entry {
} pairs;
struct he_stat stat;
struct he_stat *stat_acc;
+ struct he_mem_stat *mem_stat;
struct map_symbol ms;
struct thread *thread;
struct comm *comm;
@@ -242,7 +255,8 @@ struct hist_entry {
u64 code_page_size;
u64 weight;
u64 ins_lat;
- u64 p_stage_cyc;
+ /** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */
+ u64 weight3;
s32 socket;
s32 cpu;
int parallelism;
@@ -576,18 +590,25 @@ enum {
PERF_HPP__WEIGHT1,
PERF_HPP__WEIGHT2,
PERF_HPP__WEIGHT3,
+ PERF_HPP__MEM_STAT_OP,
+ PERF_HPP__MEM_STAT_CACHE,
+ PERF_HPP__MEM_STAT_MEMORY,
+ PERF_HPP__MEM_STAT_SNOOP,
+ PERF_HPP__MEM_STAT_DTLB,
PERF_HPP__MAX_INDEX
};
void perf_hpp__init(void);
-void perf_hpp__cancel_cumulate(void);
-void perf_hpp__cancel_latency(void);
+void perf_hpp__cancel_cumulate(struct evlist *evlist);
+void perf_hpp__cancel_latency(struct evlist *evlist);
void perf_hpp__setup_output_field(struct perf_hpp_list *list);
void perf_hpp__reset_output_field(struct perf_hpp_list *list);
void perf_hpp__append_sort_keys(struct perf_hpp_list *list);
int perf_hpp__setup_hists_formats(struct perf_hpp_list *list,
struct evlist *evlist);
+int perf_hpp__alloc_mem_stats(struct perf_hpp_list *list,
+ struct evlist *evlist);
bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format);
@@ -643,6 +664,9 @@ int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
struct hist_entry *he, hpp_field_fn get_field,
const char *fmtstr, hpp_snprint_fn print_fn,
enum perf_hpp_fmt_type fmtype);
+int hpp__fmt_mem_stat(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he, enum mem_stat_type mst,
+ const char *fmtstr, hpp_snprint_fn print_fn);
static inline void advance_hpp(struct perf_hpp *hpp, int inc)
{
diff --git a/tools/perf/util/hwmon_pmu.c b/tools/perf/util/hwmon_pmu.c
index 3cce77fc8004..416dfea9ffff 100644
--- a/tools/perf/util/hwmon_pmu.c
+++ b/tools/perf/util/hwmon_pmu.c
@@ -104,7 +104,7 @@ static const char *const hwmon_units[HWMON_TYPE_MAX] = {
struct hwmon_pmu {
struct perf_pmu pmu;
struct hashmap events;
- int hwmon_dir_fd;
+ char *hwmon_dir;
};
/**
@@ -245,7 +245,7 @@ static int hwmon_pmu__read_events(struct hwmon_pmu *pmu)
return 0;
/* Use openat so that the directory contents are refreshed. */
- io_dir__init(&dir, openat(pmu->hwmon_dir_fd, ".", O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ io_dir__init(&dir, open(pmu->hwmon_dir, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
if (dir.dirfd < 0)
return -ENOENT;
@@ -283,7 +283,7 @@ static int hwmon_pmu__read_events(struct hwmon_pmu *pmu)
__set_bit(item, alarm ? value->alarm_items : value->items);
if (item == HWMON_ITEM_LABEL) {
char buf[128];
- int fd = openat(pmu->hwmon_dir_fd, ent->d_name, O_RDONLY);
+ int fd = openat(dir.dirfd, ent->d_name, O_RDONLY);
ssize_t read_len;
if (fd < 0)
@@ -342,46 +342,52 @@ err_out:
return err;
}
-struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, int hwmon_dir, const char *sysfs_name, const char *name)
+struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, const char *hwmon_dir,
+ const char *sysfs_name, const char *name)
{
- char buf[32];
+ char buf[64];
struct hwmon_pmu *hwm;
+ __u32 type = PERF_PMU_TYPE_HWMON_START + strtoul(sysfs_name + 5, NULL, 10);
+
+ if (type > PERF_PMU_TYPE_HWMON_END) {
+ pr_err("Unable to encode hwmon type from %s in valid PMU type\n", sysfs_name);
+ return NULL;
+ }
+
+ snprintf(buf, sizeof(buf), "hwmon_%s", name);
+ fix_name(buf + 6);
hwm = zalloc(sizeof(*hwm));
if (!hwm)
return NULL;
- hwm->hwmon_dir_fd = hwmon_dir;
- hwm->pmu.type = PERF_PMU_TYPE_HWMON_START + strtoul(sysfs_name + 5, NULL, 10);
- if (hwm->pmu.type > PERF_PMU_TYPE_HWMON_END) {
- pr_err("Unable to encode hwmon type from %s in valid PMU type\n", sysfs_name);
- goto err_out;
+ if (perf_pmu__init(&hwm->pmu, type, buf) != 0) {
+ perf_pmu__delete(&hwm->pmu);
+ return NULL;
+ }
+
+ hwm->hwmon_dir = strdup(hwmon_dir);
+ if (!hwm->hwmon_dir) {
+ perf_pmu__delete(&hwm->pmu);
+ return NULL;
}
- snprintf(buf, sizeof(buf), "hwmon_%s", name);
- fix_name(buf + 6);
- hwm->pmu.name = strdup(buf);
- if (!hwm->pmu.name)
- goto err_out;
hwm->pmu.alias_name = strdup(sysfs_name);
- if (!hwm->pmu.alias_name)
- goto err_out;
+ if (!hwm->pmu.alias_name) {
+ perf_pmu__delete(&hwm->pmu);
+ return NULL;
+ }
hwm->pmu.cpus = perf_cpu_map__new("0");
- if (!hwm->pmu.cpus)
- goto err_out;
+ if (!hwm->pmu.cpus) {
+ perf_pmu__delete(&hwm->pmu);
+ return NULL;
+ }
INIT_LIST_HEAD(&hwm->pmu.format);
- INIT_LIST_HEAD(&hwm->pmu.aliases);
INIT_LIST_HEAD(&hwm->pmu.caps);
hashmap__init(&hwm->events, hwmon_pmu__event_hashmap_hash,
hwmon_pmu__event_hashmap_equal, /*ctx=*/NULL);
list_add_tail(&hwm->pmu.list, pmus);
return &hwm->pmu;
-err_out:
- free((char *)hwm->pmu.name);
- free(hwm->pmu.alias_name);
- free(hwm);
- close(hwmon_dir);
- return NULL;
}
void hwmon_pmu__exit(struct perf_pmu *pmu)
@@ -398,7 +404,7 @@ void hwmon_pmu__exit(struct perf_pmu *pmu)
free(value);
}
hashmap__clear(&hwm->events);
- close(hwm->hwmon_dir_fd);
+ zfree(&hwm->hwmon_dir);
}
static size_t hwmon_pmu__describe_items(struct hwmon_pmu *hwm, char *out_buf, size_t out_buf_len,
@@ -408,6 +414,10 @@ static size_t hwmon_pmu__describe_items(struct hwmon_pmu *hwm, char *out_buf, si
size_t bit;
char buf[64];
size_t len = 0;
+ int dir = open(hwm->hwmon_dir, O_CLOEXEC | O_DIRECTORY | O_RDONLY);
+
+ if (dir < 0)
+ return 0;
for_each_set_bit(bit, items, HWMON_ITEM__MAX) {
int fd;
@@ -420,7 +430,7 @@ static size_t hwmon_pmu__describe_items(struct hwmon_pmu *hwm, char *out_buf, si
key.num,
hwmon_item_strs[bit],
is_alarm ? "_alarm" : "");
- fd = openat(hwm->hwmon_dir_fd, buf, O_RDONLY);
+ fd = openat(dir, buf, O_RDONLY);
if (fd > 0) {
ssize_t read_len = read(fd, buf, sizeof(buf));
@@ -442,6 +452,7 @@ static size_t hwmon_pmu__describe_items(struct hwmon_pmu *hwm, char *out_buf, si
close(fd);
}
}
+ close(dir);
return len;
}
@@ -711,6 +722,7 @@ int perf_pmus__read_hwmon_pmus(struct list_head *pmus)
size_t line_len;
int hwmon_dir, name_fd;
struct io io;
+ char buf2[128];
if (class_hwmon_ent->d_type != DT_LNK)
continue;
@@ -729,12 +741,13 @@ int perf_pmus__read_hwmon_pmus(struct list_head *pmus)
close(hwmon_dir);
continue;
}
- io__init(&io, name_fd, buf, sizeof(buf));
+ io__init(&io, name_fd, buf2, sizeof(buf2));
io__getline(&io, &line, &line_len);
if (line_len > 0 && line[line_len - 1] == '\n')
line[line_len - 1] = '\0';
- hwmon_pmu__new(pmus, hwmon_dir, class_hwmon_ent->d_name, line);
+ hwmon_pmu__new(pmus, buf, class_hwmon_ent->d_name, line);
close(name_fd);
+ close(hwmon_dir);
}
free(line);
close(class_hwmon_dir.dirfd);
@@ -752,6 +765,10 @@ int evsel__hwmon_pmu_open(struct evsel *evsel,
.type_and_num = evsel->core.attr.config,
};
int idx = 0, thread = 0, nthreads, err = 0;
+ int dir = open(hwm->hwmon_dir, O_CLOEXEC | O_DIRECTORY | O_RDONLY);
+
+ if (dir < 0)
+ return -errno;
nthreads = perf_thread_map__nr(threads);
for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
@@ -762,7 +779,7 @@ int evsel__hwmon_pmu_open(struct evsel *evsel,
snprintf(buf, sizeof(buf), "%s%d_input",
hwmon_type_strs[key.type], key.num);
- fd = openat(hwm->hwmon_dir_fd, buf, O_RDONLY);
+ fd = openat(dir, buf, O_RDONLY);
FD(evsel, idx, thread) = fd;
if (fd < 0) {
err = -errno;
@@ -770,6 +787,7 @@ int evsel__hwmon_pmu_open(struct evsel *evsel,
}
}
}
+ close(dir);
return 0;
out_close:
if (err)
@@ -783,6 +801,7 @@ out_close:
}
thread = nthreads;
} while (--idx >= 0);
+ close(dir);
return err;
}
diff --git a/tools/perf/util/hwmon_pmu.h b/tools/perf/util/hwmon_pmu.h
index b3329774d2b2..dc711b289ff5 100644
--- a/tools/perf/util/hwmon_pmu.h
+++ b/tools/perf/util/hwmon_pmu.h
@@ -135,14 +135,14 @@ bool parse_hwmon_filename(const char *filename,
* hwmon_pmu__new() - Allocate and construct a hwmon PMU.
*
* @pmus: The list of PMUs to be added to.
- * @hwmon_dir: An O_DIRECTORY file descriptor for a hwmon directory.
+ * @hwmon_dir: The path to a hwmon directory.
* @sysfs_name: Name of the hwmon sysfs directory like hwmon0.
* @name: The contents of the "name" file in the hwmon directory.
*
* Exposed for testing. Regular construction should happen via
* perf_pmus__read_hwmon_pmus.
*/
-struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, int hwmon_dir,
+struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, const char *hwmon_dir,
const char *sysfs_name, const char *name);
void hwmon_pmu__exit(struct perf_pmu *pmu);
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
index 178b00205fe6..89979ca23c3f 100644
--- a/tools/perf/util/include/linux/linkage.h
+++ b/tools/perf/util/include/linux/linkage.h
@@ -132,4 +132,8 @@
SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
#endif
+#ifndef SYM_PIC_ALIAS
+#define SYM_PIC_ALIAS(sym) SYM_ALIAS(__pi_ ## sym, sym, SYM_T_FUNC, SYM_L_GLOBAL)
+#endif
+
#endif /* PERF_LINUX_LINKAGE_H_ */
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 4e8a9b172fbc..9b1011fe4826 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -127,6 +127,7 @@ struct intel_pt {
bool single_pebs;
bool sample_pebs;
+ int pebs_data_src_fmt;
struct evsel *pebs_evsel;
u64 evt_sample_type;
@@ -175,6 +176,7 @@ enum switch_state {
struct intel_pt_pebs_event {
struct evsel *evsel;
u64 id;
+ int data_src_fmt;
};
struct intel_pt_queue {
@@ -2272,7 +2274,146 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack,
}
}
-static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id)
+#define P(a, b) PERF_MEM_S(a, b)
+#define OP_LH (P(OP, LOAD) | P(LVL, HIT))
+#define LEVEL(x) P(LVLNUM, x)
+#define REM P(REMOTE, REMOTE)
+#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
+
+#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10
+#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1)
+
+/* Based on kernel __intel_pmu_pebs_data_source_grt() and pebs_data_source */
+static const u64 pebs_data_source_grt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = {
+ P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */
+ OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */
+ OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */
+ OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP Hit */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP Fwd */
+ OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */
+ OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* RAM hit|SNP Hit */
+ OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* Remote L3 hit|SNP Hit */
+ OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* RAM hit|SNP None or Miss */
+ OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* Remote RAM hit|SNP None or Miss */
+ OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */
+ OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */
+};
+
+/* Based on kernel __intel_pmu_pebs_data_source_cmt() and pebs_data_source */
+static const u64 pebs_data_source_cmt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = {
+ P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */
+ OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */
+ OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */
+ OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* L3 hit|SNP Hit */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP HitM */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP HitM */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP Fwd */
+ OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */
+ OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* RAM hit|SNP Hit */
+ OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE), /* Remote L3 hit|SNP Hit */
+ OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD), /* RAM hit|SNP None or Miss */
+ OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM), /* Remote RAM hit|SNP None or Miss */
+ OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */
+ OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */
+};
+
+/* Based on kernel pebs_set_tlb_lock() */
+static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
+{
+ /*
+ * TLB access
+ * 0 = did not miss 2nd level TLB
+ * 1 = missed 2nd level TLB
+ */
+ if (tlb)
+ *val |= P(TLB, MISS) | P(TLB, L2);
+ else
+ *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
+
+ /* locked prefix */
+ if (lock)
+ *val |= P(LOCK, LOCKED);
+}
+
+/* Based on kernel __grt_latency_data() */
+static u64 intel_pt_grt_latency_data(u8 dse, bool tlb, bool lock, bool blk,
+ const u64 *pebs_data_source)
+{
+ u64 val;
+
+ dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK;
+ val = pebs_data_source[dse];
+
+ pebs_set_tlb_lock(&val, tlb, lock);
+
+ if (blk)
+ val |= P(BLK, DATA);
+ else
+ val |= P(BLK, NA);
+
+ return val;
+}
+
+/* Default value for data source */
+#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\
+ PERF_MEM_S(LVL, NA) |\
+ PERF_MEM_S(SNOOP, NA) |\
+ PERF_MEM_S(LOCK, NA) |\
+ PERF_MEM_S(TLB, NA) |\
+ PERF_MEM_S(LVLNUM, NA))
+
+enum DATA_SRC_FORMAT {
+ DATA_SRC_FORMAT_ERR = -1,
+ DATA_SRC_FORMAT_NA = 0,
+ DATA_SRC_FORMAT_GRT = 1,
+ DATA_SRC_FORMAT_CMT = 2,
+};
+
+/* Based on kernel grt_latency_data() and cmt_latency_data */
+static u64 intel_pt_get_data_src(u64 mem_aux_info, int data_src_fmt)
+{
+ switch (data_src_fmt) {
+ case DATA_SRC_FORMAT_GRT: {
+ union {
+ u64 val;
+ struct {
+ unsigned int dse:4;
+ unsigned int locked:1;
+ unsigned int stlb_miss:1;
+ unsigned int fwd_blk:1;
+ unsigned int reserved:25;
+ };
+ } x = {.val = mem_aux_info};
+ return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk,
+ pebs_data_source_grt);
+ }
+ case DATA_SRC_FORMAT_CMT: {
+ union {
+ u64 val;
+ struct {
+ unsigned int dse:5;
+ unsigned int locked:1;
+ unsigned int stlb_miss:1;
+ unsigned int fwd_blk:1;
+ unsigned int reserved:24;
+ };
+ } x = {.val = mem_aux_info};
+ return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk,
+ pebs_data_source_cmt);
+ }
+ default:
+ return PERF_MEM_NA;
+ }
+}
+
+static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel,
+ u64 id, int data_src_fmt)
{
const struct intel_pt_blk_items *items = &ptq->state->items;
struct perf_sample sample;
@@ -2393,6 +2534,18 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse
}
}
+ if (sample_type & PERF_SAMPLE_DATA_SRC) {
+ if (items->has_mem_aux_info && data_src_fmt) {
+ if (data_src_fmt < 0) {
+ pr_err("Intel PT missing data_src info\n");
+ return -1;
+ }
+ sample.data_src = intel_pt_get_data_src(items->mem_aux_info, data_src_fmt);
+ } else {
+ sample.data_src = PERF_MEM_NA;
+ }
+ }
+
if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) {
u64 ax = items->has_rax ? items->rax : 0;
/* Refer kernel's intel_hsw_transaction() */
@@ -2413,9 +2566,10 @@ static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
struct evsel *evsel = pt->pebs_evsel;
+ int data_src_fmt = pt->pebs_data_src_fmt;
u64 id = evsel->core.id[0];
- return intel_pt_do_synth_pebs_sample(ptq, evsel, id);
+ return intel_pt_do_synth_pebs_sample(ptq, evsel, id, data_src_fmt);
}
static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
@@ -2440,7 +2594,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
hw_id);
return intel_pt_synth_single_pebs_sample(ptq);
}
- err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id);
+ err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id, pe->data_src_fmt);
if (err)
return err;
}
@@ -3407,6 +3561,49 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt,
event->itrace_start.tid);
}
+/*
+ * Events with data_src are identified by L1_Hit_Indication
+ * refer https://github.com/intel/perfmon
+ */
+static int intel_pt_data_src_fmt(struct intel_pt *pt, struct evsel *evsel)
+{
+ struct perf_env *env = pt->machine->env;
+ int fmt = DATA_SRC_FORMAT_NA;
+
+ if (!env->cpuid)
+ return DATA_SRC_FORMAT_ERR;
+
+ /*
+ * PEBS-via-PT is only supported on E-core non-hybrid. Of those only
+ * Gracemont and Crestmont have data_src. Check for:
+ * Alderlake N (Gracemont)
+ * Sierra Forest (Crestmont)
+ * Grand Ridge (Crestmont)
+ */
+
+ if (!strncmp(env->cpuid, "GenuineIntel,6,190,", 19))
+ fmt = DATA_SRC_FORMAT_GRT;
+
+ if (!strncmp(env->cpuid, "GenuineIntel,6,175,", 19) ||
+ !strncmp(env->cpuid, "GenuineIntel,6,182,", 19))
+ fmt = DATA_SRC_FORMAT_CMT;
+
+ if (fmt == DATA_SRC_FORMAT_NA)
+ return fmt;
+
+ /*
+ * Only data_src events are:
+ * mem-loads event=0xd0,umask=0x5
+ * mem-stores event=0xd0,umask=0x6
+ */
+ if (evsel->core.attr.type == PERF_TYPE_RAW &&
+ ((evsel->core.attr.config & 0xffff) == 0x5d0 ||
+ (evsel->core.attr.config & 0xffff) == 0x6d0))
+ return fmt;
+
+ return DATA_SRC_FORMAT_NA;
+}
+
static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt,
union perf_event *event,
struct perf_sample *sample)
@@ -3427,6 +3624,7 @@ static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt,
ptq->pebs[hw_id].evsel = evsel;
ptq->pebs[hw_id].id = sample->id;
+ ptq->pebs[hw_id].data_src_fmt = intel_pt_data_src_fmt(pt, evsel);
return 0;
}
@@ -3976,6 +4174,7 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt)
}
pt->single_pebs = true;
pt->sample_pebs = true;
+ pt->pebs_data_src_fmt = intel_pt_data_src_fmt(pt, evsel);
pt->pebs_evsel = evsel;
}
}
diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c
index 2c421b475b3b..8c9aee157ec4 100644
--- a/tools/perf/util/intel-tpebs.c
+++ b/tools/perf/util/intel-tpebs.c
@@ -3,7 +3,7 @@
* intel_tpebs.c: Intel TPEBS support
*/
-
+#include <api/fs/fs.h>
#include <sys/param.h>
#include <subcmd/run-command.h>
#include <thread.h>
@@ -12,13 +12,17 @@
#include <linux/zalloc.h>
#include <linux/err.h>
#include "sample.h"
+#include "counts.h"
#include "debug.h"
#include "evlist.h"
#include "evsel.h"
+#include "mutex.h"
#include "session.h"
+#include "stat.h"
#include "tool.h"
#include "cpumap.h"
#include "metricgroup.h"
+#include "stat.h"
#include <sys/stat.h>
#include <sys/file.h>
#include <poll.h>
@@ -27,95 +31,155 @@
#define PERF_DATA "-"
bool tpebs_recording;
-static pid_t tpebs_pid = -1;
-static size_t tpebs_event_size;
+enum tpebs_mode tpebs_mode;
static LIST_HEAD(tpebs_results);
static pthread_t tpebs_reader_thread;
-static struct child_process *tpebs_cmd;
+static struct child_process tpebs_cmd;
+static int control_fd[2], ack_fd[2];
+static struct mutex tpebs_mtx;
struct tpebs_retire_lat {
struct list_head nd;
- /* Event name */
- const char *name;
- /* Event name with the TPEBS modifier R */
- const char *tpebs_name;
- /* Count of retire_latency values found in sample data */
- size_t count;
- /* Sum of all the retire_latency values in sample data */
- int sum;
- /* Average of retire_latency, val = sum / count */
- double val;
+ /** @evsel: The evsel that opened the retire_lat event. */
+ struct evsel *evsel;
+ /** @event: Event passed to perf record. */
+ char *event;
+ /** @stats: Recorded retirement latency stats. */
+ struct stats stats;
+ /** @last: Last retirement latency read. */
+ uint64_t last;
+ /* Has the event been sent to perf record? */
+ bool started;
};
-static int get_perf_record_args(const char **record_argv, char buf[],
- const char *cpumap_buf)
+static void tpebs_mtx_init(void)
+{
+ mutex_init(&tpebs_mtx);
+}
+
+static struct mutex *tpebs_mtx_get(void)
{
- struct tpebs_retire_lat *e;
- int i = 0;
+ static pthread_once_t tpebs_mtx_once = PTHREAD_ONCE_INIT;
+
+ pthread_once(&tpebs_mtx_once, tpebs_mtx_init);
+ return &tpebs_mtx;
+}
- pr_debug("tpebs: Prepare perf record for retire_latency\n");
+static struct tpebs_retire_lat *tpebs_retire_lat__find(struct evsel *evsel)
+ EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get());
+
+static int evsel__tpebs_start_perf_record(struct evsel *evsel)
+{
+ const char **record_argv;
+ int tpebs_event_size = 0, i = 0, ret;
+ char control_fd_buf[32];
+ char cpumap_buf[50];
+ struct tpebs_retire_lat *t;
+
+ list_for_each_entry(t, &tpebs_results, nd)
+ tpebs_event_size++;
+
+ record_argv = malloc((10 + 2 * tpebs_event_size) * sizeof(*record_argv));
+ if (!record_argv)
+ return -ENOMEM;
record_argv[i++] = "perf";
record_argv[i++] = "record";
record_argv[i++] = "-W";
record_argv[i++] = "--synth=no";
- record_argv[i++] = buf;
- if (!cpumap_buf) {
- pr_err("tpebs: Require cpumap list to run sampling\n");
- return -ECANCELED;
- }
- /* Use -C when cpumap_buf is not "-1" */
- if (strcmp(cpumap_buf, "-1")) {
+ scnprintf(control_fd_buf, sizeof(control_fd_buf), "--control=fd:%d,%d",
+ control_fd[0], ack_fd[1]);
+ record_argv[i++] = control_fd_buf;
+
+ record_argv[i++] = "-o";
+ record_argv[i++] = PERF_DATA;
+
+ if (!perf_cpu_map__is_any_cpu_or_is_empty(evsel->evlist->core.user_requested_cpus)) {
+ cpu_map__snprint(evsel->evlist->core.user_requested_cpus, cpumap_buf,
+ sizeof(cpumap_buf));
record_argv[i++] = "-C";
record_argv[i++] = cpumap_buf;
}
- list_for_each_entry(e, &tpebs_results, nd) {
+ list_for_each_entry(t, &tpebs_results, nd) {
record_argv[i++] = "-e";
- record_argv[i++] = e->name;
+ record_argv[i++] = t->event;
}
+ record_argv[i++] = NULL;
+ assert(i == 10 + 2 * tpebs_event_size || i == 8 + 2 * tpebs_event_size);
+ /* Note, no workload given so system wide is implied. */
+
+ assert(tpebs_cmd.pid == 0);
+ tpebs_cmd.argv = record_argv;
+ tpebs_cmd.out = -1;
+ ret = start_command(&tpebs_cmd);
+ zfree(&tpebs_cmd.argv);
+ list_for_each_entry(t, &tpebs_results, nd)
+ t->started = true;
- record_argv[i++] = "-o";
- record_argv[i++] = PERF_DATA;
-
- return 0;
+ return ret;
}
-static int prepare_run_command(const char **argv)
+static bool is_child_pid(pid_t parent, pid_t child)
{
- tpebs_cmd = zalloc(sizeof(struct child_process));
- if (!tpebs_cmd)
- return -ENOMEM;
- tpebs_cmd->argv = argv;
- tpebs_cmd->out = -1;
- return 0;
+ if (parent < 0 || child < 0)
+ return false;
+
+ while (true) {
+ char path[PATH_MAX];
+ char line[256];
+ FILE *fp;
+
+new_child:
+ if (parent == child)
+ return true;
+
+ if (child <= 0)
+ return false;
+
+ scnprintf(path, sizeof(path), "%s/%d/status", procfs__mountpoint(), child);
+ fp = fopen(path, "r");
+ if (!fp) {
+ /* Presumably the process went away. Assume not a child. */
+ return false;
+ }
+ while (fgets(line, sizeof(line), fp) != NULL) {
+ if (strncmp(line, "PPid:", 5) == 0) {
+ fclose(fp);
+ if (sscanf(line + 5, "%d", &child) != 1) {
+ /* Unexpected error parsing. */
+ return false;
+ }
+ goto new_child;
+ }
+ }
+ /* Unexpected EOF. */
+ fclose(fp);
+ return false;
+ }
}
-static int start_perf_record(int control_fd[], int ack_fd[],
- const char *cpumap_buf)
+static bool should_ignore_sample(const struct perf_sample *sample, const struct tpebs_retire_lat *t)
{
- const char **record_argv;
- int ret;
- char buf[32];
+ pid_t workload_pid, sample_pid = sample->pid;
- scnprintf(buf, sizeof(buf), "--control=fd:%d,%d", control_fd[0], ack_fd[1]);
+ /*
+ * During evlist__purge the evlist will be removed prior to the
+ * evsel__exit calling evsel__tpebs_close and taking the
+ * tpebs_mtx. Avoid a segfault by ignoring samples in this case.
+ */
+ if (t->evsel->evlist == NULL)
+ return true;
- record_argv = calloc(12 + 2 * tpebs_event_size, sizeof(char *));
- if (!record_argv)
- return -ENOMEM;
+ workload_pid = t->evsel->evlist->workload.pid;
+ if (workload_pid < 0 || workload_pid == sample_pid)
+ return false;
- ret = get_perf_record_args(record_argv, buf, cpumap_buf);
- if (ret)
- goto out;
+ if (!t->evsel->core.attr.inherit)
+ return true;
- ret = prepare_run_command(record_argv);
- if (ret)
- goto out;
- ret = start_command(tpebs_cmd);
-out:
- free(record_argv);
- return ret;
+ return !is_child_pid(workload_pid, sample_pid);
}
static int process_sample_event(const struct perf_tool *tool __maybe_unused,
@@ -124,27 +188,32 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused,
struct evsel *evsel,
struct machine *machine __maybe_unused)
{
- int ret = 0;
- const char *evname;
struct tpebs_retire_lat *t;
- evname = evsel__name(evsel);
-
+ mutex_lock(tpebs_mtx_get());
+ if (tpebs_cmd.pid == 0) {
+ /* Record has terminated. */
+ mutex_unlock(tpebs_mtx_get());
+ return 0;
+ }
+ t = tpebs_retire_lat__find(evsel);
+ if (!t) {
+ mutex_unlock(tpebs_mtx_get());
+ return -EINVAL;
+ }
+ if (should_ignore_sample(sample, t)) {
+ mutex_unlock(tpebs_mtx_get());
+ return 0;
+ }
/*
* Need to handle per core results? We are assuming average retire
* latency value will be used. Save the number of samples and the sum of
* retire latency value for each event.
*/
- list_for_each_entry(t, &tpebs_results, nd) {
- if (!strcmp(evname, t->name)) {
- t->count += 1;
- t->sum += sample->retire_lat;
- t->val = (double) t->sum / t->count;
- break;
- }
- }
-
- return ret;
+ t->last = sample->weight3;
+ update_stats(&t->stats, sample->weight3);
+ mutex_unlock(tpebs_mtx_get());
+ return 0;
}
static int process_feature_event(struct perf_session *session,
@@ -155,14 +224,13 @@ static int process_feature_event(struct perf_session *session,
return 0;
}
-static void *__sample_reader(void *arg)
+static void *__sample_reader(void *arg __maybe_unused)
{
- struct child_process *child = arg;
struct perf_session *session;
struct perf_data data = {
.mode = PERF_DATA_MODE_READ,
.path = PERF_DATA,
- .file.fd = child->out,
+ .file.fd = tpebs_cmd.out,
};
struct perf_tool tool;
@@ -180,94 +248,277 @@ static void *__sample_reader(void *arg)
return NULL;
}
+static int tpebs_send_record_cmd(const char *msg) EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get())
+{
+ struct pollfd pollfd = { .events = POLLIN, };
+ int ret, len, retries = 0;
+ char ack_buf[8];
+
+ /* Check if the command exited before the send, done with the lock held. */
+ if (tpebs_cmd.pid == 0)
+ return 0;
+
+ /*
+ * Let go of the lock while sending/receiving as blocking can starve the
+ * sample reading thread.
+ */
+ mutex_unlock(tpebs_mtx_get());
+
+ /* Send perf record command.*/
+ len = strlen(msg);
+ ret = write(control_fd[1], msg, len);
+ if (ret != len) {
+ pr_err("perf record control write control message '%s' failed\n", msg);
+ ret = -EPIPE;
+ goto out;
+ }
+
+ if (!strcmp(msg, EVLIST_CTL_CMD_STOP_TAG)) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Wait for an ack. */
+ pollfd.fd = ack_fd[0];
+
+ /*
+ * We need this poll to ensure the ack_fd PIPE will not hang
+ * when perf record failed for any reason. The timeout value
+ * 3000ms is an empirical selection.
+ */
+again:
+ if (!poll(&pollfd, 1, 500)) {
+ if (check_if_command_finished(&tpebs_cmd)) {
+ ret = 0;
+ goto out;
+ }
+
+ if (retries++ < 6)
+ goto again;
+ pr_err("tpebs failed: perf record ack timeout for '%s'\n", msg);
+ ret = -ETIMEDOUT;
+ goto out;
+ }
+
+ if (!(pollfd.revents & POLLIN)) {
+ if (check_if_command_finished(&tpebs_cmd)) {
+ ret = 0;
+ goto out;
+ }
+
+ pr_err("tpebs failed: did not received an ack for '%s'\n", msg);
+ ret = -EPIPE;
+ goto out;
+ }
+
+ ret = read(ack_fd[0], ack_buf, sizeof(ack_buf));
+ if (ret > 0)
+ ret = strcmp(ack_buf, EVLIST_CTL_CMD_ACK_TAG);
+ else
+ pr_err("tpebs: perf record control ack failed\n");
+out:
+ /* Re-take lock as expected by caller. */
+ mutex_lock(tpebs_mtx_get());
+ return ret;
+}
+
/*
* tpebs_stop - stop the sample data read thread and the perf record process.
*/
-static int tpebs_stop(void)
+static int tpebs_stop(void) EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get())
{
int ret = 0;
/* Like tpebs_start, we should only run tpebs_end once. */
- if (tpebs_pid != -1) {
- kill(tpebs_cmd->pid, SIGTERM);
- tpebs_pid = -1;
+ if (tpebs_cmd.pid != 0) {
+ tpebs_send_record_cmd(EVLIST_CTL_CMD_STOP_TAG);
+ tpebs_cmd.pid = 0;
+ mutex_unlock(tpebs_mtx_get());
pthread_join(tpebs_reader_thread, NULL);
- close(tpebs_cmd->out);
- ret = finish_command(tpebs_cmd);
+ mutex_lock(tpebs_mtx_get());
+ close(control_fd[0]);
+ close(control_fd[1]);
+ close(ack_fd[0]);
+ close(ack_fd[1]);
+ close(tpebs_cmd.out);
+ ret = finish_command(&tpebs_cmd);
+ tpebs_cmd.pid = 0;
if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL)
ret = 0;
}
return ret;
}
-/*
- * tpebs_start - start tpebs execution.
- * @evsel_list: retire_latency evsels in this list will be selected and sampled
- * to get the average retire_latency value.
- *
- * This function will be called from evlist level later when evlist__open() is
- * called consistently.
+/**
+ * evsel__tpebs_event() - Create string event encoding to pass to `perf record`.
*/
-int tpebs_start(struct evlist *evsel_list)
+static int evsel__tpebs_event(struct evsel *evsel, char **event)
{
- int ret = 0;
- struct evsel *evsel;
- char cpumap_buf[50];
+ char *name, *modifier;
+ int ret;
+
+ name = strdup(evsel->name);
+ if (!name)
+ return -ENOMEM;
+
+ modifier = strrchr(name, 'R');
+ if (!modifier) {
+ ret = -EINVAL;
+ goto out;
+ }
+ *modifier = 'p';
+ modifier = strchr(name, ':');
+ if (!modifier)
+ modifier = strrchr(name, '/');
+ if (!modifier) {
+ ret = -EINVAL;
+ goto out;
+ }
+ *modifier = '\0';
+ if (asprintf(event, "%s/name=tpebs_event_%p/%s", name, evsel, modifier + 1) > 0)
+ ret = 0;
+ else
+ ret = -ENOMEM;
+out:
+ if (ret)
+ pr_err("Tpebs event modifier broken '%s'\n", evsel->name);
+ free(name);
+ return ret;
+}
+
+static struct tpebs_retire_lat *tpebs_retire_lat__new(struct evsel *evsel)
+{
+ struct tpebs_retire_lat *result = zalloc(sizeof(*result));
+ int ret;
+
+ if (!result)
+ return NULL;
+
+ ret = evsel__tpebs_event(evsel, &result->event);
+ if (ret) {
+ free(result);
+ return NULL;
+ }
+ result->evsel = evsel;
+ return result;
+}
+
+static void tpebs_retire_lat__delete(struct tpebs_retire_lat *r)
+{
+ zfree(&r->event);
+ free(r);
+}
+
+static struct tpebs_retire_lat *tpebs_retire_lat__find(struct evsel *evsel)
+{
+ struct tpebs_retire_lat *t;
+ unsigned long num;
+ const char *evsel_name;
/*
- * We should only run tpebs_start when tpebs_recording is enabled.
- * And we should only run it once with all the required events.
+ * Evsels will match for evlist with the retirement latency event. The
+ * name with "tpebs_event_" prefix will be present on events being read
+ * from `perf record`.
*/
- if (tpebs_pid != -1 || !tpebs_recording)
+ if (evsel__is_retire_lat(evsel)) {
+ list_for_each_entry(t, &tpebs_results, nd) {
+ if (t->evsel == evsel)
+ return t;
+ }
+ return NULL;
+ }
+ evsel_name = strstr(evsel->name, "tpebs_event_");
+ if (!evsel_name) {
+ /* Unexpected that the perf record should have other events. */
+ return NULL;
+ }
+ errno = 0;
+ num = strtoull(evsel_name + 12, NULL, 16);
+ if (errno) {
+ pr_err("Bad evsel for tpebs find '%s'\n", evsel->name);
+ return NULL;
+ }
+ list_for_each_entry(t, &tpebs_results, nd) {
+ if ((unsigned long)t->evsel == num)
+ return t;
+ }
+ return NULL;
+}
+
+/**
+ * evsel__tpebs_prepare - create tpebs data structures ready for opening.
+ * @evsel: retire_latency evsel, all evsels on its list will be prepared.
+ */
+static int evsel__tpebs_prepare(struct evsel *evsel)
+{
+ struct evsel *pos;
+ struct tpebs_retire_lat *tpebs_event;
+
+ mutex_lock(tpebs_mtx_get());
+ tpebs_event = tpebs_retire_lat__find(evsel);
+ if (tpebs_event) {
+ /* evsel, or an identically named one, was already prepared. */
+ mutex_unlock(tpebs_mtx_get());
return 0;
+ }
+ tpebs_event = tpebs_retire_lat__new(evsel);
+ if (!tpebs_event) {
+ mutex_unlock(tpebs_mtx_get());
+ return -ENOMEM;
+ }
+ list_add_tail(&tpebs_event->nd, &tpebs_results);
+ mutex_unlock(tpebs_mtx_get());
- cpu_map__snprint(evsel_list->core.user_requested_cpus, cpumap_buf, sizeof(cpumap_buf));
/*
- * Prepare perf record for sampling event retire_latency before fork and
- * prepare workload
+ * Eagerly prepare all other evsels on the list to try to ensure that by
+ * open they are all known.
*/
- evlist__for_each_entry(evsel_list, evsel) {
- int i;
- char *name;
- struct tpebs_retire_lat *new;
+ evlist__for_each_entry(evsel->evlist, pos) {
+ int ret;
- if (!evsel->retire_lat)
+ if (pos == evsel || !pos->retire_lat)
continue;
- pr_debug("tpebs: Retire_latency of event %s is required\n", evsel->name);
- for (i = strlen(evsel->name) - 1; i > 0; i--) {
- if (evsel->name[i] == 'R')
- break;
- }
- if (i <= 0 || evsel->name[i] != 'R') {
- ret = -1;
- goto err;
- }
+ ret = evsel__tpebs_prepare(pos);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
- name = strdup(evsel->name);
- if (!name) {
- ret = -ENOMEM;
- goto err;
- }
- name[i] = 'p';
+/**
+ * evsel__tpebs_open - starts tpebs execution.
+ * @evsel: retire_latency evsel, all evsels on its list will be selected. Each
+ * evsel is sampled to get the average retire_latency value.
+ */
+int evsel__tpebs_open(struct evsel *evsel)
+{
+ int ret;
+ bool tpebs_empty;
- new = zalloc(sizeof(*new));
- if (!new) {
- ret = -1;
- zfree(&name);
- goto err;
- }
- new->name = name;
- new->tpebs_name = evsel->name;
- list_add_tail(&new->nd, &tpebs_results);
- tpebs_event_size += 1;
+ /* We should only run tpebs_start when tpebs_recording is enabled. */
+ if (!tpebs_recording)
+ return 0;
+ /* Only start the events once. */
+ if (tpebs_cmd.pid != 0) {
+ struct tpebs_retire_lat *t;
+ bool valid;
+
+ mutex_lock(tpebs_mtx_get());
+ t = tpebs_retire_lat__find(evsel);
+ valid = t && t->started;
+ mutex_unlock(tpebs_mtx_get());
+ /* May fail as the event wasn't started. */
+ return valid ? 0 : -EBUSY;
}
- if (tpebs_event_size > 0) {
- struct pollfd pollfd = { .events = POLLIN, };
- int control_fd[2], ack_fd[2], len;
- char ack_buf[8];
+ ret = evsel__tpebs_prepare(evsel);
+ if (ret)
+ return ret;
+ mutex_lock(tpebs_mtx_get());
+ tpebs_empty = list_empty(&tpebs_results);
+ if (!tpebs_empty) {
/*Create control and ack fd for --control*/
if (pipe(control_fd) < 0) {
pr_err("tpebs: Failed to create control fifo");
@@ -280,153 +531,131 @@ int tpebs_start(struct evlist *evsel_list)
goto out;
}
- ret = start_perf_record(control_fd, ack_fd, cpumap_buf);
+ ret = evsel__tpebs_start_perf_record(evsel);
if (ret)
goto out;
- tpebs_pid = tpebs_cmd->pid;
- if (pthread_create(&tpebs_reader_thread, NULL, __sample_reader, tpebs_cmd)) {
- kill(tpebs_cmd->pid, SIGTERM);
- close(tpebs_cmd->out);
- pr_err("Could not create thread to process sample data.\n");
- ret = -1;
- goto out;
- }
- /* Wait for perf record initialization.*/
- len = strlen(EVLIST_CTL_CMD_ENABLE_TAG);
- ret = write(control_fd[1], EVLIST_CTL_CMD_ENABLE_TAG, len);
- if (ret != len) {
- pr_err("perf record control write control message failed\n");
- goto out;
- }
-
- /* wait for an ack */
- pollfd.fd = ack_fd[0];
-
- /*
- * We need this poll to ensure the ack_fd PIPE will not hang
- * when perf record failed for any reason. The timeout value
- * 3000ms is an empirical selection.
- */
- if (!poll(&pollfd, 1, 3000)) {
- pr_err("tpebs failed: perf record ack timeout\n");
- ret = -1;
- goto out;
- }
- if (!(pollfd.revents & POLLIN)) {
- pr_err("tpebs failed: did not received an ack\n");
+ if (pthread_create(&tpebs_reader_thread, /*attr=*/NULL, __sample_reader,
+ /*arg=*/NULL)) {
+ kill(tpebs_cmd.pid, SIGTERM);
+ close(tpebs_cmd.out);
+ pr_err("Could not create thread to process sample data.\n");
ret = -1;
goto out;
}
-
- ret = read(ack_fd[0], ack_buf, sizeof(ack_buf));
- if (ret > 0)
- ret = strcmp(ack_buf, EVLIST_CTL_CMD_ACK_TAG);
- else {
- pr_err("tpebs: perf record control ack failed\n");
- goto out;
- }
+ ret = tpebs_send_record_cmd(EVLIST_CTL_CMD_ENABLE_TAG);
+ }
out:
- close(control_fd[0]);
- close(control_fd[1]);
- close(ack_fd[0]);
- close(ack_fd[1]);
+ if (ret) {
+ struct tpebs_retire_lat *t = tpebs_retire_lat__find(evsel);
+
+ list_del_init(&t->nd);
+ tpebs_retire_lat__delete(t);
}
-err:
- if (ret)
- tpebs_delete();
+ mutex_unlock(tpebs_mtx_get());
return ret;
}
-
-int tpebs_set_evsel(struct evsel *evsel, int cpu_map_idx, int thread)
+int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread)
{
- __u64 val;
- bool found = false;
+ struct perf_counts_values *count, *old_count = NULL;
struct tpebs_retire_lat *t;
- struct perf_counts_values *count;
+ uint64_t val;
+ int ret;
- /* Non reitre_latency evsel should never enter this function. */
- if (!evsel__is_retire_lat(evsel))
- return -1;
+ /* Only set retire_latency value to the first CPU and thread. */
+ if (cpu_map_idx != 0 || thread != 0)
+ return 0;
+
+ if (evsel->prev_raw_counts)
+ old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
- /*
- * Need to stop the forked record to ensure get sampled data from the
- * PIPE to process and get non-zero retire_lat value for hybrid.
- */
- tpebs_stop();
count = perf_counts(evsel->counts, cpu_map_idx, thread);
- list_for_each_entry(t, &tpebs_results, nd) {
- if (t->tpebs_name == evsel->name ||
- (evsel->metric_id && !strcmp(t->tpebs_name, evsel->metric_id))) {
- found = true;
+ mutex_lock(tpebs_mtx_get());
+ t = tpebs_retire_lat__find(evsel);
+ /*
+ * If reading the first tpebs result, send a ping to the record
+ * process. Allow the sample reader a chance to read by releasing and
+ * reacquiring the lock.
+ */
+ if (t && &t->nd == tpebs_results.next) {
+ ret = tpebs_send_record_cmd(EVLIST_CTL_CMD_PING_TAG);
+ mutex_unlock(tpebs_mtx_get());
+ if (ret)
+ return ret;
+ mutex_lock(tpebs_mtx_get());
+ }
+ if (t == NULL || t->stats.n == 0) {
+ /* No sample data, use default. */
+ if (tpebs_recording) {
+ pr_warning_once(
+ "Using precomputed retirement latency data as no samples\n");
+ }
+ val = 0;
+ switch (tpebs_mode) {
+ case TPEBS_MODE__MIN:
+ val = rint(evsel->retirement_latency.min);
+ break;
+ case TPEBS_MODE__MAX:
+ val = rint(evsel->retirement_latency.max);
+ break;
+ default:
+ case TPEBS_MODE__LAST:
+ case TPEBS_MODE__MEAN:
+ val = rint(evsel->retirement_latency.mean);
+ break;
+ }
+ } else {
+ switch (tpebs_mode) {
+ case TPEBS_MODE__MIN:
+ val = t->stats.min;
+ break;
+ case TPEBS_MODE__MAX:
+ val = t->stats.max;
+ break;
+ case TPEBS_MODE__LAST:
+ val = t->last;
+ break;
+ default:
+ case TPEBS_MODE__MEAN:
+ val = rint(t->stats.mean);
break;
}
}
-
- /* Set ena and run to non-zero */
- count->ena = count->run = 1;
- count->lost = 0;
-
- if (!found) {
- /*
- * Set default value or 0 when retire_latency for this event is
- * not found from sampling data (record_tpebs not set or 0
- * sample recorded).
- */
- count->val = 0;
- return 0;
+ mutex_unlock(tpebs_mtx_get());
+
+ if (old_count) {
+ count->val = old_count->val + val;
+ count->run = old_count->run + 1;
+ count->ena = old_count->ena + 1;
+ } else {
+ count->val = val;
+ count->run++;
+ count->ena++;
}
-
- /*
- * Only set retire_latency value to the first CPU and thread.
- */
- if (cpu_map_idx == 0 && thread == 0)
- val = rint(t->val);
- else
- val = 0;
-
- count->val = val;
return 0;
}
-static void tpebs_retire_lat__delete(struct tpebs_retire_lat *r)
-{
- zfree(&r->name);
- free(r);
-}
-
-
-/*
- * tpebs_delete - delete tpebs related data and stop the created thread and
- * process by calling tpebs_stop().
+/**
+ * evsel__tpebs_close() - delete tpebs related data. If the last event, stop the
+ * created thread and process by calling tpebs_stop().
*
- * This function is called from evlist_delete() and also from builtin-stat
- * stat_handle_error(). If tpebs_start() is called from places other then perf
- * stat, need to ensure tpebs_delete() is also called to safely free mem and
- * close the data read thread and the forked perf record process.
- *
- * This function is also called in evsel__close() to be symmetric with
- * tpebs_start() being called in evsel__open(). We will update this call site
- * when move tpebs_start() to evlist level.
+ * This function is called in evsel__close() to be symmetric with
+ * evsel__tpebs_open() being called in evsel__open().
*/
-void tpebs_delete(void)
+void evsel__tpebs_close(struct evsel *evsel)
{
- struct tpebs_retire_lat *r, *rtmp;
-
- if (tpebs_pid == -1)
- return;
-
- tpebs_stop();
+ struct tpebs_retire_lat *t;
- list_for_each_entry_safe(r, rtmp, &tpebs_results, nd) {
- list_del_init(&r->nd);
- tpebs_retire_lat__delete(r);
- }
+ mutex_lock(tpebs_mtx_get());
+ t = tpebs_retire_lat__find(evsel);
+ if (t) {
+ list_del_init(&t->nd);
+ tpebs_retire_lat__delete(t);
- if (tpebs_cmd) {
- free(tpebs_cmd);
- tpebs_cmd = NULL;
+ if (list_empty(&tpebs_results))
+ tpebs_stop();
}
+ mutex_unlock(tpebs_mtx_get());
}
diff --git a/tools/perf/util/intel-tpebs.h b/tools/perf/util/intel-tpebs.h
index 766b3fbd79f1..9475e2e6ea74 100644
--- a/tools/perf/util/intel-tpebs.h
+++ b/tools/perf/util/intel-tpebs.h
@@ -2,34 +2,24 @@
/*
* intel_tpebs.h: Intel TEPBS support
*/
-#ifndef INCLUDE__PERF_INTEL_TPEBS_H__
-#define INCLUDE__PERF_INTEL_TPEBS_H__
+#ifndef __INTEL_TPEBS_H
+#define __INTEL_TPEBS_H
-#include "stat.h"
-#include "evsel.h"
+struct evlist;
+struct evsel;
-#ifdef HAVE_ARCH_X86_64_SUPPORT
+enum tpebs_mode {
+ TPEBS_MODE__MEAN,
+ TPEBS_MODE__MIN,
+ TPEBS_MODE__MAX,
+ TPEBS_MODE__LAST,
+};
extern bool tpebs_recording;
-int tpebs_start(struct evlist *evsel_list);
-void tpebs_delete(void);
-int tpebs_set_evsel(struct evsel *evsel, int cpu_map_idx, int thread);
+extern enum tpebs_mode tpebs_mode;
-#else
+int evsel__tpebs_open(struct evsel *evsel);
+void evsel__tpebs_close(struct evsel *evsel);
+int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread);
-static inline int tpebs_start(struct evlist *evsel_list __maybe_unused)
-{
- return 0;
-}
-
-static inline void tpebs_delete(void) {};
-
-static inline int tpebs_set_evsel(struct evsel *evsel __maybe_unused,
- int cpu_map_idx __maybe_unused,
- int thread __maybe_unused)
-{
- return 0;
-}
-
-#endif
-#endif
+#endif /* __INTEL_TPEBS_H */
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 624964f01b5f..b062b1f234b6 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -14,9 +14,9 @@
#include <sys/mman.h>
#include <linux/stringify.h>
-#include "build-id.h"
#include "event.h"
#include "debug.h"
+#include "dso.h"
#include "evlist.h"
#include "namespaces.h"
#include "symbol.h"
@@ -531,9 +531,22 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
/*
* mark dso as use to generate buildid in the header
*/
- if (!ret)
- build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
-
+ if (!ret) {
+ struct dso_id dso_id = {
+ {
+ .maj = event->mmap2.maj,
+ .min = event->mmap2.min,
+ .ino = event->mmap2.ino,
+ .ino_generation = event->mmap2.ino_generation,
+ },
+ .mmap2_valid = true,
+ .mmap2_ino_generation_valid = true,
+ };
+ struct dso *dso = machine__findnew_dso_id(jd->machine, filename, &dso_id);
+
+ if (dso)
+ dso__set_hit(dso);
+ }
out:
perf_sample__exit(&sample);
free(event);
diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
index b5d916aa49df..59c94190b092 100644
--- a/tools/perf/util/lock-contention.h
+++ b/tools/perf/util/lock-contention.h
@@ -18,6 +18,12 @@ struct lock_filter {
char **slabs;
};
+struct lock_delay {
+ char *sym;
+ unsigned long addr;
+ unsigned long time;
+};
+
struct lock_stat {
struct hlist_node hash_entry;
struct rb_node rb; /* used for sorting */
@@ -140,14 +146,17 @@ struct lock_contention {
struct machine *machine;
struct hlist_head *result;
struct lock_filter *filters;
+ struct lock_delay *delays;
struct lock_contention_fails fails;
struct rb_root cgroups;
+ void *btf;
unsigned long map_nr_entries;
int max_stack;
int stack_skip;
int aggr_mode;
int owner;
int nr_filtered;
+ int nr_delays;
bool save_callstack;
};
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 2531b373f2cf..b5dd42588c91 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -20,6 +20,7 @@
#include "path.h"
#include "srcline.h"
#include "symbol.h"
+#include "synthetic-events.h"
#include "sort.h"
#include "strlist.h"
#include "target.h"
@@ -128,28 +129,62 @@ out:
return 0;
}
-struct machine *machine__new_host(void)
+static struct machine *__machine__new_host(struct perf_env *host_env, bool kernel_maps)
{
struct machine *machine = malloc(sizeof(*machine));
- if (machine != NULL) {
- machine__init(machine, "", HOST_KERNEL_ID);
+ if (!machine)
+ return NULL;
- if (machine__create_kernel_maps(machine) < 0)
- goto out_delete;
+ machine__init(machine, "", HOST_KERNEL_ID);
- machine->env = &perf_env;
+ if (kernel_maps && machine__create_kernel_maps(machine) < 0) {
+ free(machine);
+ return NULL;
}
+ machine->env = host_env;
+ return machine;
+}
+
+struct machine *machine__new_host(struct perf_env *host_env)
+{
+ return __machine__new_host(host_env, /*kernel_maps=*/true);
+}
+
+static int mmap_handler(const struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_mmap2_event(machine, event, sample);
+}
+
+static int machine__init_live(struct machine *machine, pid_t pid)
+{
+ union perf_event event;
+ memset(&event, 0, sizeof(event));
+ return perf_event__synthesize_mmap_events(NULL, &event, pid, pid,
+ mmap_handler, machine, true);
+}
+
+struct machine *machine__new_live(struct perf_env *host_env, bool kernel_maps, pid_t pid)
+{
+ struct machine *machine = __machine__new_host(host_env, kernel_maps);
+
+ if (!machine)
+ return NULL;
+
+ if (machine__init_live(machine, pid)) {
+ machine__delete(machine);
+ return NULL;
+ }
return machine;
-out_delete:
- free(machine);
- return NULL;
}
-struct machine *machine__new_kallsyms(void)
+struct machine *machine__new_kallsyms(struct perf_env *host_env)
{
- struct machine *machine = machine__new_host();
+ struct machine *machine = machine__new_host(host_env);
/*
* FIXME:
* 1) We should switch to machine__load_kallsyms(), i.e. not explicitly
@@ -1696,21 +1731,21 @@ int machine__process_mmap2_event(struct machine *machine,
{
struct thread *thread;
struct map *map;
- struct dso_id dso_id = {
- .maj = event->mmap2.maj,
- .min = event->mmap2.min,
- .ino = event->mmap2.ino,
- .ino_generation = event->mmap2.ino_generation,
- };
- struct build_id __bid, *bid = NULL;
+ struct dso_id dso_id = dso_id_empty;
int ret = 0;
if (dump_trace)
perf_event__fprintf_mmap2(event, stdout);
if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
- bid = &__bid;
- build_id__init(bid, event->mmap2.build_id, event->mmap2.build_id_size);
+ build_id__init(&dso_id.build_id, event->mmap2.build_id, event->mmap2.build_id_size);
+ } else {
+ dso_id.maj = event->mmap2.maj;
+ dso_id.min = event->mmap2.min;
+ dso_id.ino = event->mmap2.ino;
+ dso_id.ino_generation = event->mmap2.ino_generation;
+ dso_id.mmap2_valid = true;
+ dso_id.mmap2_ino_generation_valid = true;
}
if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
@@ -1722,7 +1757,7 @@ int machine__process_mmap2_event(struct machine *machine,
};
strlcpy(xm.name, event->mmap2.filename, KMAP_NAME_LEN);
- ret = machine__process_kernel_mmap_event(machine, &xm, bid);
+ ret = machine__process_kernel_mmap_event(machine, &xm, &dso_id.build_id);
if (ret < 0)
goto out_problem;
return 0;
@@ -1736,7 +1771,7 @@ int machine__process_mmap2_event(struct machine *machine,
map = map__new(machine, event->mmap2.start,
event->mmap2.len, event->mmap2.pgoff,
&dso_id, event->mmap2.prot,
- event->mmap2.flags, bid,
+ event->mmap2.flags,
event->mmap2.filename, thread);
if (map == NULL)
@@ -1794,8 +1829,8 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
prot = PROT_EXEC;
map = map__new(machine, event->mmap.start,
- event->mmap.len, event->mmap.pgoff,
- NULL, prot, 0, NULL, event->mmap.filename, thread);
+ event->mmap.len, event->mmap.pgoff,
+ &dso_id_empty, prot, /*flags=*/0, event->mmap.filename, thread);
if (map == NULL)
goto out_problem_map;
@@ -1976,7 +2011,7 @@ static void ip__resolve_ams(struct thread *thread,
* Thus, we have to try consecutively until we find a match
* or else, the symbol is unknown
*/
- thread__find_cpumode_addr_location(thread, ip, &al);
+ thread__find_cpumode_addr_location(thread, ip, /*symbols=*/true, &al);
ams->addr = ip;
ams->al_addr = al.addr;
@@ -2078,7 +2113,7 @@ static int add_callchain_ip(struct thread *thread,
al.sym = NULL;
al.srcline = NULL;
if (!cpumode) {
- thread__find_cpumode_addr_location(thread, ip, &al);
+ thread__find_cpumode_addr_location(thread, ip, symbols, &al);
} else {
if (ip >= PERF_CONTEXT_MAX) {
switch (ip) {
@@ -2106,6 +2141,8 @@ static int add_callchain_ip(struct thread *thread,
}
if (symbols)
thread__find_symbol(thread, *cpumode, ip, &al);
+ else
+ thread__find_map(thread, *cpumode, ip, &al);
}
if (al.sym != NULL) {
@@ -3155,7 +3192,7 @@ struct dso *machine__findnew_dso_id(struct machine *machine, const char *filenam
struct dso *machine__findnew_dso(struct machine *machine, const char *filename)
{
- return machine__findnew_dso_id(machine, filename, NULL);
+ return machine__findnew_dso_id(machine, filename, &dso_id_empty);
}
char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index b56abec84fed..22a42c5825fa 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -169,8 +169,9 @@ struct thread *machine__findnew_guest_code(struct machine *machine, pid_t pid);
void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size);
void machines__set_comm_exec(struct machines *machines, bool comm_exec);
-struct machine *machine__new_host(void);
-struct machine *machine__new_kallsyms(void);
+struct machine *machine__new_host(struct perf_env *host_env);
+struct machine *machine__new_kallsyms(struct perf_env *host_env);
+struct machine *machine__new_live(struct perf_env *host_env, bool kernel_maps, pid_t pid);
int machine__init(struct machine *machine, const char *root_dir, pid_t pid);
void machine__exit(struct machine *machine);
void machine__delete_threads(struct machine *machine);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index d729438b7d65..b46c68c24d1c 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -120,8 +120,8 @@ static void map__init(struct map *map, u64 start, u64 end, u64 pgoff,
}
struct map *map__new(struct machine *machine, u64 start, u64 len,
- u64 pgoff, struct dso_id *id,
- u32 prot, u32 flags, struct build_id *bid,
+ u64 pgoff, const struct dso_id *id,
+ u32 prot, u32 flags,
char *filename, struct thread *thread)
{
struct map *result;
@@ -132,7 +132,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
map = zalloc(sizeof(*map));
if (ADD_RC_CHK(result, map)) {
char newfilename[PATH_MAX];
- struct dso *dso, *header_bid_dso;
+ struct dso *dso;
int anon, no_dso, vdso, android;
android = is_android_lib(filename);
@@ -189,16 +189,15 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
dso__set_nsinfo(dso, nsi);
mutex_unlock(dso__lock(dso));
- if (build_id__is_defined(bid)) {
- dso__set_build_id(dso, bid);
- } else {
+ if (!build_id__is_defined(&id->build_id)) {
/*
* If the mmap event had no build ID, search for an existing dso from the
* build ID header by name. Otherwise only the dso loaded at the time of
* reading the header will have the build ID set and all future mmaps will
* have it missing.
*/
- header_bid_dso = dsos__find(&machine->dsos, filename, false);
+ struct dso *header_bid_dso = dsos__find(&machine->dsos, filename, false);
+
if (header_bid_dso && dso__header_build_id(header_bid_dso)) {
dso__set_build_id(dso, dso__bid(header_bid_dso));
dso__set_header_build_id(dso, 1);
@@ -354,7 +353,7 @@ int map__load(struct map *map)
if (dso__has_build_id(dso)) {
char sbuild_id[SBUILD_ID_SIZE];
- build_id__sprintf(dso__bid(dso), sbuild_id);
+ build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id));
pr_debug("%s with build id %s not found", name, sbuild_id);
} else
pr_debug("Failed to open %s", name);
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 4262f5a143be..9cadf533a561 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -173,11 +173,10 @@ struct thread;
__map__for_each_symbol_by_name(map, sym_name, (pos), idx)
struct dso_id;
-struct build_id;
struct map *map__new(struct machine *machine, u64 start, u64 len,
- u64 pgoff, struct dso_id *id, u32 prot, u32 flags,
- struct build_id *bid, char *filename, struct thread *thread);
+ u64 pgoff, const struct dso_id *id, u32 prot, u32 flags,
+ char *filename, struct thread *thread);
struct map *map__new2(u64 start, struct dso *dso);
void map__delete(struct map *map);
struct map *map__clone(struct map *map);
diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c
index 0b40d901675e..85b2a93a59ac 100644
--- a/tools/perf/util/maps.c
+++ b/tools/perf/util/maps.c
@@ -1082,10 +1082,13 @@ struct map *maps__find(struct maps *maps, u64 ip)
while (!done) {
down_read(maps__lock(maps));
if (maps__maps_by_address_sorted(maps)) {
- struct map **mapp =
- bsearch(&ip, maps__maps_by_address(maps), maps__nr_maps(maps),
- sizeof(*mapp), map__addr_cmp);
+ struct map **mapp = NULL;
+ struct map **maps_by_address = maps__maps_by_address(maps);
+ unsigned int nr_maps = maps__nr_maps(maps);
+ if (maps_by_address && nr_maps)
+ mapp = bsearch(&ip, maps_by_address, nr_maps, sizeof(*mapp),
+ map__addr_cmp);
if (mapp)
result = map__get(*mapp);
done = true;
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 884d9aebce91..80b3069427bc 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -303,15 +303,12 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, char **eve
}
if (cpu_map) {
- struct perf_cpu_map *online = cpu_map__online();
-
- if (!perf_cpu_map__equal(cpu_map, online)) {
+ if (!perf_cpu_map__equal(cpu_map, cpu_map__online())) {
char buf[200];
cpu_map__snprint(cpu_map, buf, sizeof(buf));
pr_warning("Memory events are enabled on a subset of CPUs: %s\n", buf);
}
- perf_cpu_map__put(online);
perf_cpu_map__put(cpu_map);
}
@@ -680,7 +677,10 @@ do { \
if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
if (lvl & P(LVL, L2)) {
- stats->ld_l2hit++;
+ if (snoop & P(SNOOP, HITM))
+ HITM_INC(lcl_hitm);
+ else
+ stats->ld_l2hit++;
if (snoopx & P(SNOOPX, PEER))
PEER_INC(lcl_peer);
@@ -799,3 +799,181 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
stats->nomap += add->nomap;
stats->noparse += add->noparse;
}
+
+/*
+ * It returns an index in hist_entry->mem_stat array for the given val which
+ * represents a data-src based on the mem_stat_type.
+ */
+int mem_stat_index(const enum mem_stat_type mst, const u64 val)
+{
+ union perf_mem_data_src src = {
+ .val = val,
+ };
+
+ switch (mst) {
+ case PERF_MEM_STAT_OP:
+ switch (src.mem_op) {
+ case PERF_MEM_OP_LOAD:
+ return MEM_STAT_OP_LOAD;
+ case PERF_MEM_OP_STORE:
+ return MEM_STAT_OP_STORE;
+ case PERF_MEM_OP_LOAD | PERF_MEM_OP_STORE:
+ return MEM_STAT_OP_LDST;
+ default:
+ if (src.mem_op & PERF_MEM_OP_PFETCH)
+ return MEM_STAT_OP_PFETCH;
+ if (src.mem_op & PERF_MEM_OP_EXEC)
+ return MEM_STAT_OP_EXEC;
+ return MEM_STAT_OP_OTHER;
+ }
+ case PERF_MEM_STAT_CACHE:
+ switch (src.mem_lvl_num) {
+ case PERF_MEM_LVLNUM_L1:
+ return MEM_STAT_CACHE_L1;
+ case PERF_MEM_LVLNUM_L2:
+ return MEM_STAT_CACHE_L2;
+ case PERF_MEM_LVLNUM_L3:
+ return MEM_STAT_CACHE_L3;
+ case PERF_MEM_LVLNUM_L4:
+ return MEM_STAT_CACHE_L4;
+ case PERF_MEM_LVLNUM_LFB:
+ return MEM_STAT_CACHE_L1_BUF;
+ case PERF_MEM_LVLNUM_L2_MHB:
+ return MEM_STAT_CACHE_L2_BUF;
+ default:
+ return MEM_STAT_CACHE_OTHER;
+ }
+ case PERF_MEM_STAT_MEMORY:
+ switch (src.mem_lvl_num) {
+ case PERF_MEM_LVLNUM_MSC:
+ return MEM_STAT_MEMORY_MSC;
+ case PERF_MEM_LVLNUM_RAM:
+ return MEM_STAT_MEMORY_RAM;
+ case PERF_MEM_LVLNUM_UNC:
+ return MEM_STAT_MEMORY_UNC;
+ case PERF_MEM_LVLNUM_CXL:
+ return MEM_STAT_MEMORY_CXL;
+ case PERF_MEM_LVLNUM_IO:
+ return MEM_STAT_MEMORY_IO;
+ case PERF_MEM_LVLNUM_PMEM:
+ return MEM_STAT_MEMORY_PMEM;
+ default:
+ return MEM_STAT_MEMORY_OTHER;
+ }
+ case PERF_MEM_STAT_SNOOP:
+ switch (src.mem_snoop) {
+ case PERF_MEM_SNOOP_HIT:
+ return MEM_STAT_SNOOP_HIT;
+ case PERF_MEM_SNOOP_HITM:
+ return MEM_STAT_SNOOP_HITM;
+ case PERF_MEM_SNOOP_MISS:
+ return MEM_STAT_SNOOP_MISS;
+ default:
+ return MEM_STAT_SNOOP_OTHER;
+ }
+ case PERF_MEM_STAT_DTLB:
+ switch (src.mem_dtlb) {
+ case PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT:
+ return MEM_STAT_DTLB_L1_HIT;
+ case PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT:
+ return MEM_STAT_DTLB_L2_HIT;
+ case PERF_MEM_TLB_L1 | PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT:
+ return MEM_STAT_DTLB_ANY_HIT;
+ default:
+ if (src.mem_dtlb & PERF_MEM_TLB_MISS)
+ return MEM_STAT_DTLB_MISS;
+ return MEM_STAT_DTLB_OTHER;
+ }
+ default:
+ break;
+ }
+ return -1;
+}
+
+/* To align output, returned string should be shorter than MEM_STAT_PRINT_LEN */
+const char *mem_stat_name(const enum mem_stat_type mst, const int idx)
+{
+ switch (mst) {
+ case PERF_MEM_STAT_OP:
+ switch (idx) {
+ case MEM_STAT_OP_LOAD:
+ return "Load";
+ case MEM_STAT_OP_STORE:
+ return "Store";
+ case MEM_STAT_OP_LDST:
+ return "Ld+St";
+ case MEM_STAT_OP_PFETCH:
+ return "Pfetch";
+ case MEM_STAT_OP_EXEC:
+ return "Exec";
+ case MEM_STAT_OP_OTHER:
+ default:
+ return "Other";
+ }
+ case PERF_MEM_STAT_CACHE:
+ switch (idx) {
+ case MEM_STAT_CACHE_L1:
+ return "L1";
+ case MEM_STAT_CACHE_L2:
+ return "L2";
+ case MEM_STAT_CACHE_L3:
+ return "L3";
+ case MEM_STAT_CACHE_L4:
+ return "L4";
+ case MEM_STAT_CACHE_L1_BUF:
+ return "L1-buf";
+ case MEM_STAT_CACHE_L2_BUF:
+ return "L2-buf";
+ case MEM_STAT_CACHE_OTHER:
+ default:
+ return "Other";
+ }
+ case PERF_MEM_STAT_MEMORY:
+ switch (idx) {
+ case MEM_STAT_MEMORY_RAM:
+ return "RAM";
+ case MEM_STAT_MEMORY_MSC:
+ return "MSC";
+ case MEM_STAT_MEMORY_UNC:
+ return "Uncach";
+ case MEM_STAT_MEMORY_CXL:
+ return "CXL";
+ case MEM_STAT_MEMORY_IO:
+ return "IO";
+ case MEM_STAT_MEMORY_PMEM:
+ return "PMEM";
+ case MEM_STAT_MEMORY_OTHER:
+ default:
+ return "Other";
+ }
+ case PERF_MEM_STAT_SNOOP:
+ switch (idx) {
+ case MEM_STAT_SNOOP_HIT:
+ return "Hit";
+ case MEM_STAT_SNOOP_HITM:
+ return "HitM";
+ case MEM_STAT_SNOOP_MISS:
+ return "Miss";
+ case MEM_STAT_SNOOP_OTHER:
+ default:
+ return "Other";
+ }
+ case PERF_MEM_STAT_DTLB:
+ switch (idx) {
+ case MEM_STAT_DTLB_L1_HIT:
+ return "L1-Hit";
+ case MEM_STAT_DTLB_L2_HIT:
+ return "L2-Hit";
+ case MEM_STAT_DTLB_ANY_HIT:
+ return "L?-Hit";
+ case MEM_STAT_DTLB_MISS:
+ return "Miss";
+ case MEM_STAT_DTLB_OTHER:
+ default:
+ return "Other";
+ }
+ default:
+ break;
+ }
+ return "N/A";
+}
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index a5c19d39ee37..5b98076904b0 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -89,4 +89,61 @@ struct hist_entry;
int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi);
void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add);
+enum mem_stat_type {
+ PERF_MEM_STAT_OP,
+ PERF_MEM_STAT_CACHE,
+ PERF_MEM_STAT_MEMORY,
+ PERF_MEM_STAT_SNOOP,
+ PERF_MEM_STAT_DTLB,
+};
+
+#define MEM_STAT_PRINT_LEN 7 /* 1 space + 5 digits + 1 percent sign */
+
+enum mem_stat_op {
+ MEM_STAT_OP_LOAD,
+ MEM_STAT_OP_STORE,
+ MEM_STAT_OP_LDST,
+ MEM_STAT_OP_PFETCH,
+ MEM_STAT_OP_EXEC,
+ MEM_STAT_OP_OTHER,
+};
+
+enum mem_stat_cache {
+ MEM_STAT_CACHE_L1,
+ MEM_STAT_CACHE_L2,
+ MEM_STAT_CACHE_L3,
+ MEM_STAT_CACHE_L4,
+ MEM_STAT_CACHE_L1_BUF,
+ MEM_STAT_CACHE_L2_BUF,
+ MEM_STAT_CACHE_OTHER,
+};
+
+enum mem_stat_memory {
+ MEM_STAT_MEMORY_RAM,
+ MEM_STAT_MEMORY_MSC,
+ MEM_STAT_MEMORY_UNC,
+ MEM_STAT_MEMORY_CXL,
+ MEM_STAT_MEMORY_IO,
+ MEM_STAT_MEMORY_PMEM,
+ MEM_STAT_MEMORY_OTHER,
+};
+
+enum mem_stat_snoop {
+ MEM_STAT_SNOOP_HIT,
+ MEM_STAT_SNOOP_HITM,
+ MEM_STAT_SNOOP_MISS,
+ MEM_STAT_SNOOP_OTHER,
+};
+
+enum mem_stat_dtlb {
+ MEM_STAT_DTLB_L1_HIT,
+ MEM_STAT_DTLB_L2_HIT,
+ MEM_STAT_DTLB_ANY_HIT,
+ MEM_STAT_DTLB_MISS,
+ MEM_STAT_DTLB_OTHER,
+};
+
+int mem_stat_index(const enum mem_stat_type mst, const u64 data_src);
+const char *mem_stat_name(const enum mem_stat_type mst, const int idx);
+
#endif /* __PERF_MEM_EVENTS_H */
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 46920ebadfd1..595b83142d2c 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -103,7 +103,7 @@ static void metric_event_delete(struct rblist *rblist __maybe_unused,
free(me);
}
-static void metricgroup__rblist_init(struct rblist *metric_events)
+void metricgroup__rblist_init(struct rblist *metric_events)
{
rblist__init(metric_events);
metric_events->node_cmp = metric_event_cmp;
@@ -179,7 +179,7 @@ static void metric__watchdog_constraint_hint(const char *name, bool foot)
" echo 1 > /proc/sys/kernel/nmi_watchdog\n");
}
-static bool metric__group_events(const struct pmu_metric *pm)
+static bool metric__group_events(const struct pmu_metric *pm, bool metric_no_threshold)
{
switch (pm->event_grouping) {
case MetricNoGroupEvents:
@@ -191,6 +191,13 @@ static bool metric__group_events(const struct pmu_metric *pm)
return false;
case MetricNoGroupEventsSmt:
return !smt_on();
+ case MetricNoGroupEventsThresholdAndNmi:
+ if (metric_no_threshold)
+ return true;
+ if (!sysctl__nmi_watchdog_enabled())
+ return true;
+ metric__watchdog_constraint_hint(pm->metric_name, /*foot=*/false);
+ return false;
case MetricGroupEvents:
default:
return true;
@@ -212,6 +219,7 @@ static void metric__free(struct metric *m)
static struct metric *metric__new(const struct pmu_metric *pm,
const char *modifier,
bool metric_no_group,
+ bool metric_no_threshold,
int runtime,
const char *user_requested_cpu_list,
bool system_wide)
@@ -246,7 +254,7 @@ static struct metric *metric__new(const struct pmu_metric *pm,
}
m->pctx->sctx.runtime = runtime;
m->pctx->sctx.system_wide = system_wide;
- m->group_events = !metric_no_group && metric__group_events(pm);
+ m->group_events = !metric_no_group && metric__group_events(pm, metric_no_threshold);
m->metric_refs = NULL;
m->evlist = NULL;
@@ -353,7 +361,7 @@ static int setup_metric_events(const char *pmu, struct hashmap *ids,
return 0;
}
-static bool match_metric(const char *metric_or_groups, const char *sought)
+static bool match_metric_or_groups(const char *metric_or_groups, const char *sought)
{
int len;
char *m;
@@ -369,117 +377,19 @@ static bool match_metric(const char *metric_or_groups, const char *sought)
(metric_or_groups[len] == 0 || metric_or_groups[len] == ';'))
return true;
m = strchr(metric_or_groups, ';');
- return m && match_metric(m + 1, sought);
+ return m && match_metric_or_groups(m + 1, sought);
}
-static bool match_pm_metric(const struct pmu_metric *pm, const char *pmu, const char *metric)
+static bool match_pm_metric_or_groups(const struct pmu_metric *pm, const char *pmu,
+ const char *metric_or_groups)
{
const char *pm_pmu = pm->pmu ?: "cpu";
if (strcmp(pmu, "all") && strcmp(pm_pmu, pmu))
return false;
- return match_metric(pm->metric_group, metric) ||
- match_metric(pm->metric_name, metric);
-}
-
-/** struct mep - RB-tree node for building printing information. */
-struct mep {
- /** nd - RB-tree element. */
- struct rb_node nd;
- /** @metric_group: Owned metric group name, separated others with ';'. */
- char *metric_group;
- const char *metric_name;
- const char *metric_desc;
- const char *metric_long_desc;
- const char *metric_expr;
- const char *metric_threshold;
- const char *metric_unit;
-};
-
-static int mep_cmp(struct rb_node *rb_node, const void *entry)
-{
- struct mep *a = container_of(rb_node, struct mep, nd);
- struct mep *b = (struct mep *)entry;
- int ret;
-
- ret = strcmp(a->metric_group, b->metric_group);
- if (ret)
- return ret;
-
- return strcmp(a->metric_name, b->metric_name);
-}
-
-static struct rb_node *mep_new(struct rblist *rl __maybe_unused, const void *entry)
-{
- struct mep *me = malloc(sizeof(struct mep));
-
- if (!me)
- return NULL;
-
- memcpy(me, entry, sizeof(struct mep));
- return &me->nd;
-}
-
-static void mep_delete(struct rblist *rl __maybe_unused,
- struct rb_node *nd)
-{
- struct mep *me = container_of(nd, struct mep, nd);
-
- zfree(&me->metric_group);
- free(me);
-}
-
-static struct mep *mep_lookup(struct rblist *groups, const char *metric_group,
- const char *metric_name)
-{
- struct rb_node *nd;
- struct mep me = {
- .metric_group = strdup(metric_group),
- .metric_name = metric_name,
- };
- nd = rblist__find(groups, &me);
- if (nd) {
- free(me.metric_group);
- return container_of(nd, struct mep, nd);
- }
- rblist__add_node(groups, &me);
- nd = rblist__find(groups, &me);
- if (nd)
- return container_of(nd, struct mep, nd);
- return NULL;
-}
-
-static int metricgroup__add_to_mep_groups(const struct pmu_metric *pm,
- struct rblist *groups)
-{
- const char *g;
- char *omg, *mg;
-
- mg = strdup(pm->metric_group ?: pm->metric_name);
- if (!mg)
- return -ENOMEM;
- omg = mg;
- while ((g = strsep(&mg, ";")) != NULL) {
- struct mep *me;
-
- g = skip_spaces(g);
- if (strlen(g))
- me = mep_lookup(groups, g, pm->metric_name);
- else
- me = mep_lookup(groups, pm->metric_name, pm->metric_name);
-
- if (me) {
- me->metric_desc = pm->desc;
- me->metric_long_desc = pm->long_desc;
- me->metric_expr = pm->metric_expr;
- me->metric_threshold = pm->metric_threshold;
- me->metric_unit = pm->unit;
- }
- }
- free(omg);
-
- return 0;
+ return match_metric_or_groups(pm->metric_group, metric_or_groups) ||
+ match_metric_or_groups(pm->metric_name, metric_or_groups);
}
struct metricgroup_iter_data {
@@ -507,53 +417,22 @@ static int metricgroup__sys_event_iter(const struct pmu_metric *pm,
return 0;
}
-static int metricgroup__add_to_mep_groups_callback(const struct pmu_metric *pm,
- const struct pmu_metrics_table *table __maybe_unused,
- void *vdata)
+int metricgroup__for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn,
+ void *data)
{
- struct rblist *groups = vdata;
-
- return metricgroup__add_to_mep_groups(pm, groups);
-}
-
-void metricgroup__print(const struct print_callbacks *print_cb, void *print_state)
-{
- struct rblist groups;
- const struct pmu_metrics_table *table;
- struct rb_node *node, *next;
+ struct metricgroup_iter_data sys_data = {
+ .fn = fn,
+ .data = data,
+ };
- rblist__init(&groups);
- groups.node_new = mep_new;
- groups.node_cmp = mep_cmp;
- groups.node_delete = mep_delete;
- table = pmu_metrics_table__find();
if (table) {
- pmu_metrics_table__for_each_metric(table,
- metricgroup__add_to_mep_groups_callback,
- &groups);
- }
- {
- struct metricgroup_iter_data data = {
- .fn = metricgroup__add_to_mep_groups_callback,
- .data = &groups,
- };
- pmu_for_each_sys_metric(metricgroup__sys_event_iter, &data);
- }
+ int ret = pmu_metrics_table__for_each_metric(table, fn, data);
- for (node = rb_first_cached(&groups.entries); node; node = next) {
- struct mep *me = container_of(node, struct mep, nd);
-
- print_cb->print_metric(print_state,
- me->metric_group,
- me->metric_name,
- me->metric_desc,
- me->metric_long_desc,
- me->metric_expr,
- me->metric_threshold,
- me->metric_unit);
- next = rb_next(node);
- rblist__remove_node(&groups, node);
+ if (ret)
+ return ret;
}
+
+ return pmu_for_each_sys_metric(metricgroup__sys_event_iter, &sys_data);
}
static const char *code_characters = ",-=@";
@@ -802,11 +681,6 @@ struct metricgroup_add_iter_data {
const struct pmu_metrics_table *table;
};
-static bool metricgroup__find_metric(const char *pmu,
- const char *metric,
- const struct pmu_metrics_table *table,
- struct pmu_metric *pm);
-
static int add_metric(struct list_head *metric_list,
const struct pmu_metric *pm,
const char *modifier,
@@ -818,6 +692,16 @@ static int add_metric(struct list_head *metric_list,
const struct visited_metric *visited,
const struct pmu_metrics_table *table);
+static int metricgroup__find_metric_callback(const struct pmu_metric *pm,
+ const struct pmu_metrics_table *table __maybe_unused,
+ void *vdata)
+{
+ struct pmu_metric *copied_pm = vdata;
+
+ memcpy(copied_pm, pm, sizeof(*pm));
+ return 0;
+}
+
/**
* resolve_metric - Locate metrics within the root metric and recursively add
* references to them.
@@ -838,7 +722,7 @@ static int add_metric(struct list_head *metric_list,
* architecture perf is running upon.
*/
static int resolve_metric(struct list_head *metric_list,
- const char *pmu,
+ struct perf_pmu *pmu,
const char *modifier,
bool metric_no_group,
bool metric_no_threshold,
@@ -868,7 +752,9 @@ static int resolve_metric(struct list_head *metric_list,
hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) {
struct pmu_metric pm;
- if (metricgroup__find_metric(pmu, cur->pkey, table, &pm)) {
+ if (pmu_metrics_table__find_metric(table, pmu, cur->pkey,
+ metricgroup__find_metric_callback,
+ &pm) != PMU_METRICS__NOT_FOUND) {
pending = realloc(pending,
(pending_cnt + 1) * sizeof(struct to_resolve));
if (!pending)
@@ -953,8 +839,8 @@ static int __add_metric(struct list_head *metric_list,
* This metric is the root of a tree and may reference other
* metrics that are added recursively.
*/
- root_metric = metric__new(pm, modifier, metric_no_group, runtime,
- user_requested_cpu_list, system_wide);
+ root_metric = metric__new(pm, modifier, metric_no_group, metric_no_threshold,
+ runtime, user_requested_cpu_list, system_wide);
if (!root_metric)
return -ENOMEM;
@@ -1019,7 +905,12 @@ static int __add_metric(struct list_head *metric_list,
}
if (!ret) {
/* Resolve referenced metrics. */
- const char *pmu = pm->pmu ?: "cpu";
+ struct perf_pmu *pmu;
+
+ if (pm->pmu && pm->pmu[0] != '\0')
+ pmu = perf_pmus__find(pm->pmu);
+ else
+ pmu = perf_pmus__scan_core(/*pmu=*/ NULL);
ret = resolve_metric(metric_list, pmu, modifier, metric_no_group,
metric_no_threshold, user_requested_cpu_list,
@@ -1036,44 +927,6 @@ static int __add_metric(struct list_head *metric_list,
return ret;
}
-struct metricgroup__find_metric_data {
- const char *pmu;
- const char *metric;
- struct pmu_metric *pm;
-};
-
-static int metricgroup__find_metric_callback(const struct pmu_metric *pm,
- const struct pmu_metrics_table *table __maybe_unused,
- void *vdata)
-{
- struct metricgroup__find_metric_data *data = vdata;
- const char *pm_pmu = pm->pmu ?: "cpu";
-
- if (strcmp(data->pmu, "all") && strcmp(pm_pmu, data->pmu))
- return 0;
-
- if (!match_metric(pm->metric_name, data->metric))
- return 0;
-
- memcpy(data->pm, pm, sizeof(*pm));
- return 1;
-}
-
-static bool metricgroup__find_metric(const char *pmu,
- const char *metric,
- const struct pmu_metrics_table *table,
- struct pmu_metric *pm)
-{
- struct metricgroup__find_metric_data data = {
- .pmu = pmu,
- .metric = metric,
- .pm = pm,
- };
-
- return pmu_metrics_table__for_each_metric(table, metricgroup__find_metric_callback, &data)
- ? true : false;
-}
-
static int add_metric(struct list_head *metric_list,
const struct pmu_metric *pm,
const char *modifier,
@@ -1112,29 +965,6 @@ static int add_metric(struct list_head *metric_list,
return ret;
}
-static int metricgroup__add_metric_sys_event_iter(const struct pmu_metric *pm,
- const struct pmu_metrics_table *table __maybe_unused,
- void *data)
-{
- struct metricgroup_add_iter_data *d = data;
- int ret;
-
- if (!match_pm_metric(pm, d->pmu, d->metric_name))
- return 0;
-
- ret = add_metric(d->metric_list, pm, d->modifier, d->metric_no_group,
- d->metric_no_threshold, d->user_requested_cpu_list,
- d->system_wide, d->root_metric, d->visited, d->table);
- if (ret)
- goto out;
-
- *(d->has_match) = true;
-
-out:
- *(d->ret) = ret;
- return ret;
-}
-
/**
* metric_list_cmp - list_sort comparator that sorts metrics with more events to
* the front. tool events are excluded from the count.
@@ -1200,9 +1030,9 @@ static int metricgroup__add_metric_callback(const struct pmu_metric *pm,
struct metricgroup__add_metric_data *data = vdata;
int ret = 0;
- if (pm->metric_expr && match_pm_metric(pm, data->pmu, data->metric_name)) {
+ if (pm->metric_expr && match_pm_metric_or_groups(pm, data->pmu, data->metric_name)) {
bool metric_no_group = data->metric_no_group ||
- match_metric(pm->metricgroup_no_group, data->metric_name);
+ match_metric_or_groups(pm->metricgroup_no_group, data->metric_name);
data->has_match = true;
ret = add_metric(data->list, pm, data->modifier, metric_no_group,
@@ -1238,55 +1068,26 @@ static int metricgroup__add_metric(const char *pmu, const char *metric_name, con
{
LIST_HEAD(list);
int ret;
- bool has_match = false;
-
- {
- struct metricgroup__add_metric_data data = {
- .list = &list,
- .pmu = pmu,
- .metric_name = metric_name,
- .modifier = modifier,
- .metric_no_group = metric_no_group,
- .metric_no_threshold = metric_no_threshold,
- .user_requested_cpu_list = user_requested_cpu_list,
- .system_wide = system_wide,
- .has_match = false,
- };
- /*
- * Iterate over all metrics seeing if metric matches either the
- * name or group. When it does add the metric to the list.
- */
- ret = pmu_metrics_table__for_each_metric(table, metricgroup__add_metric_callback,
- &data);
- if (ret)
- goto out;
+ struct metricgroup__add_metric_data data = {
+ .list = &list,
+ .pmu = pmu,
+ .metric_name = metric_name,
+ .modifier = modifier,
+ .metric_no_group = metric_no_group,
+ .metric_no_threshold = metric_no_threshold,
+ .user_requested_cpu_list = user_requested_cpu_list,
+ .system_wide = system_wide,
+ .has_match = false,
+ };
- has_match = data.has_match;
- }
- {
- struct metricgroup_iter_data data = {
- .fn = metricgroup__add_metric_sys_event_iter,
- .data = (void *) &(struct metricgroup_add_iter_data) {
- .metric_list = &list,
- .pmu = pmu,
- .metric_name = metric_name,
- .modifier = modifier,
- .metric_no_group = metric_no_group,
- .user_requested_cpu_list = user_requested_cpu_list,
- .system_wide = system_wide,
- .has_match = &has_match,
- .ret = &ret,
- .table = table,
- },
- };
-
- pmu_for_each_sys_metric(metricgroup__sys_event_iter, &data);
- }
- /* End of pmu events. */
- if (!has_match)
+ /*
+ * Iterate over all metrics seeing if metric matches either the
+ * name or group. When it does add the metric to the list.
+ */
+ ret = metricgroup__for_each_metric(table, metricgroup__add_metric_callback, &data);
+ if (!ret && !data.has_match)
ret = -EINVAL;
-out:
/*
* add to metric_list so that they can be released
* even if it's failed
@@ -1530,7 +1331,6 @@ static int parse_groups(struct evlist *perf_evlist,
const char *user_requested_cpu_list,
bool system_wide,
bool fake_pmu,
- struct rblist *metric_events_list,
const struct pmu_metrics_table *table)
{
struct evlist *combined_evlist = NULL;
@@ -1540,8 +1340,6 @@ static int parse_groups(struct evlist *perf_evlist,
bool is_default = !strcmp(str, "Default");
int ret;
- if (metric_events_list->nr_entries == 0)
- metricgroup__rblist_init(metric_events_list);
ret = metricgroup__add_metric_list(pmu, str, metric_no_group, metric_no_threshold,
user_requested_cpu_list,
system_wide, &metric_list, table);
@@ -1632,7 +1430,8 @@ static int parse_groups(struct evlist *perf_evlist,
goto out;
}
- me = metricgroup__lookup(metric_events_list, metric_events[0], true);
+ me = metricgroup__lookup(&perf_evlist->metric_events, metric_events[0],
+ /*create=*/true);
expr = malloc(sizeof(struct metric_expr));
if (!expr) {
@@ -1692,8 +1491,7 @@ int metricgroup__parse_groups(struct evlist *perf_evlist,
bool metric_no_threshold,
const char *user_requested_cpu_list,
bool system_wide,
- bool hardware_aware_grouping,
- struct rblist *metric_events)
+ bool hardware_aware_grouping)
{
const struct pmu_metrics_table *table = pmu_metrics_table__find();
@@ -1704,13 +1502,12 @@ int metricgroup__parse_groups(struct evlist *perf_evlist,
return parse_groups(perf_evlist, pmu, str, metric_no_group, metric_no_merge,
metric_no_threshold, user_requested_cpu_list, system_wide,
- /*fake_pmu=*/false, metric_events, table);
+ /*fake_pmu=*/false, table);
}
int metricgroup__parse_groups_test(struct evlist *evlist,
const struct pmu_metrics_table *table,
- const char *str,
- struct rblist *metric_events)
+ const char *str)
{
return parse_groups(evlist, "all", str,
/*metric_no_group=*/false,
@@ -1718,34 +1515,37 @@ int metricgroup__parse_groups_test(struct evlist *evlist,
/*metric_no_threshold=*/false,
/*user_requested_cpu_list=*/NULL,
/*system_wide=*/false,
- /*fake_pmu=*/true, metric_events, table);
+ /*fake_pmu=*/true, table);
}
struct metricgroup__has_metric_data {
const char *pmu;
- const char *metric;
+ const char *metric_or_groups;
};
-static int metricgroup__has_metric_callback(const struct pmu_metric *pm,
- const struct pmu_metrics_table *table __maybe_unused,
- void *vdata)
+static int metricgroup__has_metric_or_groups_callback(const struct pmu_metric *pm,
+ const struct pmu_metrics_table *table
+ __maybe_unused,
+ void *vdata)
{
struct metricgroup__has_metric_data *data = vdata;
- return match_pm_metric(pm, data->pmu, data->metric) ? 1 : 0;
+ return match_pm_metric_or_groups(pm, data->pmu, data->metric_or_groups) ? 1 : 0;
}
-bool metricgroup__has_metric(const char *pmu, const char *metric)
+bool metricgroup__has_metric_or_groups(const char *pmu, const char *metric_or_groups)
{
const struct pmu_metrics_table *table = pmu_metrics_table__find();
struct metricgroup__has_metric_data data = {
.pmu = pmu,
- .metric = metric,
+ .metric_or_groups = metric_or_groups,
};
if (!table)
return false;
- return pmu_metrics_table__for_each_metric(table, metricgroup__has_metric_callback, &data)
+ return pmu_metrics_table__for_each_metric(table,
+ metricgroup__has_metric_or_groups_callback,
+ &data)
? true : false;
}
@@ -1800,7 +1600,7 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
evsel = evlist__find_evsel(evlist, old_me->evsel->core.idx);
if (!evsel)
return -EINVAL;
- new_me = metricgroup__lookup(new_metric_events, evsel, true);
+ new_me = metricgroup__lookup(new_metric_events, evsel, /*create=*/true);
if (!new_me)
return -ENOMEM;
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index 779f6ede1b51..324880b2ed8f 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -77,17 +77,17 @@ int metricgroup__parse_groups(struct evlist *perf_evlist,
bool metric_no_threshold,
const char *user_requested_cpu_list,
bool system_wide,
- bool hardware_aware_grouping,
- struct rblist *metric_events);
+ bool hardware_aware_grouping);
int metricgroup__parse_groups_test(struct evlist *evlist,
const struct pmu_metrics_table *table,
- const char *str,
- struct rblist *metric_events);
+ const char *str);
-void metricgroup__print(const struct print_callbacks *print_cb, void *print_state);
-bool metricgroup__has_metric(const char *pmu, const char *metric);
+int metricgroup__for_each_metric(const struct pmu_metrics_table *table, pmu_metric_iter_fn fn,
+ void *data);
+bool metricgroup__has_metric_or_groups(const char *pmu, const char *metric_or_groups);
unsigned int metricgroups__topdown_max_level(void);
int arch_get_runtimeparam(const struct pmu_metric *pm);
+void metricgroup__rblist_init(struct rblist *metric_events);
void metricgroup__rblist_exit(struct rblist *metric_events);
int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,
diff --git a/tools/perf/util/mutex.h b/tools/perf/util/mutex.h
index 62d258c71ded..38458f00846f 100644
--- a/tools/perf/util/mutex.h
+++ b/tools/perf/util/mutex.h
@@ -43,6 +43,12 @@
#define EXCLUSIVE_LOCK_FUNCTION(...) __attribute__((exclusive_lock_function(__VA_ARGS__)))
/*
+ * Documents functions that acquire a shared (reader) lock in the body of a
+ * function, and do not release it.
+ */
+#define SHARED_LOCK_FUNCTION(...) __attribute__((shared_lock_function(__VA_ARGS__)))
+
+/*
* Documents functions that expect a lock to be held on entry to the function,
* and release it in the body of the function.
*/
@@ -55,6 +61,9 @@
/* Documents a function that expects a mutex to be held prior to entry. */
#define EXCLUSIVE_LOCKS_REQUIRED(...) __attribute__((exclusive_locks_required(__VA_ARGS__)))
+/* Documents a function that expects a shared (reader) lock to be held prior to entry. */
+#define SHARED_LOCKS_REQUIRED(...) __attribute__((shared_locks_required(__VA_ARGS__)))
+
/* Turns off thread safety checking within the body of a particular function. */
#define NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis))
@@ -66,9 +75,11 @@
#define LOCKS_EXCLUDED(...)
#define LOCK_RETURNED(x)
#define EXCLUSIVE_LOCK_FUNCTION(...)
+#define SHARED_LOCK_FUNCTION(...)
#define UNLOCK_FUNCTION(...)
#define EXCLUSIVE_TRYLOCK_FUNCTION(...)
#define EXCLUSIVE_LOCKS_REQUIRED(...)
+#define SHARED_LOCKS_REQUIRED(...)
#define NO_THREAD_SAFETY_ANALYSIS
#endif
diff --git a/tools/perf/util/off_cpu.h b/tools/perf/util/off_cpu.h
index 2dd67c60f211..64bf763ddf50 100644
--- a/tools/perf/util/off_cpu.h
+++ b/tools/perf/util/off_cpu.h
@@ -13,9 +13,10 @@ struct record_opts;
#define OFFCPU_SAMPLE_TYPES (PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | \
PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \
PERF_SAMPLE_ID | PERF_SAMPLE_CPU | \
- PERF_SAMPLE_PERIOD | PERF_SAMPLE_CALLCHAIN | \
+ PERF_SAMPLE_PERIOD | PERF_SAMPLE_RAW | \
PERF_SAMPLE_CGROUP)
+#define OFFCPU_THRESH 500000000ULL
#ifdef HAVE_BPF_SKEL
int off_cpu_prepare(struct evlist *evlist, struct target *target,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 5152fd5a6ead..8282ddf68b98 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -7,6 +7,7 @@
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/param.h>
+#include "cpumap.h"
#include "term.h"
#include "env.h"
#include "evlist.h"
@@ -16,20 +17,22 @@
#include "string2.h"
#include "strbuf.h"
#include "debug.h"
-#include <api/fs/tracing_path.h>
-#include <api/io_dir.h>
#include <perf/cpumap.h>
#include <util/parse-events-bison.h>
#include <util/parse-events-flex.h>
#include "pmu.h"
#include "pmus.h"
+#include "tp_pmu.h"
#include "asm/bug.h"
+#include "ui/ui.h"
#include "util/parse-branch-options.h"
#include "util/evsel_config.h"
#include "util/event.h"
#include "util/bpf-filter.h"
+#include "util/stat.h"
#include "util/util.h"
#include "tracepoint.h"
+#include <api/fs/tracing_path.h>
#define MAX_NAME_LEN 100
@@ -81,77 +84,21 @@ const struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = {
},
};
-const struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
- [PERF_COUNT_SW_CPU_CLOCK] = {
- .symbol = "cpu-clock",
- .alias = "",
- },
- [PERF_COUNT_SW_TASK_CLOCK] = {
- .symbol = "task-clock",
- .alias = "",
- },
- [PERF_COUNT_SW_PAGE_FAULTS] = {
- .symbol = "page-faults",
- .alias = "faults",
- },
- [PERF_COUNT_SW_CONTEXT_SWITCHES] = {
- .symbol = "context-switches",
- .alias = "cs",
- },
- [PERF_COUNT_SW_CPU_MIGRATIONS] = {
- .symbol = "cpu-migrations",
- .alias = "migrations",
- },
- [PERF_COUNT_SW_PAGE_FAULTS_MIN] = {
- .symbol = "minor-faults",
- .alias = "",
- },
- [PERF_COUNT_SW_PAGE_FAULTS_MAJ] = {
- .symbol = "major-faults",
- .alias = "",
- },
- [PERF_COUNT_SW_ALIGNMENT_FAULTS] = {
- .symbol = "alignment-faults",
- .alias = "",
- },
- [PERF_COUNT_SW_EMULATION_FAULTS] = {
- .symbol = "emulation-faults",
- .alias = "",
- },
- [PERF_COUNT_SW_DUMMY] = {
- .symbol = "dummy",
- .alias = "",
- },
- [PERF_COUNT_SW_BPF_OUTPUT] = {
- .symbol = "bpf-output",
- .alias = "",
- },
- [PERF_COUNT_SW_CGROUP_SWITCHES] = {
- .symbol = "cgroup-switches",
- .alias = "",
- },
+static const char *const event_types[] = {
+ [PERF_TYPE_HARDWARE] = "hardware",
+ [PERF_TYPE_SOFTWARE] = "software",
+ [PERF_TYPE_TRACEPOINT] = "tracepoint",
+ [PERF_TYPE_HW_CACHE] = "hardware-cache",
+ [PERF_TYPE_RAW] = "raw",
+ [PERF_TYPE_BREAKPOINT] = "breakpoint",
};
-const char *event_type(int type)
+const char *event_type(size_t type)
{
- switch (type) {
- case PERF_TYPE_HARDWARE:
- return "hardware";
+ if (type >= PERF_TYPE_MAX)
+ return "unknown";
- case PERF_TYPE_SOFTWARE:
- return "software";
-
- case PERF_TYPE_TRACEPOINT:
- return "tracepoint";
-
- case PERF_TYPE_HW_CACHE:
- return "hardware-cache";
-
- default:
- break;
- }
-
- return "unknown";
+ return event_types[type];
}
static char *get_config_str(const struct parse_events_terms *head_terms,
@@ -179,6 +126,38 @@ static char *get_config_name(const struct parse_events_terms *head_terms)
return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_NAME);
}
+static struct perf_cpu_map *get_config_cpu(const struct parse_events_terms *head_terms)
+{
+ struct parse_events_term *term;
+ struct perf_cpu_map *cpus = NULL;
+
+ if (!head_terms)
+ return NULL;
+
+ list_for_each_entry(term, &head_terms->terms, list) {
+ if (term->type_term == PARSE_EVENTS__TERM_TYPE_CPU) {
+ struct perf_cpu_map *term_cpus;
+
+ if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+ term_cpus = perf_cpu_map__new_int(term->val.num);
+ } else {
+ struct perf_pmu *pmu = perf_pmus__find(term->val.str);
+
+ if (pmu && perf_cpu_map__is_empty(pmu->cpus))
+ term_cpus = pmu->is_core ? cpu_map__online() : NULL;
+ else if (pmu)
+ term_cpus = perf_cpu_map__get(pmu->cpus);
+ else
+ term_cpus = perf_cpu_map__new(term->val.str);
+ }
+ perf_cpu_map__merge(&cpus, term_cpus);
+ perf_cpu_map__put(term_cpus);
+ }
+ }
+
+ return cpus;
+}
+
/**
* fix_raw - For each raw term see if there is an event (aka alias) in pmu that
* matches the raw's string value. If the string value matches an
@@ -228,49 +207,98 @@ __add_event(struct list_head *list, int *idx,
struct perf_event_attr *attr,
bool init_attr,
const char *name, const char *metric_id, struct perf_pmu *pmu,
- struct list_head *config_terms, bool auto_merge_stats,
- struct perf_cpu_map *cpu_list, u64 alternate_hw_config)
+ struct list_head *config_terms, struct evsel *first_wildcard_match,
+ struct perf_cpu_map *user_cpus, u64 alternate_hw_config)
{
struct evsel *evsel;
- struct perf_cpu_map *cpus = perf_cpu_map__is_empty(cpu_list) && pmu ? pmu->cpus : cpu_list;
+ bool is_pmu_core;
+ struct perf_cpu_map *cpus, *pmu_cpus;
+ bool has_user_cpus = !perf_cpu_map__is_empty(user_cpus);
- cpus = perf_cpu_map__get(cpus);
- if (pmu)
- perf_pmu__warn_invalid_formats(pmu);
+ /*
+ * Ensure the first_wildcard_match's PMU matches that of the new event
+ * being added. Otherwise try to match with another event further down
+ * the evlist.
+ */
+ if (first_wildcard_match) {
+ struct evsel *pos = list_prev_entry(first_wildcard_match, core.node);
+
+ first_wildcard_match = NULL;
+ list_for_each_entry_continue(pos, list, core.node) {
+ if (perf_pmu__name_no_suffix_match(pos->pmu, pmu->name)) {
+ first_wildcard_match = pos;
+ break;
+ }
+ if (pos->pmu->is_core && (!pmu || pmu->is_core)) {
+ first_wildcard_match = pos;
+ break;
+ }
+ }
+ }
- if (pmu && (attr->type == PERF_TYPE_RAW || attr->type >= PERF_TYPE_MAX)) {
- perf_pmu__warn_invalid_config(pmu, attr->config, name,
- PERF_PMU_FORMAT_VALUE_CONFIG, "config");
- perf_pmu__warn_invalid_config(pmu, attr->config1, name,
- PERF_PMU_FORMAT_VALUE_CONFIG1, "config1");
- perf_pmu__warn_invalid_config(pmu, attr->config2, name,
- PERF_PMU_FORMAT_VALUE_CONFIG2, "config2");
- perf_pmu__warn_invalid_config(pmu, attr->config3, name,
- PERF_PMU_FORMAT_VALUE_CONFIG3, "config3");
+ if (pmu) {
+ perf_pmu__warn_invalid_formats(pmu);
+ if (attr->type == PERF_TYPE_RAW || attr->type >= PERF_TYPE_MAX) {
+ perf_pmu__warn_invalid_config(pmu, attr->config, name,
+ PERF_PMU_FORMAT_VALUE_CONFIG, "config");
+ perf_pmu__warn_invalid_config(pmu, attr->config1, name,
+ PERF_PMU_FORMAT_VALUE_CONFIG1, "config1");
+ perf_pmu__warn_invalid_config(pmu, attr->config2, name,
+ PERF_PMU_FORMAT_VALUE_CONFIG2, "config2");
+ perf_pmu__warn_invalid_config(pmu, attr->config3, name,
+ PERF_PMU_FORMAT_VALUE_CONFIG3, "config3");
+ }
+ }
+ /*
+ * If a PMU wasn't given, such as for legacy events, find now that
+ * warnings won't be generated.
+ */
+ if (!pmu)
+ pmu = perf_pmus__find_by_attr(attr);
+
+ if (pmu) {
+ is_pmu_core = pmu->is_core;
+ pmu_cpus = perf_cpu_map__get(pmu->cpus);
+ if (perf_cpu_map__is_empty(pmu_cpus))
+ pmu_cpus = cpu_map__online();
+ } else {
+ is_pmu_core = (attr->type == PERF_TYPE_HARDWARE ||
+ attr->type == PERF_TYPE_HW_CACHE);
+ pmu_cpus = is_pmu_core ? cpu_map__online() : NULL;
}
+
+ if (has_user_cpus)
+ cpus = perf_cpu_map__get(user_cpus);
+ else
+ cpus = perf_cpu_map__get(pmu_cpus);
+
if (init_attr)
event_attr_init(attr);
evsel = evsel__new_idx(attr, *idx);
- if (!evsel) {
- perf_cpu_map__put(cpus);
- return NULL;
+ if (!evsel)
+ goto out_err;
+
+ if (name) {
+ evsel->name = strdup(name);
+ if (!evsel->name)
+ goto out_err;
+ }
+
+ if (metric_id) {
+ evsel->metric_id = strdup(metric_id);
+ if (!evsel->metric_id)
+ goto out_err;
}
(*idx)++;
evsel->core.cpus = cpus;
- evsel->core.own_cpus = perf_cpu_map__get(cpus);
+ evsel->core.pmu_cpus = pmu_cpus;
evsel->core.requires_cpu = pmu ? pmu->is_uncore : false;
- evsel->core.is_pmu_core = pmu ? pmu->is_core : false;
- evsel->auto_merge_stats = auto_merge_stats;
+ evsel->core.is_pmu_core = is_pmu_core;
evsel->pmu = pmu;
evsel->alternate_hw_config = alternate_hw_config;
-
- if (name)
- evsel->name = strdup(name);
-
- if (metric_id)
- evsel->metric_id = strdup(metric_id);
+ evsel->first_wildcard_match = first_wildcard_match;
if (config_terms)
list_splice_init(config_terms, &evsel->config_terms);
@@ -278,7 +306,17 @@ __add_event(struct list_head *list, int *idx,
if (list)
list_add_tail(&evsel->core.node, list);
+ if (has_user_cpus)
+ evsel__warn_user_requested_cpus(evsel, user_cpus);
+
return evsel;
+out_err:
+ perf_cpu_map__put(cpus);
+ perf_cpu_map__put(pmu_cpus);
+ zfree(&evsel->name);
+ zfree(&evsel->metric_id);
+ free(evsel);
+ return NULL;
}
struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
@@ -287,7 +325,7 @@ struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
{
return __add_event(/*list=*/NULL, &idx, attr, /*init_attr=*/false, name,
metric_id, pmu, /*config_terms=*/NULL,
- /*auto_merge_stats=*/false, /*cpu_list=*/NULL,
+ /*first_wildcard_match=*/NULL, /*cpu_list=*/NULL,
/*alternate_hw_config=*/PERF_COUNT_HW_MAX);
}
@@ -298,7 +336,7 @@ static int add_event(struct list_head *list, int *idx,
{
return __add_event(list, idx, attr, /*init_attr*/true, name, metric_id,
/*pmu=*/NULL, config_terms,
- /*auto_merge_stats=*/false, /*cpu_list=*/NULL,
+ /*first_wildcard_match=*/NULL, /*cpu_list=*/NULL,
alternate_hw_config) ? 0 : -ENOMEM;
}
@@ -423,7 +461,7 @@ bool parse_events__filter_pmu(const struct parse_events_state *parse_state,
static int parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, struct perf_pmu *pmu,
const struct parse_events_terms *const_parsed_terms,
- bool auto_merge_stats, u64 alternate_hw_config);
+ struct evsel *first_wildcard_match, u64 alternate_hw_config);
int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
struct parse_events_state *parse_state,
@@ -433,11 +471,13 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
bool found_supported = false;
const char *config_name = get_config_name(parsed_terms);
const char *metric_id = get_config_metric_id(parsed_terms);
+ struct perf_cpu_map *cpus = get_config_cpu(parsed_terms);
+ int ret = 0;
+ struct evsel *first_wildcard_match = NULL;
- while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+ while ((pmu = perf_pmus__scan_for_event(pmu, name)) != NULL) {
LIST_HEAD(config_terms);
struct perf_event_attr attr;
- int ret;
if (parse_events__filter_pmu(parse_state, pmu))
continue;
@@ -449,10 +489,13 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
*/
ret = parse_events_add_pmu(parse_state, list, pmu,
parsed_terms,
- perf_pmu__auto_merge_stats(pmu),
+ first_wildcard_match,
/*alternate_hw_config=*/PERF_COUNT_HW_MAX);
if (ret)
- return ret;
+ goto out_err;
+ if (first_wildcard_match == NULL)
+ first_wildcard_match =
+ container_of(list->prev, struct evsel, core.node);
continue;
}
@@ -472,21 +515,29 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
if (parsed_terms) {
if (config_attr(&attr, parsed_terms, parse_state->error,
- config_term_common))
- return -EINVAL;
-
- if (get_config_terms(parsed_terms, &config_terms))
- return -ENOMEM;
+ config_term_common)) {
+ ret = -EINVAL;
+ goto out_err;
+ }
+ if (get_config_terms(parsed_terms, &config_terms)) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
}
if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name,
- metric_id, pmu, &config_terms, /*auto_merge_stats=*/false,
- /*cpu_list=*/NULL,
- /*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL)
- return -ENOMEM;
+ metric_id, pmu, &config_terms, first_wildcard_match,
+ cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL)
+ ret = -ENOMEM;
+ if (first_wildcard_match == NULL)
+ first_wildcard_match = container_of(list->prev, struct evsel, core.node);
free_config_terms(&config_terms);
+ if (ret)
+ goto out_err;
}
+out_err:
+ perf_cpu_map__put(cpus);
return found_supported ? 0 : -EINVAL;
}
@@ -548,105 +599,82 @@ static int add_tracepoint(struct parse_events_state *parse_state,
return 0;
}
-static int add_tracepoint_multi_event(struct parse_events_state *parse_state,
- struct list_head *list,
- const char *sys_name, const char *evt_name,
- struct parse_events_error *err,
- struct parse_events_terms *head_config, YYLTYPE *loc)
-{
- char *evt_path;
- struct io_dirent64 *evt_ent;
- struct io_dir evt_dir;
- int ret = 0, found = 0;
-
- evt_path = get_events_file(sys_name);
- if (!evt_path) {
- tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
- return -1;
- }
- io_dir__init(&evt_dir, open(evt_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
- if (evt_dir.dirfd < 0) {
- put_events_file(evt_path);
- tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
- return -1;
- }
+struct add_tracepoint_multi_args {
+ struct parse_events_state *parse_state;
+ struct list_head *list;
+ const char *sys_glob;
+ const char *evt_glob;
+ struct parse_events_error *err;
+ struct parse_events_terms *head_config;
+ YYLTYPE *loc;
+ int found;
+};
- while (!ret && (evt_ent = io_dir__readdir(&evt_dir))) {
- if (!strcmp(evt_ent->d_name, ".")
- || !strcmp(evt_ent->d_name, "..")
- || !strcmp(evt_ent->d_name, "enable")
- || !strcmp(evt_ent->d_name, "filter"))
- continue;
+static int add_tracepoint_multi_event_cb(void *state, const char *sys_name, const char *evt_name)
+{
+ struct add_tracepoint_multi_args *args = state;
+ int ret;
- if (!strglobmatch(evt_ent->d_name, evt_name))
- continue;
+ if (!strglobmatch(evt_name, args->evt_glob))
+ return 0;
- found++;
+ args->found++;
+ ret = add_tracepoint(args->parse_state, args->list, sys_name, evt_name,
+ args->err, args->head_config, args->loc);
- ret = add_tracepoint(parse_state, list, sys_name, evt_ent->d_name,
- err, head_config, loc);
- }
+ return ret;
+}
- if (!found) {
- tracepoint_error(err, ENOENT, sys_name, evt_name, loc->first_column);
- ret = -1;
+static int add_tracepoint_multi_event(struct add_tracepoint_multi_args *args, const char *sys_name)
+{
+ if (strpbrk(args->evt_glob, "*?") == NULL) {
+ /* Not a glob. */
+ args->found++;
+ return add_tracepoint(args->parse_state, args->list, sys_name, args->evt_glob,
+ args->err, args->head_config, args->loc);
}
- put_events_file(evt_path);
- close(evt_dir.dirfd);
- return ret;
+ return tp_pmu__for_each_tp_event(sys_name, args, add_tracepoint_multi_event_cb);
}
-static int add_tracepoint_event(struct parse_events_state *parse_state,
- struct list_head *list,
- const char *sys_name, const char *evt_name,
- struct parse_events_error *err,
- struct parse_events_terms *head_config, YYLTYPE *loc)
+static int add_tracepoint_multi_sys_cb(void *state, const char *sys_name)
{
- return strpbrk(evt_name, "*?") ?
- add_tracepoint_multi_event(parse_state, list, sys_name, evt_name,
- err, head_config, loc) :
- add_tracepoint(parse_state, list, sys_name, evt_name,
- err, head_config, loc);
+ struct add_tracepoint_multi_args *args = state;
+
+ if (!strglobmatch(sys_name, args->sys_glob))
+ return 0;
+
+ return add_tracepoint_multi_event(args, sys_name);
}
static int add_tracepoint_multi_sys(struct parse_events_state *parse_state,
struct list_head *list,
- const char *sys_name, const char *evt_name,
+ const char *sys_glob, const char *evt_glob,
struct parse_events_error *err,
struct parse_events_terms *head_config, YYLTYPE *loc)
{
- struct io_dirent64 *events_ent;
- struct io_dir events_dir;
- int ret = 0;
- char *events_dir_path = get_tracing_file("events");
+ struct add_tracepoint_multi_args args = {
+ .parse_state = parse_state,
+ .list = list,
+ .sys_glob = sys_glob,
+ .evt_glob = evt_glob,
+ .err = err,
+ .head_config = head_config,
+ .loc = loc,
+ .found = 0,
+ };
+ int ret;
- if (!events_dir_path) {
- tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
- return -1;
+ if (strpbrk(sys_glob, "*?") == NULL) {
+ /* Not a glob. */
+ ret = add_tracepoint_multi_event(&args, sys_glob);
+ } else {
+ ret = tp_pmu__for_each_tp_sys(&args, add_tracepoint_multi_sys_cb);
}
- io_dir__init(&events_dir, open(events_dir_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
- put_events_file(events_dir_path);
- if (events_dir.dirfd < 0) {
- tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
- return -1;
+ if (args.found == 0) {
+ tracepoint_error(err, ENOENT, sys_glob, evt_glob, loc->first_column);
+ return -ENOENT;
}
-
- while (!ret && (events_ent = io_dir__readdir(&events_dir))) {
- if (!strcmp(events_ent->d_name, ".")
- || !strcmp(events_ent->d_name, "..")
- || !strcmp(events_ent->d_name, "enable")
- || !strcmp(events_ent->d_name, "header_event")
- || !strcmp(events_ent->d_name, "header_page"))
- continue;
-
- if (!strglobmatch(events_ent->d_name, sys_name))
- continue;
-
- ret = add_tracepoint_event(parse_state, list, events_ent->d_name,
- evt_name, err, head_config, loc);
- }
- close(events_dir.dirfd);
return ret;
}
@@ -805,6 +833,7 @@ const char *parse_events__term_type_str(enum parse_events__term_type term_type)
[PARSE_EVENTS__TERM_TYPE_RAW] = "raw",
[PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE] = "legacy-cache",
[PARSE_EVENTS__TERM_TYPE_HARDWARE] = "hardware",
+ [PARSE_EVENTS__TERM_TYPE_CPU] = "cpu",
};
if ((unsigned int)term_type >= __PARSE_EVENTS__TERM_TYPE_NR)
return "unknown term";
@@ -834,6 +863,7 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er
case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
case PARSE_EVENTS__TERM_TYPE_PERCORE:
+ case PARSE_EVENTS__TERM_TYPE_CPU:
return true;
case PARSE_EVENTS__TERM_TYPE_USER:
case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
@@ -981,6 +1011,32 @@ do { \
return -EINVAL;
}
break;
+ case PARSE_EVENTS__TERM_TYPE_CPU: {
+ struct perf_cpu_map *map;
+
+ if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+ if (term->val.num >= (u64)cpu__max_present_cpu().cpu) {
+ parse_events_error__handle(err, term->err_val,
+ strdup("too big"),
+ /*help=*/NULL);
+ return -EINVAL;
+ }
+ break;
+ }
+ assert(term->type_val == PARSE_EVENTS__TERM_TYPE_STR);
+ if (perf_pmus__find(term->val.str) != NULL)
+ break;
+
+ map = perf_cpu_map__new(term->val.str);
+ if (!map) {
+ parse_events_error__handle(err, term->err_val,
+ strdup("not a valid PMU or CPU number"),
+ /*help=*/NULL);
+ return -EINVAL;
+ }
+ perf_cpu_map__put(map);
+ break;
+ }
case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
case PARSE_EVENTS__TERM_TYPE_USER:
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
@@ -1108,6 +1164,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
case PARSE_EVENTS__TERM_TYPE_RAW:
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
case PARSE_EVENTS__TERM_TYPE_HARDWARE:
+ case PARSE_EVENTS__TERM_TYPE_CPU:
default:
if (err) {
parse_events_error__handle(err, term->err_term,
@@ -1242,6 +1299,7 @@ do { \
case PARSE_EVENTS__TERM_TYPE_RAW:
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
case PARSE_EVENTS__TERM_TYPE_HARDWARE:
+ case PARSE_EVENTS__TERM_TYPE_CPU:
default:
break;
}
@@ -1296,6 +1354,7 @@ static int get_config_chgs(struct perf_pmu *pmu, struct parse_events_terms *head
case PARSE_EVENTS__TERM_TYPE_RAW:
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
case PARSE_EVENTS__TERM_TYPE_HARDWARE:
+ case PARSE_EVENTS__TERM_TYPE_CPU:
default:
break;
}
@@ -1324,22 +1383,20 @@ int parse_events_add_tracepoint(struct parse_events_state *parse_state,
return -EINVAL;
}
- if (strpbrk(sys, "*?"))
- return add_tracepoint_multi_sys(parse_state, list, sys, event,
- err, head_config, loc);
- else
- return add_tracepoint_event(parse_state, list, sys, event,
- err, head_config, loc);
+ return add_tracepoint_multi_sys(parse_state, list, sys, event,
+ err, head_config, loc);
}
static int __parse_events_add_numeric(struct parse_events_state *parse_state,
struct list_head *list,
struct perf_pmu *pmu, u32 type, u32 extended_type,
- u64 config, const struct parse_events_terms *head_config)
+ u64 config, const struct parse_events_terms *head_config,
+ struct evsel *first_wildcard_match)
{
struct perf_event_attr attr;
LIST_HEAD(config_terms);
const char *name, *metric_id;
+ struct perf_cpu_map *cpus;
int ret;
memset(&attr, 0, sizeof(attr));
@@ -1361,10 +1418,11 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state,
name = get_config_name(head_config);
metric_id = get_config_metric_id(head_config);
+ cpus = get_config_cpu(head_config);
ret = __add_event(list, &parse_state->idx, &attr, /*init_attr*/true, name,
- metric_id, pmu, &config_terms, /*auto_merge_stats=*/false,
- /*cpu_list=*/NULL, /*alternate_hw_config=*/PERF_COUNT_HW_MAX
- ) == NULL ? -ENOMEM : 0;
+ metric_id, pmu, &config_terms, first_wildcard_match,
+ cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) ? 0 : -ENOMEM;
+ perf_cpu_map__put(cpus);
free_config_terms(&config_terms);
return ret;
}
@@ -1380,6 +1438,7 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
/* Wildcards on numeric values are only supported by core PMUs. */
if (wildcard && perf_pmus__supports_extended_type()) {
+ struct evsel *first_wildcard_match = NULL;
while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
int ret;
@@ -1389,15 +1448,20 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
ret = __parse_events_add_numeric(parse_state, list, pmu,
type, pmu->type,
- config, head_config);
+ config, head_config,
+ first_wildcard_match);
if (ret)
return ret;
+ if (first_wildcard_match == NULL)
+ first_wildcard_match =
+ container_of(list->prev, struct evsel, core.node);
}
if (found_supported)
return 0;
}
return __parse_events_add_numeric(parse_state, list, perf_pmus__find_by_type(type),
- type, /*extended_type=*/0, config, head_config);
+ type, /*extended_type=*/0, config, head_config,
+ /*first_wildcard_match=*/NULL);
}
static bool config_term_percore(struct list_head *config_terms)
@@ -1415,7 +1479,7 @@ static bool config_term_percore(struct list_head *config_terms)
static int parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, struct perf_pmu *pmu,
const struct parse_events_terms *const_parsed_terms,
- bool auto_merge_stats, u64 alternate_hw_config)
+ struct evsel *first_wildcard_match, u64 alternate_hw_config)
{
struct perf_event_attr attr;
struct perf_pmu_info info;
@@ -1424,6 +1488,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
LIST_HEAD(config_terms);
struct parse_events_terms parsed_terms;
bool alias_rewrote_terms = false;
+ struct perf_cpu_map *term_cpu = NULL;
if (verbose > 1) {
struct strbuf sb;
@@ -1451,7 +1516,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
evsel = __add_event(list, &parse_state->idx, &attr,
/*init_attr=*/true, /*name=*/NULL,
/*metric_id=*/NULL, pmu,
- /*config_terms=*/NULL, auto_merge_stats,
+ /*config_terms=*/NULL, first_wildcard_match,
/*cpu_list=*/NULL, alternate_hw_config);
return evsel ? 0 : -ENOMEM;
}
@@ -1518,11 +1583,12 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
return -EINVAL;
}
+ term_cpu = get_config_cpu(&parsed_terms);
evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true,
get_config_name(&parsed_terms),
get_config_metric_id(&parsed_terms), pmu,
- &config_terms, auto_merge_stats, /*cpu_list=*/NULL,
- alternate_hw_config);
+ &config_terms, first_wildcard_match, term_cpu, alternate_hw_config);
+ perf_cpu_map__put(term_cpu);
if (!evsel) {
parse_events_terms__exit(&parsed_terms);
return -ENOMEM;
@@ -1539,6 +1605,10 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state,
evsel->scale = info.scale;
evsel->per_pkg = info.per_pkg;
evsel->snapshot = info.snapshot;
+ evsel->retirement_latency.mean = info.retirement_latency_mean;
+ evsel->retirement_latency.min = info.retirement_latency_min;
+ evsel->retirement_latency.max = info.retirement_latency_max;
+
return 0;
}
@@ -1554,6 +1624,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
int ok = 0;
const char *config;
struct parse_events_terms parsed_terms;
+ struct evsel *first_wildcard_match = NULL;
*listp = NULL;
@@ -1585,8 +1656,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
INIT_LIST_HEAD(list);
- while ((pmu = perf_pmus__scan(pmu)) != NULL) {
- bool auto_merge_stats;
+ while ((pmu = perf_pmus__scan_for_event(pmu, event_name)) != NULL) {
if (parse_events__filter_pmu(parse_state, pmu))
continue;
@@ -1594,9 +1664,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
if (!perf_pmu__have_event(pmu, event_name))
continue;
- auto_merge_stats = perf_pmu__auto_merge_stats(pmu);
if (!parse_events_add_pmu(parse_state, list, pmu,
- &parsed_terms, auto_merge_stats, hw_config)) {
+ &parsed_terms, first_wildcard_match, hw_config)) {
struct strbuf sb;
strbuf_init(&sb, /*hint=*/ 0);
@@ -1605,11 +1674,13 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
strbuf_release(&sb);
ok++;
}
+ if (first_wildcard_match == NULL)
+ first_wildcard_match = container_of(list->prev, struct evsel, core.node);
}
if (parse_state->fake_pmu) {
if (!parse_events_add_pmu(parse_state, list, perf_pmus__fake_pmu(), &parsed_terms,
- /*auto_merge_stats=*/true, hw_config)) {
+ first_wildcard_match, hw_config)) {
struct strbuf sb;
strbuf_init(&sb, /*hint=*/ 0);
@@ -1640,6 +1711,7 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state
struct perf_pmu *pmu;
int ok = 0;
char *help;
+ struct evsel *first_wildcard_match = NULL;
*listp = malloc(sizeof(**listp));
if (!*listp)
@@ -1650,32 +1722,35 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state
/* Attempt to add to list assuming event_or_pmu is a PMU name. */
pmu = perf_pmus__find(event_or_pmu);
if (pmu && !parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms,
- /*auto_merge_stats=*/false,
+ first_wildcard_match,
/*alternate_hw_config=*/PERF_COUNT_HW_MAX))
return 0;
if (parse_state->fake_pmu) {
if (!parse_events_add_pmu(parse_state, *listp, perf_pmus__fake_pmu(),
const_parsed_terms,
- /*auto_merge_stats=*/false,
+ first_wildcard_match,
/*alternate_hw_config=*/PERF_COUNT_HW_MAX))
return 0;
}
pmu = NULL;
/* Failed to add, try wildcard expansion of event_or_pmu as a PMU name. */
- while ((pmu = perf_pmus__scan(pmu)) != NULL) {
- if (!parse_events__filter_pmu(parse_state, pmu) &&
- perf_pmu__wildcard_match(pmu, event_or_pmu)) {
- bool auto_merge_stats = perf_pmu__auto_merge_stats(pmu);
-
- if (!parse_events_add_pmu(parse_state, *listp, pmu,
- const_parsed_terms,
- auto_merge_stats,
- /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) {
- ok++;
- parse_state->wild_card_pmus = true;
- }
+ while ((pmu = perf_pmus__scan_matching_wildcard(pmu, event_or_pmu)) != NULL) {
+
+ if (parse_events__filter_pmu(parse_state, pmu))
+ continue;
+
+ if (!parse_events_add_pmu(parse_state, *listp, pmu,
+ const_parsed_terms,
+ first_wildcard_match,
+ /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) {
+ ok++;
+ parse_state->wild_card_pmus = true;
+ }
+ if (first_wildcard_match == NULL) {
+ first_wildcard_match =
+ container_of((*listp)->prev, struct evsel, core.node);
}
}
if (ok)
@@ -1733,13 +1808,11 @@ static int parse_events__modifier_list(struct parse_events_state *parse_state,
int eH = group ? evsel->core.attr.exclude_host : 0;
int eG = group ? evsel->core.attr.exclude_guest : 0;
int exclude = eu | ek | eh;
- int exclude_GH = group ? evsel->exclude_GH : 0;
+ int exclude_GH = eG | eH;
if (mod.user) {
if (!exclude)
exclude = eu = ek = eh = 1;
- if (!exclude_GH && !perf_guest && exclude_GH_default)
- eG = 1;
eu = 0;
}
if (mod.kernel) {
@@ -1762,6 +1835,13 @@ static int parse_events__modifier_list(struct parse_events_state *parse_state,
exclude_GH = eG = eH = 1;
eH = 0;
}
+ if (!exclude_GH && exclude_GH_default) {
+ if (perf_host)
+ eG = 1;
+ else if (perf_guest)
+ eH = 1;
+ }
+
evsel->core.attr.exclude_user = eu;
evsel->core.attr.exclude_kernel = ek;
evsel->core.attr.exclude_hv = eh;
@@ -2032,6 +2112,11 @@ static int evlist__cmp(void *_fg_idx, const struct list_head *l, const struct li
return arch_evlist__cmp(lhs, rhs);
}
+int __weak arch_evlist__add_required_events(struct list_head *list __always_unused)
+{
+ return 0;
+}
+
static int parse_events__sort_events_and_fix_groups(struct list_head *list)
{
int idx = 0, force_grouped_idx = -1;
@@ -2043,6 +2128,11 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
struct evsel *force_grouped_leader = NULL;
bool last_event_was_forced_leader = false;
+ /* On x86 topdown metrics events require a slots event. */
+ ret = arch_evlist__add_required_events(list);
+ if (ret)
+ return ret;
+
/*
* Compute index to insert ungrouped events at. Place them where the
* first ungrouped event appears.
@@ -2196,14 +2286,23 @@ int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filte
if (ret2 < 0)
return ret;
- if (ret2 && warn_if_reordered && !parse_state.wild_card_pmus)
- pr_warning("WARNING: events were regrouped to match PMUs\n");
-
/*
* Add list to the evlist even with errors to allow callers to clean up.
*/
evlist__splice_list_tail(evlist, &parse_state.list);
+ if (ret2 && warn_if_reordered && !parse_state.wild_card_pmus) {
+ pr_warning("WARNING: events were regrouped to match PMUs\n");
+
+ if (verbose > 0) {
+ struct strbuf sb = STRBUF_INIT;
+
+ evlist__uniquify_evsel_names(evlist, &stat_config);
+ evlist__format_evsels(evlist, &sb, 2048);
+ pr_debug("evlist after sorting/fixing: '%s'\n", sb.buf);
+ strbuf_release(&sb);
+ }
+ }
if (!ret) {
struct evsel *last;
@@ -2456,12 +2555,17 @@ foreach_evsel_in_last_glob(struct evlist *evlist,
return 0;
}
+/* Will a tracepoint filter work for str or should a BPF filter be used? */
+static bool is_possible_tp_filter(const char *str)
+{
+ return strstr(str, "uid") == NULL;
+}
+
static int set_filter(struct evsel *evsel, const void *arg)
{
const char *str = arg;
- bool found = false;
int nr_addr_filters = 0;
- struct perf_pmu *pmu = NULL;
+ struct perf_pmu *pmu;
if (evsel == NULL) {
fprintf(stderr,
@@ -2469,7 +2573,7 @@ static int set_filter(struct evsel *evsel, const void *arg)
return -1;
}
- if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) {
+ if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT && is_possible_tp_filter(str)) {
if (evsel__append_tp_filter(evsel, str) < 0) {
fprintf(stderr,
"not enough memory to hold filter string\n");
@@ -2479,16 +2583,11 @@ static int set_filter(struct evsel *evsel, const void *arg)
return 0;
}
- while ((pmu = perf_pmus__scan(pmu)) != NULL)
- if (pmu->type == evsel->core.attr.type) {
- found = true;
- break;
- }
-
- if (found)
+ pmu = evsel__find_pmu(evsel);
+ if (pmu) {
perf_pmu__scan_file(pmu, "nr_addr_filters",
"%d", &nr_addr_filters);
-
+ }
if (!nr_addr_filters)
return perf_bpf_filter__parse(&evsel->bpf_filters, str);
@@ -2510,6 +2609,30 @@ int parse_filter(const struct option *opt, const char *str,
(const void *)str);
}
+int parse_uid_filter(struct evlist *evlist, uid_t uid)
+{
+ struct option opt = {
+ .value = &evlist,
+ };
+ char buf[128];
+ int ret;
+
+ snprintf(buf, sizeof(buf), "uid == %d", uid);
+ ret = parse_filter(&opt, buf, /*unset=*/0);
+ if (ret) {
+ if (use_browser >= 1) {
+ /*
+ * Use ui__warning so a pop up appears above the
+ * underlying BPF error message.
+ */
+ ui__warning("Failed to add UID filtering that uses BPF filtering.\n");
+ } else {
+ fprintf(stderr, "Failed to add UID filtering that uses BPF filtering.\n");
+ }
+ }
+ return ret;
+}
+
static int add_exclude_perf_filter(struct evsel *evsel,
const void *arg __maybe_unused)
{
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index e176a34ab088..62dc7202e3ba 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -11,6 +11,7 @@
#include <linux/perf_event.h>
#include <stdio.h>
#include <string.h>
+#include <sys/types.h>
struct evsel;
struct evlist;
@@ -20,7 +21,7 @@ struct option;
struct perf_pmu;
struct strbuf;
-const char *event_type(int type);
+const char *event_type(size_t type);
/* Arguments encoded in opt->value. */
struct parse_events_option_args {
@@ -45,6 +46,7 @@ static inline int parse_events(struct evlist *evlist, const char *str,
int parse_event(struct evlist *evlist, const char *str);
int parse_filter(const struct option *opt, const char *str, int unset);
+int parse_uid_filter(struct evlist *evlist, uid_t uid);
int exclude_perf(const struct option *opt, const char *arg, int unset);
enum parse_events__term_val_type {
@@ -80,7 +82,8 @@ enum parse_events__term_type {
PARSE_EVENTS__TERM_TYPE_RAW,
PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE,
PARSE_EVENTS__TERM_TYPE_HARDWARE,
-#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_HARDWARE + 1)
+ PARSE_EVENTS__TERM_TYPE_CPU,
+#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_CPU + 1)
};
struct parse_events_term {
@@ -261,7 +264,6 @@ struct event_symbol {
const char *alias;
};
extern const struct event_symbol event_symbols_hw[];
-extern const struct event_symbol event_symbols_sw[];
char *parse_events_formats_error_string(char *additional_terms);
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 7ed86e3e34e3..2034590eb789 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -117,12 +117,12 @@ do { \
yyless(0); \
} while (0)
-static int sym(yyscan_t scanner, int type, int config)
+static int sym(yyscan_t scanner, int config)
{
YYSTYPE *yylval = parse_events_get_lval(scanner);
- yylval->num = (type << 16) + config;
- return type == PERF_TYPE_HARDWARE ? PE_VALUE_SYM_HW : PE_VALUE_SYM_SW;
+ yylval->num = config;
+ return PE_VALUE_SYM_HW;
}
static int term(yyscan_t scanner, enum parse_events__term_type type)
@@ -335,6 +335,7 @@ aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); }
aux-action { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_ACTION); }
aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); }
+cpu { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CPU); }
cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); }
stalled-cycles-frontend|idle-cycles-frontend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
stalled-cycles-backend|idle-cycles-backend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
@@ -390,28 +391,16 @@ r0x{num_raw_hex} { return str(yyscanner, PE_RAW); }
<<EOF>> { BEGIN(INITIAL); }
}
-cpu-cycles|cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); }
-stalled-cycles-frontend|idle-cycles-frontend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
-stalled-cycles-backend|idle-cycles-backend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
-instructions { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); }
-cache-references { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); }
-cache-misses { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); }
-branch-instructions|branches { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); }
-branch-misses { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); }
-bus-cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES); }
-ref-cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES); }
-cpu-clock { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); }
-task-clock { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK); }
-page-faults|faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS); }
-minor-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN); }
-major-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ); }
-context-switches|cs { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); }
-cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS); }
-alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
-emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
-dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
-bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
-cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); }
+cpu-cycles|cycles { return sym(yyscanner, PERF_COUNT_HW_CPU_CYCLES); }
+stalled-cycles-frontend|idle-cycles-frontend { return sym(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
+stalled-cycles-backend|idle-cycles-backend { return sym(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
+instructions { return sym(yyscanner, PERF_COUNT_HW_INSTRUCTIONS); }
+cache-references { return sym(yyscanner, PERF_COUNT_HW_CACHE_REFERENCES); }
+cache-misses { return sym(yyscanner, PERF_COUNT_HW_CACHE_MISSES); }
+branch-instructions|branches { return sym(yyscanner, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); }
+branch-misses { return sym(yyscanner, PERF_COUNT_HW_BRANCH_MISSES); }
+bus-cycles { return sym(yyscanner, PERF_COUNT_HW_BUS_CYCLES); }
+ref-cycles { return sym(yyscanner, PERF_COUNT_HW_REF_CPU_CYCLES); }
{lc_type} { return str(yyscanner, PE_LEGACY_CACHE); }
{lc_type}-{lc_op_result} { return str(yyscanner, PE_LEGACY_CACHE); }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index f888cbb076d6..a2361c0040d7 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -55,7 +55,7 @@ static void free_list_evsel(struct list_head* list_evsel)
%}
%token PE_START_EVENTS PE_START_TERMS
-%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_TERM
+%token PE_VALUE PE_VALUE_SYM_HW PE_TERM
%token PE_EVENT_NAME
%token PE_RAW PE_NAME
%token PE_MODIFIER_EVENT PE_MODIFIER_BP PE_BP_COLON PE_BP_SLASH
@@ -66,10 +66,8 @@ static void free_list_evsel(struct list_head* list_evsel)
%token PE_TERM_HW
%type <num> PE_VALUE
%type <num> PE_VALUE_SYM_HW
-%type <num> PE_VALUE_SYM_SW
%type <mod> PE_MODIFIER_EVENT
%type <term_type> PE_TERM
-%type <num> value_sym
%type <str> PE_RAW
%type <str> PE_NAME
%type <str> PE_LEGACY_CACHE
@@ -306,24 +304,19 @@ PE_NAME sep_dc
$$ = list;
}
-value_sym:
-PE_VALUE_SYM_HW
-|
-PE_VALUE_SYM_SW
-
event_legacy_symbol:
-value_sym '/' event_config '/'
+PE_VALUE_SYM_HW '/' event_config '/'
{
struct list_head *list;
- int type = $1 >> 16;
- int config = $1 & 255;
int err;
- bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE);
list = alloc_list();
if (!list)
YYNOMEM;
- err = parse_events_add_numeric(_parse_state, list, type, config, $3, wildcard);
+ err = parse_events_add_numeric(_parse_state, list,
+ PERF_TYPE_HARDWARE, $1,
+ $3,
+ /*wildcard=*/true);
parse_events_terms__delete($3);
if (err) {
free_list_evsel(list);
@@ -332,18 +325,18 @@ value_sym '/' event_config '/'
$$ = list;
}
|
-value_sym sep_slash_slash_dc
+PE_VALUE_SYM_HW sep_slash_slash_dc
{
struct list_head *list;
- int type = $1 >> 16;
- int config = $1 & 255;
- bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE);
int err;
list = alloc_list();
if (!list)
YYNOMEM;
- err = parse_events_add_numeric(_parse_state, list, type, config, /*head_config=*/NULL, wildcard);
+ err = parse_events_add_numeric(_parse_state, list,
+ PERF_TYPE_HARDWARE, $1,
+ /*head_config=*/NULL,
+ /*wildcard=*/true);
if (err)
PE_ABORT(err);
$$ = list;
diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c
index 0dacc133ed39..e5b3a2a5ddef 100644
--- a/tools/perf/util/pfm.c
+++ b/tools/perf/util/pfm.c
@@ -47,10 +47,6 @@ int parse_libpfm_events_option(const struct option *opt, const char *str,
p_orig = p = strdup(str);
if (!p)
return -1;
- /*
- * force loading of the PMU list
- */
- perf_pmus__scan(NULL);
for (q = p; strsep(&p, ",{}"); q = p) {
sep = p ? str + (p - p_orig - 1) : "";
@@ -234,6 +230,7 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state,
if (is_libpfm_event_supported(name, cpus, threads)) {
print_cb->print_event(print_state, topic, pinfo->name,
+ /*pmu_type=*/PERF_TYPE_RAW,
name, info->equiv,
/*scale_unit=*/NULL,
/*deprecated=*/NULL, "PFM event",
@@ -269,6 +266,7 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state,
print_cb->print_event(print_state,
topic,
pinfo->name,
+ /*pmu_type=*/PERF_TYPE_RAW,
name, /*alias=*/NULL,
/*scale_unit=*/NULL,
/*deprecated=*/NULL, "PFM event",
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index b7ebac5ab1d1..5a291f1380ed 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -20,13 +20,16 @@
#include "debug.h"
#include "evsel.h"
#include "pmu.h"
+#include "drm_pmu.h"
#include "hwmon_pmu.h"
#include "pmus.h"
#include "tool_pmu.h"
+#include "tp_pmu.h"
#include <util/pmu-bison.h>
#include <util/pmu-flex.h>
#include "parse-events.h"
#include "print-events.h"
+#include "hashmap.h"
#include "header.h"
#include "string2.h"
#include "strbuf.h"
@@ -66,8 +69,6 @@ struct perf_pmu_alias {
char *topic;
/** @terms: Owned list of the original parsed parameters. */
struct parse_events_terms terms;
- /** @list: List element of struct perf_pmu aliases. */
- struct list_head list;
/**
* @pmu_name: The name copied from the json struct pmu_event. This can
* differ from the PMU name as it won't have suffixes.
@@ -77,6 +78,12 @@ struct perf_pmu_alias {
char unit[UNIT_MAX_LEN+1];
/** @scale: Value to scale read counter values by. */
double scale;
+ /** @retirement_latency_mean: Value to be given for unsampled retirement latency mean. */
+ double retirement_latency_mean;
+ /** @retirement_latency_min: Value to be given for unsampled retirement latency min. */
+ double retirement_latency_min;
+ /** @retirement_latency_max: Value to be given for unsampled retirement latency max. */
+ double retirement_latency_max;
/**
* @per_pkg: Does the file
* <sysfs>/bus/event_source/devices/<pmu_name>/events/<name>.per-pkg or
@@ -257,7 +264,7 @@ static int pmu_format(struct perf_pmu *pmu, int dirfd, const char *name, bool ea
return 0;
}
-int perf_pmu__convert_scale(const char *scale, char **end, double *sval)
+static int parse_double(const char *scale, char **end, double *sval)
{
char *lc;
int ret = 0;
@@ -294,6 +301,11 @@ out:
return ret;
}
+int perf_pmu__convert_scale(const char *scale, char **end, double *sval)
+{
+ return parse_double(scale, end, sval);
+}
+
static int perf_pmu__parse_scale(struct perf_pmu *pmu, struct perf_pmu_alias *alias)
{
struct stat st;
@@ -407,25 +419,33 @@ static void perf_pmu__parse_snapshot(struct perf_pmu *pmu, struct perf_pmu_alias
}
/* Delete an alias entry. */
-static void perf_pmu_free_alias(struct perf_pmu_alias *newalias)
+static void perf_pmu_free_alias(struct perf_pmu_alias *alias)
{
- zfree(&newalias->name);
- zfree(&newalias->desc);
- zfree(&newalias->long_desc);
- zfree(&newalias->topic);
- zfree(&newalias->pmu_name);
- parse_events_terms__exit(&newalias->terms);
- free(newalias);
+ if (!alias)
+ return;
+
+ zfree(&alias->name);
+ zfree(&alias->desc);
+ zfree(&alias->long_desc);
+ zfree(&alias->topic);
+ zfree(&alias->pmu_name);
+ parse_events_terms__exit(&alias->terms);
+ free(alias);
}
static void perf_pmu__del_aliases(struct perf_pmu *pmu)
{
- struct perf_pmu_alias *alias, *tmp;
+ struct hashmap_entry *entry;
+ size_t bkt;
- list_for_each_entry_safe(alias, tmp, &pmu->aliases, list) {
- list_del(&alias->list);
- perf_pmu_free_alias(alias);
- }
+ if (!pmu->aliases)
+ return;
+
+ hashmap__for_each_entry(pmu->aliases, entry, bkt)
+ perf_pmu_free_alias(entry->pvalue);
+
+ hashmap__free(pmu->aliases);
+ pmu->aliases = NULL;
}
static struct perf_pmu_alias *perf_pmu__find_alias(struct perf_pmu *pmu,
@@ -433,35 +453,37 @@ static struct perf_pmu_alias *perf_pmu__find_alias(struct perf_pmu *pmu,
bool load)
{
struct perf_pmu_alias *alias;
+ bool has_sysfs_event;
+ char event_file_name[NAME_MAX + 8];
- if (load && !pmu->sysfs_aliases_loaded) {
- bool has_sysfs_event;
- char event_file_name[FILENAME_MAX + 8];
+ if (hashmap__find(pmu->aliases, name, &alias))
+ return alias;
- /*
- * Test if alias/event 'name' exists in the PMU's sysfs/events
- * directory. If not skip parsing the sysfs aliases. Sysfs event
- * name must be all lower or all upper case.
- */
- scnprintf(event_file_name, sizeof(event_file_name), "events/%s", name);
- for (size_t i = 7, n = 7 + strlen(name); i < n; i++)
- event_file_name[i] = tolower(event_file_name[i]);
+ if (!load || pmu->sysfs_aliases_loaded)
+ return NULL;
- has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name);
- if (!has_sysfs_event) {
- for (size_t i = 7, n = 7 + strlen(name); i < n; i++)
- event_file_name[i] = toupper(event_file_name[i]);
+ /*
+ * Test if alias/event 'name' exists in the PMU's sysfs/events
+ * directory. If not skip parsing the sysfs aliases. Sysfs event
+ * name must be all lower or all upper case.
+ */
+ scnprintf(event_file_name, sizeof(event_file_name), "events/%s", name);
+ for (size_t i = 7, n = 7 + strlen(name); i < n; i++)
+ event_file_name[i] = tolower(event_file_name[i]);
- has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name);
- }
- if (has_sysfs_event)
- pmu_aliases_parse(pmu);
+ has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name);
+ if (!has_sysfs_event) {
+ for (size_t i = 7, n = 7 + strlen(name); i < n; i++)
+ event_file_name[i] = toupper(event_file_name[i]);
+ has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name);
}
- list_for_each_entry(alias, &pmu->aliases, list) {
- if (!strcasecmp(alias->name, name))
+ if (has_sysfs_event) {
+ pmu_aliases_parse(pmu);
+ if (hashmap__find(pmu->aliases, name, &alias))
return alias;
}
+
return NULL;
}
@@ -525,6 +547,18 @@ static int update_alias(const struct pmu_event *pe,
if (!ret)
snprintf(data->alias->unit, sizeof(data->alias->unit), "%s", unit);
}
+ if (!ret && pe->retirement_latency_mean) {
+ ret = parse_double(pe->retirement_latency_mean, NULL,
+ &data->alias->retirement_latency_mean);
+ }
+ if (!ret && pe->retirement_latency_min) {
+ ret = parse_double(pe->retirement_latency_min, NULL,
+ &data->alias->retirement_latency_min);
+ }
+ if (!ret && pe->retirement_latency_max) {
+ ret = parse_double(pe->retirement_latency_max, NULL,
+ &data->alias->retirement_latency_max);
+ }
return ret;
}
@@ -532,8 +566,8 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name,
const char *desc, const char *val, FILE *val_fd,
const struct pmu_event *pe, enum event_source src)
{
- struct perf_pmu_alias *alias;
- int ret;
+ struct perf_pmu_alias *alias, *old_alias;
+ int ret = 0;
const char *long_desc = NULL, *topic = NULL, *unit = NULL, *pmu_name = NULL;
bool deprecated = false, perpkg = false;
@@ -562,6 +596,24 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name,
alias->per_pkg = perpkg;
alias->snapshot = false;
alias->deprecated = deprecated;
+ alias->retirement_latency_mean = 0.0;
+ alias->retirement_latency_min = 0.0;
+ alias->retirement_latency_max = 0.0;
+
+ if (!ret && pe && pe->retirement_latency_mean) {
+ ret = parse_double(pe->retirement_latency_mean, NULL,
+ &alias->retirement_latency_mean);
+ }
+ if (!ret && pe && pe->retirement_latency_min) {
+ ret = parse_double(pe->retirement_latency_min, NULL,
+ &alias->retirement_latency_min);
+ }
+ if (!ret && pe && pe->retirement_latency_max) {
+ ret = parse_double(pe->retirement_latency_max, NULL,
+ &alias->retirement_latency_max);
+ }
+ if (ret)
+ return ret;
ret = parse_events_terms(&alias->terms, val, val_fd);
if (ret) {
@@ -572,8 +624,7 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name,
alias->name = strdup(name);
alias->desc = desc ? strdup(desc) : NULL;
- alias->long_desc = long_desc ? strdup(long_desc) :
- desc ? strdup(desc) : NULL;
+ alias->long_desc = long_desc ? strdup(long_desc) : NULL;
alias->topic = topic ? strdup(topic) : NULL;
alias->pmu_name = pmu_name ? strdup(pmu_name) : NULL;
if (unit) {
@@ -607,7 +658,8 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name,
break;
}
- list_add_tail(&alias->list, &pmu->aliases);
+ hashmap__set(pmu->aliases, alias->name, alias, /*old_key=*/ NULL, &old_alias);
+ perf_pmu_free_alias(old_alias);
return 0;
}
@@ -701,7 +753,7 @@ static int pmu_aliases_parse(struct perf_pmu *pmu)
static int pmu_aliases_parse_eager(struct perf_pmu *pmu, int sysfs_fd)
{
- char path[FILENAME_MAX + 7];
+ char path[NAME_MAX + 8];
int ret, events_dir_fd;
scnprintf(path, sizeof(path), "%s/events", pmu->name);
@@ -1095,43 +1147,107 @@ perf_pmu__arch_init(struct perf_pmu *pmu)
pmu->mem_events = perf_mem_events;
}
+/* Variant of str_hash that does tolower on each character. */
+static size_t aliases__hash(long key, void *ctx __maybe_unused)
+{
+ const char *s = (const char *)key;
+ size_t h = 0;
+
+ while (*s) {
+ h = h * 31 + tolower(*s);
+ s++;
+ }
+ return h;
+}
+
+static bool aliases__equal(long key1, long key2, void *ctx __maybe_unused)
+{
+ return strcasecmp((const char *)key1, (const char *)key2) == 0;
+}
+
+int perf_pmu__init(struct perf_pmu *pmu, __u32 type, const char *name)
+{
+ pmu->type = type;
+ INIT_LIST_HEAD(&pmu->format);
+ INIT_LIST_HEAD(&pmu->caps);
+
+ pmu->name = strdup(name);
+ if (!pmu->name)
+ return -ENOMEM;
+
+ pmu->aliases = hashmap__new(aliases__hash, aliases__equal, /*ctx=*/ NULL);
+ if (!pmu->aliases)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static __u32 wellknown_pmu_type(const char *pmu_name)
+{
+ struct {
+ const char *pmu_name;
+ __u32 type;
+ } wellknown_pmus[] = {
+ {
+ "software",
+ PERF_TYPE_SOFTWARE
+ },
+ {
+ "tracepoint",
+ PERF_TYPE_TRACEPOINT
+ },
+ {
+ "breakpoint",
+ PERF_TYPE_BREAKPOINT
+ },
+ };
+ for (size_t i = 0; i < ARRAY_SIZE(wellknown_pmus); i++) {
+ if (!strcmp(wellknown_pmus[i].pmu_name, pmu_name))
+ return wellknown_pmus[i].type;
+ }
+ return PERF_TYPE_MAX;
+}
+
struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *name,
bool eager_load)
{
struct perf_pmu *pmu;
- __u32 type;
pmu = zalloc(sizeof(*pmu));
if (!pmu)
return NULL;
- pmu->name = strdup(name);
- if (!pmu->name)
- goto err;
+ if (perf_pmu__init(pmu, PERF_PMU_TYPE_FAKE, name) != 0) {
+ perf_pmu__delete(pmu);
+ return NULL;
+ }
/*
* Read type early to fail fast if a lookup name isn't a PMU. Ensure
* that type value is successfully assigned (return 1).
*/
- if (perf_pmu__scan_file_at(pmu, dirfd, "type", "%u", &type) != 1)
- goto err;
-
- INIT_LIST_HEAD(&pmu->format);
- INIT_LIST_HEAD(&pmu->aliases);
- INIT_LIST_HEAD(&pmu->caps);
+ if (perf_pmu__scan_file_at(pmu, dirfd, "type", "%u", &pmu->type) != 1) {
+ /* Double check the PMU's name isn't wellknown. */
+ pmu->type = wellknown_pmu_type(name);
+ if (pmu->type == PERF_TYPE_MAX) {
+ perf_pmu__delete(pmu);
+ return NULL;
+ }
+ }
/*
* The pmu data we store & need consists of the pmu
* type value and format definitions. Load both right
* now.
*/
- if (pmu_format(pmu, dirfd, name, eager_load))
- goto err;
+ if (pmu_format(pmu, dirfd, name, eager_load)) {
+ perf_pmu__delete(pmu);
+ return NULL;
+ }
pmu->is_core = is_pmu_core(name);
pmu->cpus = pmu_cpumask(dirfd, name, pmu->is_core);
- pmu->type = type;
pmu->is_uncore = pmu_is_uncore(dirfd, name);
if (pmu->is_uncore)
pmu->id = pmu_id(name);
@@ -1153,10 +1269,6 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char
pmu_aliases_parse_eager(pmu, dirfd);
return pmu;
-err:
- zfree(&pmu->name);
- free(pmu);
- return NULL;
}
/* Creates the PMU when sysfs scanning fails. */
@@ -1178,7 +1290,7 @@ struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pm
pmu->cpus = cpu_map__online();
INIT_LIST_HEAD(&pmu->format);
- INIT_LIST_HEAD(&pmu->aliases);
+ pmu->aliases = hashmap__new(aliases__hash, aliases__equal, /*ctx=*/ NULL);
INIT_LIST_HEAD(&pmu->caps);
list_add_tail(&pmu->list, core_pmus);
return pmu;
@@ -1429,7 +1541,7 @@ static int pmu_config_term(const struct perf_pmu *pmu,
break;
case PARSE_EVENTS__TERM_TYPE_USER: /* Not hardcoded. */
return -EINVAL;
- case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_HARDWARE:
+ case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_CPU:
/* Skip non-config terms. */
break;
default:
@@ -1546,6 +1658,8 @@ int perf_pmu__config_terms(const struct perf_pmu *pmu,
if (perf_pmu__is_hwmon(pmu))
return hwmon_pmu__config_terms(pmu, attr, terms, err);
+ if (perf_pmu__is_drm(pmu))
+ return drm_pmu__config_terms(pmu, attr, terms, err);
list_for_each_entry(term, &terms->terms, list) {
if (pmu_config_term(pmu, attr, term, terms, zero, apply_hardcoded, err))
@@ -1678,11 +1792,18 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_
info->unit = NULL;
info->scale = 0.0;
info->snapshot = false;
+ info->retirement_latency_mean = 0.0;
+ info->retirement_latency_min = 0.0;
+ info->retirement_latency_max = 0.0;
if (perf_pmu__is_hwmon(pmu)) {
ret = hwmon_pmu__check_alias(head_terms, info, err);
goto out;
}
+ if (perf_pmu__is_drm(pmu)) {
+ ret = drm_pmu__check_alias(pmu, head_terms, info, err);
+ goto out;
+ }
/* Fake PMU doesn't rewrite terms. */
if (perf_pmu__is_fake(pmu))
@@ -1711,6 +1832,10 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_
if (term->alternate_hw_config)
*alternate_hw_config = term->val.num;
+ info->retirement_latency_mean = alias->retirement_latency_mean;
+ info->retirement_latency_min = alias->retirement_latency_min;
+ info->retirement_latency_max = alias->retirement_latency_max;
+
list_del_init(&term->list);
parse_events_term__delete(term);
}
@@ -1804,6 +1929,7 @@ int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_call
"aux-output",
"aux-action=(pause|resume|start-paused)",
"aux-sample-size=number",
+ "cpu=number",
};
struct perf_pmu_format *format;
int ret;
@@ -1858,8 +1984,12 @@ bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name)
return false;
if (perf_pmu__is_tool(pmu) && tool_pmu__skip_event(name))
return false;
+ if (perf_pmu__is_tracepoint(pmu))
+ return tp_pmu__have_event(pmu, name);
if (perf_pmu__is_hwmon(pmu))
return hwmon_pmu__have_event(pmu, name);
+ if (perf_pmu__is_drm(pmu))
+ return drm_pmu__have_event(pmu, name);
if (perf_pmu__find_alias(pmu, name, /*load=*/ true) != NULL)
return true;
if (pmu->cpu_aliases_added || !pmu->events_table)
@@ -1871,8 +2001,12 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu)
{
size_t nr;
+ if (perf_pmu__is_tracepoint(pmu))
+ return tp_pmu__num_events(pmu);
if (perf_pmu__is_hwmon(pmu))
return hwmon_pmu__num_events(pmu);
+ if (perf_pmu__is_drm(pmu))
+ return drm_pmu__num_events(pmu);
pmu_aliases_parse(pmu);
nr = pmu->sysfs_aliases + pmu->sys_json_aliases;
@@ -1930,21 +2064,27 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus,
void *state, pmu_event_callback cb)
{
char buf[1024];
- struct perf_pmu_alias *event;
struct pmu_event_info info = {
.pmu = pmu,
.event_type_desc = "Kernel PMU event",
};
int ret = 0;
struct strbuf sb;
+ struct hashmap_entry *entry;
+ size_t bkt;
+ if (perf_pmu__is_tracepoint(pmu))
+ return tp_pmu__for_each_event(pmu, state, cb);
if (perf_pmu__is_hwmon(pmu))
return hwmon_pmu__for_each_event(pmu, state, cb);
+ if (perf_pmu__is_drm(pmu))
+ return drm_pmu__for_each_event(pmu, state, cb);
strbuf_init(&sb, /*hint=*/ 0);
pmu_aliases_parse(pmu);
pmu_add_cpu_aliases(pmu);
- list_for_each_entry(event, &pmu->aliases, list) {
+ hashmap__for_each_entry(pmu->aliases, entry, bkt) {
+ struct perf_pmu_alias *event = entry->pvalue;
size_t buf_used, pmu_name_len;
if (perf_pmu__is_tool(pmu) && tool_pmu__skip_event(event->name))
@@ -2052,6 +2192,9 @@ static bool perf_pmu___name_match(const struct perf_pmu *pmu, const char *to_mat
for (size_t i = 0; i < ARRAY_SIZE(names); i++) {
const char *name = names[i];
+ if (!name)
+ continue;
+
if (wildcard && perf_pmu__match_wildcard_uncore(name, to_match))
return true;
if (!wildcard && perf_pmu__match_ignoring_suffix_uncore(name, to_match))
@@ -2211,6 +2354,17 @@ static void perf_pmu__del_caps(struct perf_pmu *pmu)
}
}
+struct perf_pmu_caps *perf_pmu__get_cap(struct perf_pmu *pmu, const char *name)
+{
+ struct perf_pmu_caps *caps;
+
+ list_for_each_entry(caps, &pmu->caps, list) {
+ if (!strcmp(caps->name, name))
+ return caps;
+ }
+ return NULL;
+}
+
/*
* Reading/parsing the given pmu capabilities, which should be located at:
* /sys/bus/event_source/devices/<dev>/caps as sysfs group attributes.
@@ -2401,8 +2555,13 @@ int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename,
void perf_pmu__delete(struct perf_pmu *pmu)
{
+ if (!pmu)
+ return;
+
if (perf_pmu__is_hwmon(pmu))
hwmon_pmu__exit(pmu);
+ else if (perf_pmu__is_drm(pmu))
+ drm_pmu__exit(pmu);
perf_pmu__del_formats(&pmu->format);
perf_pmu__del_aliases(pmu);
@@ -2418,14 +2577,16 @@ void perf_pmu__delete(struct perf_pmu *pmu)
const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config)
{
- struct perf_pmu_alias *event;
+ struct hashmap_entry *entry;
+ size_t bkt;
if (!pmu)
return NULL;
pmu_aliases_parse(pmu);
pmu_add_cpu_aliases(pmu);
- list_for_each_entry(event, &pmu->aliases, list) {
+ hashmap__for_each_entry(pmu->aliases, entry, bkt) {
+ struct perf_pmu_alias *event = entry->pvalue;
struct perf_event_attr attr = {.config = 0,};
int ret = perf_pmu__config(pmu, &attr, &event->terms, /*apply_hardcoded=*/true,
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index b93014cc3670..1ebcf0242af8 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -14,6 +14,7 @@
#include "mem-events.h"
struct evsel_config_term;
+struct hashmap;
struct perf_cpu_map;
struct print_callbacks;
@@ -38,7 +39,9 @@ struct perf_pmu_caps {
enum {
PERF_PMU_TYPE_PE_START = 0,
- PERF_PMU_TYPE_PE_END = 0xFFFEFFFF,
+ PERF_PMU_TYPE_PE_END = 0xFFFDFFFF,
+ PERF_PMU_TYPE_DRM_START = 0xFFFE0000,
+ PERF_PMU_TYPE_DRM_END = 0xFFFEFFFF,
PERF_PMU_TYPE_HWMON_START = 0xFFFF0000,
PERF_PMU_TYPE_HWMON_END = 0xFFFFFFFD,
PERF_PMU_TYPE_TOOL = 0xFFFFFFFE,
@@ -125,7 +128,7 @@ struct perf_pmu {
* event read from <sysfs>/bus/event_source/devices/<name>/events/ or
* from json events in pmu-events.c.
*/
- struct list_head aliases;
+ struct hashmap *aliases;
/**
* @events_table: The events table for json events in pmu-events.c.
*/
@@ -194,6 +197,9 @@ struct perf_pmu {
struct perf_pmu_info {
const char *unit;
double scale;
+ double retirement_latency_mean;
+ double retirement_latency_min;
+ double retirement_latency_max;
bool per_pkg;
bool snapshot;
};
@@ -274,6 +280,8 @@ bool pmu_uncore_identifier_match(const char *compat, const char *id);
int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
+struct perf_pmu_caps *perf_pmu__get_cap(struct perf_pmu *pmu, const char *name);
+
int perf_pmu__caps_parse(struct perf_pmu *pmu);
void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
@@ -289,11 +297,11 @@ int perf_pmu__pathname_scnprintf(char *buf, size_t size,
int perf_pmu__event_source_devices_fd(void);
int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, int flags);
+int perf_pmu__init(struct perf_pmu *pmu, __u32 type, const char *name);
struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name,
bool eager_load);
struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus);
void perf_pmu__delete(struct perf_pmu *pmu);
-struct perf_pmu *perf_pmus__find_core_pmu(void);
const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config);
bool perf_pmu__is_fake(const struct perf_pmu *pmu);
diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
index b99292de7669..98be2eb8f1f0 100644
--- a/tools/perf/util/pmus.c
+++ b/tools/perf/util/pmus.c
@@ -12,6 +12,7 @@
#include <unistd.h>
#include "cpumap.h"
#include "debug.h"
+#include "drm_pmu.h"
#include "evsel.h"
#include "pmus.h"
#include "pmu.h"
@@ -19,6 +20,7 @@
#include "tool_pmu.h"
#include "print-events.h"
#include "strbuf.h"
+#include "string2.h"
/*
* core_pmus: A PMU belongs to core_pmus if it's name is "cpu" or it's sysfs
@@ -42,16 +44,19 @@ enum perf_tool_pmu_type {
PERF_TOOL_PMU_TYPE_PE_OTHER,
PERF_TOOL_PMU_TYPE_TOOL,
PERF_TOOL_PMU_TYPE_HWMON,
+ PERF_TOOL_PMU_TYPE_DRM,
#define PERF_TOOL_PMU_TYPE_PE_CORE_MASK (1 << PERF_TOOL_PMU_TYPE_PE_CORE)
#define PERF_TOOL_PMU_TYPE_PE_OTHER_MASK (1 << PERF_TOOL_PMU_TYPE_PE_OTHER)
#define PERF_TOOL_PMU_TYPE_TOOL_MASK (1 << PERF_TOOL_PMU_TYPE_TOOL)
#define PERF_TOOL_PMU_TYPE_HWMON_MASK (1 << PERF_TOOL_PMU_TYPE_HWMON)
+#define PERF_TOOL_PMU_TYPE_DRM_MASK (1 << PERF_TOOL_PMU_TYPE_DRM)
#define PERF_TOOL_PMU_TYPE_ALL_MASK (PERF_TOOL_PMU_TYPE_PE_CORE_MASK | \
PERF_TOOL_PMU_TYPE_PE_OTHER_MASK | \
PERF_TOOL_PMU_TYPE_TOOL_MASK | \
- PERF_TOOL_PMU_TYPE_HWMON_MASK)
+ PERF_TOOL_PMU_TYPE_HWMON_MASK | \
+ PERF_TOOL_PMU_TYPE_DRM_MASK)
};
static unsigned int read_pmu_types;
@@ -172,6 +177,8 @@ struct perf_pmu *perf_pmus__find(const char *name)
/* Looking up an individual perf event PMU failed, check if a tool PMU should be read. */
if (!strncmp(name, "hwmon_", 6))
to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK;
+ else if (!strncmp(name, "drm_", 4))
+ to_read_pmus |= PERF_TOOL_PMU_TYPE_DRM_MASK;
else if (!strcmp(name, "tool"))
to_read_pmus |= PERF_TOOL_PMU_TYPE_TOOL_MASK;
@@ -272,6 +279,10 @@ skip_pe_pmus:
(read_pmu_types & PERF_TOOL_PMU_TYPE_HWMON_MASK) == 0)
perf_pmus__read_hwmon_pmus(&other_pmus);
+ if ((to_read_types & PERF_TOOL_PMU_TYPE_DRM_MASK) != 0 &&
+ (read_pmu_types & PERF_TOOL_PMU_TYPE_DRM_MASK) == 0)
+ perf_pmus__read_drm_pmus(&other_pmus);
+
list_sort(NULL, &other_pmus, pmus_cmp);
read_pmu_types |= to_read_types;
@@ -304,6 +315,8 @@ struct perf_pmu *perf_pmus__find_by_type(unsigned int type)
if (type >= PERF_PMU_TYPE_PE_START && type <= PERF_PMU_TYPE_PE_END) {
to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK |
PERF_TOOL_PMU_TYPE_PE_OTHER_MASK;
+ } else if (type >= PERF_PMU_TYPE_DRM_START && type <= PERF_PMU_TYPE_DRM_END) {
+ to_read_pmus = PERF_TOOL_PMU_TYPE_DRM_MASK;
} else if (type >= PERF_PMU_TYPE_HWMON_START && type <= PERF_PMU_TYPE_HWMON_END) {
to_read_pmus = PERF_TOOL_PMU_TYPE_HWMON_MASK;
} else {
@@ -350,6 +363,92 @@ struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu)
return NULL;
}
+struct perf_pmu *perf_pmus__scan_for_event(struct perf_pmu *pmu, const char *event)
+{
+ bool use_core_pmus = !pmu || pmu->is_core;
+
+ if (!pmu) {
+ /* Hwmon filename values that aren't used. */
+ enum hwmon_type type;
+ int number;
+ /*
+ * Core PMUs, other sysfs PMUs and tool PMU can take all event
+ * types or aren't wother optimizing for.
+ */
+ unsigned int to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK |
+ PERF_TOOL_PMU_TYPE_PE_OTHER_MASK |
+ PERF_TOOL_PMU_TYPE_TOOL_MASK;
+
+ /* Could the event be a hwmon event? */
+ if (parse_hwmon_filename(event, &type, &number, /*item=*/NULL, /*alarm=*/NULL))
+ to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK;
+
+ /* Could the event be a DRM event? */
+ if (strlen(event) > 4 && strncmp("drm-", event, 4) == 0)
+ to_read_pmus |= PERF_TOOL_PMU_TYPE_DRM_MASK;
+
+ pmu_read_sysfs(to_read_pmus);
+ pmu = list_prepare_entry(pmu, &core_pmus, list);
+ }
+ if (use_core_pmus) {
+ list_for_each_entry_continue(pmu, &core_pmus, list)
+ return pmu;
+
+ pmu = NULL;
+ pmu = list_prepare_entry(pmu, &other_pmus, list);
+ }
+ list_for_each_entry_continue(pmu, &other_pmus, list)
+ return pmu;
+ return NULL;
+}
+
+struct perf_pmu *perf_pmus__scan_matching_wildcard(struct perf_pmu *pmu, const char *wildcard)
+{
+ bool use_core_pmus = !pmu || pmu->is_core;
+
+ if (!pmu) {
+ /*
+ * Core PMUs, other sysfs PMUs and tool PMU can have any name or
+ * aren't wother optimizing for.
+ */
+ unsigned int to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK |
+ PERF_TOOL_PMU_TYPE_PE_OTHER_MASK |
+ PERF_TOOL_PMU_TYPE_TOOL_MASK;
+
+ /*
+ * Hwmon PMUs have an alias from a sysfs name like hwmon0,
+ * hwmon1, etc. or have a name of hwmon_<name>. They therefore
+ * can only have a wildcard match if the wildcard begins with
+ * "hwmon". Similarly drm PMUs must start "drm_", avoid reading
+ * such events unless the PMU could match.
+ */
+ if (strisglob(wildcard)) {
+ to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK |
+ PERF_TOOL_PMU_TYPE_DRM_MASK;
+ } else if (strlen(wildcard) >= 4 && strncmp("drm_", wildcard, 4) == 0) {
+ to_read_pmus |= PERF_TOOL_PMU_TYPE_DRM_MASK;
+ } else if (strlen(wildcard) >= 5 && strncmp("hwmon", wildcard, 5) == 0) {
+ to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK;
+ }
+
+ pmu_read_sysfs(to_read_pmus);
+ pmu = list_prepare_entry(pmu, &core_pmus, list);
+ }
+ if (use_core_pmus) {
+ list_for_each_entry_continue(pmu, &core_pmus, list) {
+ if (perf_pmu__wildcard_match(pmu, wildcard))
+ return pmu;
+ }
+ pmu = NULL;
+ pmu = list_prepare_entry(pmu, &other_pmus, list);
+ }
+ list_for_each_entry_continue(pmu, &other_pmus, list) {
+ if (perf_pmu__wildcard_match(pmu, wildcard))
+ return pmu;
+ }
+ return NULL;
+}
+
static struct perf_pmu *perf_pmus__scan_skip_duplicates(struct perf_pmu *pmu)
{
bool use_core_pmus = !pmu || pmu->is_core;
@@ -546,6 +645,7 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p
print_cb->print_event(print_state,
aliases[j].topic,
aliases[j].pmu_name,
+ aliases[j].pmu->type,
aliases[j].name,
aliases[j].alias,
aliases[j].scale_unit,
@@ -650,6 +750,7 @@ void perf_pmus__print_raw_pmu_events(const struct print_callbacks *print_cb, voi
print_cb->print_event(print_state,
/*topic=*/NULL,
/*pmu_name=*/NULL,
+ pmu->type,
format_args.short_string.buf,
/*event_alias=*/NULL,
/*scale_unit=*/NULL,
@@ -715,27 +816,39 @@ bool perf_pmus__supports_extended_type(void)
return perf_pmus__do_support_extended_type;
}
+struct perf_pmu *perf_pmus__find_by_attr(const struct perf_event_attr *attr)
+{
+ struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type);
+ u32 type = attr->type;
+ bool legacy_core_type = type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE;
+
+ if (!pmu && legacy_core_type && perf_pmus__supports_extended_type()) {
+ type = attr->config >> PERF_PMU_TYPE_SHIFT;
+
+ pmu = perf_pmus__find_by_type(type);
+ }
+ if (!pmu && (legacy_core_type || type == PERF_TYPE_RAW)) {
+ /*
+ * For legacy events, if there was no extended type info then
+ * assume the PMU is the first core PMU.
+ *
+ * On architectures like ARM there is no sysfs PMU with type
+ * PERF_TYPE_RAW, assume the RAW events are going to be handled
+ * by the first core PMU.
+ */
+ pmu = perf_pmus__find_core_pmu();
+ }
+ return pmu;
+}
+
struct perf_pmu *evsel__find_pmu(const struct evsel *evsel)
{
struct perf_pmu *pmu = evsel->pmu;
- bool legacy_core_type;
if (pmu)
return pmu;
- pmu = perf_pmus__find_by_type(evsel->core.attr.type);
- legacy_core_type =
- evsel->core.attr.type == PERF_TYPE_HARDWARE ||
- evsel->core.attr.type == PERF_TYPE_HW_CACHE;
- if (!pmu && legacy_core_type) {
- if (perf_pmus__supports_extended_type()) {
- u32 type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT;
-
- pmu = perf_pmus__find_by_type(type);
- } else {
- pmu = perf_pmus__find_core_pmu();
- }
- }
+ pmu = perf_pmus__find_by_attr(&evsel->core.attr);
((struct evsel *)evsel)->pmu = pmu;
return pmu;
}
@@ -755,7 +868,7 @@ struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name)
return perf_pmu__lookup(&other_pmus, test_sysfs_dirfd, name, /*eager_load=*/true);
}
-struct perf_pmu *perf_pmus__add_test_hwmon_pmu(int hwmon_dir,
+struct perf_pmu *perf_pmus__add_test_hwmon_pmu(const char *hwmon_dir,
const char *sysfs_name,
const char *name)
{
diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h
index 8def20e615ad..7cb36863711a 100644
--- a/tools/perf/util/pmus.h
+++ b/tools/perf/util/pmus.h
@@ -5,6 +5,7 @@
#include <stdbool.h>
#include <stddef.h>
+struct perf_event_attr;
struct perf_pmu;
struct print_callbacks;
@@ -16,9 +17,12 @@ void perf_pmus__destroy(void);
struct perf_pmu *perf_pmus__find(const char *name);
struct perf_pmu *perf_pmus__find_by_type(unsigned int type);
+struct perf_pmu *perf_pmus__find_by_attr(const struct perf_event_attr *attr);
struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu);
struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu);
+struct perf_pmu *perf_pmus__scan_for_event(struct perf_pmu *pmu, const char *event);
+struct perf_pmu *perf_pmus__scan_matching_wildcard(struct perf_pmu *pmu, const char *wildcard);
const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str);
@@ -29,9 +33,10 @@ int perf_pmus__num_core_pmus(void);
bool perf_pmus__supports_extended_type(void);
struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name);
-struct perf_pmu *perf_pmus__add_test_hwmon_pmu(int hwmon_dir,
+struct perf_pmu *perf_pmus__add_test_hwmon_pmu(const char *hwmon_dir,
const char *sysfs_name,
const char *name);
struct perf_pmu *perf_pmus__fake_pmu(void);
+struct perf_pmu *perf_pmus__find_core_pmu(void);
#endif /* __PMUS_H */
diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
index a786cbfb0ff5..4153124a9948 100644
--- a/tools/perf/util/print-events.c
+++ b/tools/perf/util/print-events.c
@@ -44,97 +44,6 @@ static const char * const event_type_descriptors[] = {
"Hardware breakpoint",
};
-/*
- * Print the events from <debugfs_mount_point>/tracing/events
- */
-void print_tracepoint_events(const struct print_callbacks *print_cb __maybe_unused, void *print_state __maybe_unused)
-{
- char *events_path = get_tracing_file("events");
- int events_fd = open(events_path, O_PATH);
- struct dirent **sys_namelist = NULL;
- int sys_items;
-
- if (events_fd < 0) {
- pr_err("Error: failed to open tracing events directory\n");
- pr_err("%s: %s\n", events_path, strerror(errno));
- return;
- }
- put_tracing_file(events_path);
-
- sys_items = tracing_events__scandir_alphasort(&sys_namelist);
-
- for (int i = 0; i < sys_items; i++) {
- struct dirent *sys_dirent = sys_namelist[i];
- struct dirent **evt_namelist = NULL;
- int dir_fd;
- int evt_items;
-
- if (sys_dirent->d_type != DT_DIR ||
- !strcmp(sys_dirent->d_name, ".") ||
- !strcmp(sys_dirent->d_name, ".."))
- goto next_sys;
-
- dir_fd = openat(events_fd, sys_dirent->d_name, O_PATH);
- if (dir_fd < 0)
- goto next_sys;
-
- evt_items = scandirat(events_fd, sys_dirent->d_name, &evt_namelist, NULL, alphasort);
- for (int j = 0; j < evt_items; j++) {
- /*
- * Buffer sized at twice the max filename length + 1
- * separator + 1 \0 terminator.
- */
- char buf[NAME_MAX * 2 + 2];
- /* 16 possible hex digits and 22 other characters and \0. */
- char encoding[16 + 22];
- struct dirent *evt_dirent = evt_namelist[j];
- struct io id;
- __u64 config;
-
- if (evt_dirent->d_type != DT_DIR ||
- !strcmp(evt_dirent->d_name, ".") ||
- !strcmp(evt_dirent->d_name, ".."))
- goto next_evt;
-
- snprintf(buf, sizeof(buf), "%s/id", evt_dirent->d_name);
- io__init(&id, openat(dir_fd, buf, O_RDONLY), buf, sizeof(buf));
-
- if (id.fd < 0)
- goto next_evt;
-
- if (io__get_dec(&id, &config) < 0) {
- close(id.fd);
- goto next_evt;
- }
- close(id.fd);
-
- snprintf(buf, sizeof(buf), "%s:%s",
- sys_dirent->d_name, evt_dirent->d_name);
- snprintf(encoding, sizeof(encoding), "tracepoint/config=0x%llx/", config);
- print_cb->print_event(print_state,
- /*topic=*/NULL,
- /*pmu_name=*/NULL, /* really "tracepoint" */
- /*event_name=*/buf,
- /*event_alias=*/NULL,
- /*scale_unit=*/NULL,
- /*deprecated=*/false,
- "Tracepoint event",
- /*desc=*/NULL,
- /*long_desc=*/NULL,
- encoding);
-next_evt:
- free(evt_namelist[j]);
- }
- close(dir_fd);
- free(evt_namelist);
-next_sys:
- free(sys_namelist[i]);
- }
-
- free(sys_namelist);
- close(events_fd);
-}
-
void print_sdt_events(const struct print_callbacks *print_cb, void *print_state)
{
struct strlist *bidlist, *sdtlist;
@@ -212,6 +121,7 @@ void print_sdt_events(const struct print_callbacks *print_cb, void *print_state)
print_cb->print_event(print_state,
/*topic=*/NULL,
/*pmu_name=*/NULL,
+ PERF_TYPE_TRACEPOINT,
evt_name ?: sdt_name->s,
/*event_alias=*/NULL,
/*deprecated=*/false,
@@ -268,6 +178,7 @@ bool is_event_supported(u8 type, u64 config)
ret = evsel__open(evsel, NULL, tmap) >= 0;
}
+ evsel__close(evsel);
evsel__delete(evsel);
}
@@ -312,6 +223,7 @@ int print_hwcache_events(const struct print_callbacks *print_cb, void *print_sta
print_cb->print_event(print_state,
"cache",
pmu->name,
+ pmu->type,
name,
alias_name,
/*scale_unit=*/NULL,
@@ -368,6 +280,7 @@ void print_symbol_events(const struct print_callbacks *print_cb, void *print_sta
print_cb->print_event(print_state,
/*topic=*/NULL,
/*pmu_name=*/NULL,
+ type,
nd->s,
alias,
/*scale_unit=*/NULL,
@@ -380,6 +293,139 @@ void print_symbol_events(const struct print_callbacks *print_cb, void *print_sta
strlist__delete(evt_name_list);
}
+/** struct mep - RB-tree node for building printing information. */
+struct mep {
+ /** nd - RB-tree element. */
+ struct rb_node nd;
+ /** @metric_group: Owned metric group name, separated others with ';'. */
+ char *metric_group;
+ const char *metric_name;
+ const char *metric_desc;
+ const char *metric_long_desc;
+ const char *metric_expr;
+ const char *metric_threshold;
+ const char *metric_unit;
+ const char *pmu_name;
+};
+
+static int mep_cmp(struct rb_node *rb_node, const void *entry)
+{
+ struct mep *a = container_of(rb_node, struct mep, nd);
+ struct mep *b = (struct mep *)entry;
+ int ret;
+
+ ret = strcmp(a->metric_group, b->metric_group);
+ if (ret)
+ return ret;
+
+ return strcmp(a->metric_name, b->metric_name);
+}
+
+static struct rb_node *mep_new(struct rblist *rl __maybe_unused, const void *entry)
+{
+ struct mep *me = malloc(sizeof(struct mep));
+
+ if (!me)
+ return NULL;
+
+ memcpy(me, entry, sizeof(struct mep));
+ return &me->nd;
+}
+
+static void mep_delete(struct rblist *rl __maybe_unused,
+ struct rb_node *nd)
+{
+ struct mep *me = container_of(nd, struct mep, nd);
+
+ zfree(&me->metric_group);
+ free(me);
+}
+
+static struct mep *mep_lookup(struct rblist *groups, const char *metric_group,
+ const char *metric_name)
+{
+ struct rb_node *nd;
+ struct mep me = {
+ .metric_group = strdup(metric_group),
+ .metric_name = metric_name,
+ };
+ nd = rblist__find(groups, &me);
+ if (nd) {
+ free(me.metric_group);
+ return container_of(nd, struct mep, nd);
+ }
+ rblist__add_node(groups, &me);
+ nd = rblist__find(groups, &me);
+ if (nd)
+ return container_of(nd, struct mep, nd);
+ return NULL;
+}
+
+static int metricgroup__add_to_mep_groups_callback(const struct pmu_metric *pm,
+ const struct pmu_metrics_table *table __maybe_unused,
+ void *vdata)
+{
+ struct rblist *groups = vdata;
+ const char *g;
+ char *omg, *mg;
+
+ mg = strdup(pm->metric_group ?: pm->metric_name);
+ if (!mg)
+ return -ENOMEM;
+ omg = mg;
+ while ((g = strsep(&mg, ";")) != NULL) {
+ struct mep *me;
+
+ g = skip_spaces(g);
+ if (strlen(g))
+ me = mep_lookup(groups, g, pm->metric_name);
+ else
+ me = mep_lookup(groups, pm->metric_name, pm->metric_name);
+
+ if (me) {
+ me->metric_desc = pm->desc;
+ me->metric_long_desc = pm->long_desc;
+ me->metric_expr = pm->metric_expr;
+ me->metric_threshold = pm->metric_threshold;
+ me->metric_unit = pm->unit;
+ me->pmu_name = pm->pmu;
+ }
+ }
+ free(omg);
+
+ return 0;
+}
+
+void metricgroup__print(const struct print_callbacks *print_cb, void *print_state)
+{
+ struct rblist groups;
+ struct rb_node *node, *next;
+ const struct pmu_metrics_table *table = pmu_metrics_table__find();
+
+ rblist__init(&groups);
+ groups.node_new = mep_new;
+ groups.node_cmp = mep_cmp;
+ groups.node_delete = mep_delete;
+
+ metricgroup__for_each_metric(table, metricgroup__add_to_mep_groups_callback, &groups);
+
+ for (node = rb_first_cached(&groups.entries); node; node = next) {
+ struct mep *me = container_of(node, struct mep, nd);
+
+ print_cb->print_metric(print_state,
+ me->metric_group,
+ me->metric_name,
+ me->metric_desc,
+ me->metric_long_desc,
+ me->metric_expr,
+ me->metric_threshold,
+ me->metric_unit,
+ me->pmu_name);
+ next = rb_next(node);
+ rblist__remove_node(&groups, node);
+ }
+}
+
/*
* Print the help text for the event symbols:
*/
@@ -387,8 +433,6 @@ void print_events(const struct print_callbacks *print_cb, void *print_state)
{
print_symbol_events(print_cb, print_state, PERF_TYPE_HARDWARE,
event_symbols_hw, PERF_COUNT_HW_MAX);
- print_symbol_events(print_cb, print_state, PERF_TYPE_SOFTWARE,
- event_symbols_sw, PERF_COUNT_SW_MAX);
print_hwcache_events(print_cb, print_state);
@@ -397,6 +441,7 @@ void print_events(const struct print_callbacks *print_cb, void *print_state)
print_cb->print_event(print_state,
/*topic=*/NULL,
/*pmu_name=*/NULL,
+ PERF_TYPE_RAW,
"rNNN",
/*event_alias=*/NULL,
/*scale_unit=*/NULL,
@@ -411,6 +456,7 @@ void print_events(const struct print_callbacks *print_cb, void *print_state)
print_cb->print_event(print_state,
/*topic=*/NULL,
/*pmu_name=*/NULL,
+ PERF_TYPE_BREAKPOINT,
"mem:<addr>[/len][:access]",
/*scale_unit=*/NULL,
/*event_alias=*/NULL,
@@ -420,8 +466,6 @@ void print_events(const struct print_callbacks *print_cb, void *print_state)
/*long_desc=*/NULL,
/*encoding_desc=*/NULL);
- print_tracepoint_events(print_cb, print_state);
-
print_sdt_events(print_cb, print_state);
metricgroup__print(print_cb, print_state);
diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h
index 445efa1636c1..d6ba384f0c66 100644
--- a/tools/perf/util/print-events.h
+++ b/tools/perf/util/print-events.h
@@ -12,7 +12,7 @@ struct print_callbacks {
void (*print_start)(void *print_state);
void (*print_end)(void *print_state);
void (*print_event)(void *print_state, const char *topic,
- const char *pmu_name,
+ const char *pmu_name, u32 pmu_type,
const char *event_name, const char *event_alias,
const char *scale_unit,
bool deprecated, const char *event_type_desc,
@@ -25,7 +25,8 @@ struct print_callbacks {
const char *long_desc,
const char *expr,
const char *threshold,
- const char *unit);
+ const char *unit,
+ const char *pmu_name);
bool (*skip_duplicate_pmus)(void *print_state);
};
@@ -36,7 +37,7 @@ void print_sdt_events(const struct print_callbacks *print_cb, void *print_state)
void print_symbol_events(const struct print_callbacks *print_cb, void *print_state,
unsigned int type, const struct event_symbol *syms,
unsigned int max);
-void print_tracepoint_events(const struct print_callbacks *print_cb, void *print_state);
+void metricgroup__print(const struct print_callbacks *print_cb, void *print_state);
bool is_event_supported(u8 type, u64 config);
#endif /* __PERF_PRINT_EVENTS_H */
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 307ad6242a4e..6ab2eb551b6c 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -75,12 +75,14 @@ int e_snprintf(char *str, size_t size, const char *format, ...)
}
static struct machine *host_machine;
+static struct perf_env host_env;
/* Initialize symbol maps and path of vmlinux/modules */
int init_probe_symbol_maps(bool user_only)
{
int ret;
+ perf_env__init(&host_env);
symbol_conf.allow_aliases = true;
ret = symbol__init(NULL);
if (ret < 0) {
@@ -94,7 +96,7 @@ int init_probe_symbol_maps(bool user_only)
if (symbol_conf.vmlinux_name)
pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name);
- host_machine = machine__new_host();
+ host_machine = machine__new_host(&host_env);
if (!host_machine) {
pr_debug("machine__new_host() failed.\n");
symbol__exit();
@@ -111,6 +113,7 @@ void exit_probe_symbol_maps(void)
machine__delete(host_machine);
host_machine = NULL;
symbol__exit();
+ perf_env__exit(&host_env);
}
static struct ref_reloc_sym *kernel_get_ref_reloc_sym(struct map **pmap)
@@ -502,7 +505,7 @@ static struct debuginfo *open_from_debuginfod(struct dso *dso, struct nsinfo *ns
if (!c)
return NULL;
- build_id__sprintf(dso__bid(dso), sbuild_id);
+ build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id));
fd = debuginfod_find_debuginfo(c, (const unsigned char *)sbuild_id,
0, &path);
if (fd >= 0)
@@ -1063,7 +1066,6 @@ static int sprint_line_description(char *sbuf, size_t size, struct line_range *l
static int __show_line_range(struct line_range *lr, const char *module,
bool user)
{
- struct build_id bid;
int l = 1;
struct int_node *ln;
struct debuginfo *dinfo;
@@ -1088,8 +1090,10 @@ static int __show_line_range(struct line_range *lr, const char *module,
ret = -ENOENT;
}
if (dinfo->build_id) {
+ struct build_id bid;
+
build_id__init(&bid, dinfo->build_id, BUILD_ID_SIZE);
- build_id__sprintf(&bid, sbuild_id);
+ build_id__snprintf(&bid, sbuild_id, sizeof(sbuild_id));
}
debuginfo__delete(dinfo);
if (ret == 0 || ret == -ENOENT) {
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index ec8ac242fedb..5069fb61f48c 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -448,10 +448,10 @@ static int probe_cache__open(struct probe_cache *pcache, const char *target,
if (!target || !strcmp(target, DSO__NAME_KALLSYMS)) {
target = DSO__NAME_KALLSYMS;
is_kallsyms = true;
- ret = sysfs__sprintf_build_id("/", sbuildid);
+ ret = sysfs__snprintf_build_id("/", sbuildid, sizeof(sbuildid));
} else {
nsinfo__mountns_enter(nsi, &nsc);
- ret = filename__sprintf_build_id(target, sbuildid);
+ ret = filename__snprintf_build_id(target, sbuildid, sizeof(sbuildid));
nsinfo__mountns_exit(&nsc);
}
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 3cc7c40f5097..5ffd97ee4898 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -848,7 +848,6 @@ static int probe_point_lazy_walker(const char *fname, int lineno,
/* Find probe points from lazy pattern */
static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
{
- struct build_id bid;
char sbuild_id[SBUILD_ID_SIZE] = "";
int ret = 0;
char *fpath;
@@ -858,8 +857,10 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
comp_dir = cu_get_comp_dir(&pf->cu_die);
if (pf->dbg->build_id) {
+ struct build_id bid;
+
build_id__init(&bid, pf->dbg->build_id, BUILD_ID_SIZE);
- build_id__sprintf(&bid, sbuild_id);
+ build_id__snprintf(&bid, sbuild_id, sizeof(sbuild_id));
}
ret = find_source_path(pf->fname, sbuild_id, comp_dir, &fpath);
if (ret < 0) {
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index f3c05da25b4a..ea77bea0306f 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -10,6 +10,7 @@
#endif
#include <perf/mmap.h>
#include "callchain.h"
+#include "counts.h"
#include "evlist.h"
#include "evsel.h"
#include "event.h"
@@ -18,6 +19,7 @@
#include "strbuf.h"
#include "thread_map.h"
#include "trace-event.h"
+#include "metricgroup.h"
#include "mmap.h"
#include "util/sample.h"
#include <internal/lib.h>
@@ -335,7 +337,6 @@ tracepoint_field(const struct pyrf_event *pe, struct tep_format_field *field)
static PyObject*
get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name)
{
- const char *str = _PyUnicode_AsString(PyObject_Str(attr_name));
struct evsel *evsel = pevent->evsel;
struct tep_event *tp_format = evsel__tp_format(evsel);
struct tep_format_field *field;
@@ -343,7 +344,18 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name)
if (IS_ERR_OR_NULL(tp_format))
return NULL;
+ PyObject *obj = PyObject_Str(attr_name);
+ if (obj == NULL)
+ return NULL;
+
+ const char *str = PyUnicode_AsUTF8(obj);
+ if (str == NULL) {
+ Py_DECREF(obj);
+ return NULL;
+ }
+
field = tep_find_any_field(tp_format, str);
+ Py_DECREF(obj);
return field ? tracepoint_field(pevent, field) : NULL;
}
#endif /* HAVE_LIBTRACEEVENT */
@@ -527,8 +539,10 @@ static PyObject *pyrf_cpu_map__item(PyObject *obj, Py_ssize_t i)
{
struct pyrf_cpu_map *pcpus = (void *)obj;
- if (i >= perf_cpu_map__nr(pcpus->cpus))
+ if (i >= perf_cpu_map__nr(pcpus->cpus)) {
+ PyErr_SetString(PyExc_IndexError, "Index out of range");
return NULL;
+ }
return Py_BuildValue("i", perf_cpu_map__cpu(pcpus->cpus, i).cpu);
}
@@ -566,14 +580,14 @@ struct pyrf_thread_map {
static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,
PyObject *args, PyObject *kwargs)
{
- static char *kwlist[] = { "pid", "tid", "uid", NULL };
- int pid = -1, tid = -1, uid = UINT_MAX;
+ static char *kwlist[] = { "pid", "tid", NULL };
+ int pid = -1, tid = -1;
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iii",
- kwlist, &pid, &tid, &uid))
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii",
+ kwlist, &pid, &tid))
return -1;
- pthreads->threads = thread_map__new(pid, tid, uid);
+ pthreads->threads = thread_map__new(pid, tid);
if (pthreads->threads == NULL)
return -1;
return 0;
@@ -596,8 +610,10 @@ static PyObject *pyrf_thread_map__item(PyObject *obj, Py_ssize_t i)
{
struct pyrf_thread_map *pthreads = (void *)obj;
- if (i >= perf_thread_map__nr(pthreads->threads))
+ if (i >= perf_thread_map__nr(pthreads->threads)) {
+ PyErr_SetString(PyExc_IndexError, "Index out of range");
return NULL;
+ }
return Py_BuildValue("i", perf_thread_map__pid(pthreads->threads, i));
}
@@ -626,6 +642,92 @@ static int pyrf_thread_map__setup_types(void)
return PyType_Ready(&pyrf_thread_map__type);
}
+struct pyrf_counts_values {
+ PyObject_HEAD
+
+ struct perf_counts_values values;
+};
+
+static const char pyrf_counts_values__doc[] = PyDoc_STR("perf counts values object.");
+
+static void pyrf_counts_values__delete(struct pyrf_counts_values *pcounts_values)
+{
+ Py_TYPE(pcounts_values)->tp_free((PyObject *)pcounts_values);
+}
+
+#define counts_values_member_def(member, ptype, help) \
+ { #member, ptype, \
+ offsetof(struct pyrf_counts_values, values.member), \
+ 0, help }
+
+static PyMemberDef pyrf_counts_values_members[] = {
+ counts_values_member_def(val, T_ULONG, "Value of event"),
+ counts_values_member_def(ena, T_ULONG, "Time for which enabled"),
+ counts_values_member_def(run, T_ULONG, "Time for which running"),
+ counts_values_member_def(id, T_ULONG, "Unique ID for an event"),
+ counts_values_member_def(lost, T_ULONG, "Num of lost samples"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_counts_values_get_values(struct pyrf_counts_values *self, void *closure)
+{
+ PyObject *vals = PyList_New(5);
+
+ if (!vals)
+ return NULL;
+ for (int i = 0; i < 5; i++)
+ PyList_SetItem(vals, i, PyLong_FromLong(self->values.values[i]));
+
+ return vals;
+}
+
+static int pyrf_counts_values_set_values(struct pyrf_counts_values *self, PyObject *list,
+ void *closure)
+{
+ Py_ssize_t size;
+ PyObject *item = NULL;
+
+ if (!PyList_Check(list)) {
+ PyErr_SetString(PyExc_TypeError, "Value assigned must be a list");
+ return -1;
+ }
+
+ size = PyList_Size(list);
+ for (Py_ssize_t i = 0; i < size; i++) {
+ item = PyList_GetItem(list, i);
+ if (!PyLong_Check(item)) {
+ PyErr_SetString(PyExc_TypeError, "List members should be numbers");
+ return -1;
+ }
+ self->values.values[i] = PyLong_AsLong(item);
+ }
+
+ return 0;
+}
+
+static PyGetSetDef pyrf_counts_values_getset[] = {
+ {"values", (getter)pyrf_counts_values_get_values, (setter)pyrf_counts_values_set_values,
+ "Name field", NULL},
+ { .name = NULL, },
+};
+
+static PyTypeObject pyrf_counts_values__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.counts_values",
+ .tp_basicsize = sizeof(struct pyrf_counts_values),
+ .tp_dealloc = (destructor)pyrf_counts_values__delete,
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_counts_values__doc,
+ .tp_members = pyrf_counts_values_members,
+ .tp_getset = pyrf_counts_values_getset,
+};
+
+static int pyrf_counts_values__setup_types(void)
+{
+ pyrf_counts_values__type.tp_new = PyType_GenericNew;
+ return PyType_Ready(&pyrf_counts_values__type);
+}
+
struct pyrf_evsel {
PyObject_HEAD
@@ -781,15 +883,104 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,
return Py_None;
}
+static PyObject *pyrf_evsel__cpus(struct pyrf_evsel *pevsel)
+{
+ struct pyrf_cpu_map *pcpu_map = PyObject_New(struct pyrf_cpu_map, &pyrf_cpu_map__type);
+
+ if (pcpu_map)
+ pcpu_map->cpus = perf_cpu_map__get(pevsel->evsel.core.cpus);
+
+ return (PyObject *)pcpu_map;
+}
+
+static PyObject *pyrf_evsel__threads(struct pyrf_evsel *pevsel)
+{
+ struct pyrf_thread_map *pthread_map =
+ PyObject_New(struct pyrf_thread_map, &pyrf_thread_map__type);
+
+ if (pthread_map)
+ pthread_map->threads = perf_thread_map__get(pevsel->evsel.core.threads);
+
+ return (PyObject *)pthread_map;
+}
+
+/*
+ * Ensure evsel's counts and prev_raw_counts are allocated, the latter
+ * used by tool PMUs to compute the cumulative count as expected by
+ * stat's process_counter_values.
+ */
+static int evsel__ensure_counts(struct evsel *evsel)
+{
+ int nthreads, ncpus;
+
+ if (evsel->counts != NULL)
+ return 0;
+
+ nthreads = perf_thread_map__nr(evsel->core.threads);
+ ncpus = perf_cpu_map__nr(evsel->core.cpus);
+
+ evsel->counts = perf_counts__new(ncpus, nthreads);
+ if (evsel->counts == NULL)
+ return -ENOMEM;
+
+ evsel->prev_raw_counts = perf_counts__new(ncpus, nthreads);
+ if (evsel->prev_raw_counts == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static PyObject *pyrf_evsel__read(struct pyrf_evsel *pevsel,
+ PyObject *args, PyObject *kwargs)
+{
+ struct evsel *evsel = &pevsel->evsel;
+ int cpu = 0, cpu_idx, thread = 0, thread_idx;
+ struct perf_counts_values *old_count, *new_count;
+ struct pyrf_counts_values *count_values = PyObject_New(struct pyrf_counts_values,
+ &pyrf_counts_values__type);
+
+ if (!count_values)
+ return NULL;
+
+ if (!PyArg_ParseTuple(args, "ii", &cpu, &thread))
+ return NULL;
+
+ cpu_idx = perf_cpu_map__idx(evsel->core.cpus, (struct perf_cpu){.cpu = cpu});
+ if (cpu_idx < 0) {
+ PyErr_Format(PyExc_TypeError, "CPU %d is not part of evsel's CPUs", cpu);
+ return NULL;
+ }
+ thread_idx = perf_thread_map__idx(evsel->core.threads, thread);
+ if (thread_idx < 0) {
+ PyErr_Format(PyExc_TypeError, "Thread %d is not part of evsel's threads",
+ thread);
+ return NULL;
+ }
+
+ if (evsel__ensure_counts(evsel))
+ return PyErr_NoMemory();
+
+ /* Set up pointers to the old and newly read counter values. */
+ old_count = perf_counts(evsel->prev_raw_counts, cpu_idx, thread_idx);
+ new_count = perf_counts(evsel->counts, cpu_idx, thread_idx);
+ /* Update the value in evsel->counts. */
+ evsel__read_counter(evsel, cpu_idx, thread_idx);
+ /* Copy the value and turn it into the delta from old_count. */
+ count_values->values = *new_count;
+ count_values->values.val -= old_count->val;
+ count_values->values.ena -= old_count->ena;
+ count_values->values.run -= old_count->run;
+ /* Save the new count over the old_count for the next read. */
+ *old_count = *new_count;
+ return (PyObject *)count_values;
+}
+
static PyObject *pyrf_evsel__str(PyObject *self)
{
struct pyrf_evsel *pevsel = (void *)self;
struct evsel *evsel = &pevsel->evsel;
- if (!evsel->pmu)
- return PyUnicode_FromFormat("evsel(%s)", evsel__name(evsel));
-
- return PyUnicode_FromFormat("evsel(%s/%s/)", evsel->pmu->name, evsel__name(evsel));
+ return PyUnicode_FromFormat("evsel(%s/%s/)", evsel__pmu_name(evsel), evsel__name(evsel));
}
static PyMethodDef pyrf_evsel__methods[] = {
@@ -799,6 +990,24 @@ static PyMethodDef pyrf_evsel__methods[] = {
.ml_flags = METH_VARARGS | METH_KEYWORDS,
.ml_doc = PyDoc_STR("open the event selector file descriptor table.")
},
+ {
+ .ml_name = "cpus",
+ .ml_meth = (PyCFunction)pyrf_evsel__cpus,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = PyDoc_STR("CPUs the event is to be used with.")
+ },
+ {
+ .ml_name = "threads",
+ .ml_meth = (PyCFunction)pyrf_evsel__threads,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = PyDoc_STR("threads the event is to be used with.")
+ },
+ {
+ .ml_name = "read",
+ .ml_meth = (PyCFunction)pyrf_evsel__read,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("read counters")
+ },
{ .ml_name = NULL, }
};
@@ -1054,6 +1263,16 @@ static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist,
return Py_None;
}
+static PyObject *pyrf_evlist__close(struct pyrf_evlist *pevlist)
+{
+ struct evlist *evlist = &pevlist->evlist;
+
+ evlist__close(evlist);
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
static PyObject *pyrf_evlist__config(struct pyrf_evlist *pevlist)
{
struct record_opts opts = {
@@ -1113,6 +1332,12 @@ static PyMethodDef pyrf_evlist__methods[] = {
.ml_doc = PyDoc_STR("open the file descriptors.")
},
{
+ .ml_name = "close",
+ .ml_meth = (PyCFunction)pyrf_evlist__close,
+ .ml_flags = METH_NOARGS,
+ .ml_doc = PyDoc_STR("close the file descriptors.")
+ },
+ {
.ml_name = "poll",
.ml_meth = (PyCFunction)pyrf_evlist__poll,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
@@ -1357,10 +1582,37 @@ static PyObject *pyrf_evsel__from_evsel(struct evsel *evsel)
return (PyObject *)pevsel;
}
+static int evlist__pos(struct evlist *evlist, struct evsel *evsel)
+{
+ struct evsel *pos;
+ int idx = 0;
+
+ evlist__for_each_entry(evlist, pos) {
+ if (evsel == pos)
+ return idx;
+ idx++;
+ }
+ return -1;
+}
+
+static struct evsel *evlist__at(struct evlist *evlist, int idx)
+{
+ struct evsel *pos;
+ int idx2 = 0;
+
+ evlist__for_each_entry(evlist, pos) {
+ if (idx == idx2)
+ return pos;
+ idx2++;
+ }
+ return NULL;
+}
+
static PyObject *pyrf_evlist__from_evlist(struct evlist *evlist)
{
struct pyrf_evlist *pevlist = PyObject_New(struct pyrf_evlist, &pyrf_evlist__type);
struct evsel *pos;
+ struct rb_node *node;
if (!pevlist)
return NULL;
@@ -1372,6 +1624,39 @@ static PyObject *pyrf_evlist__from_evlist(struct evlist *evlist)
evlist__add(&pevlist->evlist, &pevsel->evsel);
}
+ evlist__for_each_entry(&pevlist->evlist, pos) {
+ struct evsel *leader = evsel__leader(pos);
+
+ if (pos != leader) {
+ int idx = evlist__pos(evlist, leader);
+
+ if (idx >= 0)
+ evsel__set_leader(pos, evlist__at(&pevlist->evlist, idx));
+ else if (leader == NULL)
+ evsel__set_leader(pos, pos);
+ }
+ }
+ metricgroup__copy_metric_events(&pevlist->evlist, /*cgrp=*/NULL,
+ &pevlist->evlist.metric_events,
+ &evlist->metric_events);
+ for (node = rb_first_cached(&pevlist->evlist.metric_events.entries); node;
+ node = rb_next(node)) {
+ struct metric_event *me = container_of(node, struct metric_event, nd);
+ struct list_head *mpos;
+ int idx = evlist__pos(evlist, me->evsel);
+
+ if (idx >= 0)
+ me->evsel = evlist__at(&pevlist->evlist, idx);
+ list_for_each(mpos, &me->head) {
+ struct metric_expr *e = container_of(mpos, struct metric_expr, nd);
+
+ for (int j = 0; e->metric_events[j]; j++) {
+ idx = evlist__pos(evlist, e->metric_events[j]);
+ if (idx >= 0)
+ e->metric_events[j] = evlist__at(&pevlist->evlist, idx);
+ }
+ }
+ }
return (PyObject *)pevlist;
}
@@ -1442,7 +1727,8 @@ PyMODINIT_FUNC PyInit_perf(void)
pyrf_evlist__setup_types() < 0 ||
pyrf_evsel__setup_types() < 0 ||
pyrf_thread_map__setup_types() < 0 ||
- pyrf_cpu_map__setup_types() < 0)
+ pyrf_cpu_map__setup_types() < 0 ||
+ pyrf_counts_values__setup_types() < 0)
return module;
/* The page_size is placed in util object. */
@@ -1487,6 +1773,9 @@ PyMODINIT_FUNC PyInit_perf(void)
Py_INCREF(&pyrf_cpu_map__type);
PyModule_AddObject(module, "cpu_map", (PyObject*)&pyrf_cpu_map__type);
+ Py_INCREF(&pyrf_counts_values__type);
+ PyModule_AddObject(module, "counts_values", (PyObject *)&pyrf_counts_values__type);
+
dict = PyModule_GetDict(module);
if (dict == NULL)
goto error;
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index a6566134e09e..ea3a6c4657ee 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -28,6 +28,7 @@ struct record_opts {
bool sample_time_set;
bool sample_cpu;
bool sample_identifier;
+ bool sample_data_src;
bool period;
bool period_set;
bool running_time;
@@ -79,6 +80,7 @@ struct record_opts {
int synth;
int threads_spec;
const char *threads_user_spec;
+ u64 off_cpu_thresh_ns;
};
extern const char * const *record_usage;
diff --git a/tools/perf/util/rwsem.c b/tools/perf/util/rwsem.c
index 5109167f27f7..9d26832398db 100644
--- a/tools/perf/util/rwsem.c
+++ b/tools/perf/util/rwsem.c
@@ -27,6 +27,7 @@ int exit_rwsem(struct rw_semaphore *sem)
}
int down_read(struct rw_semaphore *sem)
+ NO_THREAD_SAFETY_ANALYSIS
{
#if RWS_ERRORCHECK
mutex_lock(&sem->mtx);
@@ -37,6 +38,7 @@ int down_read(struct rw_semaphore *sem)
}
int up_read(struct rw_semaphore *sem)
+ NO_THREAD_SAFETY_ANALYSIS
{
#if RWS_ERRORCHECK
mutex_unlock(&sem->mtx);
@@ -47,6 +49,7 @@ int up_read(struct rw_semaphore *sem)
}
int down_write(struct rw_semaphore *sem)
+ NO_THREAD_SAFETY_ANALYSIS
{
#if RWS_ERRORCHECK
mutex_lock(&sem->mtx);
@@ -57,6 +60,7 @@ int down_write(struct rw_semaphore *sem)
}
int up_write(struct rw_semaphore *sem)
+ NO_THREAD_SAFETY_ANALYSIS
{
#if RWS_ERRORCHECK
mutex_unlock(&sem->mtx);
diff --git a/tools/perf/util/rwsem.h b/tools/perf/util/rwsem.h
index ef5cbc31d967..b102d8143181 100644
--- a/tools/perf/util/rwsem.h
+++ b/tools/perf/util/rwsem.h
@@ -10,7 +10,7 @@
*/
#define RWS_ERRORCHECK 0
-struct rw_semaphore {
+struct LOCKABLE rw_semaphore {
#if RWS_ERRORCHECK
struct mutex mtx;
#else
@@ -21,10 +21,10 @@ struct rw_semaphore {
int init_rwsem(struct rw_semaphore *sem);
int exit_rwsem(struct rw_semaphore *sem);
-int down_read(struct rw_semaphore *sem);
-int up_read(struct rw_semaphore *sem);
+int down_read(struct rw_semaphore *sem) SHARED_LOCK_FUNCTION(sem);
+int up_read(struct rw_semaphore *sem) UNLOCK_FUNCTION(sem);
-int down_write(struct rw_semaphore *sem);
-int up_write(struct rw_semaphore *sem);
+int down_write(struct rw_semaphore *sem) EXCLUSIVE_LOCK_FUNCTION(sem);
+int up_write(struct rw_semaphore *sem) UNLOCK_FUNCTION(sem);
#endif /* _PERF_RWSEM_H */
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
index 0ce52f0280b8..c17dbe232c54 100644
--- a/tools/perf/util/s390-cpumsf.c
+++ b/tools/perf/util/s390-cpumsf.c
@@ -1142,7 +1142,7 @@ int s390_cpumsf_process_auxtrace_info(union perf_event *event,
sf->machine = &session->machines.host; /* No kvm support */
sf->auxtrace_type = auxtrace_info->type;
sf->pmu_type = PERF_TYPE_RAW;
- sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid);
+ sf->machine_type = s390_cpumsf_get_type(perf_session__env(session)->cpuid);
sf->auxtrace.process_event = s390_cpumsf_process_event;
sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event;
diff --git a/tools/perf/util/sample-raw.c b/tools/perf/util/sample-raw.c
index f3f6bd9d290e..bcf442574d6e 100644
--- a/tools/perf/util/sample-raw.c
+++ b/tools/perf/util/sample-raw.c
@@ -6,15 +6,16 @@
#include "env.h"
#include "header.h"
#include "sample-raw.h"
+#include "session.h"
/*
* Check platform the perf data file was created on and perform platform
* specific interpretation.
*/
-void evlist__init_trace_event_sample_raw(struct evlist *evlist)
+void evlist__init_trace_event_sample_raw(struct evlist *evlist, struct perf_env *env)
{
- const char *arch_pf = perf_env__arch(evlist->env);
- const char *cpuid = perf_env__cpuid(evlist->env);
+ const char *arch_pf = perf_env__arch(env);
+ const char *cpuid = perf_env__cpuid(env);
if (arch_pf && !strcmp("s390", arch_pf))
evlist->trace_event_sample_raw = evlist__s390_sample_raw;
diff --git a/tools/perf/util/sample-raw.h b/tools/perf/util/sample-raw.h
index ea01c5811503..896e9a87e373 100644
--- a/tools/perf/util/sample-raw.h
+++ b/tools/perf/util/sample-raw.h
@@ -11,5 +11,5 @@ void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event,
bool evlist__has_amd_ibs(struct evlist *evlist);
void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event,
struct perf_sample *sample);
-void evlist__init_trace_event_sample_raw(struct evlist *evlist);
+void evlist__init_trace_event_sample_raw(struct evlist *evlist, struct perf_env *env);
#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index 0e96240052e9..fae834144ef4 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -104,10 +104,8 @@ struct perf_sample {
u8 cpumode;
u16 misc;
u16 ins_lat;
- union {
- u16 p_stage_cyc;
- u16 retire_lat;
- };
+ /** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */
+ u16 weight3;
bool no_hw_idx; /* No hw_idx collected in branch_stack */
char insn[MAX_INSN];
void *raw_data;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 520729e78965..6655c0bbe0d8 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -780,14 +780,13 @@ static void set_sym_in_dict(PyObject *dict, struct addr_location *al,
const char *sym_field, const char *symoff_field,
const char *map_pgoff)
{
- char sbuild_id[SBUILD_ID_SIZE];
-
if (al->map) {
+ char sbuild_id[SBUILD_ID_SIZE];
struct dso *dso = map__dso(al->map);
pydict_set_item_string_decref(dict, dso_field,
_PyUnicode_FromString(dso__name(dso)));
- build_id__sprintf(dso__bid(dso), sbuild_id);
+ build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id));
pydict_set_item_string_decref(dict, dso_bid_field,
_PyUnicode_FromString(sbuild_id));
pydict_set_item_string_decref(dict, dso_map_start,
@@ -1238,7 +1237,7 @@ static int python_export_dso(struct db_export *dbe, struct dso *dso,
char sbuild_id[SBUILD_ID_SIZE];
PyObject *t;
- build_id__sprintf(dso__bid(dso), sbuild_id);
+ build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id));
t = tuple_new(5);
@@ -1306,7 +1305,7 @@ static void python_export_sample_table(struct db_export *dbe,
tuple_set_d64(t, 0, es->db_id);
tuple_set_d64(t, 1, es->evsel->db_id);
- tuple_set_d64(t, 2, maps__machine(es->al->maps)->db_id);
+ tuple_set_d64(t, 2, maps__machine(thread__maps(es->al->thread))->db_id);
tuple_set_d64(t, 3, thread__db_id(es->al->thread));
tuple_set_d64(t, 4, es->comm_db_id);
tuple_set_d64(t, 5, es->dso_db_id);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 60fb9997ea0d..26ae078278cd 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -12,6 +12,7 @@
#include <sys/types.h>
#include <sys/mman.h>
#include <perf/cpumap.h>
+#include <perf/event.h>
#include "map_symbol.h"
#include "branch.h"
@@ -137,7 +138,8 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
struct perf_session *__perf_session__new(struct perf_data *data,
struct perf_tool *tool,
- bool trace_event_repipe)
+ bool trace_event_repipe,
+ struct perf_env *host_env)
{
int ret = -ENOMEM;
struct perf_session *session = zalloc(sizeof(*session));
@@ -176,7 +178,7 @@ struct perf_session *__perf_session__new(struct perf_data *data,
perf_session__set_comm_exec(session);
}
- evlist__init_trace_event_sample_raw(session->evlist);
+ evlist__init_trace_event_sample_raw(session->evlist, &session->header.env);
/* Open the directory data. */
if (data->is_dir) {
@@ -190,8 +192,11 @@ struct perf_session *__perf_session__new(struct perf_data *data,
symbol_conf.kallsyms_name = perf_data__kallsyms_name(data);
}
} else {
- session->machines.host.env = &perf_env;
+ assert(host_env != NULL);
+ session->machines.host.env = host_env;
}
+ if (session->evlist)
+ session->evlist->session = session;
session->machines.host.single_address_space =
perf_env__single_address_space(session->machines.host.env);
@@ -1094,7 +1099,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
printf("... weight: %" PRIu64 "", sample->weight);
if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
printf(",0x%"PRIx16"", sample->ins_lat);
- printf(",0x%"PRIx16"", sample->p_stage_cyc);
+ printf(",0x%"PRIx16"", sample->weight3);
}
printf("\n");
}
@@ -1400,7 +1405,9 @@ static s64 perf_session__process_user_event(struct perf_session *session,
int err;
perf_sample__init(&sample, /*all=*/true);
- if (event->header.type != PERF_RECORD_COMPRESSED || perf_tool__compressed_is_stub(tool))
+ if ((event->header.type != PERF_RECORD_COMPRESSED &&
+ event->header.type != PERF_RECORD_COMPRESSED2) ||
+ perf_tool__compressed_is_stub(tool))
dump_event(session->evlist, event, file_offset, &sample, file_path);
/* These events are processed right away */
@@ -1481,6 +1488,7 @@ static s64 perf_session__process_user_event(struct perf_session *session,
err = tool->feature(session, event);
break;
case PERF_RECORD_COMPRESSED:
+ case PERF_RECORD_COMPRESSED2:
err = tool->compressed(session, event, file_offset, file_path);
if (err)
dump_event(session->evlist, event, file_offset, &sample, file_path);
@@ -1488,6 +1496,9 @@ static s64 perf_session__process_user_event(struct perf_session *session,
case PERF_RECORD_FINISHED_INIT:
err = tool->finished_init(session, event);
break;
+ case PERF_RECORD_BPF_METADATA:
+ err = tool->bpf_metadata(session, event);
+ break;
default:
err = -EINVAL;
break;
@@ -1639,8 +1650,17 @@ static s64 perf_session__process_event(struct perf_session *session,
if (session->header.needs_swap)
event_swap(event, evlist__sample_id_all(evlist));
- if (event->header.type >= PERF_RECORD_HEADER_MAX)
- return -EINVAL;
+ if (event->header.type >= PERF_RECORD_HEADER_MAX) {
+ /* perf should not support unaligned event, stop here. */
+ if (event->header.size % sizeof(u64))
+ return -EINVAL;
+
+ /* This perf is outdated and does not support the latest event type. */
+ ui__warning("Unsupported header type %u, please consider updating perf.\n",
+ event->header.type);
+ /* Skip unsupported event by returning its size. */
+ return event->header.size;
+ }
events_stats__inc(&evlist->stats, event->header.type);
@@ -2542,7 +2562,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
{
int i, err = -1;
struct perf_cpu_map *map;
- int nr_cpus = min(session->header.env.nr_cpus_avail, MAX_NR_CPUS);
+ int nr_cpus = min(perf_session__env(session)->nr_cpus_avail, MAX_NR_CPUS);
struct perf_cpu cpu;
for (i = 0; i < PERF_TYPE_MAX; ++i) {
@@ -2731,3 +2751,8 @@ int perf_session__dsos_hit_all(struct perf_session *session)
return 0;
}
+
+struct perf_env *perf_session__env(struct perf_session *session)
+{
+ return &session->header.env;
+}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index db1c120a9e67..cf88d65a25cb 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -107,12 +107,13 @@ struct perf_tool;
struct perf_session *__perf_session__new(struct perf_data *data,
struct perf_tool *tool,
- bool trace_event_repipe);
+ bool trace_event_repipe,
+ struct perf_env *host_env);
static inline struct perf_session *perf_session__new(struct perf_data *data,
struct perf_tool *tool)
{
- return __perf_session__new(data, tool, /*trace_event_repipe=*/false);
+ return __perf_session__new(data, tool, /*trace_event_repipe=*/false, /*host_env=*/NULL);
}
void perf_session__delete(struct perf_session *session);
@@ -208,4 +209,6 @@ int perf_event__process_finished_round(const struct perf_tool *tool,
union perf_event *event,
struct ordered_events *oe);
+struct perf_env *perf_session__env(struct perf_session *session);
+
#endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/sha1.c b/tools/perf/util/sha1.c
new file mode 100644
index 000000000000..7032fa4ff3fd
--- /dev/null
+++ b/tools/perf/util/sha1.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * SHA-1 message digest algorithm
+ *
+ * Copyright 2025 Google LLC
+ */
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/unaligned.h>
+#include <string.h>
+
+#include "sha1.h"
+
+#define SHA1_BLOCK_SIZE 64
+
+static const u32 sha1_K[4] = { 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6 };
+
+#define SHA1_ROUND(i, a, b, c, d, e) \
+ do { \
+ if ((i) >= 16) \
+ w[i] = rol32(w[(i) - 16] ^ w[(i) - 14] ^ w[(i) - 8] ^ \
+ w[(i) - 3], \
+ 1); \
+ e += w[i] + rol32(a, 5) + sha1_K[(i) / 20]; \
+ if ((i) < 20) \
+ e += (b & (c ^ d)) ^ d; \
+ else if ((i) < 40 || (i) >= 60) \
+ e += b ^ c ^ d; \
+ else \
+ e += (c & d) ^ (b & (c ^ d)); \
+ b = rol32(b, 30); \
+ /* The new (a, b, c, d, e) is the old (e, a, b, c, d). */ \
+ } while (0)
+
+#define SHA1_5ROUNDS(i) \
+ do { \
+ SHA1_ROUND((i) + 0, a, b, c, d, e); \
+ SHA1_ROUND((i) + 1, e, a, b, c, d); \
+ SHA1_ROUND((i) + 2, d, e, a, b, c); \
+ SHA1_ROUND((i) + 3, c, d, e, a, b); \
+ SHA1_ROUND((i) + 4, b, c, d, e, a); \
+ } while (0)
+
+#define SHA1_20ROUNDS(i) \
+ do { \
+ SHA1_5ROUNDS((i) + 0); \
+ SHA1_5ROUNDS((i) + 5); \
+ SHA1_5ROUNDS((i) + 10); \
+ SHA1_5ROUNDS((i) + 15); \
+ } while (0)
+
+static void sha1_blocks(u32 h[5], const u8 *data, size_t nblocks)
+{
+ while (nblocks--) {
+ u32 a = h[0];
+ u32 b = h[1];
+ u32 c = h[2];
+ u32 d = h[3];
+ u32 e = h[4];
+ u32 w[80];
+
+ for (int i = 0; i < 16; i++)
+ w[i] = get_unaligned_be32(&data[i * 4]);
+ SHA1_20ROUNDS(0);
+ SHA1_20ROUNDS(20);
+ SHA1_20ROUNDS(40);
+ SHA1_20ROUNDS(60);
+
+ h[0] += a;
+ h[1] += b;
+ h[2] += c;
+ h[3] += d;
+ h[4] += e;
+ data += SHA1_BLOCK_SIZE;
+ }
+}
+
+/* Calculate the SHA-1 message digest of the given data. */
+void sha1(const void *data, size_t len, u8 out[SHA1_DIGEST_SIZE])
+{
+ u32 h[5] = { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476,
+ 0xC3D2E1F0 };
+ u8 final_data[2 * SHA1_BLOCK_SIZE] = { 0 };
+ size_t final_len = len % SHA1_BLOCK_SIZE;
+
+ sha1_blocks(h, data, len / SHA1_BLOCK_SIZE);
+
+ memcpy(final_data, data + len - final_len, final_len);
+ final_data[final_len] = 0x80;
+ final_len = round_up(final_len + 9, SHA1_BLOCK_SIZE);
+ put_unaligned_be64((u64)len * 8, &final_data[final_len - 8]);
+
+ sha1_blocks(h, final_data, final_len / SHA1_BLOCK_SIZE);
+
+ for (int i = 0; i < 5; i++)
+ put_unaligned_be32(h[i], &out[i * 4]);
+}
diff --git a/tools/perf/util/sha1.h b/tools/perf/util/sha1.h
new file mode 100644
index 000000000000..e92c9966e1d5
--- /dev/null
+++ b/tools/perf/util/sha1.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <linux/types.h>
+
+#define SHA1_DIGEST_SIZE 20
+
+void sha1(const void *data, size_t len, u8 out[SHA1_DIGEST_SIZE]);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index c51049087e4e..f3a565b0e230 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -141,6 +141,43 @@ struct sort_entry sort_thread = {
.se_width_idx = HISTC_THREAD,
};
+/* --sort tgid */
+
+static int64_t
+sort__tgid_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return thread__pid(right->thread) - thread__pid(left->thread);
+}
+
+static int hist_entry__tgid_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ int tgid = thread__pid(he->thread);
+ const char *comm = NULL;
+
+ /* display comm of the thread-group leader */
+ if (thread__pid(he->thread) == thread__tid(he->thread)) {
+ comm = thread__comm_str(he->thread);
+ } else {
+ struct maps *maps = thread__maps(he->thread);
+ struct thread *leader = machine__find_thread(maps__machine(maps),
+ tgid, tgid);
+ if (leader) {
+ comm = thread__comm_str(leader);
+ thread__put(leader);
+ }
+ }
+ width = max(7U, width) - 8;
+ return repsep_snprintf(bf, size, "%7d:%-*.*s", tgid, width, width, comm ?: "");
+}
+
+struct sort_entry sort_tgid = {
+ .se_header = " Tgid:Command",
+ .se_cmp = sort__tgid_cmp,
+ .se_snprintf = hist_entry__tgid_snprintf,
+ .se_width_idx = HISTC_TGID,
+};
+
/* --sort simd */
static int64_t
@@ -1709,22 +1746,27 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
if (rc)
return rc;
/*
- * Addresses with no major/minor numbers are assumed to be
+ * Addresses with no major/minor numbers or build ID are assumed to be
* anonymous in userspace. Sort those on pid then address.
*
* The kernel and non-zero major/minor mapped areas are
* assumed to be unity mapped. Sort those on address.
*/
+ if (left->cpumode != PERF_RECORD_MISC_KERNEL && (map__flags(l_map) & MAP_SHARED) == 0) {
+ const struct dso_id *dso_id = dso__id_const(l_dso);
- if ((left->cpumode != PERF_RECORD_MISC_KERNEL) &&
- (!(map__flags(l_map) & MAP_SHARED)) && !dso__id(l_dso)->maj && !dso__id(l_dso)->min &&
- !dso__id(l_dso)->ino && !dso__id(l_dso)->ino_generation) {
- /* userspace anonymous */
+ if (!dso_id->mmap2_valid)
+ dso_id = dso__id_const(r_dso);
- if (thread__pid(left->thread) > thread__pid(right->thread))
- return -1;
- if (thread__pid(left->thread) < thread__pid(right->thread))
- return 1;
+ if (!build_id__is_defined(&dso_id->build_id) &&
+ (!dso_id->mmap2_valid || (dso_id->maj == 0 && dso_id->min == 0))) {
+ /* userspace anonymous */
+
+ if (thread__pid(left->thread) > thread__pid(right->thread))
+ return -1;
+ if (thread__pid(left->thread) < thread__pid(right->thread))
+ return 1;
+ }
}
addr:
@@ -1749,6 +1791,7 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf,
if (he->mem_info) {
struct map *map = mem_info__daddr(he->mem_info)->ms.map;
struct dso *dso = map ? map__dso(map) : NULL;
+ const struct dso_id *dso_id = dso ? dso__id_const(dso) : &dso_id_empty;
addr = cl_address(mem_info__daddr(he->mem_info)->al_addr, chk_double_cl);
ms = &mem_info__daddr(he->mem_info)->ms;
@@ -1757,8 +1800,7 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf,
if ((he->cpumode != PERF_RECORD_MISC_KERNEL) &&
map && !(map__prot(map) & PROT_EXEC) &&
(map__flags(map) & MAP_SHARED) &&
- (dso__id(dso)->maj || dso__id(dso)->min || dso__id(dso)->ino ||
- dso__id(dso)->ino_generation))
+ (!dso_id->mmap2_valid || (dso_id->maj == 0 && dso_id->min == 0)))
level = 's';
else if (!map)
level = 'X';
@@ -1842,21 +1884,20 @@ struct sort_entry sort_global_ins_lat = {
static int64_t
sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right)
{
- return left->p_stage_cyc - right->p_stage_cyc;
+ return left->weight3 - right->weight3;
}
static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width)
{
- return repsep_snprintf(bf, size, "%-*u", width,
- he->p_stage_cyc * he->stat.nr_events);
+ return repsep_snprintf(bf, size, "%-*u", width, he->weight3 * he->stat.nr_events);
}
static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width)
{
- return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc);
+ return repsep_snprintf(bf, size, "%-*u", width, he->weight3);
}
struct sort_entry sort_local_p_stage_cyc = {
@@ -2489,25 +2530,51 @@ struct sort_dimension {
int taken;
};
-int __weak arch_support_sort_key(const char *sort_key __maybe_unused)
+static int arch_support_sort_key(const char *sort_key, struct perf_env *env)
{
+ const char *arch = perf_env__arch(env);
+
+ if (!strcmp("x86", arch) || !strcmp("powerpc", arch)) {
+ if (!strcmp(sort_key, "p_stage_cyc"))
+ return 1;
+ if (!strcmp(sort_key, "local_p_stage_cyc"))
+ return 1;
+ }
return 0;
}
-const char * __weak arch_perf_header_entry(const char *se_header)
-{
+static const char *arch_perf_header_entry(const char *se_header, struct perf_env *env)
+{
+ const char *arch = perf_env__arch(env);
+
+ if (!strcmp("x86", arch)) {
+ if (!strcmp(se_header, "Local Pipeline Stage Cycle"))
+ return "Local Retire Latency";
+ else if (!strcmp(se_header, "Pipeline Stage Cycle"))
+ return "Retire Latency";
+ } else if (!strcmp("powerpc", arch)) {
+ if (!strcmp(se_header, "Local INSTR Latency"))
+ return "Finish Cyc";
+ else if (!strcmp(se_header, "INSTR Latency"))
+ return "Global Finish_cyc";
+ else if (!strcmp(se_header, "Local Pipeline Stage Cycle"))
+ return "Dispatch Cyc";
+ else if (!strcmp(se_header, "Pipeline Stage Cycle"))
+ return "Global Dispatch_cyc";
+ }
return se_header;
}
-static void sort_dimension_add_dynamic_header(struct sort_dimension *sd)
+static void sort_dimension_add_dynamic_header(struct sort_dimension *sd, struct perf_env *env)
{
- sd->entry->se_header = arch_perf_header_entry(sd->entry->se_header);
+ sd->entry->se_header = arch_perf_header_entry(sd->entry->se_header, env);
}
#define DIM(d, n, func) [d] = { .name = n, .entry = &(func) }
static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_PID, "pid", sort_thread),
+ DIM(SORT_TGID, "tgid", sort_tgid),
DIM(SORT_COMM, "comm", sort_comm),
DIM(SORT_DSO, "dso", sort_dso),
DIM(SORT_SYM, "symbol", sort_sym),
@@ -2598,9 +2665,11 @@ struct hpp_dimension {
struct perf_hpp_fmt *fmt;
int taken;
int was_taken;
+ int mem_mode;
};
#define DIM(d, n) { .name = n, .fmt = &perf_hpp__format[d], }
+#define DIM_MEM(d, n) { .name = n, .fmt = &perf_hpp__format[d], .mem_mode = 1, }
static struct hpp_dimension hpp_sort_dimensions[] = {
DIM(PERF_HPP__OVERHEAD, "overhead"),
@@ -2620,8 +2689,15 @@ static struct hpp_dimension hpp_sort_dimensions[] = {
DIM(PERF_HPP__WEIGHT2, "ins_lat"),
DIM(PERF_HPP__WEIGHT3, "retire_lat"),
DIM(PERF_HPP__WEIGHT3, "p_stage_cyc"),
+ /* used for output only when SORT_MODE__MEM */
+ DIM_MEM(PERF_HPP__MEM_STAT_OP, "op"),
+ DIM_MEM(PERF_HPP__MEM_STAT_CACHE, "cache"),
+ DIM_MEM(PERF_HPP__MEM_STAT_MEMORY, "memory"),
+ DIM_MEM(PERF_HPP__MEM_STAT_SNOOP, "snoop"),
+ DIM_MEM(PERF_HPP__MEM_STAT_DTLB, "dtlb"),
};
+#undef DIM_MEM
#undef DIM
struct hpp_sort_entry {
@@ -2641,18 +2717,22 @@ void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists)
}
static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
- struct hists *hists, int line __maybe_unused,
+ struct hists *hists, int line,
int *span __maybe_unused)
{
struct hpp_sort_entry *hse;
size_t len = fmt->user_len;
+ const char *hdr = "";
+
+ if (line == hists->hpp_list->nr_header_lines - 1)
+ hdr = fmt->name;
hse = container_of(fmt, struct hpp_sort_entry, hpp);
if (!len)
len = hists__col_len(hists, hse->se->se_width_idx);
- return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, fmt->name);
+ return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, hdr);
}
static int __sort__hpp_width(struct perf_hpp_fmt *fmt,
@@ -2884,9 +2964,10 @@ static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd,
}
static int __sort_dimension__add_hpp_output(struct sort_dimension *sd,
- struct perf_hpp_list *list)
+ struct perf_hpp_list *list,
+ int level)
{
- struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, 0);
+ struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, level);
if (hse == NULL)
return -1;
@@ -3495,12 +3576,13 @@ static int __hpp_dimension__add(struct hpp_dimension *hd,
}
static int __sort_dimension__add_output(struct perf_hpp_list *list,
- struct sort_dimension *sd)
+ struct sort_dimension *sd,
+ int level)
{
if (sd->taken)
return 0;
- if (__sort_dimension__add_hpp_output(sd, list) < 0)
+ if (__sort_dimension__add_hpp_output(sd, list, level) < 0)
return -1;
sd->taken = 1;
@@ -3508,14 +3590,15 @@ static int __sort_dimension__add_output(struct perf_hpp_list *list,
}
static int __hpp_dimension__add_output(struct perf_hpp_list *list,
- struct hpp_dimension *hd)
+ struct hpp_dimension *hd,
+ int level)
{
struct perf_hpp_fmt *fmt;
if (hd->taken)
return 0;
- fmt = __hpp_dimension__alloc_hpp(hd, 0);
+ fmt = __hpp_dimension__alloc_hpp(hd, level);
if (!fmt)
return -1;
@@ -3532,11 +3615,11 @@ int hpp_dimension__add_output(unsigned col, bool implicit)
hd = &hpp_sort_dimensions[col];
if (implicit && !hd->was_taken)
return 0;
- return __hpp_dimension__add_output(&perf_hpp_list, hd);
+ return __hpp_dimension__add_output(&perf_hpp_list, hd, /*level=*/0);
}
int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
- struct evlist *evlist,
+ struct evlist *evlist, struct perf_env *env,
int level)
{
unsigned int i, j;
@@ -3549,7 +3632,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
*/
for (j = 0; j < ARRAY_SIZE(arch_specific_sort_keys); j++) {
if (!strcmp(arch_specific_sort_keys[j], tok) &&
- !arch_support_sort_key(tok)) {
+ !arch_support_sort_key(tok, env)) {
return 0;
}
}
@@ -3562,7 +3645,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
for (j = 0; j < ARRAY_SIZE(dynamic_headers); j++) {
if (sd->name && !strcmp(dynamic_headers[j], sd->name))
- sort_dimension_add_dynamic_header(sd);
+ sort_dimension_add_dynamic_header(sd, env);
}
if (sd->entry == &sort_parent && parent_pattern) {
@@ -3601,15 +3684,6 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
return __sort_dimension__add(sd, list, level);
}
- for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
- struct hpp_dimension *hd = &hpp_sort_dimensions[i];
-
- if (strncasecmp(tok, hd->name, strlen(tok)))
- continue;
-
- return __hpp_dimension__add(hd, list, level);
- }
-
for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
struct sort_dimension *sd = &bstack_sort_dimensions[i];
@@ -3651,6 +3725,15 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
return 0;
}
+ for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
+ struct hpp_dimension *hd = &hpp_sort_dimensions[i];
+
+ if (strncasecmp(tok, hd->name, strlen(tok)))
+ continue;
+
+ return __hpp_dimension__add(hd, list, level);
+ }
+
if (!add_dynamic_entry(evlist, tok, level))
return 0;
@@ -3658,13 +3741,13 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
}
/* This should match with sort_dimension__add() above */
-static bool is_hpp_sort_key(const char *key)
+static bool is_hpp_sort_key(const char *key, struct perf_env *env)
{
unsigned i;
for (i = 0; i < ARRAY_SIZE(arch_specific_sort_keys); i++) {
if (!strcmp(arch_specific_sort_keys[i], key) &&
- !arch_support_sort_key(key)) {
+ !arch_support_sort_key(key, env)) {
return false;
}
}
@@ -3686,7 +3769,7 @@ static bool is_hpp_sort_key(const char *key)
}
static int setup_sort_list(struct perf_hpp_list *list, char *str,
- struct evlist *evlist)
+ struct evlist *evlist, struct perf_env *env)
{
char *tmp, *tok;
int ret = 0;
@@ -3715,7 +3798,7 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str,
}
if (*tok) {
- if (is_hpp_sort_key(tok)) {
+ if (is_hpp_sort_key(tok, env)) {
/* keep output (hpp) sort keys in the same level */
if (prev_was_hpp) {
bool next_same = (level == next_level);
@@ -3728,7 +3811,7 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str,
prev_was_hpp = false;
}
- ret = sort_dimension__add(list, tok, evlist, level);
+ ret = sort_dimension__add(list, tok, evlist, env, level);
if (ret == -EINVAL) {
if (!cacheline_size() && !strncasecmp(tok, "dcacheline", strlen(tok)))
ui__error("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system");
@@ -3857,7 +3940,7 @@ static char *setup_overhead(char *keys)
return keys;
}
-static int __setup_sorting(struct evlist *evlist)
+static int __setup_sorting(struct evlist *evlist, struct perf_env *env)
{
char *str;
const char *sort_keys;
@@ -3897,7 +3980,7 @@ static int __setup_sorting(struct evlist *evlist)
}
}
- ret = setup_sort_list(&perf_hpp_list, str, evlist);
+ ret = setup_sort_list(&perf_hpp_list, str, evlist, env);
free(str);
return ret;
@@ -4000,7 +4083,7 @@ void sort__setup_elide(FILE *output)
}
}
-int output_field_add(struct perf_hpp_list *list, const char *tok)
+int output_field_add(struct perf_hpp_list *list, const char *tok, int *level)
{
unsigned int i;
@@ -4013,16 +4096,25 @@ int output_field_add(struct perf_hpp_list *list, const char *tok)
if (!strcasecmp(tok, "weight"))
ui__warning("--fields weight shows the average value unlike in the --sort key.\n");
- return __hpp_dimension__add_output(list, hd);
+ if (hd->mem_mode && sort__mode != SORT_MODE__MEMORY)
+ continue;
+
+ return __hpp_dimension__add_output(list, hd, *level);
}
+ /*
+ * A non-output field will increase level so that it can be in a
+ * different hierarchy.
+ */
+ (*level)++;
+
for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
struct sort_dimension *sd = &common_sort_dimensions[i];
if (!sd->name || strncasecmp(tok, sd->name, strlen(tok)))
continue;
- return __sort_dimension__add_output(list, sd);
+ return __sort_dimension__add_output(list, sd, *level);
}
for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
@@ -4034,7 +4126,7 @@ int output_field_add(struct perf_hpp_list *list, const char *tok)
if (sort__mode != SORT_MODE__BRANCH)
return -EINVAL;
- return __sort_dimension__add_output(list, sd);
+ return __sort_dimension__add_output(list, sd, *level);
}
for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) {
@@ -4046,7 +4138,7 @@ int output_field_add(struct perf_hpp_list *list, const char *tok)
if (sort__mode != SORT_MODE__MEMORY)
return -EINVAL;
- return __sort_dimension__add_output(list, sd);
+ return __sort_dimension__add_output(list, sd, *level);
}
return -ESRCH;
@@ -4056,10 +4148,11 @@ static int setup_output_list(struct perf_hpp_list *list, char *str)
{
char *tmp, *tok;
int ret = 0;
+ int level = 0;
for (tok = strtok_r(str, ", ", &tmp);
tok; tok = strtok_r(NULL, ", ", &tmp)) {
- ret = output_field_add(list, tok);
+ ret = output_field_add(list, tok, &level);
if (ret == -EINVAL) {
ui__error("Invalid --fields key: `%s'", tok);
break;
@@ -4123,16 +4216,16 @@ out:
return ret;
}
-int setup_sorting(struct evlist *evlist)
+int setup_sorting(struct evlist *evlist, struct perf_env *env)
{
int err;
- err = __setup_sorting(evlist);
+ err = __setup_sorting(evlist, env);
if (err < 0)
return err;
if (parent_pattern != default_parent_pattern) {
- err = sort_dimension__add(&perf_hpp_list, "parent", evlist, -1);
+ err = sort_dimension__add(&perf_hpp_list, "parent", evlist, env, -1);
if (err < 0)
return err;
}
@@ -4149,6 +4242,10 @@ int setup_sorting(struct evlist *evlist)
if (err < 0)
return err;
+ err = perf_hpp__alloc_mem_stats(&perf_hpp_list, evlist);
+ if (err < 0)
+ return err;
+
/* copy sort keys to output fields */
perf_hpp__setup_output_field(&perf_hpp_list);
/* and then copy output fields to sort keys */
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 180d36a2bea3..d7787958e06b 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -6,6 +6,7 @@
#include "hist.h"
struct option;
+struct perf_env;
extern regex_t parent_regex;
extern const char *sort_order;
@@ -73,6 +74,7 @@ enum sort_type {
SORT_SYM_OFFSET,
SORT_ANNOTATE_DATA_TYPE_CACHELINE,
SORT_PARALLELISM,
+ SORT_TGID,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
@@ -129,7 +131,7 @@ extern struct sort_entry sort_thread;
struct evlist;
struct tep_handle;
-int setup_sorting(struct evlist *evlist);
+int setup_sorting(struct evlist *evlist, struct perf_env *env);
int setup_output_field(void);
void reset_output_field(void);
void sort__setup_elide(FILE *fp);
@@ -144,9 +146,9 @@ bool is_strict_order(const char *order);
int hpp_dimension__add_output(unsigned col, bool implicit);
void reset_dimensions(void);
int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
- struct evlist *evlist,
+ struct evlist *evlist, struct perf_env *env,
int level);
-int output_field_add(struct perf_hpp_list *list, const char *tok);
+int output_field_add(struct perf_hpp_list *list, const char *tok, int *level);
int64_t
sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right);
int64_t
diff --git a/tools/perf/util/spark.c b/tools/perf/util/spark.c
index 70272a8b81a6..65ca253cc22e 100644
--- a/tools/perf/util/spark.c
+++ b/tools/perf/util/spark.c
@@ -1,9 +1,7 @@
-#include <stdio.h>
-#include <limits.h>
-#include <string.h>
-#include <stdlib.h>
+// SPDX-License-Identifier: GPL-2.0
#include "spark.h"
-#include "stat.h"
+#include <limits.h>
+#include <linux/kernel.h>
#define SPARK_SHIFT 8
diff --git a/tools/perf/util/spark.h b/tools/perf/util/spark.h
index 25402d7d7a64..78597c38ef35 100644
--- a/tools/perf/util/spark.h
+++ b/tools/perf/util/spark.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef SPARK_H
#define SPARK_H 1
diff --git a/tools/perf/util/srccode.c b/tools/perf/util/srccode.c
index 476e99896d5e..0f4907843ac1 100644
--- a/tools/perf/util/srccode.c
+++ b/tools/perf/util/srccode.c
@@ -16,7 +16,7 @@
#include "srccode.h"
#include "debug.h"
#include <internal/lib.h> // page_size
-#include "fncache.h"
+#include "hashmap.h"
#define MAXSRCCACHE (32*1024*1024)
#define MAXSRCFILES 64
@@ -92,7 +92,7 @@ static struct srcfile *find_srcfile(char *fn)
struct srcfile *h;
int fd;
unsigned long sz;
- unsigned hval = shash((unsigned char *)fn) % SRC_HTAB_SZ;
+ size_t hval = str_hash(fn) % SRC_HTAB_SZ;
hlist_for_each_entry (h, &srcfile_htab[hval], hash_nd) {
if (!strcmp(fn, h->fn)) {
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index f32d0d4f4bc9..3e3449e35dd4 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -524,12 +524,12 @@ static enum a2l_style addr2line_configure(struct child_process *a2l, const char
style = LLVM;
cached = true;
lines = 1;
- pr_debug("Detected LLVM addr2line style\n");
+ pr_debug3("Detected LLVM addr2line style\n");
} else if (ch == '0') {
style = GNU_BINUTILS;
cached = true;
lines = 3;
- pr_debug("Detected binutils addr2line style\n");
+ pr_debug3("Detected binutils addr2line style\n");
} else {
if (!symbol_conf.disable_add2line_warn) {
char *output = NULL;
@@ -595,7 +595,7 @@ static int read_addr2line_record(struct io *io,
if (io__getline(io, &line, &line_len) < 0 || !line_len)
goto error;
- pr_debug("%s %s: addr2line read address for sentinel: %s", __func__, dso_name, line);
+ pr_debug3("%s %s: addr2line read address for sentinel: %s", __func__, dso_name, line);
if (style == LLVM && line_len == 2 && line[0] == ',') {
/* Found the llvm-addr2line sentinel character. */
zfree(&line);
@@ -641,7 +641,7 @@ static int read_addr2line_record(struct io *io,
if (first && (io__getline(io, &line, &line_len) < 0 || !line_len))
goto error;
- pr_debug("%s %s: addr2line read line: %s", __func__, dso_name, line);
+ pr_debug3("%s %s: addr2line read line: %s", __func__, dso_name, line);
if (function != NULL)
*function = strdup(strim(line));
@@ -652,7 +652,7 @@ static int read_addr2line_record(struct io *io,
if (io__getline(io, &line, &line_len) < 0 || !line_len)
goto error;
- pr_debug("%s %s: addr2line filename:number : %s", __func__, dso_name, line);
+ pr_debug3("%s %s: addr2line filename:number : %s", __func__, dso_name, line);
if (filename_split(line, line_nr == NULL ? &dummy_line_nr : line_nr) == 0 &&
style == GNU_BINUTILS) {
ret = 0;
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index e852ac0d9847..a67b991f4e81 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -50,15 +50,15 @@ static int aggr_header_lens[] = {
};
static const char *aggr_header_csv[] = {
- [AGGR_CORE] = "core,cpus,",
- [AGGR_CACHE] = "cache,cpus,",
- [AGGR_CLUSTER] = "cluster,cpus,",
- [AGGR_DIE] = "die,cpus,",
- [AGGR_SOCKET] = "socket,cpus,",
- [AGGR_NONE] = "cpu,",
- [AGGR_THREAD] = "comm-pid,",
- [AGGR_NODE] = "node,",
- [AGGR_GLOBAL] = ""
+ [AGGR_CORE] = "core,ctrs,",
+ [AGGR_CACHE] = "cache,ctrs,",
+ [AGGR_CLUSTER] = "cluster,ctrs,",
+ [AGGR_DIE] = "die,ctrs,",
+ [AGGR_SOCKET] = "socket,ctrs,",
+ [AGGR_NONE] = "cpu,",
+ [AGGR_THREAD] = "comm-pid,",
+ [AGGR_NODE] = "node,",
+ [AGGR_GLOBAL] = ""
};
static const char *aggr_header_std[] = {
@@ -304,7 +304,7 @@ static void print_aggr_id_std(struct perf_stat_config *config,
return;
}
- fprintf(output, "%-*s %*d ", aggr_header_lens[idx], buf, 4, aggr_nr);
+ fprintf(output, "%-*s %*d ", aggr_header_lens[idx], buf, /*strlen("ctrs")*/ 4, aggr_nr);
}
static void print_aggr_id_csv(struct perf_stat_config *config,
@@ -366,27 +366,27 @@ static void print_aggr_id_json(struct perf_stat_config *config, struct outstate
{
switch (config->aggr_mode) {
case AGGR_CORE:
- json_out(os, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d",
+ json_out(os, "\"core\" : \"S%d-D%d-C%d\", \"counters\" : %d",
id.socket, id.die, id.core, aggr_nr);
break;
case AGGR_CACHE:
- json_out(os, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"aggregate-number\" : %d",
+ json_out(os, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"counters\" : %d",
id.socket, id.die, id.cache_lvl, id.cache, aggr_nr);
break;
case AGGR_CLUSTER:
- json_out(os, "\"cluster\" : \"S%d-D%d-CLS%d\", \"aggregate-number\" : %d",
+ json_out(os, "\"cluster\" : \"S%d-D%d-CLS%d\", \"counters\" : %d",
id.socket, id.die, id.cluster, aggr_nr);
break;
case AGGR_DIE:
- json_out(os, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d",
+ json_out(os, "\"die\" : \"S%d-D%d\", \"counters\" : %d",
id.socket, id.die, aggr_nr);
break;
case AGGR_SOCKET:
- json_out(os, "\"socket\" : \"S%d\", \"aggregate-number\" : %d",
+ json_out(os, "\"socket\" : \"S%d\", \"counters\" : %d",
id.socket, aggr_nr);
break;
case AGGR_NODE:
- json_out(os, "\"node\" : \"N%d\", \"aggregate-number\" : %d",
+ json_out(os, "\"node\" : \"N%d\", \"counters\" : %d",
id.node, aggr_nr);
break;
case AGGR_NONE:
@@ -798,40 +798,28 @@ static void abs_printout(struct perf_stat_config *config,
print_cgroup(config, os, evsel->cgrp);
}
-static bool is_mixed_hw_group(struct evsel *counter)
-{
- struct evlist *evlist = counter->evlist;
- u32 pmu_type = counter->core.attr.type;
- struct evsel *pos;
-
- if (counter->core.nr_members < 2)
- return false;
-
- evlist__for_each_entry(evlist, pos) {
- /* software events can be part of any hardware group */
- if (pos->core.attr.type == PERF_TYPE_SOFTWARE)
- continue;
- if (pmu_type == PERF_TYPE_SOFTWARE) {
- pmu_type = pos->core.attr.type;
- continue;
- }
- if (pmu_type != pos->core.attr.type)
- return true;
- }
-
- return false;
-}
-
-static bool evlist__has_hybrid(struct evlist *evlist)
+static bool evlist__has_hybrid_pmus(struct evlist *evlist)
{
struct evsel *evsel;
+ struct perf_pmu *last_core_pmu = NULL;
if (perf_pmus__num_core_pmus() == 1)
return false;
evlist__for_each_entry(evlist, evsel) {
- if (evsel->core.is_pmu_core)
+ if (evsel->core.is_pmu_core) {
+ struct perf_pmu *pmu = evsel__find_pmu(evsel);
+
+ if (pmu == last_core_pmu)
+ continue;
+
+ if (last_core_pmu == NULL) {
+ last_core_pmu = pmu;
+ continue;
+ }
+ /* A distinct core PMU. */
return true;
+ }
}
return false;
@@ -872,10 +860,8 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
ok = false;
if (counter->supported) {
- if (!evlist__has_hybrid(counter->evlist)) {
+ if (!evlist__has_hybrid_pmus(counter->evlist)) {
config->print_free_counters_hint = 1;
- if (is_mixed_hw_group(counter))
- config->print_mixed_hw_group_error = 1;
}
}
}
@@ -913,12 +899,11 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
print_noise(config, os, counter, noise, /*before_metric=*/true);
print_running(config, os, run, ena, /*before_metric=*/true);
from = perf_stat__print_shadow_stats_metricgroup(config, counter, aggr_idx,
- &num, from, &out,
- &config->metric_events);
+ &num, from, &out);
} while (from != NULL);
- } else
- perf_stat__print_shadow_stats(config, counter, uval, aggr_idx,
- &out, &config->metric_events);
+ } else {
+ perf_stat__print_shadow_stats(config, counter, uval, aggr_idx, &out);
+ }
} else {
pm(config, os, METRIC_THRESHOLD_UNKNOWN, /*format=*/NULL, /*unit=*/NULL, /*val=*/0);
}
@@ -929,61 +914,6 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
}
}
-static void evsel__uniquify_counter(struct evsel *counter)
-{
- const char *name, *pmu_name;
- char *new_name, *config;
- int ret;
-
- /* No uniquification necessary. */
- if (!counter->needs_uniquify)
- return;
-
- /* The evsel was already uniquified. */
- if (counter->uniquified_name)
- return;
-
- /* Avoid checking to uniquify twice. */
- counter->uniquified_name = true;
-
- name = evsel__name(counter);
- pmu_name = counter->pmu->name;
- /* Already prefixed by the PMU name. */
- if (!strncmp(name, pmu_name, strlen(pmu_name)))
- return;
-
- config = strchr(name, '/');
- if (config) {
- int len = config - name;
-
- if (config[1] == '/') {
- /* case: event// */
- ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 2);
- } else {
- /* case: event/.../ */
- ret = asprintf(&new_name, "%s/%.*s,%s", pmu_name, len, name, config + 1);
- }
- } else {
- config = strchr(name, ':');
- if (config) {
- /* case: event:.. */
- int len = config - name;
-
- ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 1);
- } else {
- /* case: event */
- ret = asprintf(&new_name, "%s/%s/", pmu_name, name);
- }
- }
- if (ret > 0) {
- free(counter->name);
- counter->name = new_name;
- } else {
- /* ENOMEM from asprintf. */
- counter->uniquified_name = false;
- }
-}
-
/**
* should_skip_zero_count() - Check if the event should print 0 values.
* @config: The perf stat configuration (including aggregation mode).
@@ -1022,8 +952,16 @@ static bool should_skip_zero_counter(struct perf_stat_config *config,
return true;
/*
- * Many tool events are only gathered on the first index, skip other
- * zero values.
+ * In per-thread mode the aggr_map and aggr_get_id functions may be
+ * NULL, assume all 0 values should be output in that case.
+ */
+ if (!config->aggr_map || !config->aggr_get_id)
+ return false;
+
+ /*
+ * Tool events may be gathered on all logical CPUs, for example
+ * system_time, but for many the first index is the only one used, for
+ * example num_cores. Don't skip for the first index.
*/
if (evsel__is_tool(counter)) {
struct aggr_cpu_id own_id =
@@ -1031,15 +969,12 @@ static bool should_skip_zero_counter(struct perf_stat_config *config,
return !aggr_cpu_id__equal(id, &own_id);
}
-
/*
- * Skip value 0 when it's an uncore event and the given aggr id
- * does not belong to the PMU cpumask.
+ * Skip value 0 when the counter's cpumask doesn't match the given aggr
+ * id.
*/
- if (!counter->pmu || !counter->pmu->is_uncore)
- return false;
- perf_cpu_map__for_each_cpu(cpu, idx, counter->pmu->cpus) {
+ perf_cpu_map__for_each_cpu(cpu, idx, counter->core.cpus) {
struct aggr_cpu_id own_id = config->aggr_get_id(config, cpu);
if (aggr_cpu_id__equal(id, &own_id))
@@ -1066,16 +1001,21 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
os->evsel = counter;
/* Skip already merged uncore/hybrid events */
- if (counter->merged_stat)
- return;
-
- evsel__uniquify_counter(counter);
+ if (config->aggr_mode != AGGR_NONE) {
+ if (evsel__is_hybrid(counter)) {
+ if (config->hybrid_merge && counter->first_wildcard_match != NULL)
+ return;
+ } else {
+ if (counter->first_wildcard_match != NULL)
+ return;
+ }
+ }
val = aggr->counts.val;
ena = aggr->counts.ena;
run = aggr->counts.run;
- if (perf_stat__skip_metric_event(counter, &config->metric_events, ena, run))
+ if (perf_stat__skip_metric_event(counter, ena, run))
return;
if (val == 0 && should_skip_zero_counter(config, counter, &id))
@@ -1334,10 +1274,7 @@ static void print_metric_headers(struct perf_stat_config *config,
os.evsel = counter;
- perf_stat__print_shadow_stats(config, counter, 0,
- 0,
- &out,
- &config->metric_events);
+ perf_stat__print_shadow_stats(config, counter, 0, 0, &out);
}
if (!config->json_output)
@@ -1376,7 +1313,7 @@ static void print_header_interval_std(struct perf_stat_config *config,
case AGGR_CLUSTER:
case AGGR_CACHE:
case AGGR_CORE:
- fprintf(output, "#%*s %-*s cpus",
+ fprintf(output, "#%*s %-*s ctrs",
INTERVAL_LEN - 1, "time",
aggr_header_lens[config->aggr_mode],
aggr_header_std[config->aggr_mode]);
@@ -1575,11 +1512,6 @@ static void print_footer(struct perf_stat_config *config)
" echo 0 > /proc/sys/kernel/nmi_watchdog\n"
" perf stat ...\n"
" echo 1 > /proc/sys/kernel/nmi_watchdog\n");
-
- if (config->print_mixed_hw_group_error)
- fprintf(output,
- "The events in group usually have to be from "
- "the same PMU. Try reorganizing the group.\n");
}
static void print_percore(struct perf_stat_config *config,
@@ -1650,96 +1582,6 @@ static void print_cgroup_counter(struct perf_stat_config *config, struct evlist
print_metric_end(config, os);
}
-/* Should uniquify be disabled for the evlist? */
-static bool evlist__disable_uniquify(const struct evlist *evlist)
-{
- struct evsel *counter;
- struct perf_pmu *last_pmu = NULL;
- bool first = true;
-
- evlist__for_each_entry(evlist, counter) {
- /* If PMUs vary then uniquify can be useful. */
- if (!first && counter->pmu != last_pmu)
- return false;
- first = false;
- if (counter->pmu) {
- /* Allow uniquify for uncore PMUs. */
- if (!counter->pmu->is_core)
- return false;
- /* Keep hybrid event names uniquified for clarity. */
- if (perf_pmus__num_core_pmus() > 1)
- return false;
- }
- }
- return true;
-}
-
-static void evsel__set_needs_uniquify(struct evsel *counter, const struct perf_stat_config *config)
-{
- struct evsel *evsel;
-
- if (counter->merged_stat) {
- /* Counter won't be shown. */
- return;
- }
-
- if (counter->use_config_name || counter->is_libpfm_event) {
- /* Original name will be used. */
- return;
- }
-
- if (!config->hybrid_merge && evsel__is_hybrid(counter)) {
- /* Unique hybrid counters necessary. */
- counter->needs_uniquify = true;
- return;
- }
-
- if (counter->core.attr.type < PERF_TYPE_MAX && counter->core.attr.type != PERF_TYPE_RAW) {
- /* Legacy event, don't uniquify. */
- return;
- }
-
- if (counter->pmu && counter->pmu->is_core &&
- counter->alternate_hw_config != PERF_COUNT_HW_MAX) {
- /* A sysfs or json event replacing a legacy event, don't uniquify. */
- return;
- }
-
- if (config->aggr_mode == AGGR_NONE) {
- /* Always unique with no aggregation. */
- counter->needs_uniquify = true;
- return;
- }
-
- /*
- * Do other non-merged events in the evlist have the same name? If so
- * uniquify is necessary.
- */
- evlist__for_each_entry(counter->evlist, evsel) {
- if (evsel == counter || evsel->merged_stat)
- continue;
-
- if (evsel__name_is(counter, evsel__name(evsel))) {
- counter->needs_uniquify = true;
- return;
- }
- }
-}
-
-static void evlist__set_needs_uniquify(struct evlist *evlist, const struct perf_stat_config *config)
-{
- struct evsel *counter;
-
- if (evlist__disable_uniquify(evlist)) {
- evlist__for_each_entry(evlist, counter)
- counter->uniquified_name = true;
- return;
- }
-
- evlist__for_each_entry(evlist, counter)
- evsel__set_needs_uniquify(counter, config);
-}
-
void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
struct target *_target, struct timespec *ts,
int argc, const char **argv)
@@ -1751,7 +1593,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
.first = true,
};
- evlist__set_needs_uniquify(evlist, config);
+ evlist__uniquify_evsel_names(evlist, config);
if (config->iostat_run)
evlist->selected = evlist__first(evlist);
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index d83bda5824d2..abaf6b579bfc 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -635,14 +635,14 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
int aggr_idx,
int *num,
void *from,
- struct perf_stat_output_ctx *out,
- struct rblist *metric_events)
+ struct perf_stat_output_ctx *out)
{
struct metric_event *me;
struct metric_expr *mexp = from;
void *ctxp = out->ctx;
bool header_printed = false;
const char *name = NULL;
+ struct rblist *metric_events = &evsel->evlist->metric_events;
me = metricgroup__lookup(metric_events, evsel, false);
if (me == NULL)
@@ -683,8 +683,7 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
void perf_stat__print_shadow_stats(struct perf_stat_config *config,
struct evsel *evsel,
double avg, int aggr_idx,
- struct perf_stat_output_ctx *out,
- struct rblist *metric_events)
+ struct perf_stat_output_ctx *out)
{
typedef void (*stat_print_function_t)(struct perf_stat_config *config,
const struct evsel *evsel,
@@ -735,7 +734,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
}
perf_stat__print_shadow_stats_metricgroup(config, evsel, aggr_idx,
- &num, NULL, out, metric_events);
+ &num, NULL, out);
if (num == 0) {
print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN,
@@ -748,7 +747,6 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
* if it's not running or not the metric event.
*/
bool perf_stat__skip_metric_event(struct evsel *evsel,
- struct rblist *metric_events,
u64 ena, u64 run)
{
if (!evsel->default_metricgroup)
@@ -757,5 +755,5 @@ bool perf_stat__skip_metric_event(struct evsel *evsel,
if (!ena || !run)
return true;
- return !metricgroup__lookup(metric_events, evsel, false);
+ return !metricgroup__lookup(&evsel->evlist->metric_events, evsel, false);
}
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 1f7abd8754c7..50b1a92d16df 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -526,7 +526,7 @@ static int evsel__merge_aggr_counters(struct evsel *evsel, struct evsel *alias)
struct perf_counts_values *aggr_counts_a = &ps_a->aggr[i].counts;
struct perf_counts_values *aggr_counts_b = &ps_b->aggr[i].counts;
- /* NB: don't increase aggr.nr for aliases */
+ ps_a->aggr[i].nr += ps_b->aggr[i].nr;
aggr_counts_a->val += aggr_counts_b->val;
aggr_counts_a->ena += aggr_counts_b->ena;
@@ -535,35 +535,6 @@ static int evsel__merge_aggr_counters(struct evsel *evsel, struct evsel *alias)
return 0;
}
-/*
- * Events should have the same name, scale, unit, cgroup but on different core
- * PMUs or on different but matching uncore PMUs.
- */
-static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b)
-{
- if (strcmp(evsel__name(evsel_a), evsel__name(evsel_b)))
- return false;
-
- if (evsel_a->scale != evsel_b->scale)
- return false;
-
- if (evsel_a->cgrp != evsel_b->cgrp)
- return false;
-
- if (strcmp(evsel_a->unit, evsel_b->unit))
- return false;
-
- if (evsel__is_clock(evsel_a) != evsel__is_clock(evsel_b))
- return false;
-
- if (evsel_a->pmu == evsel_b->pmu || evsel_a->pmu == NULL || evsel_b->pmu == NULL)
- return false;
-
- if (evsel_a->pmu->is_core)
- return evsel_b->pmu->is_core;
-
- return perf_pmu__name_no_suffix_match(evsel_a->pmu, evsel_b->pmu->name);
-}
static void evsel__merge_aliases(struct evsel *evsel)
{
@@ -572,10 +543,9 @@ static void evsel__merge_aliases(struct evsel *evsel)
alias = list_prepare_entry(evsel, &(evlist->core.entries), core.node);
list_for_each_entry_continue(alias, &evlist->core.entries, core.node) {
- /* Merge the same events on different PMUs. */
- if (evsel__is_alias(evsel, alias)) {
+ if (alias->first_wildcard_match == evsel) {
+ /* Merge the same events on different PMUs. */
evsel__merge_aggr_counters(evsel, alias);
- alias->merged_stat = true;
}
}
}
@@ -588,11 +558,7 @@ static bool evsel__should_merge_hybrid(const struct evsel *evsel,
static void evsel__merge_stats(struct evsel *evsel, struct perf_stat_config *config)
{
- /* this evsel is already merged */
- if (evsel->merged_stat)
- return;
-
- if (evsel->auto_merge_stats || evsel__should_merge_hybrid(evsel, config))
+ if (!evsel->pmu || !evsel->pmu->is_core || evsel__should_merge_hybrid(evsel, config))
evsel__merge_aliases(evsel);
}
@@ -803,8 +769,6 @@ int create_perf_stat_counter(struct evsel *evsel,
attr->enable_on_exec = 1;
}
- if (target__has_cpu(target) && !target__has_per_thread(target))
- return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu_map_idx);
-
- return evsel__open_per_thread(evsel, evsel->core.threads);
+ return evsel__open_per_cpu_and_thread(evsel, evsel__cpus(evsel), cpu_map_idx,
+ evsel->core.threads);
}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 2fda9acd7374..4b0f14ae4e5f 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -7,7 +7,6 @@
#include <sys/types.h>
#include <sys/resource.h>
#include "cpumap.h"
-#include "rblist.h"
#include "counts.h"
struct perf_cpu_map;
@@ -100,7 +99,6 @@ struct perf_stat_config {
int times;
int run_count;
int print_free_counters_hint;
- int print_mixed_hw_group_error;
const char *csv_sep;
struct stats *walltime_nsecs_stats;
struct rusage ru_data;
@@ -109,7 +107,6 @@ struct perf_stat_config {
aggr_get_id_t aggr_get_id;
struct cpu_aggr_map *cpus_aggr_map;
u64 *walltime_run;
- struct rblist metric_events;
int ctl_fd;
int ctl_fd_ack;
bool ctl_fd_close;
@@ -188,18 +185,14 @@ struct perf_stat_output_ctx {
void perf_stat__print_shadow_stats(struct perf_stat_config *config,
struct evsel *evsel,
double avg, int aggr_idx,
- struct perf_stat_output_ctx *out,
- struct rblist *metric_events);
-bool perf_stat__skip_metric_event(struct evsel *evsel,
- struct rblist *metric_events,
- u64 ena, u64 run);
+ struct perf_stat_output_ctx *out);
+bool perf_stat__skip_metric_event(struct evsel *evsel, u64 ena, u64 run);
void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
struct evsel *evsel,
int aggr_idx,
int *num,
void *from,
- struct perf_stat_output_ctx *out,
- struct rblist *metric_events);
+ struct perf_stat_output_ctx *out);
int evlist__alloc_stats(struct perf_stat_config *config,
struct evlist *evlist, bool alloc_raw);
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index fbf6d0f73af9..6d2c280a1730 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -13,10 +13,6 @@
#include "maps.h"
#include "symbol.h"
#include "symsrc.h"
-#include "demangle-cxx.h"
-#include "demangle-ocaml.h"
-#include "demangle-java.h"
-#include "demangle-rust.h"
#include "machine.h"
#include "vdso.h"
#include "debug.h"
@@ -279,62 +275,6 @@ static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr)
return -1;
}
-static bool want_demangle(bool is_kernel_sym)
-{
- return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
-}
-
-/*
- * Demangle C++ function signature, typically replaced by demangle-cxx.cpp
- * version.
- */
-#ifndef HAVE_CXA_DEMANGLE_SUPPORT
-char *cxx_demangle_sym(const char *str __maybe_unused, bool params __maybe_unused,
- bool modifiers __maybe_unused)
-{
-#ifdef HAVE_LIBBFD_SUPPORT
- int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0);
-
- return bfd_demangle(NULL, str, flags);
-#elif defined(HAVE_CPLUS_DEMANGLE_SUPPORT)
- int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0);
-
- return cplus_demangle(str, flags);
-#else
- return NULL;
-#endif
-}
-#endif /* !HAVE_CXA_DEMANGLE_SUPPORT */
-
-static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
-{
- char *demangled = NULL;
-
- /*
- * We need to figure out if the object was created from C++ sources
- * DWARF DW_compile_unit has this, but we don't always have access
- * to it...
- */
- if (!want_demangle(dso__kernel(dso) || kmodule))
- return demangled;
-
- demangled = cxx_demangle_sym(elf_name, verbose > 0, verbose > 0);
- if (demangled == NULL) {
- demangled = ocaml_demangle_sym(elf_name);
- if (demangled == NULL) {
- demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
- }
- }
- else if (rust_is_mangled(demangled))
- /*
- * Input to Rust demangling is the BFD-demangled
- * name which it Rust-demangles in place.
- */
- rust_demangle_sym(demangled);
-
- return demangled;
-}
-
struct rel_info {
u32 nr_entries;
u32 *sorted;
@@ -620,7 +560,7 @@ static bool get_plt_got_name(GElf_Shdr *shdr, size_t i,
/* Get the associated symbol */
gelf_getsym(di->dynsym_data, vr->sym_idx, &sym);
sym_name = elf_sym__name(&sym, di->dynstr_data);
- demangled = demangle_sym(di->dso, 0, sym_name);
+ demangled = dso__demangle_sym(di->dso, /*kmodule=*/0, sym_name);
if (demangled != NULL)
sym_name = demangled;
@@ -818,7 +758,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss)
gelf_getsym(syms, get_rel_symidx(&ri, idx), &sym);
elf_name = elf_sym__name(&sym, symstrs);
- demangled = demangle_sym(dso, 0, elf_name);
+ demangled = dso__demangle_sym(dso, /*kmodule=*/0, elf_name);
if (demangled)
elf_name = demangled;
if (*elf_name)
@@ -847,11 +787,6 @@ out_elf_end:
return 0;
}
-char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
-{
- return demangle_sym(dso, kmodule, elf_name);
-}
-
/*
* Align offset to 4 bytes as needed for note name and descriptor data.
*/
@@ -1733,6 +1668,12 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
continue;
}
+ /* Reject RISCV ELF "mapping symbols" */
+ if (ehdr.e_machine == EM_RISCV) {
+ if (elf_name[0] == '$' && strchr("dx", elf_name[1]))
+ continue;
+ }
+
if (runtime_ss->opdsec && sym.st_shndx == runtime_ss->opdidx) {
u32 offset = sym.st_value - syms_ss->opdshdr.sh_addr;
u64 *opd = opddata->d_buf + offset;
@@ -1840,7 +1781,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
}
}
- demangled = demangle_sym(dso, kmodule, elf_name);
+ demangled = dso__demangle_sym(dso, kmodule, elf_name);
if (demangled != NULL)
elf_name = demangled;
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index c6f369b5d893..7201494c5c20 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -90,11 +90,23 @@ int filename__read_build_id(const char *filename, struct build_id *bid)
{
FILE *fp;
int ret = -1;
- bool need_swap = false;
+ bool need_swap = false, elf32;
u8 e_ident[EI_NIDENT];
- size_t buf_size;
- void *buf;
int i;
+ union {
+ struct {
+ Elf32_Ehdr ehdr32;
+ Elf32_Phdr *phdr32;
+ };
+ struct {
+ Elf64_Ehdr ehdr64;
+ Elf64_Phdr *phdr64;
+ };
+ } hdrs;
+ void *phdr;
+ size_t phdr_size;
+ void *buf = NULL;
+ size_t buf_size = 0;
fp = fopen(filename, "r");
if (fp == NULL)
@@ -108,117 +120,79 @@ int filename__read_build_id(const char *filename, struct build_id *bid)
goto out;
need_swap = check_need_swap(e_ident[EI_DATA]);
+ elf32 = e_ident[EI_CLASS] == ELFCLASS32;
- /* for simplicity */
- fseek(fp, 0, SEEK_SET);
-
- if (e_ident[EI_CLASS] == ELFCLASS32) {
- Elf32_Ehdr ehdr;
- Elf32_Phdr *phdr;
-
- if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1)
- goto out;
+ if (fread(elf32 ? (void *)&hdrs.ehdr32 : (void *)&hdrs.ehdr64,
+ elf32 ? sizeof(hdrs.ehdr32) : sizeof(hdrs.ehdr64),
+ 1, fp) != 1)
+ goto out;
- if (need_swap) {
- ehdr.e_phoff = bswap_32(ehdr.e_phoff);
- ehdr.e_phentsize = bswap_16(ehdr.e_phentsize);
- ehdr.e_phnum = bswap_16(ehdr.e_phnum);
+ if (need_swap) {
+ if (elf32) {
+ hdrs.ehdr32.e_phoff = bswap_32(hdrs.ehdr32.e_phoff);
+ hdrs.ehdr32.e_phentsize = bswap_16(hdrs.ehdr32.e_phentsize);
+ hdrs.ehdr32.e_phnum = bswap_16(hdrs.ehdr32.e_phnum);
+ } else {
+ hdrs.ehdr64.e_phoff = bswap_64(hdrs.ehdr64.e_phoff);
+ hdrs.ehdr64.e_phentsize = bswap_16(hdrs.ehdr64.e_phentsize);
+ hdrs.ehdr64.e_phnum = bswap_16(hdrs.ehdr64.e_phnum);
}
+ }
+ phdr_size = elf32 ? hdrs.ehdr32.e_phentsize * hdrs.ehdr32.e_phnum
+ : hdrs.ehdr64.e_phentsize * hdrs.ehdr64.e_phnum;
+ phdr = malloc(phdr_size);
+ if (phdr == NULL)
+ goto out;
- buf_size = ehdr.e_phentsize * ehdr.e_phnum;
- buf = malloc(buf_size);
- if (buf == NULL)
- goto out;
-
- fseek(fp, ehdr.e_phoff, SEEK_SET);
- if (fread(buf, buf_size, 1, fp) != 1)
- goto out_free;
-
- for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) {
- void *tmp;
- long offset;
-
- if (need_swap) {
- phdr->p_type = bswap_32(phdr->p_type);
- phdr->p_offset = bswap_32(phdr->p_offset);
- phdr->p_filesz = bswap_32(phdr->p_filesz);
- }
-
- if (phdr->p_type != PT_NOTE)
- continue;
-
- buf_size = phdr->p_filesz;
- offset = phdr->p_offset;
- tmp = realloc(buf, buf_size);
- if (tmp == NULL)
- goto out_free;
-
- buf = tmp;
- fseek(fp, offset, SEEK_SET);
- if (fread(buf, buf_size, 1, fp) != 1)
- goto out_free;
+ fseek(fp, elf32 ? hdrs.ehdr32.e_phoff : hdrs.ehdr64.e_phoff, SEEK_SET);
+ if (fread(phdr, phdr_size, 1, fp) != 1)
+ goto out_free;
- ret = read_build_id(buf, buf_size, bid, need_swap);
- if (ret == 0) {
- ret = bid->size;
- break;
- }
- }
- } else {
- Elf64_Ehdr ehdr;
- Elf64_Phdr *phdr;
+ if (elf32)
+ hdrs.phdr32 = phdr;
+ else
+ hdrs.phdr64 = phdr;
- if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1)
- goto out;
+ for (i = 0; i < elf32 ? hdrs.ehdr32.e_phnum : hdrs.ehdr64.e_phnum; i++) {
+ size_t p_filesz;
if (need_swap) {
- ehdr.e_phoff = bswap_64(ehdr.e_phoff);
- ehdr.e_phentsize = bswap_16(ehdr.e_phentsize);
- ehdr.e_phnum = bswap_16(ehdr.e_phnum);
+ if (elf32) {
+ hdrs.phdr32[i].p_type = bswap_32(hdrs.phdr32[i].p_type);
+ hdrs.phdr32[i].p_offset = bswap_32(hdrs.phdr32[i].p_offset);
+ hdrs.phdr32[i].p_filesz = bswap_32(hdrs.phdr32[i].p_offset);
+ } else {
+ hdrs.phdr64[i].p_type = bswap_32(hdrs.phdr64[i].p_type);
+ hdrs.phdr64[i].p_offset = bswap_64(hdrs.phdr64[i].p_offset);
+ hdrs.phdr64[i].p_filesz = bswap_64(hdrs.phdr64[i].p_filesz);
+ }
}
+ if ((elf32 ? hdrs.phdr32[i].p_type : hdrs.phdr64[i].p_type) != PT_NOTE)
+ continue;
- buf_size = ehdr.e_phentsize * ehdr.e_phnum;
- buf = malloc(buf_size);
- if (buf == NULL)
- goto out;
-
- fseek(fp, ehdr.e_phoff, SEEK_SET);
- if (fread(buf, buf_size, 1, fp) != 1)
- goto out_free;
-
- for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) {
+ p_filesz = elf32 ? hdrs.phdr32[i].p_filesz : hdrs.phdr64[i].p_filesz;
+ if (p_filesz > buf_size) {
void *tmp;
- long offset;
-
- if (need_swap) {
- phdr->p_type = bswap_32(phdr->p_type);
- phdr->p_offset = bswap_64(phdr->p_offset);
- phdr->p_filesz = bswap_64(phdr->p_filesz);
- }
-
- if (phdr->p_type != PT_NOTE)
- continue;
- buf_size = phdr->p_filesz;
- offset = phdr->p_offset;
+ buf_size = p_filesz;
tmp = realloc(buf, buf_size);
if (tmp == NULL)
goto out_free;
-
buf = tmp;
- fseek(fp, offset, SEEK_SET);
- if (fread(buf, buf_size, 1, fp) != 1)
- goto out_free;
+ }
+ fseek(fp, elf32 ? hdrs.phdr32[i].p_offset : hdrs.phdr64[i].p_offset, SEEK_SET);
+ if (fread(buf, p_filesz, 1, fp) != 1)
+ goto out_free;
- ret = read_build_id(buf, buf_size, bid, need_swap);
- if (ret == 0) {
- ret = bid->size;
- break;
- }
+ ret = read_build_id(buf, p_filesz, bid, need_swap);
+ if (ret == 0) {
+ ret = bid->size;
+ break;
}
}
out_free:
free(buf);
+ free(phdr);
out:
fclose(fp);
return ret;
@@ -343,7 +317,7 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused,
struct symsrc *runtime_ss __maybe_unused,
int kmodule __maybe_unused)
{
- struct build_id bid;
+ struct build_id bid = { .size = 0, };
int ret;
ret = fd__is_64_bit(ss->fd);
@@ -381,13 +355,6 @@ void symbol__elf_init(void)
{
}
-char *dso__demangle_sym(struct dso *dso __maybe_unused,
- int kmodule __maybe_unused,
- const char *elf_name __maybe_unused)
-{
- return NULL;
-}
-
bool filename__has_section(const char *filename __maybe_unused, const char *sec __maybe_unused)
{
return false;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 11540219481b..e816e4220d33 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -19,6 +19,11 @@
#include "build-id.h"
#include "cap.h"
#include "cpumap.h"
+#include "debug.h"
+#include "demangle-cxx.h"
+#include "demangle-java.h"
+#include "demangle-ocaml.h"
+#include "demangle-rust-v0.h"
#include "dso.h"
#include "util.h" // lsdir()
#include "debug.h"
@@ -36,6 +41,7 @@
#include "header.h"
#include "path.h"
#include <linux/ctype.h>
+#include <linux/log2.h>
#include <linux/zalloc.h>
#include <elf.h>
@@ -98,10 +104,12 @@ static enum dso_binary_type binary_type_symtab[] = {
#define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab)
-static bool symbol_type__filter(char symbol_type)
+static bool symbol_type__filter(char __symbol_type)
{
- symbol_type = toupper(symbol_type);
- return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B';
+ // Since 'U' == undefined and 'u' == unique global symbol, we can't use toupper there
+ char symbol_type = toupper(__symbol_type);
+ return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B' ||
+ __symbol_type == 'u' || __symbol_type == 'l';
}
static int prefix_underscores_count(const char *str)
@@ -623,7 +631,7 @@ void dso__sort_by_name(struct dso *dso)
{
mutex_lock(dso__lock(dso));
if (!dso__sorted_by_name(dso)) {
- size_t len;
+ size_t len = 0;
dso__set_symbol_names(dso, symbols__sort_by_name(dso__symbols(dso), &len));
if (dso__symbol_names(dso)) {
@@ -1414,6 +1422,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
goto out_err;
}
}
+ map__zput(new_node->map);
free(new_node);
}
@@ -1804,7 +1813,6 @@ int dso__load(struct dso *dso, struct map *map)
struct symsrc *syms_ss = NULL, *runtime_ss = NULL;
bool kmod;
bool perfmap;
- struct build_id bid;
struct nscookie nsc;
char newmapname[PATH_MAX];
const char *map_path = dso__long_name(dso);
@@ -1865,6 +1873,8 @@ int dso__load(struct dso *dso, struct map *map)
*/
if (!dso__has_build_id(dso) &&
is_regular_file(dso__long_name(dso))) {
+ struct build_id bid = { .size = 0, };
+
__symbol__join_symfs(name, PATH_MAX, dso__long_name(dso));
if (filename__read_build_id(name, &bid) > 0)
dso__set_build_id(dso, &bid);
@@ -2113,7 +2123,7 @@ static bool filename__readable(const char *file)
static char *dso__find_kallsyms(struct dso *dso, struct map *map)
{
- struct build_id bid;
+ struct build_id bid = { .size = 0, };
char sbuild_id[SBUILD_ID_SIZE];
bool is_host = false;
char path[PATH_MAX];
@@ -2143,7 +2153,7 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map)
goto proc_kallsyms;
}
- build_id__sprintf(dso__bid(dso), sbuild_id);
+ build_id__snprintf(dso__bid(dso), sbuild_id, sizeof(sbuild_id));
/* Find kallsyms in build-id cache with kcore */
scnprintf(path, sizeof(path), "%s/%s/%s",
@@ -2646,3 +2656,79 @@ int symbol__validate_sym_arguments(void)
}
return 0;
}
+
+static bool want_demangle(bool is_kernel_sym)
+{
+ return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
+}
+
+/*
+ * Demangle C++ function signature, typically replaced by demangle-cxx.cpp
+ * version.
+ */
+#ifndef HAVE_CXA_DEMANGLE_SUPPORT
+char *cxx_demangle_sym(const char *str __maybe_unused, bool params __maybe_unused,
+ bool modifiers __maybe_unused)
+{
+#ifdef HAVE_LIBBFD_SUPPORT
+ int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0);
+
+ return bfd_demangle(NULL, str, flags);
+#elif defined(HAVE_CPLUS_DEMANGLE_SUPPORT)
+ int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0);
+
+ return cplus_demangle(str, flags);
+#else
+ return NULL;
+#endif
+}
+#endif /* !HAVE_CXA_DEMANGLE_SUPPORT */
+
+char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
+{
+ struct demangle rust_demangle = {
+ .style = DemangleStyleUnknown,
+ };
+ char *demangled = NULL;
+
+ /*
+ * We need to figure out if the object was created from C++ sources
+ * DWARF DW_compile_unit has this, but we don't always have access
+ * to it...
+ */
+ if (!want_demangle((dso && dso__kernel(dso)) || kmodule))
+ return demangled;
+
+ rust_demangle_demangle(elf_name, &rust_demangle);
+ if (rust_demangle_is_known(&rust_demangle)) {
+ /* A rust mangled name. */
+ if (rust_demangle.mangled_len == 0)
+ return demangled;
+
+ for (size_t buf_len = roundup_pow_of_two(rust_demangle.mangled_len * 2);
+ buf_len < 1024 * 1024; buf_len += 32) {
+ char *tmp = realloc(demangled, buf_len);
+
+ if (!tmp) {
+ /* Failure to grow output buffer, return what is there. */
+ return demangled;
+ }
+ demangled = tmp;
+ if (rust_demangle_display_demangle(&rust_demangle, demangled, buf_len,
+ /*alternate=*/true) == OverflowOk)
+ return demangled;
+ }
+ /* Buffer exceeded sensible bounds, return what is there. */
+ return demangled;
+ }
+
+ demangled = cxx_demangle_sym(elf_name, verbose > 0, verbose > 0);
+ if (demangled)
+ return demangled;
+
+ demangled = ocaml_demangle_sym(elf_name);
+ if (demangled)
+ return demangled;
+
+ return java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
+}
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index cd9aa82c7d5a..7a80d2c14d9b 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -43,7 +43,7 @@ struct symbol_conf {
report_individual_block,
inline_name,
disable_add2line_warn,
- buildid_mmap2,
+ no_buildid_mmap2,
guest_code,
lazy_load_kernel_maps,
keep_exited_threads,
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 2fc4d0537840..cb2c1ace304a 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -368,11 +368,11 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event,
struct machine *machine,
bool is_kernel)
{
- struct build_id bid;
+ struct build_id bid = { .size = 0, };
struct nsinfo *nsi;
struct nscookie nc;
struct dso *dso = NULL;
- struct dso_id id;
+ struct dso_id dso_id = dso_id_empty;
int rc;
if (is_kernel) {
@@ -380,12 +380,18 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event,
goto out;
}
- id.maj = event->maj;
- id.min = event->min;
- id.ino = event->ino;
- id.ino_generation = event->ino_generation;
+ if (event->header.misc & PERF_RECORD_MISC_MMAP_BUILD_ID) {
+ build_id__init(&dso_id.build_id, event->build_id, event->build_id_size);
+ } else {
+ dso_id.maj = event->maj;
+ dso_id.min = event->min;
+ dso_id.ino = event->ino;
+ dso_id.ino_generation = event->ino_generation;
+ dso_id.mmap2_valid = true;
+ dso_id.mmap2_ino_generation_valid = true;
+ };
- dso = dsos__findnew_id(&machine->dsos, event->filename, &id);
+ dso = dsos__findnew_id(&machine->dsos, event->filename, &dso_id);
if (dso && dso__has_build_id(dso)) {
bid = *dso__bid(dso);
rc = 0;
@@ -526,7 +532,7 @@ out:
event->mmap2.pid = tgid;
event->mmap2.tid = pid;
- if (symbol_conf.buildid_mmap2)
+ if (!symbol_conf.no_buildid_mmap2)
perf_record_mmap2__read_build_id(&event->mmap2, machine, false);
if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
@@ -684,7 +690,7 @@ static int perf_event__synthesize_modules_maps_cb(struct map *map, void *data)
return 0;
dso = map__dso(map);
- if (symbol_conf.buildid_mmap2) {
+ if (!symbol_conf.no_buildid_mmap2) {
size = PERF_ALIGN(dso__long_name_len(dso) + 1, sizeof(u64));
event->mmap2.header.type = PERF_RECORD_MMAP2;
event->mmap2.header.size = (sizeof(event->mmap2) -
@@ -728,9 +734,9 @@ int perf_event__synthesize_modules(const struct perf_tool *tool, perf_event__han
.process = process,
.machine = machine,
};
- size_t size = symbol_conf.buildid_mmap2
- ? sizeof(args.event->mmap2)
- : sizeof(args.event->mmap);
+ size_t size = symbol_conf.no_buildid_mmap2
+ ? sizeof(args.event->mmap)
+ : sizeof(args.event->mmap2);
args.event = zalloc(size + machine->id_hdr_size);
if (args.event == NULL) {
@@ -1118,8 +1124,8 @@ static int __perf_event__synthesize_kernel_mmap(const struct perf_tool *tool,
struct machine *machine)
{
union perf_event *event;
- size_t size = symbol_conf.buildid_mmap2 ?
- sizeof(event->mmap2) : sizeof(event->mmap);
+ size_t size = symbol_conf.no_buildid_mmap2 ?
+ sizeof(event->mmap) : sizeof(event->mmap2);
struct map *map = machine__kernel_map(machine);
struct kmap *kmap;
int err;
@@ -1153,7 +1159,7 @@ static int __perf_event__synthesize_kernel_mmap(const struct perf_tool *tool,
event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
}
- if (symbol_conf.buildid_mmap2) {
+ if (!symbol_conf.no_buildid_mmap2) {
size = snprintf(event->mmap2.filename, sizeof(event->mmap2.filename),
"%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1;
size = PERF_ALIGN(size, sizeof(u64));
@@ -1567,10 +1573,16 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
return result;
}
-void __weak arch_perf_synthesize_sample_weight(const struct perf_sample *data,
+static void perf_synthesize_sample_weight(const struct perf_sample *data,
__u64 *array, u64 type __maybe_unused)
{
*array = data->weight;
+
+ if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
+ *array &= 0xffffffff;
+ *array |= ((u64)data->ins_lat << 32);
+ *array |= ((u64)data->weight3 << 48);
+ }
}
static __u64 *copy_read_group_values(__u64 *array, __u64 read_format,
@@ -1730,7 +1742,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
}
if (type & PERF_SAMPLE_WEIGHT_TYPE) {
- arch_perf_synthesize_sample_weight(sample, array, type);
+ perf_synthesize_sample_weight(sample, array, type);
array++;
}
@@ -2045,7 +2057,7 @@ int perf_event__synthesize_event_update_name(const struct perf_tool *tool, struc
int perf_event__synthesize_event_update_cpus(const struct perf_tool *tool, struct evsel *evsel,
perf_event__handler_t process)
{
- struct synthesize_cpu_map_data syn_data = { .map = evsel->core.own_cpus };
+ struct synthesize_cpu_map_data syn_data = { .map = evsel->core.pmu_cpus };
struct perf_record_event_update *ev;
int err;
@@ -2126,7 +2138,7 @@ int perf_event__synthesize_extra_attr(const struct perf_tool *tool, struct evlis
}
}
- if (evsel->core.own_cpus) {
+ if (evsel->core.pmu_cpus) {
err = perf_event__synthesize_event_update_cpus(tool, evsel, process);
if (err < 0) {
pr_err("Couldn't synthesize evsel cpus.\n");
@@ -2248,7 +2260,9 @@ int perf_event__synthesize_build_id(const struct perf_tool *tool,
memset(&ev, 0, len);
- ev.build_id.size = min(bid->size, sizeof(ev.build_id.build_id));
+ ev.build_id.size = bid->size;
+ if (ev.build_id.size > sizeof(ev.build_id.build_id))
+ ev.build_id.size = sizeof(ev.build_id.build_id);
memcpy(ev.build_id.build_id, bid->data, ev.build_id.size);
ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID;
ev.build_id.header.misc = misc | PERF_RECORD_MISC_BUILD_ID_SIZE;
@@ -2308,7 +2322,9 @@ int perf_event__synthesize_mmap2_build_id(const struct perf_tool *tool,
ev.mmap2.len = len;
ev.mmap2.pgoff = pgoff;
- ev.mmap2.build_id_size = min(bid->size, sizeof(ev.mmap2.build_id));
+ ev.mmap2.build_id_size = bid->size;
+ if (ev.mmap2.build_id_size > sizeof(ev.mmap2.build_id))
+ ev.build_id.size = sizeof(ev.mmap2.build_id);
memcpy(ev.mmap2.build_id, bid->data, ev.mmap2.build_id_size);
ev.mmap2.prot = prot;
diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h
index b9c936b5cfeb..ee29615d68e5 100644
--- a/tools/perf/util/synthetic-events.h
+++ b/tools/perf/util/synthetic-events.h
@@ -92,6 +92,8 @@ int perf_event__synthesize_threads(const struct perf_tool *tool, perf_event__han
int perf_event__synthesize_tracing_data(const struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process);
int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
pid_t perf_event__synthesize_comm(const struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine);
+void perf_event__synthesize_final_bpf_metadata(struct perf_session *session,
+ perf_event__handler_t process);
int perf_tool__process_synth_event(const struct perf_tool *tool, union perf_event *event, struct machine *machine, perf_event__handler_t process);
diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c
index 0f383418e3df..8cf71bea295a 100644
--- a/tools/perf/util/target.c
+++ b/tools/perf/util/target.c
@@ -28,20 +28,6 @@ enum target_errno target__validate(struct target *target)
ret = TARGET_ERRNO__PID_OVERRIDE_CPU;
}
- /* UID and PID are mutually exclusive */
- if (target->tid && target->uid_str) {
- target->uid_str = NULL;
- if (ret == TARGET_ERRNO__SUCCESS)
- ret = TARGET_ERRNO__PID_OVERRIDE_UID;
- }
-
- /* UID and CPU are mutually exclusive */
- if (target->uid_str && target->cpu_list) {
- target->cpu_list = NULL;
- if (ret == TARGET_ERRNO__SUCCESS)
- ret = TARGET_ERRNO__UID_OVERRIDE_CPU;
- }
-
/* PID and SYSTEM are mutually exclusive */
if (target->tid && target->system_wide) {
target->system_wide = false;
@@ -49,13 +35,6 @@ enum target_errno target__validate(struct target *target)
ret = TARGET_ERRNO__PID_OVERRIDE_SYSTEM;
}
- /* UID and SYSTEM are mutually exclusive */
- if (target->uid_str && target->system_wide) {
- target->system_wide = false;
- if (ret == TARGET_ERRNO__SUCCESS)
- ret = TARGET_ERRNO__UID_OVERRIDE_SYSTEM;
- }
-
/* BPF and CPU are mutually exclusive */
if (target->bpf_str && target->cpu_list) {
target->cpu_list = NULL;
@@ -70,13 +49,6 @@ enum target_errno target__validate(struct target *target)
ret = TARGET_ERRNO__BPF_OVERRIDE_PID;
}
- /* BPF and UID are mutually exclusive */
- if (target->bpf_str && target->uid_str) {
- target->uid_str = NULL;
- if (ret == TARGET_ERRNO__SUCCESS)
- ret = TARGET_ERRNO__BPF_OVERRIDE_UID;
- }
-
/* BPF and THREADS are mutually exclusive */
if (target->bpf_str && target->per_thread) {
target->per_thread = false;
@@ -94,15 +66,13 @@ enum target_errno target__validate(struct target *target)
return ret;
}
-enum target_errno target__parse_uid(struct target *target)
+uid_t parse_uid(const char *str)
{
struct passwd pwd, *result;
char buf[1024];
- const char *str = target->uid_str;
- target->uid = UINT_MAX;
if (str == NULL)
- return TARGET_ERRNO__SUCCESS;
+ return UINT_MAX;
/* Try user name first */
getpwnam_r(str, &pwd, buf, sizeof(buf), &result);
@@ -115,16 +85,15 @@ enum target_errno target__parse_uid(struct target *target)
int uid = strtol(str, &endptr, 10);
if (*endptr != '\0')
- return TARGET_ERRNO__INVALID_UID;
+ return UINT_MAX;
getpwuid_r(uid, &pwd, buf, sizeof(buf), &result);
if (result == NULL)
- return TARGET_ERRNO__USER_NOT_FOUND;
+ return UINT_MAX;
}
- target->uid = result->pw_uid;
- return TARGET_ERRNO__SUCCESS;
+ return result->pw_uid;
}
/*
@@ -132,20 +101,14 @@ enum target_errno target__parse_uid(struct target *target)
*/
static const char *target__error_str[] = {
"PID/TID switch overriding CPU",
- "PID/TID switch overriding UID",
- "UID switch overriding CPU",
"PID/TID switch overriding SYSTEM",
- "UID switch overriding SYSTEM",
"SYSTEM/CPU switch overriding PER-THREAD",
"BPF switch overriding CPU",
"BPF switch overriding PID/TID",
- "BPF switch overriding UID",
"BPF switch overriding THREAD",
- "Invalid User: %s",
- "Problems obtaining information for user %s",
};
-int target__strerror(struct target *target, int errnum,
+int target__strerror(struct target *target __maybe_unused, int errnum,
char *buf, size_t buflen)
{
int idx;
@@ -170,11 +133,6 @@ int target__strerror(struct target *target, int errnum,
snprintf(buf, buflen, "%s", msg);
break;
- case TARGET_ERRNO__INVALID_UID:
- case TARGET_ERRNO__USER_NOT_FOUND:
- snprintf(buf, buflen, msg, target->uid_str);
- break;
-
default:
/* cannot reach here */
break;
diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
index 2ee2cc30340f..84ebb9c940c6 100644
--- a/tools/perf/util/target.h
+++ b/tools/perf/util/target.h
@@ -9,9 +9,7 @@ struct target {
const char *pid;
const char *tid;
const char *cpu_list;
- const char *uid_str;
const char *bpf_str;
- uid_t uid;
bool system_wide;
bool uses_mmap;
bool default_per_cpu;
@@ -36,31 +34,24 @@ enum target_errno {
/* for target__validate() */
TARGET_ERRNO__PID_OVERRIDE_CPU = __TARGET_ERRNO__START,
- TARGET_ERRNO__PID_OVERRIDE_UID,
- TARGET_ERRNO__UID_OVERRIDE_CPU,
TARGET_ERRNO__PID_OVERRIDE_SYSTEM,
- TARGET_ERRNO__UID_OVERRIDE_SYSTEM,
TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD,
TARGET_ERRNO__BPF_OVERRIDE_CPU,
TARGET_ERRNO__BPF_OVERRIDE_PID,
- TARGET_ERRNO__BPF_OVERRIDE_UID,
TARGET_ERRNO__BPF_OVERRIDE_THREAD,
- /* for target__parse_uid() */
- TARGET_ERRNO__INVALID_UID,
- TARGET_ERRNO__USER_NOT_FOUND,
-
__TARGET_ERRNO__END,
};
enum target_errno target__validate(struct target *target);
-enum target_errno target__parse_uid(struct target *target);
+
+uid_t parse_uid(const char *str);
int target__strerror(struct target *target, int errnum, char *buf, size_t buflen);
static inline bool target__has_task(struct target *target)
{
- return target->tid || target->pid || target->uid_str;
+ return target->tid || target->pid;
}
static inline bool target__has_cpu(struct target *target)
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 89585f53c1d5..aa9c58bbf9d3 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -41,6 +41,7 @@ int thread__init_maps(struct thread *thread, struct machine *machine)
}
struct thread *thread__new(pid_t pid, pid_t tid)
+ NO_THREAD_SAFETY_ANALYSIS /* Allocation/creation is inherently single threaded. */
{
RC_STRUCT(thread) *_thread = zalloc(sizeof(*_thread));
struct thread *thread;
@@ -200,7 +201,8 @@ int thread__set_namespaces(struct thread *thread, u64 timestamp,
return ret;
}
-struct comm *thread__comm(struct thread *thread)
+static struct comm *__thread__comm(struct thread *thread)
+ SHARED_LOCKS_REQUIRED(thread__comm_lock(thread))
{
if (list_empty(thread__comm_list(thread)))
return NULL;
@@ -208,16 +210,30 @@ struct comm *thread__comm(struct thread *thread)
return list_first_entry(thread__comm_list(thread), struct comm, list);
}
+struct comm *thread__comm(struct thread *thread)
+{
+ struct comm *res = NULL;
+
+ down_read(thread__comm_lock(thread));
+ res = __thread__comm(thread);
+ up_read(thread__comm_lock(thread));
+ return res;
+}
+
struct comm *thread__exec_comm(struct thread *thread)
{
struct comm *comm, *last = NULL, *second_last = NULL;
+ down_read(thread__comm_lock(thread));
list_for_each_entry(comm, thread__comm_list(thread), list) {
- if (comm->exec)
+ if (comm->exec) {
+ up_read(thread__comm_lock(thread));
return comm;
+ }
second_last = last;
last = comm;
}
+ up_read(thread__comm_lock(thread));
/*
* 'last' with no start time might be the parent's comm of a synthesized
@@ -233,8 +249,9 @@ struct comm *thread__exec_comm(struct thread *thread)
static int ____thread__set_comm(struct thread *thread, const char *str,
u64 timestamp, bool exec)
+ EXCLUSIVE_LOCKS_REQUIRED(thread__comm_lock(thread))
{
- struct comm *new, *curr = thread__comm(thread);
+ struct comm *new, *curr = __thread__comm(thread);
/* Override the default :tid entry */
if (!thread__comm_set(thread)) {
@@ -285,8 +302,9 @@ int thread__set_comm_from_proc(struct thread *thread)
}
static const char *__thread__comm_str(struct thread *thread)
+ SHARED_LOCKS_REQUIRED(thread__comm_lock(thread))
{
- const struct comm *comm = thread__comm(thread);
+ const struct comm *comm = __thread__comm(thread);
if (!comm)
return NULL;
@@ -410,7 +428,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bo
}
void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
- struct addr_location *al)
+ bool symbols, struct addr_location *al)
{
size_t i;
const u8 cpumodes[] = {
@@ -421,7 +439,11 @@ void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
};
for (i = 0; i < ARRAY_SIZE(cpumodes); i++) {
- thread__find_symbol(thread, cpumodes[i], addr, al);
+ if (symbols)
+ thread__find_symbol(thread, cpumodes[i], addr, al);
+ else
+ thread__find_map(thread, cpumodes[i], addr, al);
+
if (al->map)
break;
}
@@ -471,6 +493,7 @@ uint16_t thread__e_machine(struct thread *thread, struct machine *machine)
if (parent) {
e_machine = thread__e_machine(parent, machine);
+ thread__put(parent);
thread__set_e_machine(thread, e_machine);
return e_machine;
}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index cd574a896418..310eaea344bb 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -126,7 +126,7 @@ struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode,
u64 addr, struct addr_location *al);
void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
- struct addr_location *al);
+ bool symbols, struct addr_location *al);
int thread__memcpy(struct thread *thread, struct machine *machine,
void *buf, u64 ip, int len, bool *is64bit);
@@ -236,14 +236,15 @@ static inline struct rw_semaphore *thread__namespaces_lock(struct thread *thread
return &RC_CHK_ACCESS(thread)->namespaces_lock;
}
-static inline struct list_head *thread__comm_list(struct thread *thread)
+static inline struct rw_semaphore *thread__comm_lock(struct thread *thread)
{
- return &RC_CHK_ACCESS(thread)->comm_list;
+ return &RC_CHK_ACCESS(thread)->comm_lock;
}
-static inline struct rw_semaphore *thread__comm_lock(struct thread *thread)
+static inline struct list_head *thread__comm_list(struct thread *thread)
+ SHARED_LOCKS_REQUIRED(thread__comm_lock(thread))
{
- return &RC_CHK_ACCESS(thread)->comm_lock;
+ return &RC_CHK_ACCESS(thread)->comm_list;
}
static inline u64 thread__db_id(const struct thread *thread)
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index b5f12390c355..ca193c1374ed 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -72,7 +72,7 @@ struct perf_thread_map *thread_map__new_by_tid(pid_t tid)
return threads;
}
-static struct perf_thread_map *__thread_map__new_all_cpus(uid_t uid)
+static struct perf_thread_map *thread_map__new_all_cpus(void)
{
DIR *proc;
int max_threads = 32, items, i;
@@ -98,15 +98,6 @@ static struct perf_thread_map *__thread_map__new_all_cpus(uid_t uid)
if (*end) /* only interested in proper numerical dirents */
continue;
- snprintf(path, sizeof(path), "/proc/%s", dirent->d_name);
-
- if (uid != UINT_MAX) {
- struct stat st;
-
- if (stat(path, &st) != 0 || st.st_uid != uid)
- continue;
- }
-
snprintf(path, sizeof(path), "/proc/%d/task", pid);
items = scandir(path, &namelist, filter, NULL);
if (items <= 0) {
@@ -157,24 +148,11 @@ out_free_namelist:
goto out_closedir;
}
-struct perf_thread_map *thread_map__new_all_cpus(void)
-{
- return __thread_map__new_all_cpus(UINT_MAX);
-}
-
-struct perf_thread_map *thread_map__new_by_uid(uid_t uid)
-{
- return __thread_map__new_all_cpus(uid);
-}
-
-struct perf_thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid)
+struct perf_thread_map *thread_map__new(pid_t pid, pid_t tid)
{
if (pid != -1)
return thread_map__new_by_pid(pid);
- if (tid == -1 && uid != UINT_MAX)
- return thread_map__new_by_uid(uid);
-
return thread_map__new_by_tid(tid);
}
@@ -289,15 +267,11 @@ out_free_threads:
goto out;
}
-struct perf_thread_map *thread_map__new_str(const char *pid, const char *tid,
- uid_t uid, bool all_threads)
+struct perf_thread_map *thread_map__new_str(const char *pid, const char *tid, bool all_threads)
{
if (pid)
return thread_map__new_by_pid_str(pid);
- if (!tid && uid != UINT_MAX)
- return thread_map__new_by_uid(uid);
-
if (all_threads)
return thread_map__new_all_cpus();
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index 00ec05fc1656..fc16d87f32fb 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -11,13 +11,11 @@ struct perf_record_thread_map;
struct perf_thread_map *thread_map__new_dummy(void);
struct perf_thread_map *thread_map__new_by_pid(pid_t pid);
struct perf_thread_map *thread_map__new_by_tid(pid_t tid);
-struct perf_thread_map *thread_map__new_by_uid(uid_t uid);
-struct perf_thread_map *thread_map__new_all_cpus(void);
-struct perf_thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid);
+struct perf_thread_map *thread_map__new(pid_t pid, pid_t tid);
struct perf_thread_map *thread_map__new_event(struct perf_record_thread_map *event);
struct perf_thread_map *thread_map__new_str(const char *pid,
- const char *tid, uid_t uid, bool all_threads);
+ const char *tid, bool all_threads);
struct perf_thread_map *thread_map__new_by_tid_str(const char *tid_str);
diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c
index 3b7f390f26eb..e83c7ababc2a 100644
--- a/tools/perf/util/tool.c
+++ b/tools/perf/util/tool.c
@@ -1,12 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
#include "data.h"
#include "debug.h"
+#include "event.h"
#include "header.h"
#include "session.h"
#include "stat.h"
#include "tool.h"
#include "tsc.h"
+#include <linux/compiler.h>
#include <sys/mman.h>
+#include <stddef.h>
#include <unistd.h>
#ifdef HAVE_ZSTD_SUPPORT
@@ -17,7 +20,7 @@ static int perf_session__process_compressed_event(struct perf_session *session,
void *src;
size_t decomp_size, src_size;
u64 decomp_last_rem = 0;
- size_t mmap_len, decomp_len = session->header.env.comp_mmap_len;
+ size_t mmap_len, decomp_len = perf_session__env(session)->comp_mmap_len;
struct decomp *decomp, *decomp_last = session->active_decomp->decomp_last;
if (decomp_last) {
@@ -43,8 +46,15 @@ static int perf_session__process_compressed_event(struct perf_session *session,
decomp->size = decomp_last_rem;
}
- src = (void *)event + sizeof(struct perf_record_compressed);
- src_size = event->pack.header.size - sizeof(struct perf_record_compressed);
+ if (event->header.type == PERF_RECORD_COMPRESSED) {
+ src = (void *)event + sizeof(struct perf_record_compressed);
+ src_size = event->pack.header.size - sizeof(struct perf_record_compressed);
+ } else if (event->header.type == PERF_RECORD_COMPRESSED2) {
+ src = (void *)event + sizeof(struct perf_record_compressed2);
+ src_size = event->pack2.data_size;
+ } else {
+ return -1;
+ }
decomp_size = zstd_decompress_stream(session->active_decomp->zstd_decomp, src, src_size,
&(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
@@ -230,6 +240,16 @@ static int perf_session__process_compressed_event_stub(struct perf_session *sess
return 0;
}
+static int perf_event__process_bpf_metadata_stub(struct perf_session *perf_session __maybe_unused,
+ union perf_event *event)
+{
+ if (dump_trace)
+ perf_event__fprintf_bpf_metadata(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
void perf_tool__init(struct perf_tool *tool, bool ordered_events)
{
tool->ordered_events = ordered_events;
@@ -286,6 +306,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events)
tool->compressed = perf_session__process_compressed_event_stub;
#endif
tool->finished_init = process_event_op2_stub;
+ tool->bpf_metadata = perf_event__process_bpf_metadata_stub;
}
bool perf_tool__compressed_is_stub(const struct perf_tool *tool)
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index db1c7642b0d1..18b76ff0f26a 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -77,7 +77,8 @@ struct perf_tool {
stat,
stat_round,
feature,
- finished_init;
+ finished_init,
+ bpf_metadata;
event_op4 compressed;
event_op3 auxtrace;
bool ordered_events;
diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c
index 97b327d1ce4a..d99e699e646d 100644
--- a/tools/perf/util/tool_pmu.c
+++ b/tools/perf/util/tool_pmu.c
@@ -332,7 +332,7 @@ static bool has_pmem(void)
return has_pmem;
}
-bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result)
+bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *result)
{
const struct cpu_topology *topology;
@@ -347,18 +347,60 @@ bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result)
return true;
case TOOL_PMU__EVENT_NUM_CPUS:
- *result = cpu__max_present_cpu().cpu;
+ if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) {
+ /* No evsel to be specific to. */
+ *result = cpu__max_present_cpu().cpu;
+ } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) {
+ /* Evsel just has specific CPUs. */
+ *result = perf_cpu_map__nr(evsel->core.cpus);
+ } else {
+ /*
+ * "Any CPU" event that can be scheduled on any CPU in
+ * the PMU's cpumask. The PMU cpumask should be saved in
+ * pmu_cpus. If not present fall back to max.
+ */
+ if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus))
+ *result = perf_cpu_map__nr(evsel->core.pmu_cpus);
+ else
+ *result = cpu__max_present_cpu().cpu;
+ }
return true;
case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: {
struct perf_cpu_map *online = cpu_map__online();
- if (online) {
+ if (!online)
+ return false;
+
+ if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) {
+ /* No evsel to be specific to. */
*result = perf_cpu_map__nr(online);
- perf_cpu_map__put(online);
- return true;
+ } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) {
+ /* Evsel just has specific CPUs. */
+ struct perf_cpu_map *tmp =
+ perf_cpu_map__intersect(online, evsel->core.cpus);
+
+ *result = perf_cpu_map__nr(tmp);
+ perf_cpu_map__put(tmp);
+ } else {
+ /*
+ * "Any CPU" event that can be scheduled on any CPU in
+ * the PMU's cpumask. The PMU cpumask should be saved in
+ * pmu_cpus, if not present then just the online cpu
+ * mask.
+ */
+ if (!perf_cpu_map__is_empty(evsel->core.pmu_cpus)) {
+ struct perf_cpu_map *tmp =
+ perf_cpu_map__intersect(online, evsel->core.pmu_cpus);
+
+ *result = perf_cpu_map__nr(tmp);
+ perf_cpu_map__put(tmp);
+ } else {
+ *result = perf_cpu_map__nr(online);
+ }
}
- return false;
+ perf_cpu_map__put(online);
+ return true;
}
case TOOL_PMU__EVENT_NUM_DIES:
topology = online_topology();
@@ -417,7 +459,7 @@ int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
val = 0;
if (cpu_map_idx == 0 && thread == 0) {
- if (!tool_pmu__read_event(ev, &val)) {
+ if (!tool_pmu__read_event(ev, evsel, &val)) {
count->lost++;
val = 0;
}
@@ -486,8 +528,14 @@ int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
delta_start *= 1000000000 / ticks_per_sec;
}
count->val = delta_start;
- count->ena = count->run = delta_start;
count->lost = 0;
+ /*
+ * The values of enabled and running must make a ratio of 100%. The
+ * exact values don't matter as long as they are non-zero to avoid
+ * issues with evsel__count_has_error.
+ */
+ count->ena++;
+ count->run++;
return 0;
}
@@ -496,19 +544,12 @@ struct perf_pmu *tool_pmu__new(void)
struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu));
if (!tool)
- goto out;
- tool->name = strdup("tool");
- if (!tool->name) {
- zfree(&tool);
- goto out;
- }
+ return NULL;
- tool->type = PERF_PMU_TYPE_TOOL;
- INIT_LIST_HEAD(&tool->aliases);
- INIT_LIST_HEAD(&tool->caps);
- INIT_LIST_HEAD(&tool->format);
+ if (perf_pmu__init(tool, PERF_PMU_TYPE_TOOL, "tool") != 0) {
+ perf_pmu__delete(tool);
+ return NULL;
+ }
tool->events_table = find_core_events_table("common", "common");
-
-out:
return tool;
}
diff --git a/tools/perf/util/tool_pmu.h b/tools/perf/util/tool_pmu.h
index c6ad1dd90a56..d642e7d73910 100644
--- a/tools/perf/util/tool_pmu.h
+++ b/tools/perf/util/tool_pmu.h
@@ -34,7 +34,7 @@ enum tool_pmu_event tool_pmu__str_to_event(const char *str);
bool tool_pmu__skip_event(const char *name);
int tool_pmu__num_skip_events(void);
-bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result);
+bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *result);
u64 tool_pmu__cpu_slots_per_cycle(void);
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index 4db3d1bd686c..b06e10a116bb 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -88,9 +88,9 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
else if (target->tid)
ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s",
target->tid);
- else if (target->uid_str != NULL)
+ else if (top->uid_str != NULL)
ret += SNPRINTF(bf + ret, size - ret, " (uid: %s",
- target->uid_str);
+ top->uid_str);
else
ret += SNPRINTF(bf + ret, size - ret, " (all");
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index 4c5588dbb131..04ff926846be 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -48,6 +48,7 @@ struct perf_top {
const char *sym_filter;
float min_percent;
unsigned int nr_threads_synthesize;
+ const char *uid_str;
struct {
struct ordered_events *in;
diff --git a/tools/perf/util/tp_pmu.c b/tools/perf/util/tp_pmu.c
new file mode 100644
index 000000000000..e7534a973247
--- /dev/null
+++ b/tools/perf/util/tp_pmu.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+#include "tp_pmu.h"
+#include "pmus.h"
+#include <api/fs/fs.h>
+#include <api/fs/tracing_path.h>
+#include <api/io_dir.h>
+#include <linux/kernel.h>
+#include <errno.h>
+#include <string.h>
+
+int tp_pmu__id(const char *sys, const char *name)
+{
+ char *tp_dir = get_events_file(sys);
+ char path[PATH_MAX];
+ int id, err;
+
+ if (!tp_dir)
+ return -1;
+
+ scnprintf(path, PATH_MAX, "%s/%s/id", tp_dir, name);
+ put_events_file(tp_dir);
+ err = filename__read_int(path, &id);
+ if (err)
+ return err;
+
+ return id;
+}
+
+
+int tp_pmu__for_each_tp_event(const char *sys, void *state, tp_event_callback cb)
+{
+ char *evt_path;
+ struct io_dirent64 *evt_ent;
+ struct io_dir evt_dir;
+ int ret = 0;
+
+ evt_path = get_events_file(sys);
+ if (!evt_path)
+ return -errno;
+
+ io_dir__init(&evt_dir, open(evt_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ if (evt_dir.dirfd < 0) {
+ ret = -errno;
+ put_events_file(evt_path);
+ return ret;
+ }
+ put_events_file(evt_path);
+
+ while (!ret && (evt_ent = io_dir__readdir(&evt_dir))) {
+ if (!strcmp(evt_ent->d_name, ".")
+ || !strcmp(evt_ent->d_name, "..")
+ || !strcmp(evt_ent->d_name, "enable")
+ || !strcmp(evt_ent->d_name, "filter"))
+ continue;
+
+ ret = cb(state, sys, evt_ent->d_name);
+ if (ret)
+ break;
+ }
+ close(evt_dir.dirfd);
+ return ret;
+}
+
+int tp_pmu__for_each_tp_sys(void *state, tp_sys_callback cb)
+{
+ struct io_dirent64 *events_ent;
+ struct io_dir events_dir;
+ int ret = 0;
+ char *events_dir_path = get_tracing_file("events");
+
+ if (!events_dir_path)
+ return -errno;
+
+ io_dir__init(&events_dir, open(events_dir_path, O_CLOEXEC | O_DIRECTORY | O_RDONLY));
+ if (events_dir.dirfd < 0) {
+ ret = -errno;
+ put_events_file(events_dir_path);
+ return ret;
+ }
+ put_events_file(events_dir_path);
+
+ while (!ret && (events_ent = io_dir__readdir(&events_dir))) {
+ if (!strcmp(events_ent->d_name, ".") ||
+ !strcmp(events_ent->d_name, "..") ||
+ !strcmp(events_ent->d_name, "enable") ||
+ !strcmp(events_ent->d_name, "header_event") ||
+ !strcmp(events_ent->d_name, "header_page"))
+ continue;
+
+ ret = cb(state, events_ent->d_name);
+ if (ret)
+ break;
+ }
+ close(events_dir.dirfd);
+ return ret;
+}
+
+bool perf_pmu__is_tracepoint(const struct perf_pmu *pmu)
+{
+ return pmu->type == PERF_TYPE_TRACEPOINT;
+}
+
+struct for_each_event_args {
+ void *state;
+ pmu_event_callback cb;
+ const struct perf_pmu *pmu;
+};
+
+static int for_each_event_cb(void *state, const char *sys_name, const char *evt_name)
+{
+ struct for_each_event_args *args = state;
+ char name[2 * FILENAME_MAX + 2];
+ /* 16 possible hex digits and 22 other characters and \0. */
+ char encoding[16 + 22];
+ char *format = NULL;
+ size_t format_size;
+ struct pmu_event_info info = {
+ .pmu = args->pmu,
+ .pmu_name = args->pmu->name,
+ .event_type_desc = "Tracepoint event",
+ };
+ char *tp_dir = get_events_file(sys_name);
+ char path[PATH_MAX];
+ int id, err;
+
+ if (!tp_dir)
+ return -1;
+
+ scnprintf(path, sizeof(path), "%s/%s/id", tp_dir, evt_name);
+ err = filename__read_int(path, &id);
+ if (err == 0) {
+ snprintf(encoding, sizeof(encoding), "tracepoint/config=0x%x/", id);
+ info.encoding_desc = encoding;
+ }
+
+ scnprintf(path, sizeof(path), "%s/%s/format", tp_dir, evt_name);
+ put_events_file(tp_dir);
+ err = filename__read_str(path, &format, &format_size);
+ if (err == 0) {
+ info.long_desc = format;
+ for (size_t i = 0 ; i < format_size; i++) {
+ /* Swap tabs to spaces due to some rendering issues. */
+ if (format[i] == '\t')
+ format[i] = ' ';
+ }
+ }
+ snprintf(name, sizeof(name), "%s:%s", sys_name, evt_name);
+ info.name = name;
+ err = args->cb(args->state, &info);
+ free(format);
+ return err;
+}
+
+static int for_each_event_sys_cb(void *state, const char *sys_name)
+{
+ return tp_pmu__for_each_tp_event(sys_name, state, for_each_event_cb);
+}
+
+int tp_pmu__for_each_event(struct perf_pmu *pmu, void *state, pmu_event_callback cb)
+{
+ struct for_each_event_args args = {
+ .state = state,
+ .cb = cb,
+ .pmu = pmu,
+ };
+
+ return tp_pmu__for_each_tp_sys(&args, for_each_event_sys_cb);
+}
+
+static int num_events_cb(void *state, const char *sys_name __maybe_unused,
+ const char *evt_name __maybe_unused)
+{
+ size_t *count = state;
+
+ (*count)++;
+ return 0;
+}
+
+static int num_events_sys_cb(void *state, const char *sys_name)
+{
+ return tp_pmu__for_each_tp_event(sys_name, state, num_events_cb);
+}
+
+size_t tp_pmu__num_events(struct perf_pmu *pmu __maybe_unused)
+{
+ size_t count = 0;
+
+ tp_pmu__for_each_tp_sys(&count, num_events_sys_cb);
+ return count;
+}
+
+bool tp_pmu__have_event(struct perf_pmu *pmu __maybe_unused, const char *name)
+{
+ char *dup_name, *colon;
+ int id;
+
+ colon = strchr(name, ':');
+ if (colon == NULL)
+ return false;
+
+ dup_name = strdup(name);
+ if (!dup_name)
+ return false;
+
+ colon = dup_name + (colon - name);
+ *colon = '\0';
+ id = tp_pmu__id(dup_name, colon + 1);
+ free(dup_name);
+ return id >= 0;
+}
diff --git a/tools/perf/util/tp_pmu.h b/tools/perf/util/tp_pmu.h
new file mode 100644
index 000000000000..30456bd6943d
--- /dev/null
+++ b/tools/perf/util/tp_pmu.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __TP_PMU_H
+#define __TP_PMU_H
+
+#include "pmu.h"
+
+typedef int (*tp_sys_callback)(void *state, const char *sys_name);
+typedef int (*tp_event_callback)(void *state, const char *sys_name, const char *evt_name);
+
+int tp_pmu__id(const char *sys, const char *name);
+int tp_pmu__for_each_tp_event(const char *sys, void *state, tp_event_callback cb);
+int tp_pmu__for_each_tp_sys(void *state, tp_sys_callback cb);
+
+bool perf_pmu__is_tracepoint(const struct perf_pmu *pmu);
+int tp_pmu__for_each_event(struct perf_pmu *pmu, void *state, pmu_event_callback cb);
+size_t tp_pmu__num_events(struct perf_pmu *pmu);
+bool tp_pmu__have_event(struct perf_pmu *pmu, const char *name);
+
+#endif /* __TP_PMU_H */
diff --git a/tools/perf/util/trace.h b/tools/perf/util/trace.h
new file mode 100644
index 000000000000..fa8d480527a2
--- /dev/null
+++ b/tools/perf/util/trace.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef UTIL_TRACE_H
+#define UTIL_TRACE_H
+
+#include <stdio.h> /* for FILE */
+
+enum trace_summary_mode {
+ SUMMARY__NONE = 0,
+ SUMMARY__BY_TOTAL,
+ SUMMARY__BY_THREAD,
+ SUMMARY__BY_CGROUP,
+};
+
+#ifdef HAVE_BPF_SKEL
+
+int trace_prepare_bpf_summary(enum trace_summary_mode mode);
+void trace_start_bpf_summary(void);
+void trace_end_bpf_summary(void);
+int trace_print_bpf_summary(FILE *fp);
+void trace_cleanup_bpf_summary(void);
+
+#else /* !HAVE_BPF_SKEL */
+
+static inline int trace_prepare_bpf_summary(enum trace_summary_mode mode __maybe_unused)
+{
+ return -1;
+}
+static inline void trace_start_bpf_summary(void) {}
+static inline void trace_end_bpf_summary(void) {}
+static inline int trace_print_bpf_summary(FILE *fp __maybe_unused)
+{
+ return 0;
+}
+static inline void trace_cleanup_bpf_summary(void) {}
+
+#endif /* HAVE_BPF_SKEL */
+
+#endif /* UTIL_TRACE_H */
diff --git a/tools/perf/util/trace_augment.h b/tools/perf/util/trace_augment.h
index 57a3e5045937..4f729bc67753 100644
--- a/tools/perf/util/trace_augment.h
+++ b/tools/perf/util/trace_augment.h
@@ -1,6 +1,66 @@
#ifndef TRACE_AUGMENT_H
#define TRACE_AUGMENT_H
-#define TRACE_AUG_MAX_BUF 32 /* for buffer augmentation in perf trace */
+#include <linux/compiler.h>
+
+struct bpf_program;
+struct evlist;
+
+#ifdef HAVE_BPF_SKEL
+
+int augmented_syscalls__prepare(void);
+int augmented_syscalls__create_bpf_output(struct evlist *evlist);
+void augmented_syscalls__setup_bpf_output(void);
+int augmented_syscalls__set_filter_pids(unsigned int nr, pid_t *pids);
+int augmented_syscalls__get_map_fds(int *enter_fd, int *exit_fd, int *beauty_fd);
+struct bpf_program *augmented_syscalls__find_by_title(const char *name);
+struct bpf_program *augmented_syscalls__unaugmented(void);
+void augmented_syscalls__cleanup(void);
+
+#else /* !HAVE_BPF_SKEL */
+
+static inline int augmented_syscalls__prepare(void)
+{
+ return -1;
+}
+
+static inline int augmented_syscalls__create_bpf_output(struct evlist *evlist __maybe_unused)
+{
+ return -1;
+}
+
+static inline void augmented_syscalls__setup_bpf_output(void)
+{
+}
+
+static inline int augmented_syscalls__set_filter_pids(unsigned int nr __maybe_unused,
+ pid_t *pids __maybe_unused)
+{
+ return 0;
+}
+
+static inline int augmented_syscalls__get_map_fds(int *enter_fd __maybe_unused,
+ int *exit_fd __maybe_unused,
+ int *beauty_fd __maybe_unused)
+{
+ return -1;
+}
+
+static inline struct bpf_program *
+augmented_syscalls__find_by_title(const char *name __maybe_unused)
+{
+ return NULL;
+}
+
+static inline struct bpf_program *augmented_syscalls__unaugmented(void)
+{
+ return NULL;
+}
+
+static inline void augmented_syscalls__cleanup(void)
+{
+}
+
+#endif /* HAVE_BPF_SKEL */
#endif
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index 793d11832694..ae70fb56a057 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -84,8 +84,11 @@ static int __report_module(struct addr_location *al, u64 ip,
char filename[PATH_MAX];
__symbol__join_symfs(filename, sizeof(filename), dso__long_name(dso));
- mod = dwfl_report_elf(ui->dwfl, dso__short_name(dso), filename, -1,
- base, false);
+ /* Don't hang up on device files like /dev/dri/renderD128. */
+ if (is_regular_file(filename)) {
+ mod = dwfl_report_elf(ui->dwfl, dso__short_name(dso), filename, -1,
+ base, false);
+ }
}
if (!mod) {
char filename[PATH_MAX];