summaryrefslogtreecommitdiff
path: root/tools/lib/perf
diff options
context:
space:
mode:
Diffstat (limited to 'tools/lib/perf')
-rw-r--r--tools/lib/perf/.gitignore5
-rw-r--r--tools/lib/perf/Documentation/Makefile2
-rw-r--r--tools/lib/perf/Documentation/examples/sampling.c2
-rw-r--r--tools/lib/perf/Documentation/libperf-sampling.txt2
-rw-r--r--tools/lib/perf/Documentation/libperf.txt6
-rw-r--r--tools/lib/perf/Makefile41
-rw-r--r--tools/lib/perf/cpumap.c397
-rw-r--r--tools/lib/perf/evlist.c158
-rw-r--r--tools/lib/perf/evsel.c61
-rw-r--r--tools/lib/perf/include/internal/cpumap.h14
-rw-r--r--tools/lib/perf/include/internal/evlist.h7
-rw-r--r--tools/lib/perf/include/internal/evsel.h81
-rw-r--r--tools/lib/perf/include/internal/mmap.h3
-rw-r--r--tools/lib/perf/include/internal/rc_check.h113
-rw-r--r--tools/lib/perf/include/perf/core.h2
-rw-r--r--tools/lib/perf/include/perf/cpumap.h82
-rw-r--r--tools/lib/perf/include/perf/event.h67
-rw-r--r--tools/lib/perf/include/perf/evlist.h1
-rw-r--r--tools/lib/perf/include/perf/threadmap.h1
-rw-r--r--tools/lib/perf/libperf.map11
-rw-r--r--tools/lib/perf/mmap.c90
-rw-r--r--tools/lib/perf/tests/test-cpumap.c4
-rw-r--r--tools/lib/perf/tests/test-evlist.c6
-rw-r--r--tools/lib/perf/tests/test-evsel.c2
-rw-r--r--tools/lib/perf/threadmap.c17
25 files changed, 928 insertions, 247 deletions
diff --git a/tools/lib/perf/.gitignore b/tools/lib/perf/.gitignore
new file mode 100644
index 000000000000..0f5b4af63f62
--- /dev/null
+++ b/tools/lib/perf/.gitignore
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+libperf.pc
+libperf.so.*
+tests-shared
+tests-static
diff --git a/tools/lib/perf/Documentation/Makefile b/tools/lib/perf/Documentation/Makefile
index 972754082a85..573ca5b27556 100644
--- a/tools/lib/perf/Documentation/Makefile
+++ b/tools/lib/perf/Documentation/Makefile
@@ -121,7 +121,7 @@ install-man: all
$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \
$(INSTALL) -m 644 $(MAN_7) $(DESTDIR)$(man7dir);
-install-html:
+install-html: $(MAN_HTML)
$(call QUIET_INSTALL, html) \
$(INSTALL) -d -m 755 $(DESTDIR)$(htmldir); \
$(INSTALL) -m 644 $(MAN_HTML) $(DESTDIR)$(htmldir); \
diff --git a/tools/lib/perf/Documentation/examples/sampling.c b/tools/lib/perf/Documentation/examples/sampling.c
index 8e1a926a9cfe..bc142f0664b5 100644
--- a/tools/lib/perf/Documentation/examples/sampling.c
+++ b/tools/lib/perf/Documentation/examples/sampling.c
@@ -39,7 +39,7 @@ int main(int argc, char **argv)
libperf_init(libperf_print);
- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
if (!cpus) {
fprintf(stderr, "failed to create cpus\n");
return -1;
diff --git a/tools/lib/perf/Documentation/libperf-sampling.txt b/tools/lib/perf/Documentation/libperf-sampling.txt
index d6ca24f6ef78..2378980fab8a 100644
--- a/tools/lib/perf/Documentation/libperf-sampling.txt
+++ b/tools/lib/perf/Documentation/libperf-sampling.txt
@@ -97,7 +97,7 @@ In this case we will monitor all the available CPUs:
[source,c]
--
- 42 cpus = perf_cpu_map__new(NULL);
+ 42 cpus = perf_cpu_map__new_online_cpus();
43 if (!cpus) {
44 fprintf(stderr, "failed to create cpus\n");
45 return -1;
diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
index a8f1a237931b..4072bc9b7670 100644
--- a/tools/lib/perf/Documentation/libperf.txt
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -37,16 +37,15 @@ SYNOPSIS
struct perf_cpu_map;
- struct perf_cpu_map *perf_cpu_map__dummy_new(void);
+ struct perf_cpu_map *perf_cpu_map__new_any_cpu(void);
struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
- struct perf_cpu_map *perf_cpu_map__read(FILE *file);
struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other);
void perf_cpu_map__put(struct perf_cpu_map *map);
int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
- bool perf_cpu_map__empty(const struct perf_cpu_map *map);
+ bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map);
int perf_cpu_map__max(struct perf_cpu_map *map);
bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu);
@@ -211,6 +210,7 @@ SYNOPSIS
struct perf_record_time_conv;
struct perf_record_header_feature;
struct perf_record_compressed;
+ struct perf_record_compressed2;
--
DESCRIPTION
diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile
index d8cad124e4c5..7fbb50b74c00 100644
--- a/tools/lib/perf/Makefile
+++ b/tools/lib/perf/Makefile
@@ -39,29 +39,10 @@ libdir = $(prefix)/$(libdir_relative)
libdir_SQ = $(subst ','\'',$(libdir))
libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
-ifeq ("$(origin V)", "command line")
- VERBOSE = $(V)
-endif
-ifndef VERBOSE
- VERBOSE = 0
-endif
-
-ifeq ($(VERBOSE),1)
- Q =
-else
- Q = @
-endif
-
TEST_ARGS := $(if $(V),-v)
-# Set compile option CFLAGS
-ifdef EXTRA_CFLAGS
- CFLAGS := $(EXTRA_CFLAGS)
-else
- CFLAGS := -g -Wall
-endif
-
INCLUDES = \
+-I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi \
-I$(srctree)/tools/lib/perf/include \
-I$(srctree)/tools/lib/ \
-I$(srctree)/tools/include \
@@ -70,11 +51,12 @@ INCLUDES = \
-I$(srctree)/tools/include/uapi
# Append required CFLAGS
-override CFLAGS += $(EXTRA_WARNINGS)
-override CFLAGS += -Werror -Wall
+override CFLAGS += -g -Werror -Wall
override CFLAGS += -fPIC
override CFLAGS += $(INCLUDES)
override CFLAGS += -fvisibility=hidden
+override CFLAGS += $(EXTRA_WARNINGS)
+override CFLAGS += $(EXTRA_CFLAGS)
all:
@@ -118,7 +100,16 @@ $(LIBAPI)-clean:
$(call QUIET_CLEAN, libapi)
$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
-$(LIBPERF_IN): FORCE
+uapi-asm := $(OUTPUT)arch/$(SRCARCH)/include/generated/uapi/asm
+ifeq ($(SRCARCH),arm64)
+ syscall-y := $(uapi-asm)/unistd_64.h
+endif
+uapi-asm-generic:
+ $(if $(syscall-y),\
+ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.asm-headers obj=$(uapi-asm) \
+ generic=include/uapi/asm-generic $(syscall-y),)
+
+$(LIBPERF_IN): uapi-asm-generic FORCE
$(Q)$(MAKE) $(build)=libperf
$(LIBPERF_A): $(LIBPERF_IN)
@@ -139,7 +130,7 @@ all: fixdep
clean: $(LIBAPI)-clean
$(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \
*.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd tests/*.o LIBPERF-CFLAGS $(LIBPERF_PC) \
- $(TESTS_STATIC) $(TESTS_SHARED)
+ $(TESTS_STATIC) $(TESTS_SHARED) $(syscall-y)
TESTS_IN = tests-in.o
@@ -188,7 +179,7 @@ install_lib: libs
cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ)
HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h
-INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h threadmap.h xyarray.h
+INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h rc_check.h threadmap.h xyarray.h
INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/perf
INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS))
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index 6cd0be7c1bb4..4160e7d2e120 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <errno.h>
#include <perf/cpumap.h>
#include <stdlib.h>
#include <linux/refcount.h>
@@ -9,25 +10,38 @@
#include <unistd.h>
#include <ctype.h>
#include <limits.h>
+#include "internal.h"
+#include <api/fs/fs.h>
-static struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
+#define MAX_NR_CPUS 4096
+
+void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus)
{
- struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus);
+ RC_CHK_ACCESS(map)->nr = nr_cpus;
+}
- if (cpus != NULL) {
+struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
+{
+ RC_STRUCT(perf_cpu_map) *cpus;
+ struct perf_cpu_map *result;
+
+ if (nr_cpus == 0)
+ return NULL;
+
+ cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus);
+ if (ADD_RC_CHK(result, cpus)) {
cpus->nr = nr_cpus;
refcount_set(&cpus->refcnt, 1);
-
}
- return cpus;
+ return result;
}
-struct perf_cpu_map *perf_cpu_map__dummy_new(void)
+struct perf_cpu_map *perf_cpu_map__new_any_cpu(void)
{
struct perf_cpu_map *cpus = perf_cpu_map__alloc(1);
if (cpus)
- cpus->map[0].cpu = -1;
+ RC_CHK_ACCESS(cpus)->map[0].cpu = -1;
return cpus;
}
@@ -35,48 +49,79 @@ struct perf_cpu_map *perf_cpu_map__dummy_new(void)
static void cpu_map__delete(struct perf_cpu_map *map)
{
if (map) {
- WARN_ONCE(refcount_read(&map->refcnt) != 0,
+ WARN_ONCE(refcount_read(perf_cpu_map__refcnt(map)) != 0,
"cpu_map refcnt unbalanced\n");
- free(map);
+ RC_CHK_FREE(map);
}
}
struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map)
{
- if (map)
- refcount_inc(&map->refcnt);
- return map;
+ struct perf_cpu_map *result;
+
+ if (RC_CHK_GET(result, map))
+ refcount_inc(perf_cpu_map__refcnt(map));
+
+ return result;
}
void perf_cpu_map__put(struct perf_cpu_map *map)
{
- if (map && refcount_dec_and_test(&map->refcnt))
- cpu_map__delete(map);
+ if (map) {
+ if (refcount_dec_and_test(perf_cpu_map__refcnt(map)))
+ cpu_map__delete(map);
+ else
+ RC_CHK_PUT(map);
+ }
}
-static struct perf_cpu_map *cpu_map__default_new(void)
+static struct perf_cpu_map *cpu_map__new_sysconf(void)
{
struct perf_cpu_map *cpus;
- int nr_cpus;
+ int nr_cpus, nr_cpus_conf;
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
if (nr_cpus < 0)
return NULL;
+ nr_cpus_conf = sysconf(_SC_NPROCESSORS_CONF);
+ if (nr_cpus != nr_cpus_conf) {
+ pr_warning("Number of online CPUs (%d) differs from the number configured (%d) the CPU map will only cover the first %d CPUs.",
+ nr_cpus, nr_cpus_conf, nr_cpus);
+ }
+
cpus = perf_cpu_map__alloc(nr_cpus);
if (cpus != NULL) {
int i;
for (i = 0; i < nr_cpus; ++i)
- cpus->map[i].cpu = i;
+ RC_CHK_ACCESS(cpus)->map[i].cpu = i;
}
return cpus;
}
-struct perf_cpu_map *perf_cpu_map__default_new(void)
+static struct perf_cpu_map *cpu_map__new_sysfs_online(void)
+{
+ struct perf_cpu_map *cpus = NULL;
+ char *buf = NULL;
+ size_t buf_len;
+
+ if (sysfs__read_str("devices/system/cpu/online", &buf, &buf_len) >= 0) {
+ cpus = perf_cpu_map__new(buf);
+ free(buf);
+ }
+ return cpus;
+}
+
+struct perf_cpu_map *perf_cpu_map__new_online_cpus(void)
{
- return cpu_map__default_new();
+ struct perf_cpu_map *cpus = cpu_map__new_sysfs_online();
+
+ if (cpus)
+ return cpus;
+
+ return cpu_map__new_sysconf();
}
@@ -87,6 +132,11 @@ static int cmp_cpu(const void *a, const void *b)
return cpu_a->cpu - cpu_b->cpu;
}
+static struct perf_cpu __perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
+{
+ return RC_CHK_ACCESS(cpus)->map[idx];
+}
+
static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus)
{
size_t payload_size = nr_cpus * sizeof(struct perf_cpu);
@@ -94,89 +144,21 @@ static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu
int i, j;
if (cpus != NULL) {
- memcpy(cpus->map, tmp_cpus, payload_size);
- qsort(cpus->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu);
+ memcpy(RC_CHK_ACCESS(cpus)->map, tmp_cpus, payload_size);
+ qsort(RC_CHK_ACCESS(cpus)->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu);
/* Remove dups */
j = 0;
for (i = 0; i < nr_cpus; i++) {
- if (i == 0 || cpus->map[i].cpu != cpus->map[i - 1].cpu)
- cpus->map[j++].cpu = cpus->map[i].cpu;
- }
- cpus->nr = j;
- assert(j <= nr_cpus);
- }
- return cpus;
-}
-
-struct perf_cpu_map *perf_cpu_map__read(FILE *file)
-{
- struct perf_cpu_map *cpus = NULL;
- int nr_cpus = 0;
- struct perf_cpu *tmp_cpus = NULL, *tmp;
- int max_entries = 0;
- int n, cpu, prev;
- char sep;
-
- sep = 0;
- prev = -1;
- for (;;) {
- n = fscanf(file, "%u%c", &cpu, &sep);
- if (n <= 0)
- break;
- if (prev >= 0) {
- int new_max = nr_cpus + cpu - prev - 1;
-
- WARN_ONCE(new_max >= MAX_NR_CPUS, "Perf can support %d CPUs. "
- "Consider raising MAX_NR_CPUS\n", MAX_NR_CPUS);
-
- if (new_max >= max_entries) {
- max_entries = new_max + MAX_NR_CPUS / 2;
- tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
- if (tmp == NULL)
- goto out_free_tmp;
- tmp_cpus = tmp;
+ if (i == 0 ||
+ __perf_cpu_map__cpu(cpus, i).cpu !=
+ __perf_cpu_map__cpu(cpus, i - 1).cpu) {
+ RC_CHK_ACCESS(cpus)->map[j++].cpu =
+ __perf_cpu_map__cpu(cpus, i).cpu;
}
-
- while (++prev < cpu)
- tmp_cpus[nr_cpus++].cpu = prev;
- }
- if (nr_cpus == max_entries) {
- max_entries += MAX_NR_CPUS;
- tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
- if (tmp == NULL)
- goto out_free_tmp;
- tmp_cpus = tmp;
}
-
- tmp_cpus[nr_cpus++].cpu = cpu;
- if (n == 2 && sep == '-')
- prev = cpu;
- else
- prev = -1;
- if (n == 1 || sep == '\n')
- break;
+ perf_cpu_map__set_nr(cpus, j);
+ assert(j <= nr_cpus);
}
-
- if (nr_cpus > 0)
- cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
- else
- cpus = cpu_map__default_new();
-out_free_tmp:
- free(tmp_cpus);
- return cpus;
-}
-
-static struct perf_cpu_map *cpu_map__read_all_cpu_map(void)
-{
- struct perf_cpu_map *cpus = NULL;
- FILE *onlnf;
-
- onlnf = fopen("/sys/devices/system/cpu/online", "r");
- if (!onlnf)
- return cpu_map__default_new();
-
- cpus = perf_cpu_map__read(onlnf);
- fclose(onlnf);
return cpus;
}
@@ -190,7 +172,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
int max_entries = 0;
if (!cpu_list)
- return cpu_map__read_all_cpu_map();
+ return perf_cpu_map__new_online_cpus();
/*
* must handle the case of empty cpumap to cover
@@ -203,8 +185,8 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
while (isdigit(*cpu_list)) {
p = NULL;
start_cpu = strtoul(cpu_list, &p, 0);
- if (start_cpu >= INT_MAX
- || (*p != '\0' && *p != ',' && *p != '-'))
+ if (start_cpu >= INT16_MAX
+ || (*p != '\0' && *p != ',' && *p != '-' && *p != '\n'))
goto invalid;
if (*p == '-') {
@@ -212,7 +194,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
p = NULL;
end_cpu = strtoul(cpu_list, &p, 0);
- if (end_cpu >= INT_MAX || (*p != '\0' && *p != ','))
+ if (end_cpu >= INT16_MAX || (*p != '\0' && *p != ',' && *p != '\n'))
goto invalid;
if (end_cpu < start_cpu)
@@ -227,17 +209,17 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
for (; start_cpu <= end_cpu; start_cpu++) {
/* check for duplicates */
for (i = 0; i < nr_cpus; i++)
- if (tmp_cpus[i].cpu == (int)start_cpu)
+ if (tmp_cpus[i].cpu == (int16_t)start_cpu)
goto invalid;
if (nr_cpus == max_entries) {
- max_entries += MAX_NR_CPUS;
+ max_entries += max(end_cpu - start_cpu + 1, 16UL);
tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
if (tmp == NULL)
goto invalid;
tmp_cpus = tmp;
}
- tmp_cpus[nr_cpus++].cpu = (int)start_cpu;
+ tmp_cpus[nr_cpus++].cpu = (int16_t)start_cpu;
}
if (*p)
++p;
@@ -245,38 +227,69 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
cpu_list = p;
}
- if (nr_cpus > 0)
+ if (nr_cpus > 0) {
cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
- else if (*cpu_list != '\0')
- cpus = cpu_map__default_new();
- else
- cpus = perf_cpu_map__dummy_new();
+ } else if (*cpu_list != '\0') {
+ pr_warning("Unexpected characters at end of cpu list ('%s'), using online CPUs.",
+ cpu_list);
+ cpus = perf_cpu_map__new_online_cpus();
+ } else {
+ cpus = perf_cpu_map__new_any_cpu();
+ }
invalid:
free(tmp_cpus);
out:
return cpus;
}
+struct perf_cpu_map *perf_cpu_map__new_int(int cpu)
+{
+ struct perf_cpu_map *cpus = perf_cpu_map__alloc(1);
+
+ if (cpus)
+ RC_CHK_ACCESS(cpus)->map[0].cpu = cpu;
+
+ return cpus;
+}
+
+static int __perf_cpu_map__nr(const struct perf_cpu_map *cpus)
+{
+ return RC_CHK_ACCESS(cpus)->nr;
+}
+
struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
{
struct perf_cpu result = {
.cpu = -1
};
- if (cpus && idx < cpus->nr)
- return cpus->map[idx];
+ if (cpus && idx < __perf_cpu_map__nr(cpus))
+ return __perf_cpu_map__cpu(cpus, idx);
return result;
}
int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
{
- return cpus ? cpus->nr : 1;
+ return cpus ? __perf_cpu_map__nr(cpus) : 1;
}
-bool perf_cpu_map__empty(const struct perf_cpu_map *map)
+bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map)
{
- return map ? map->map[0].cpu == -1 : true;
+ return map ? __perf_cpu_map__cpu(map, 0).cpu == -1 : true;
+}
+
+bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map)
+{
+ if (!map)
+ return true;
+
+ return __perf_cpu_map__nr(map) == 1 && __perf_cpu_map__cpu(map, 0).cpu == -1;
+}
+
+bool perf_cpu_map__is_empty(const struct perf_cpu_map *map)
+{
+ return map == NULL;
}
int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
@@ -287,10 +300,10 @@ int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
return -1;
low = 0;
- high = cpus->nr;
+ high = __perf_cpu_map__nr(cpus);
while (low < high) {
int idx = (low + high) / 2;
- struct perf_cpu cpu_at_idx = cpus->map[idx];
+ struct perf_cpu cpu_at_idx = __perf_cpu_map__cpu(cpus, idx);
if (cpu_at_idx.cpu == cpu.cpu)
return idx;
@@ -309,14 +322,58 @@ bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
return perf_cpu_map__idx(cpus, cpu) != -1;
}
+bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, const struct perf_cpu_map *rhs)
+{
+ int nr;
+
+ if (lhs == rhs)
+ return true;
+
+ if (!lhs || !rhs)
+ return false;
+
+ nr = __perf_cpu_map__nr(lhs);
+ if (nr != __perf_cpu_map__nr(rhs))
+ return false;
+
+ for (int idx = 0; idx < nr; idx++) {
+ if (__perf_cpu_map__cpu(lhs, idx).cpu != __perf_cpu_map__cpu(rhs, idx).cpu)
+ return false;
+ }
+ return true;
+}
+
+bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map)
+{
+ return map && __perf_cpu_map__cpu(map, 0).cpu == -1;
+}
+
+struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map)
+{
+ struct perf_cpu cpu, result = {
+ .cpu = -1
+ };
+ int idx;
+
+ perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) {
+ result = cpu;
+ break;
+ }
+ return result;
+}
+
struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
{
struct perf_cpu result = {
.cpu = -1
};
- // cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well.
- return map->nr > 0 ? map->map[map->nr - 1] : result;
+ if (!map)
+ return result;
+
+ // The CPUs are always sorted and nr is always > 0 as 0 length map is
+ // encoded as NULL.
+ return __perf_cpu_map__cpu(map, __perf_cpu_map__nr(map) - 1);
}
/** Is 'b' a subset of 'a'. */
@@ -324,15 +381,15 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu
{
if (a == b || !b)
return true;
- if (!a || b->nr > a->nr)
+ if (!a || __perf_cpu_map__nr(b) > __perf_cpu_map__nr(a))
return false;
- for (int i = 0, j = 0; i < a->nr; i++) {
- if (a->map[i].cpu > b->map[j].cpu)
+ for (int i = 0, j = 0; i < __perf_cpu_map__nr(a); i++) {
+ if (__perf_cpu_map__cpu(a, i).cpu > __perf_cpu_map__cpu(b, j).cpu)
return false;
- if (a->map[i].cpu == b->map[j].cpu) {
+ if (__perf_cpu_map__cpu(a, i).cpu == __perf_cpu_map__cpu(b, j).cpu) {
j++;
- if (j == b->nr)
+ if (j == __perf_cpu_map__nr(b))
return true;
}
}
@@ -340,53 +397,101 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu
}
/*
- * Merge two cpumaps
+ * Merge two cpumaps.
*
- * orig either gets freed and replaced with a new map, or reused
- * with no reference count change (similar to "realloc")
- * other has its reference count increased.
+ * If 'other' is subset of '*orig', '*orig' keeps itself with no reference count
+ * change (similar to "realloc").
+ *
+ * If '*orig' is subset of 'other', '*orig' reuses 'other' with its reference
+ * count increased.
+ *
+ * Otherwise, '*orig' gets freed and replaced with a new map.
*/
-
-struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
- struct perf_cpu_map *other)
+int perf_cpu_map__merge(struct perf_cpu_map **orig, struct perf_cpu_map *other)
{
struct perf_cpu *tmp_cpus;
int tmp_len;
int i, j, k;
struct perf_cpu_map *merged;
- if (perf_cpu_map__is_subset(orig, other))
- return orig;
- if (perf_cpu_map__is_subset(other, orig)) {
- perf_cpu_map__put(orig);
- return perf_cpu_map__get(other);
+ if (perf_cpu_map__is_subset(*orig, other))
+ return 0;
+ if (perf_cpu_map__is_subset(other, *orig)) {
+ perf_cpu_map__put(*orig);
+ *orig = perf_cpu_map__get(other);
+ return 0;
}
- tmp_len = orig->nr + other->nr;
+ tmp_len = __perf_cpu_map__nr(*orig) + __perf_cpu_map__nr(other);
tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
if (!tmp_cpus)
- return NULL;
+ return -ENOMEM;
/* Standard merge algorithm from wikipedia */
i = j = k = 0;
- while (i < orig->nr && j < other->nr) {
- if (orig->map[i].cpu <= other->map[j].cpu) {
- if (orig->map[i].cpu == other->map[j].cpu)
+ while (i < __perf_cpu_map__nr(*orig) && j < __perf_cpu_map__nr(other)) {
+ if (__perf_cpu_map__cpu(*orig, i).cpu <= __perf_cpu_map__cpu(other, j).cpu) {
+ if (__perf_cpu_map__cpu(*orig, i).cpu == __perf_cpu_map__cpu(other, j).cpu)
j++;
- tmp_cpus[k++] = orig->map[i++];
+ tmp_cpus[k++] = __perf_cpu_map__cpu(*orig, i++);
} else
- tmp_cpus[k++] = other->map[j++];
+ tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++);
}
- while (i < orig->nr)
- tmp_cpus[k++] = orig->map[i++];
+ while (i < __perf_cpu_map__nr(*orig))
+ tmp_cpus[k++] = __perf_cpu_map__cpu(*orig, i++);
- while (j < other->nr)
- tmp_cpus[k++] = other->map[j++];
+ while (j < __perf_cpu_map__nr(other))
+ tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++);
assert(k <= tmp_len);
merged = cpu_map__trim_new(k, tmp_cpus);
free(tmp_cpus);
- perf_cpu_map__put(orig);
+ perf_cpu_map__put(*orig);
+ *orig = merged;
+ return 0;
+}
+
+struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
+ struct perf_cpu_map *other)
+{
+ int i, j, k;
+ struct perf_cpu_map *merged;
+
+ if (perf_cpu_map__is_subset(other, orig))
+ return perf_cpu_map__get(orig);
+ if (perf_cpu_map__is_subset(orig, other))
+ return perf_cpu_map__get(other);
+
+ i = j = k = 0;
+ while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
+ if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu)
+ i++;
+ else if (__perf_cpu_map__cpu(orig, i).cpu > __perf_cpu_map__cpu(other, j).cpu)
+ j++;
+ else { /* CPUs match. */
+ i++;
+ j++;
+ k++;
+ }
+ }
+ if (k == 0) /* Maps are completely disjoint. */
+ return NULL;
+
+ merged = perf_cpu_map__alloc(k);
+ if (!merged)
+ return NULL;
+ /* Entries are added to merged in sorted order, so no need to sort again. */
+ i = j = k = 0;
+ while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
+ if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu)
+ i++;
+ else if (__perf_cpu_map__cpu(orig, i).cpu > __perf_cpu_map__cpu(other, j).cpu)
+ j++;
+ else {
+ j++;
+ RC_CHK_ACCESS(merged)->map[k++] = __perf_cpu_map__cpu(orig, i++);
+ }
+ }
return merged;
}
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 61b637f29b82..3ed023f4b190 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -36,20 +36,88 @@ void perf_evlist__init(struct perf_evlist *evlist)
static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
struct perf_evsel *evsel)
{
+ if (perf_cpu_map__is_empty(evsel->cpus)) {
+ if (perf_cpu_map__is_empty(evsel->pmu_cpus)) {
+ /*
+ * Assume the unset PMU cpus were for a system-wide
+ * event, like a software or tracepoint.
+ */
+ evsel->pmu_cpus = perf_cpu_map__new_online_cpus();
+ }
+ if (evlist->has_user_cpus && !evsel->system_wide) {
+ /*
+ * Use the user CPUs unless the evsel is set to be
+ * system wide, such as the dummy event.
+ */
+ evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
+ } else {
+ /*
+ * System wide and other modes, assume the cpu map
+ * should be set to all PMU CPUs.
+ */
+ evsel->cpus = perf_cpu_map__get(evsel->pmu_cpus);
+ }
+ }
/*
- * We already have cpus for evsel (via PMU sysfs) so
- * keep it, if there's no target cpu list defined.
+ * Avoid "any CPU"(-1) for uncore and PMUs that require a CPU, even if
+ * requested.
*/
- if (evsel->system_wide) {
+ if (evsel->requires_cpu && perf_cpu_map__has_any_cpu(evsel->cpus)) {
perf_cpu_map__put(evsel->cpus);
- evsel->cpus = perf_cpu_map__new(NULL);
- } else if (!evsel->own_cpus || evlist->has_user_cpus ||
- (!evsel->requires_cpu && perf_cpu_map__empty(evlist->user_requested_cpus))) {
+ evsel->cpus = perf_cpu_map__get(evsel->pmu_cpus);
+ }
+
+ /*
+ * Globally requested CPUs replace user requested unless the evsel is
+ * set to be system wide.
+ */
+ if (evlist->has_user_cpus && !evsel->system_wide) {
+ assert(!perf_cpu_map__has_any_cpu(evlist->user_requested_cpus));
+ if (!perf_cpu_map__equal(evsel->cpus, evlist->user_requested_cpus)) {
+ perf_cpu_map__put(evsel->cpus);
+ evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
+ }
+ }
+
+ /* Ensure cpus only references valid PMU CPUs. */
+ if (!perf_cpu_map__has_any_cpu(evsel->cpus) &&
+ !perf_cpu_map__is_subset(evsel->pmu_cpus, evsel->cpus)) {
+ struct perf_cpu_map *tmp = perf_cpu_map__intersect(evsel->pmu_cpus, evsel->cpus);
+
perf_cpu_map__put(evsel->cpus);
- evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
- } else if (evsel->cpus != evsel->own_cpus) {
+ evsel->cpus = tmp;
+ }
+
+ /*
+ * Was event requested on all the PMU's CPUs but the user requested is
+ * any CPU (-1)? If so switch to using any CPU (-1) to reduce the number
+ * of events.
+ */
+ if (!evsel->system_wide &&
+ !evsel->requires_cpu &&
+ perf_cpu_map__equal(evsel->cpus, evsel->pmu_cpus) &&
+ perf_cpu_map__has_any_cpu(evlist->user_requested_cpus)) {
perf_cpu_map__put(evsel->cpus);
- evsel->cpus = perf_cpu_map__get(evsel->own_cpus);
+ evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
+ }
+
+ /* Sanity check assert before the evsel is potentially removed. */
+ assert(!evsel->requires_cpu || !perf_cpu_map__has_any_cpu(evsel->cpus));
+
+ /*
+ * Empty cpu lists would eventually get opened as "any" so remove
+ * genuinely empty ones before they're opened in the wrong place.
+ */
+ if (perf_cpu_map__is_empty(evsel->cpus)) {
+ struct perf_evsel *next = perf_evlist__next(evlist, evsel);
+
+ perf_evlist__remove(evlist, evsel);
+ /* Keep idx contiguous */
+ if (next)
+ list_for_each_entry_from(next, &evlist->entries, node)
+ next->idx--;
+
+ return;
}
if (evsel->system_wide) {
@@ -60,16 +128,20 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
evsel->threads = perf_thread_map__get(evlist->threads);
}
- evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus);
+ perf_cpu_map__merge(&evlist->all_cpus, evsel->cpus);
}
static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
{
- struct perf_evsel *evsel;
+ struct perf_evsel *evsel, *n;
evlist->needs_map_propagation = true;
- perf_evlist__for_each_evsel(evlist, evsel)
+ /* Clear the all_cpus set which will be merged into during propagation. */
+ perf_cpu_map__put(evlist->all_cpus);
+ evlist->all_cpus = NULL;
+
+ list_for_each_entry_safe(evsel, n, &evlist->entries, node)
__perf_evlist__propagate_maps(evlist, evsel);
}
@@ -233,10 +305,10 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist)
static void perf_evlist__id_hash(struct perf_evlist *evlist,
struct perf_evsel *evsel,
- int cpu, int thread, u64 id)
+ int cpu_map_idx, int thread, u64 id)
{
int hash;
- struct perf_sample_id *sid = SID(evsel, cpu, thread);
+ struct perf_sample_id *sid = SID(evsel, cpu_map_idx, thread);
sid->id = id;
sid->evsel = evsel;
@@ -254,21 +326,27 @@ void perf_evlist__reset_id_hash(struct perf_evlist *evlist)
void perf_evlist__id_add(struct perf_evlist *evlist,
struct perf_evsel *evsel,
- int cpu, int thread, u64 id)
+ int cpu_map_idx, int thread, u64 id)
{
- perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
+ if (!SID(evsel, cpu_map_idx, thread))
+ return;
+
+ perf_evlist__id_hash(evlist, evsel, cpu_map_idx, thread, id);
evsel->id[evsel->ids++] = id;
}
int perf_evlist__id_add_fd(struct perf_evlist *evlist,
struct perf_evsel *evsel,
- int cpu, int thread, int fd)
+ int cpu_map_idx, int thread, int fd)
{
u64 read_data[4] = { 0, };
int id_idx = 1; /* The first entry is the counter value */
u64 id;
int ret;
+ if (!SID(evsel, cpu_map_idx, thread))
+ return -1;
+
ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
if (!ret)
goto add;
@@ -297,7 +375,7 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist,
id = read_data[id_idx];
add:
- perf_evlist__id_add(evlist, evsel, cpu, thread, id);
+ perf_evlist__id_add(evlist, evsel, cpu_map_idx, thread, id);
return 0;
}
@@ -604,7 +682,7 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist)
/* One for each CPU */
nr_mmaps = perf_cpu_map__nr(evlist->all_cpus);
- if (perf_cpu_map__empty(evlist->all_cpus)) {
+ if (perf_cpu_map__has_any_cpu_or_is_empty(evlist->all_cpus)) {
/* Plus one for each thread */
nr_mmaps += perf_thread_map__nr(evlist->threads);
/* Minus the per-thread CPU (-1) */
@@ -638,7 +716,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
return -ENOMEM;
- if (perf_cpu_map__empty(cpus))
+ if (perf_cpu_map__has_any_cpu_or_is_empty(cpus))
return mmap_per_thread(evlist, ops, mp);
return mmap_per_cpu(evlist, ops, mp);
@@ -687,15 +765,14 @@ perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map,
void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader)
{
- struct perf_evsel *first, *last, *evsel;
-
- first = list_first_entry(list, struct perf_evsel, node);
- last = list_last_entry(list, struct perf_evsel, node);
-
- leader->nr_members = last->idx - first->idx + 1;
+ struct perf_evsel *evsel;
+ int n = 0;
- __perf_evlist__for_each_entry(list, evsel)
+ __perf_evlist__for_each_entry(list, evsel) {
evsel->leader = leader;
+ n++;
+ }
+ leader->nr_members = n;
}
void perf_evlist__set_leader(struct perf_evlist *evlist)
@@ -704,7 +781,32 @@ void perf_evlist__set_leader(struct perf_evlist *evlist)
struct perf_evsel *first = list_entry(evlist->entries.next,
struct perf_evsel, node);
- evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
__perf_evlist__set_leader(&evlist->entries, first);
}
}
+
+int perf_evlist__nr_groups(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+ int nr_groups = 0;
+
+ perf_evlist__for_each_evsel(evlist, evsel) {
+ /*
+ * evsels by default have a nr_members of 1, and they are their
+ * own leader. If the nr_members is >1 then this is an
+ * indication of a group.
+ */
+ if (evsel->leader == evsel && evsel->nr_members > 1)
+ nr_groups++;
+ }
+ return nr_groups;
+}
+
+void perf_evlist__go_system_wide(struct perf_evlist *evlist, struct perf_evsel *evsel)
+{
+ if (!evsel->system_wide) {
+ evsel->system_wide = true;
+ if (evlist->needs_map_propagation)
+ __perf_evlist__propagate_maps(evlist, evsel);
+ }
+}
diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
index 8b51b008a81f..13a307fc75ae 100644
--- a/tools/lib/perf/evsel.c
+++ b/tools/lib/perf/evsel.c
@@ -5,6 +5,7 @@
#include <perf/evsel.h>
#include <perf/cpumap.h>
#include <perf/threadmap.h>
+#include <linux/hash.h>
#include <linux/list.h>
#include <internal/evsel.h>
#include <linux/zalloc.h>
@@ -23,6 +24,7 @@ void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr,
int idx)
{
INIT_LIST_HEAD(&evsel->node);
+ INIT_LIST_HEAD(&evsel->per_stream_periods);
evsel->attr = *attr;
evsel->idx = idx;
evsel->leader = evsel;
@@ -38,8 +40,19 @@ struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr)
return evsel;
}
+void perf_evsel__exit(struct perf_evsel *evsel)
+{
+ assert(evsel->fd == NULL); /* If not fds were not closed. */
+ assert(evsel->mmap == NULL); /* If not munmap wasn't called. */
+ assert(evsel->sample_id == NULL); /* If not free_id wasn't called. */
+ perf_cpu_map__put(evsel->cpus);
+ perf_cpu_map__put(evsel->pmu_cpus);
+ perf_thread_map__put(evsel->threads);
+}
+
void perf_evsel__delete(struct perf_evsel *evsel)
{
+ perf_evsel__exit(evsel);
free(evsel);
}
@@ -120,7 +133,7 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
static struct perf_cpu_map *empty_cpu_map;
if (empty_cpu_map == NULL) {
- empty_cpu_map = perf_cpu_map__dummy_new();
+ empty_cpu_map = perf_cpu_map__new_any_cpu();
if (empty_cpu_map == NULL)
return -ENOMEM;
}
@@ -531,10 +544,56 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
void perf_evsel__free_id(struct perf_evsel *evsel)
{
+ struct perf_sample_id_period *pos, *n;
+
xyarray__delete(evsel->sample_id);
evsel->sample_id = NULL;
zfree(&evsel->id);
evsel->ids = 0;
+
+ perf_evsel_for_each_per_thread_period_safe(evsel, n, pos) {
+ list_del_init(&pos->node);
+ free(pos);
+ }
+}
+
+bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel)
+{
+ return (evsel->attr.sample_type & PERF_SAMPLE_READ) &&
+ (evsel->attr.sample_type & PERF_SAMPLE_TID) &&
+ evsel->attr.inherit;
+}
+
+u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid, bool per_thread)
+{
+ struct hlist_head *head;
+ struct perf_sample_id_period *res;
+ int hash;
+
+ if (!per_thread)
+ return &sid->period;
+
+ hash = hash_32(tid, PERF_SAMPLE_ID__HLIST_BITS);
+ head = &sid->periods[hash];
+
+ hlist_for_each_entry(res, head, hnode)
+ if (res->tid == tid)
+ return &res->period;
+
+ if (sid->evsel == NULL)
+ return NULL;
+
+ res = zalloc(sizeof(struct perf_sample_id_period));
+ if (res == NULL)
+ return NULL;
+
+ INIT_LIST_HEAD(&res->node);
+ res->tid = tid;
+
+ list_add_tail(&res->node, &sid->evsel->per_stream_periods);
+ hlist_add_head(&res->hnode, &sid->periods[hash]);
+
+ return &res->period;
}
void perf_counts_values__scale(struct perf_counts_values *count,
diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h
index 35dd29642296..e2be2d17c32b 100644
--- a/tools/lib/perf/include/internal/cpumap.h
+++ b/tools/lib/perf/include/internal/cpumap.h
@@ -4,6 +4,7 @@
#include <linux/refcount.h>
#include <perf/cpumap.h>
+#include <internal/rc_check.h>
/**
* A sized, reference counted, sorted array of integers representing CPU
@@ -12,7 +13,7 @@
* gaps if CPU numbers were used. For events associated with a pid, rather than
* a CPU, a single dummy map with an entry of -1 is used.
*/
-struct perf_cpu_map {
+DECLARE_RC_STRUCT(perf_cpu_map) {
refcount_t refcnt;
/** Length of the map array. */
int nr;
@@ -20,11 +21,14 @@ struct perf_cpu_map {
struct perf_cpu map[];
};
-#ifndef MAX_NR_CPUS
-#define MAX_NR_CPUS 2048
-#endif
-
+struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus);
int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu);
bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b);
+void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus);
+
+static inline refcount_t *perf_cpu_map__refcnt(struct perf_cpu_map *map)
+{
+ return &RC_CHK_ACCESS(map)->refcnt;
+}
#endif /* __LIBPERF_INTERNAL_CPUMAP_H */
diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h
index 850f07070036..f43bdb9b6227 100644
--- a/tools/lib/perf/include/internal/evlist.h
+++ b/tools/lib/perf/include/internal/evlist.h
@@ -17,7 +17,6 @@ struct perf_mmap_param;
struct perf_evlist {
struct list_head entries;
int nr_entries;
- int nr_groups;
bool has_user_cpus;
bool needs_map_propagation;
/**
@@ -127,13 +126,15 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist);
void perf_evlist__id_add(struct perf_evlist *evlist,
struct perf_evsel *evsel,
- int cpu, int thread, u64 id);
+ int cpu_map_idx, int thread, u64 id);
int perf_evlist__id_add_fd(struct perf_evlist *evlist,
struct perf_evsel *evsel,
- int cpu, int thread, int fd);
+ int cpu_map_idx, int thread, int fd);
void perf_evlist__reset_id_hash(struct perf_evlist *evlist);
void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader);
+
+void perf_evlist__go_system_wide(struct perf_evlist *evlist, struct perf_evsel *evsel);
#endif /* __LIBPERF_INTERNAL_EVLIST_H */
diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h
index a99a75d9e78f..fefe64ba5e26 100644
--- a/tools/lib/perf/include/internal/evsel.h
+++ b/tools/lib/perf/include/internal/evsel.h
@@ -11,6 +11,32 @@
struct perf_thread_map;
struct xyarray;
+/**
+ * The per-thread accumulated period storage node.
+ */
+struct perf_sample_id_period {
+ struct list_head node;
+ struct hlist_node hnode;
+ /* Holds total ID period value for PERF_SAMPLE_READ processing. */
+ u64 period;
+ /* The TID that the values belongs to */
+ u32 tid;
+};
+
+/**
+ * perf_evsel_for_each_per_thread_period_safe - safely iterate thru all the
+ * per_stream_periods
+ * @evlist:perf_evsel instance to iterate
+ * @item: struct perf_sample_id_period iterator
+ * @tmp: struct perf_sample_id_period temp iterator
+ */
+#define perf_evsel_for_each_per_thread_period_safe(evsel, tmp, item) \
+ list_for_each_entry_safe(item, tmp, &(evsel)->per_stream_periods, node)
+
+
+#define PERF_SAMPLE_ID__HLIST_BITS 4
+#define PERF_SAMPLE_ID__HLIST_SIZE (1 << PERF_SAMPLE_ID__HLIST_BITS)
+
/*
* Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are
* more than one entry in the evlist.
@@ -34,15 +60,46 @@ struct perf_sample_id {
pid_t machine_pid;
struct perf_cpu vcpu;
- /* Holds total ID period value for PERF_SAMPLE_READ processing. */
- u64 period;
+ /*
+ * Per-thread, and global event counts are mutually exclusive:
+ * Whilst it is possible to combine events into a group with differing
+ * values of PERF_SAMPLE_READ, it is not valid to have inconsistent
+ * values for `inherit`. Therefore it is not possible to have a
+ * situation where a per-thread event is sampled as a global event;
+ * all !inherit groups are global, and all groups where the sampling
+ * event is inherit + PERF_SAMPLE_READ will be per-thread. Any event
+ * that is part of such a group that is inherit but not PERF_SAMPLE_READ
+ * will be read as per-thread. If such an event can also trigger a
+ * sample (such as with sample_period > 0) then it will not cause
+ * `read_format` to be included in its PERF_RECORD_SAMPLE, and
+ * therefore will not expose the per-thread group members as global.
+ */
+ union {
+ /*
+ * Holds total ID period value for PERF_SAMPLE_READ processing
+ * (when period is not per-thread).
+ */
+ u64 period;
+ /*
+ * Holds total ID period value for PERF_SAMPLE_READ processing
+ * (when period is per-thread).
+ */
+ struct hlist_head periods[PERF_SAMPLE_ID__HLIST_SIZE];
+ };
};
struct perf_evsel {
struct list_head node;
struct perf_event_attr attr;
+ /** The commonly used cpu map of CPUs the event should be opened upon, etc. */
struct perf_cpu_map *cpus;
- struct perf_cpu_map *own_cpus;
+ /**
+ * The cpu map read from the PMU. For core PMUs this is the list of all
+ * CPUs the event can be opened upon. For other PMUs this is the default
+ * cpu map for opening the event on, for example, the first CPU on a
+ * socket for an uncore event.
+ */
+ struct perf_cpu_map *pmu_cpus;
struct perf_thread_map *threads;
struct xyarray *fd;
struct xyarray *mmap;
@@ -51,13 +108,17 @@ struct perf_evsel {
u32 ids;
struct perf_evsel *leader;
+ /* For events where the read_format value is per-thread rather than
+ * global, stores the per-thread cumulative period */
+ struct list_head per_stream_periods;
+
/* parse modifier helper */
int nr_members;
/*
* system_wide is for events that need to be on every CPU, irrespective
- * of user requested CPUs or threads. Map propagation will set cpus to
- * this event's own_cpus, whereby they will contribute to evlist
- * all_cpus.
+ * of user requested CPUs or threads. Tha main example of this is the
+ * dummy event. Map propagation will set cpus for this event to all CPUs
+ * as software PMU events like dummy, have a CPU map that is empty.
*/
bool system_wide;
/*
@@ -65,11 +126,14 @@ struct perf_evsel {
* i.e. it cannot be the 'any CPU' value of -1.
*/
bool requires_cpu;
+ /** Is the PMU for the event a core one? Effects the handling of own_cpus. */
+ bool is_pmu_core;
int idx;
};
void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr,
int idx);
+void perf_evsel__exit(struct perf_evsel *evsel);
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
void perf_evsel__close_fd(struct perf_evsel *evsel);
void perf_evsel__free_fd(struct perf_evsel *evsel);
@@ -79,4 +143,9 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter);
int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
void perf_evsel__free_id(struct perf_evsel *evsel);
+bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel);
+
+u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid,
+ bool per_thread);
+
#endif /* __LIBPERF_INTERNAL_EVSEL_H */
diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h
index 5a062af8e9d8..5f08cab61ece 100644
--- a/tools/lib/perf/include/internal/mmap.h
+++ b/tools/lib/perf/include/internal/mmap.h
@@ -33,7 +33,8 @@ struct perf_mmap {
bool overwrite;
u64 flush;
libperf_unmap_cb_t unmap_cb;
- char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
+ void *event_copy;
+ size_t event_copy_sz;
struct perf_mmap *next;
};
diff --git a/tools/lib/perf/include/internal/rc_check.h b/tools/lib/perf/include/internal/rc_check.h
new file mode 100644
index 000000000000..f80ddfc80129
--- /dev/null
+++ b/tools/lib/perf/include/internal/rc_check.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __LIBPERF_INTERNAL_RC_CHECK_H
+#define __LIBPERF_INTERNAL_RC_CHECK_H
+
+#include <stdlib.h>
+#include <linux/zalloc.h>
+
+/*
+ * Enable reference count checking implicitly with leak checking, which is
+ * integrated into address sanitizer.
+ */
+#if defined(__SANITIZE_ADDRESS__) || defined(LEAK_SANITIZER) || defined(ADDRESS_SANITIZER)
+#define REFCNT_CHECKING 1
+#elif defined(__has_feature)
+#if __has_feature(address_sanitizer) || __has_feature(leak_sanitizer)
+#define REFCNT_CHECKING 1
+#endif
+#endif
+
+/*
+ * Shared reference count checking macros.
+ *
+ * Reference count checking is an approach to sanitizing the use of reference
+ * counted structs. It leverages address and leak sanitizers to make sure gets
+ * are paired with a put. Reference count checking adds a malloc-ed layer of
+ * indirection on a get, and frees it on a put. A missed put will be reported as
+ * a memory leak. A double put will be reported as a double free. Accessing
+ * after a put will cause a use-after-free and/or a segfault.
+ */
+
+#ifndef REFCNT_CHECKING
+/* Replaces "struct foo" so that the pointer may be interposed. */
+#define DECLARE_RC_STRUCT(struct_name) \
+ struct struct_name
+
+/* Declare a reference counted struct variable. */
+#define RC_STRUCT(struct_name) struct struct_name
+
+/*
+ * Interpose the indirection. Result will hold the indirection and object is the
+ * reference counted struct.
+ */
+#define ADD_RC_CHK(result, object) (result = object, object)
+
+/* Strip the indirection layer. */
+#define RC_CHK_ACCESS(object) object
+
+/* Frees the object and the indirection layer. */
+#define RC_CHK_FREE(object) free(object)
+
+/* A get operation adding the indirection layer. */
+#define RC_CHK_GET(result, object) ADD_RC_CHK(result, object)
+
+/* A put operation removing the indirection layer. */
+#define RC_CHK_PUT(object) {}
+
+/* Pointer equality when the indirection may or may not be there. */
+#define RC_CHK_EQUAL(object1, object2) (object1 == object2)
+
+#else
+
+/* Replaces "struct foo" so that the pointer may be interposed. */
+#define DECLARE_RC_STRUCT(struct_name) \
+ struct original_##struct_name; \
+ struct struct_name { \
+ struct original_##struct_name *orig; \
+ }; \
+ struct original_##struct_name
+
+/* Declare a reference counted struct variable. */
+#define RC_STRUCT(struct_name) struct original_##struct_name
+
+/*
+ * Interpose the indirection. Result will hold the indirection and object is the
+ * reference counted struct.
+ */
+#define ADD_RC_CHK(result, object) \
+ ( \
+ object ? (result = malloc(sizeof(*result)), \
+ result ? (result->orig = object, result) \
+ : (result = NULL, NULL)) \
+ : (result = NULL, NULL) \
+ )
+
+/* Strip the indirection layer. */
+#define RC_CHK_ACCESS(object) object->orig
+
+/* Frees the object and the indirection layer. */
+#define RC_CHK_FREE(object) \
+ do { \
+ zfree(&object->orig); \
+ free(object); \
+ } while(0)
+
+/* A get operation adding the indirection layer. */
+#define RC_CHK_GET(result, object) ADD_RC_CHK(result, (object ? object->orig : NULL))
+
+/* A put operation removing the indirection layer. */
+#define RC_CHK_PUT(object) \
+ do { \
+ if (object) { \
+ object->orig = NULL; \
+ free(object); \
+ } \
+ } while(0)
+
+/* Pointer equality when the indirection may or may not be there. */
+#define RC_CHK_EQUAL(object1, object2) (object1 == object2 || \
+ (object1 && object2 && object1->orig == object2->orig))
+
+#endif
+
+#endif /* __LIBPERF_INTERNAL_RC_CHECK_H */
diff --git a/tools/lib/perf/include/perf/core.h b/tools/lib/perf/include/perf/core.h
index a3f6d68edad7..06cc132d88cf 100644
--- a/tools/lib/perf/include/perf/core.h
+++ b/tools/lib/perf/include/perf/core.h
@@ -5,7 +5,7 @@
#include <stdarg.h>
#ifndef LIBPERF_API
-#define LIBPERF_API __attribute__((visibility("default")))
+#define LIBPERF_API extern __attribute__((visibility("default")))
#endif
enum libperf_print_level {
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 3f43f770cdac..58cc5c5fa47c 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -3,35 +3,101 @@
#define __LIBPERF_CPUMAP_H
#include <perf/core.h>
-#include <stdio.h>
#include <stdbool.h>
+#include <stdint.h>
/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
struct perf_cpu {
- int cpu;
+ int16_t cpu;
+};
+
+struct perf_cache {
+ int cache_lvl;
+ int cache;
};
struct perf_cpu_map;
-LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
-LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void);
+/**
+ * perf_cpu_map__new_any_cpu - a map with a singular "any CPU"/dummy -1 value.
+ */
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_any_cpu(void);
+/**
+ * perf_cpu_map__new_online_cpus - a map read from
+ * /sys/devices/system/cpu/online if
+ * available. If reading wasn't possible a map
+ * is created using the online processors
+ * assuming the first 'n' processors are all
+ * online.
+ */
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_online_cpus(void);
+/**
+ * perf_cpu_map__new - create a map from the given cpu_list such as "0-7". If no
+ * cpu_list argument is provided then
+ * perf_cpu_map__new_online_cpus is returned.
+ */
LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
-LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file);
+/** perf_cpu_map__new_int - create a map with the one given cpu. */
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_int(int cpu);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
-LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
- struct perf_cpu_map *other);
+LIBPERF_API int perf_cpu_map__merge(struct perf_cpu_map **orig,
+ struct perf_cpu_map *other);
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
+ struct perf_cpu_map *other);
LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__cpu - get the CPU value at the given index. Returns -1 if index
+ * is invalid.
+ */
LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
+/**
+ * perf_cpu_map__nr - for an empty map returns 1, as perf_cpu_map__cpu returns a
+ * cpu of -1 for an invalid index, this makes an empty map
+ * look like it contains the "any CPU"/dummy value. Otherwise
+ * the result is the number CPUs in the map plus one if the
+ * "any CPU"/dummy value is present.
+ */
LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
-LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value.
+ */
+LIBPERF_API bool perf_cpu_map__has_any_cpu_or_is_empty(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__is_any_cpu_or_is_empty - is map either empty or the "any CPU"/dummy value.
+ */
+LIBPERF_API bool perf_cpu_map__is_any_cpu_or_is_empty(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__is_empty - does the map contain no values and it doesn't
+ * contain the special "any CPU"/dummy value.
+ */
+LIBPERF_API bool perf_cpu_map__is_empty(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__min - the minimum CPU value or -1 if empty or just the "any CPU"/dummy value.
+ */
+LIBPERF_API struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map);
+/**
+ * perf_cpu_map__max - the maximum CPU value or -1 if empty or just the "any CPU"/dummy value.
+ */
LIBPERF_API struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map);
LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
+LIBPERF_API bool perf_cpu_map__equal(const struct perf_cpu_map *lhs,
+ const struct perf_cpu_map *rhs);
+/**
+ * perf_cpu_map__any_cpu - Does the map contain the "any CPU"/dummy -1 value?
+ */
+LIBPERF_API bool perf_cpu_map__has_any_cpu(const struct perf_cpu_map *map);
#define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \
for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \
(idx) < perf_cpu_map__nr(cpus); \
(idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx))
+#define perf_cpu_map__for_each_cpu_skip_any(_cpu, idx, cpus) \
+ for ((idx) = 0, (_cpu) = perf_cpu_map__cpu(cpus, idx); \
+ (idx) < perf_cpu_map__nr(cpus); \
+ (idx)++, (_cpu) = perf_cpu_map__cpu(cpus, idx)) \
+ if ((_cpu).cpu != -1)
+
#define perf_cpu_map__for_each_idx(idx, cpus) \
for ((idx) = 0; (idx) < perf_cpu_map__nr(cpus); (idx)++)
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
index ad47d7b31046..43a8cb04994f 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -70,11 +70,19 @@ struct perf_record_lost {
__u64 lost;
};
+#define PERF_RECORD_MISC_LOST_SAMPLES_BPF (1 << 15)
+
struct perf_record_lost_samples {
struct perf_event_header header;
__u64 lost;
};
+#define MAX_ID_HDR_ENTRIES 6
+struct perf_record_lost_samples_and_ids {
+ struct perf_record_lost_samples lost;
+ __u64 sample_ids[MAX_ID_HDR_ENTRIES];
+};
+
/*
* PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID | PERF_FORMAT_LOST
*/
@@ -143,12 +151,34 @@ struct perf_record_switch {
__u32 next_prev_tid;
};
+struct perf_record_callchain_deferred {
+ struct perf_event_header header;
+ /*
+ * This is to match kernel and (deferred) user stacks together.
+ * The kernel part will be in the sample callchain array after
+ * the PERF_CONTEXT_USER_DEFERRED entry.
+ */
+ __u64 cookie;
+ __u64 nr;
+ __u64 ips[];
+};
+
struct perf_record_header_attr {
struct perf_event_header header;
struct perf_event_attr attr;
- __u64 id[];
+ /*
+ * Array of u64 id follows here but we cannot use a flexible array
+ * because size of attr in the data can be different then current
+ * version. Please use perf_record_header_attr_id() below.
+ *
+ * __u64 id[]; // do not use this
+ */
};
+/* Returns the pointer to id array based on the actual attr size. */
+#define perf_record_header_attr_id(evt) \
+ ((void *)&(evt)->attr.attr + (evt)->attr.attr.size)
+
enum {
PERF_CPU_MAP__CPUS = 0,
PERF_CPU_MAP__MASK = 1,
@@ -273,6 +303,7 @@ struct perf_record_header_event_type {
struct perf_record_header_tracing_data {
struct perf_event_header header;
__u32 size;
+ __u32 pad;
};
#define PERF_RECORD_MISC_BUILD_ID_SIZE (1 << 15)
@@ -378,7 +409,8 @@ enum {
PERF_STAT_CONFIG_TERM__AGGR_MODE = 0,
PERF_STAT_CONFIG_TERM__INTERVAL = 1,
PERF_STAT_CONFIG_TERM__SCALE = 2,
- PERF_STAT_CONFIG_TERM__MAX = 3,
+ PERF_STAT_CONFIG_TERM__AGGR_LEVEL = 3,
+ PERF_STAT_CONFIG_TERM__MAX = 4,
};
struct perf_record_stat_config_entry {
@@ -438,6 +470,32 @@ struct perf_record_compressed {
char data[];
};
+/*
+ * `header.size` includes the padding we are going to add while writing the record.
+ * `data_size` only includes the size of `data[]` itself.
+ */
+struct perf_record_compressed2 {
+ struct perf_event_header header;
+ __u64 data_size;
+ char data[];
+};
+
+#define BPF_METADATA_KEY_LEN 64
+#define BPF_METADATA_VALUE_LEN 256
+#define BPF_PROG_NAME_LEN KSYM_NAME_LEN
+
+struct perf_record_bpf_metadata_entry {
+ char key[BPF_METADATA_KEY_LEN];
+ char value[BPF_METADATA_VALUE_LEN];
+};
+
+struct perf_record_bpf_metadata {
+ struct perf_event_header header;
+ char prog_name[BPF_PROG_NAME_LEN];
+ __u64 nr_entries;
+ struct perf_record_bpf_metadata_entry entries[];
+};
+
enum perf_user_event_type { /* above any possible kernel type */
PERF_RECORD_USER_TYPE_START = 64,
PERF_RECORD_HEADER_ATTR = 64,
@@ -459,6 +517,8 @@ enum perf_user_event_type { /* above any possible kernel type */
PERF_RECORD_HEADER_FEATURE = 80,
PERF_RECORD_COMPRESSED = 81,
PERF_RECORD_FINISHED_INIT = 82,
+ PERF_RECORD_COMPRESSED2 = 83,
+ PERF_RECORD_BPF_METADATA = 84,
PERF_RECORD_HEADER_MAX
};
@@ -475,6 +535,7 @@ union perf_event {
struct perf_record_read read;
struct perf_record_throttle throttle;
struct perf_record_sample sample;
+ struct perf_record_callchain_deferred callchain_deferred;
struct perf_record_bpf_event bpf;
struct perf_record_ksymbol ksymbol;
struct perf_record_text_poke_event text_poke;
@@ -499,6 +560,8 @@ union perf_event {
struct perf_record_time_conv time_conv;
struct perf_record_header_feature feat;
struct perf_record_compressed pack;
+ struct perf_record_compressed2 pack2;
+ struct perf_record_bpf_metadata bpf_metadata;
};
#endif /* __LIBPERF_EVENT_H */
diff --git a/tools/lib/perf/include/perf/evlist.h b/tools/lib/perf/include/perf/evlist.h
index 9ca399d49bb4..e894b770779e 100644
--- a/tools/lib/perf/include/perf/evlist.h
+++ b/tools/lib/perf/include/perf/evlist.h
@@ -47,4 +47,5 @@ LIBPERF_API struct perf_mmap *perf_evlist__next_mmap(struct perf_evlist *evlist,
(pos) = perf_evlist__next_mmap((evlist), (pos), overwrite))
LIBPERF_API void perf_evlist__set_leader(struct perf_evlist *evlist);
+LIBPERF_API int perf_evlist__nr_groups(struct perf_evlist *evlist);
#endif /* __LIBPERF_EVLIST_H */
diff --git a/tools/lib/perf/include/perf/threadmap.h b/tools/lib/perf/include/perf/threadmap.h
index 8b40e7777cea..44deb815b817 100644
--- a/tools/lib/perf/include/perf/threadmap.h
+++ b/tools/lib/perf/include/perf/threadmap.h
@@ -14,6 +14,7 @@ LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int idx,
LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int idx);
LIBPERF_API int perf_thread_map__nr(struct perf_thread_map *threads);
LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx);
+LIBPERF_API int perf_thread_map__idx(struct perf_thread_map *map, pid_t pid);
LIBPERF_API struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map);
LIBPERF_API void perf_thread_map__put(struct perf_thread_map *map);
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
index 190b56ae923a..fdd8304fe9d0 100644
--- a/tools/lib/perf/libperf.map
+++ b/tools/lib/perf/libperf.map
@@ -1,15 +1,18 @@
LIBPERF_0.0.1 {
global:
libperf_init;
- perf_cpu_map__dummy_new;
- perf_cpu_map__default_new;
+ perf_cpu_map__new_any_cpu;
+ perf_cpu_map__new_online_cpus;
perf_cpu_map__get;
perf_cpu_map__put;
perf_cpu_map__new;
- perf_cpu_map__read;
perf_cpu_map__nr;
perf_cpu_map__cpu;
- perf_cpu_map__empty;
+ perf_cpu_map__has_any_cpu_or_is_empty;
+ perf_cpu_map__is_any_cpu_or_is_empty;
+ perf_cpu_map__is_empty;
+ perf_cpu_map__has_any_cpu;
+ perf_cpu_map__min;
perf_cpu_map__max;
perf_cpu_map__has;
perf_thread_map__new_array;
diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
index 0d1634cedf44..ec124eb0ec0a 100644
--- a/tools/lib/perf/mmap.c
+++ b/tools/lib/perf/mmap.c
@@ -19,6 +19,7 @@
void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
bool overwrite, libperf_unmap_cb_t unmap_cb)
{
+ /* Assume fields were zero initialized. */
map->fd = -1;
map->overwrite = overwrite;
map->unmap_cb = unmap_cb;
@@ -51,13 +52,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
void perf_mmap__munmap(struct perf_mmap *map)
{
- if (map && map->base != NULL) {
+ if (!map)
+ return;
+
+ zfree(&map->event_copy);
+ map->event_copy_sz = 0;
+ if (map->base) {
munmap(map->base, perf_mmap__mmap_len(map));
map->base = NULL;
map->fd = -1;
refcount_set(&map->refcnt, 0);
}
- if (map && map->unmap_cb)
+ if (map->unmap_cb)
map->unmap_cb(map);
}
@@ -223,9 +229,17 @@ static union perf_event *perf_mmap__read(struct perf_mmap *map,
*/
if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
unsigned int offset = *startp;
- unsigned int len = min(sizeof(*event), size), cpy;
+ unsigned int len = size, cpy;
void *dst = map->event_copy;
+ if (size > map->event_copy_sz) {
+ dst = realloc(map->event_copy, size);
+ if (!dst)
+ return NULL;
+ map->event_copy = dst;
+ map->event_copy_sz = size;
+ }
+
do {
cpy = min(map->mask + 1 - (offset & map->mask), len);
memcpy(dst, &data[offset & map->mask], cpy);
@@ -265,7 +279,7 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map)
if (!refcount_read(&map->refcnt))
return NULL;
- /* non-overwirte doesn't pause the ringbuffer */
+ /* non-overwrite doesn't pause the ringbuffer */
if (!map->overwrite)
map->end = perf_mmap__read_head(map);
@@ -392,6 +406,72 @@ static u64 read_perf_counter(unsigned int counter)
static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); }
+/* __riscv_xlen contains the witdh of the native base integer, here 64-bit */
+#elif defined(__riscv) && __riscv_xlen == 64
+
+/* TODO: implement rv32 support */
+
+#define CSR_CYCLE 0xc00
+#define CSR_TIME 0xc01
+
+#define csr_read(csr) \
+({ \
+ register unsigned long __v; \
+ __asm__ __volatile__ ("csrr %0, %1" \
+ : "=r" (__v) \
+ : "i" (csr) : ); \
+ __v; \
+})
+
+static unsigned long csr_read_num(int csr_num)
+{
+#define switchcase_csr_read(__csr_num, __val) {\
+ case __csr_num: \
+ __val = csr_read(__csr_num); \
+ break; }
+#define switchcase_csr_read_2(__csr_num, __val) {\
+ switchcase_csr_read(__csr_num + 0, __val) \
+ switchcase_csr_read(__csr_num + 1, __val)}
+#define switchcase_csr_read_4(__csr_num, __val) {\
+ switchcase_csr_read_2(__csr_num + 0, __val) \
+ switchcase_csr_read_2(__csr_num + 2, __val)}
+#define switchcase_csr_read_8(__csr_num, __val) {\
+ switchcase_csr_read_4(__csr_num + 0, __val) \
+ switchcase_csr_read_4(__csr_num + 4, __val)}
+#define switchcase_csr_read_16(__csr_num, __val) {\
+ switchcase_csr_read_8(__csr_num + 0, __val) \
+ switchcase_csr_read_8(__csr_num + 8, __val)}
+#define switchcase_csr_read_32(__csr_num, __val) {\
+ switchcase_csr_read_16(__csr_num + 0, __val) \
+ switchcase_csr_read_16(__csr_num + 16, __val)}
+
+ unsigned long ret = 0;
+
+ switch (csr_num) {
+ switchcase_csr_read_32(CSR_CYCLE, ret)
+ default:
+ break;
+ }
+
+ return ret;
+#undef switchcase_csr_read_32
+#undef switchcase_csr_read_16
+#undef switchcase_csr_read_8
+#undef switchcase_csr_read_4
+#undef switchcase_csr_read_2
+#undef switchcase_csr_read
+}
+
+static u64 read_perf_counter(unsigned int counter)
+{
+ return csr_read_num(CSR_CYCLE + counter);
+}
+
+static u64 read_timestamp(void)
+{
+ return csr_read_num(CSR_TIME);
+}
+
#else
static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
static u64 read_timestamp(void) { return 0; }
@@ -428,7 +508,7 @@ int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count
idx = READ_ONCE(pc->index);
cnt = READ_ONCE(pc->offset);
if (pc->cap_user_rdpmc && idx) {
- s64 evcnt = read_perf_counter(idx - 1);
+ u64 evcnt = read_perf_counter(idx - 1);
u16 width = READ_ONCE(pc->pmc_width);
evcnt <<= 64 - width;
diff --git a/tools/lib/perf/tests/test-cpumap.c b/tools/lib/perf/tests/test-cpumap.c
index 87b0510a556f..c998b1dae863 100644
--- a/tools/lib/perf/tests/test-cpumap.c
+++ b/tools/lib/perf/tests/test-cpumap.c
@@ -21,7 +21,7 @@ int test_cpumap(int argc, char **argv)
libperf_init(libperf_print);
- cpus = perf_cpu_map__dummy_new();
+ cpus = perf_cpu_map__new_any_cpu();
if (!cpus)
return -1;
@@ -29,7 +29,7 @@ int test_cpumap(int argc, char **argv)
perf_cpu_map__put(cpus);
perf_cpu_map__put(cpus);
- cpus = perf_cpu_map__default_new();
+ cpus = perf_cpu_map__new_online_cpus();
if (!cpus)
return -1;
diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c
index ed616fc19b4f..10f70cb41ff1 100644
--- a/tools/lib/perf/tests/test-evlist.c
+++ b/tools/lib/perf/tests/test-evlist.c
@@ -46,7 +46,7 @@ static int test_stat_cpu(void)
};
int err, idx;
- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
__T("failed to create cpus", cpus);
evlist = perf_evlist__new();
@@ -261,7 +261,7 @@ static int test_mmap_thread(void)
threads = perf_thread_map__new_dummy();
__T("failed to create threads", threads);
- cpus = perf_cpu_map__dummy_new();
+ cpus = perf_cpu_map__new_any_cpu();
__T("failed to create cpus", cpus);
perf_thread_map__set_pid(threads, 0, pid);
@@ -350,7 +350,7 @@ static int test_mmap_cpus(void)
attr.config = id;
- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
__T("failed to create cpus", cpus);
evlist = perf_evlist__new();
diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c
index a11fc51bfb68..545ec3150546 100644
--- a/tools/lib/perf/tests/test-evsel.c
+++ b/tools/lib/perf/tests/test-evsel.c
@@ -27,7 +27,7 @@ static int test_stat_cpu(void)
};
int err, idx;
- cpus = perf_cpu_map__new(NULL);
+ cpus = perf_cpu_map__new_online_cpus();
__T("failed to create cpus", cpus);
evsel = perf_evsel__new(&attr);
diff --git a/tools/lib/perf/threadmap.c b/tools/lib/perf/threadmap.c
index 07968f3ea093..db431b036f57 100644
--- a/tools/lib/perf/threadmap.c
+++ b/tools/lib/perf/threadmap.c
@@ -97,5 +97,22 @@ int perf_thread_map__nr(struct perf_thread_map *threads)
pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx)
{
+ if (!map) {
+ assert(idx == 0);
+ return -1;
+ }
+
return map->map[idx].pid;
}
+
+int perf_thread_map__idx(struct perf_thread_map *threads, pid_t pid)
+{
+ if (!threads)
+ return pid == -1 ? 0 : -1;
+
+ for (int i = 0; i < threads->nr; ++i) {
+ if (threads->map[i].pid == pid)
+ return i;
+ }
+ return -1;
+}