summaryrefslogtreecommitdiff
path: root/tools/lib
diff options
context:
space:
mode:
Diffstat (limited to 'tools/lib')
-rw-r--r--tools/lib/perf/Documentation/libperf.txt3
-rw-r--r--tools/lib/perf/evsel.c80
-rw-r--r--tools/lib/perf/include/internal/evsel.h1
-rw-r--r--tools/lib/perf/include/internal/mmap.h3
-rw-r--r--tools/lib/perf/include/internal/tests.h32
-rw-r--r--tools/lib/perf/include/internal/xyarray.h9
-rw-r--r--tools/lib/perf/include/perf/bpf_perf.h31
-rw-r--r--tools/lib/perf/include/perf/event.h7
-rw-r--r--tools/lib/perf/include/perf/evsel.h3
-rw-r--r--tools/lib/perf/libperf.map3
-rw-r--r--tools/lib/perf/mmap.c88
-rw-r--r--tools/lib/perf/tests/Makefile6
-rw-r--r--tools/lib/perf/tests/test-evsel.c66
13 files changed, 327 insertions, 5 deletions
diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
index 0c74c30ed23a..63ae5e0195ce 100644
--- a/tools/lib/perf/Documentation/libperf.txt
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -136,6 +136,9 @@ SYNOPSIS
struct perf_thread_map *threads);
void perf_evsel__close(struct perf_evsel *evsel);
void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
+ int perf_evsel__mmap(struct perf_evsel *evsel, int pages);
+ void perf_evsel__munmap(struct perf_evsel *evsel);
+ void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread);
int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
struct perf_counts_values *count);
int perf_evsel__enable(struct perf_evsel *evsel);
diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
index 4dc06289f4c7..bd8c2f19ef74 100644
--- a/tools/lib/perf/evsel.c
+++ b/tools/lib/perf/evsel.c
@@ -11,10 +11,12 @@
#include <stdlib.h>
#include <internal/xyarray.h>
#include <internal/cpumap.h>
+#include <internal/mmap.h>
#include <internal/threadmap.h>
#include <internal/lib.h>
#include <linux/string.h>
#include <sys/ioctl.h>
+#include <sys/mman.h>
void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr)
{
@@ -38,6 +40,7 @@ void perf_evsel__delete(struct perf_evsel *evsel)
}
#define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y))
+#define MMAP(e, x, y) (e->mmap ? ((struct perf_mmap *) xyarray__entry(e->mmap, x, y)) : NULL)
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
{
@@ -55,6 +58,13 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
return evsel->fd != NULL ? 0 : -ENOMEM;
}
+static int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+ evsel->mmap = xyarray__new(ncpus, nthreads, sizeof(struct perf_mmap));
+
+ return evsel->mmap != NULL ? 0 : -ENOMEM;
+}
+
static int
sys_perf_event_open(struct perf_event_attr *attr,
pid_t pid, int cpu, int group_fd,
@@ -156,6 +166,72 @@ void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu)
perf_evsel__close_fd_cpu(evsel, cpu);
}
+void perf_evsel__munmap(struct perf_evsel *evsel)
+{
+ int cpu, thread;
+
+ if (evsel->fd == NULL || evsel->mmap == NULL)
+ return;
+
+ for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
+ for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
+ int fd = FD(evsel, cpu, thread);
+ struct perf_mmap *map = MMAP(evsel, cpu, thread);
+
+ if (fd < 0)
+ continue;
+
+ perf_mmap__munmap(map);
+ }
+ }
+
+ xyarray__delete(evsel->mmap);
+ evsel->mmap = NULL;
+}
+
+int perf_evsel__mmap(struct perf_evsel *evsel, int pages)
+{
+ int ret, cpu, thread;
+ struct perf_mmap_param mp = {
+ .prot = PROT_READ | PROT_WRITE,
+ .mask = (pages * page_size) - 1,
+ };
+
+ if (evsel->fd == NULL || evsel->mmap)
+ return -EINVAL;
+
+ if (perf_evsel__alloc_mmap(evsel, xyarray__max_x(evsel->fd), xyarray__max_y(evsel->fd)) < 0)
+ return -ENOMEM;
+
+ for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
+ for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
+ int fd = FD(evsel, cpu, thread);
+ struct perf_mmap *map = MMAP(evsel, cpu, thread);
+
+ if (fd < 0)
+ continue;
+
+ perf_mmap__init(map, NULL, false, NULL);
+
+ ret = perf_mmap__mmap(map, &mp, fd, cpu);
+ if (ret) {
+ perf_evsel__munmap(evsel);
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread)
+{
+ if (FD(evsel, cpu, thread) < 0 || MMAP(evsel, cpu, thread) == NULL)
+ return NULL;
+
+ return MMAP(evsel, cpu, thread)->base;
+}
+
int perf_evsel__read_size(struct perf_evsel *evsel)
{
u64 read_format = evsel->attr.read_format;
@@ -191,6 +267,10 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
if (FD(evsel, cpu, thread) < 0)
return -EINVAL;
+ if (MMAP(evsel, cpu, thread) &&
+ !perf_mmap__read_self(MMAP(evsel, cpu, thread), count))
+ return 0;
+
if (readn(FD(evsel, cpu, thread), count->values, size) <= 0)
return -errno;
diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h
index 1ffd083b235e..1c067d088bc6 100644
--- a/tools/lib/perf/include/internal/evsel.h
+++ b/tools/lib/perf/include/internal/evsel.h
@@ -41,6 +41,7 @@ struct perf_evsel {
struct perf_cpu_map *own_cpus;
struct perf_thread_map *threads;
struct xyarray *fd;
+ struct xyarray *mmap;
struct xyarray *sample_id;
u64 *id;
u32 ids;
diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h
index be7556e0a2b2..5e3422f40ed5 100644
--- a/tools/lib/perf/include/internal/mmap.h
+++ b/tools/lib/perf/include/internal/mmap.h
@@ -11,6 +11,7 @@
#define PERF_SAMPLE_MAX_SIZE (1 << 16)
struct perf_mmap;
+struct perf_counts_values;
typedef void (*libperf_unmap_cb_t)(struct perf_mmap *map);
@@ -52,4 +53,6 @@ void perf_mmap__put(struct perf_mmap *map);
u64 perf_mmap__read_head(struct perf_mmap *map);
+int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count);
+
#endif /* __LIBPERF_INTERNAL_MMAP_H */
diff --git a/tools/lib/perf/include/internal/tests.h b/tools/lib/perf/include/internal/tests.h
index 2093e8868a67..29425c2dabe1 100644
--- a/tools/lib/perf/include/internal/tests.h
+++ b/tools/lib/perf/include/internal/tests.h
@@ -3,11 +3,32 @@
#define __LIBPERF_INTERNAL_TESTS_H
#include <stdio.h>
+#include <unistd.h>
int tests_failed;
+int tests_verbose;
+
+static inline int get_verbose(char **argv, int argc)
+{
+ int c;
+ int verbose = 0;
+
+ while ((c = getopt(argc, argv, "v")) != -1) {
+ switch (c)
+ {
+ case 'v':
+ verbose = 1;
+ break;
+ default:
+ break;
+ }
+ }
+ return verbose;
+}
#define __T_START \
do { \
+ tests_verbose = get_verbose(argv, argc); \
fprintf(stdout, "- running %s...", __FILE__); \
fflush(NULL); \
tests_failed = 0; \
@@ -30,4 +51,15 @@ do {
} \
} while (0)
+#define __T_VERBOSE(...) \
+do { \
+ if (tests_verbose) { \
+ if (tests_verbose == 1) { \
+ fputc('\n', stderr); \
+ tests_verbose++; \
+ } \
+ fprintf(stderr, ##__VA_ARGS__); \
+ } \
+} while (0)
+
#endif /* __LIBPERF_INTERNAL_TESTS_H */
diff --git a/tools/lib/perf/include/internal/xyarray.h b/tools/lib/perf/include/internal/xyarray.h
index 51e35d6c8ec4..f10af3da7b21 100644
--- a/tools/lib/perf/include/internal/xyarray.h
+++ b/tools/lib/perf/include/internal/xyarray.h
@@ -18,11 +18,18 @@ struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size);
void xyarray__delete(struct xyarray *xy);
void xyarray__reset(struct xyarray *xy);
-static inline void *xyarray__entry(struct xyarray *xy, int x, int y)
+static inline void *__xyarray__entry(struct xyarray *xy, int x, int y)
{
return &xy->contents[x * xy->row_size + y * xy->entry_size];
}
+static inline void *xyarray__entry(struct xyarray *xy, size_t x, size_t y)
+{
+ if (x >= xy->max_x || y >= xy->max_y)
+ return NULL;
+ return __xyarray__entry(xy, x, y);
+}
+
static inline int xyarray__max_y(struct xyarray *xy)
{
return xy->max_y;
diff --git a/tools/lib/perf/include/perf/bpf_perf.h b/tools/lib/perf/include/perf/bpf_perf.h
new file mode 100644
index 000000000000..e7cf6ba7b674
--- /dev/null
+++ b/tools/lib/perf/include/perf/bpf_perf.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __LIBPERF_BPF_PERF_H
+#define __LIBPERF_BPF_PERF_H
+
+#include <linux/types.h> /* for __u32 */
+
+/*
+ * bpf_perf uses a hashmap, the attr_map, to track all the leader programs.
+ * The hashmap is pinned in bpffs. flock() on this file is used to ensure
+ * no concurrent access to the attr_map. The key of attr_map is struct
+ * perf_event_attr, and the value is struct perf_event_attr_map_entry.
+ *
+ * struct perf_event_attr_map_entry contains two __u32 IDs, bpf_link of the
+ * leader prog, and the diff_map. Each perf-stat session holds a reference
+ * to the bpf_link to make sure the leader prog is attached to sched_switch
+ * tracepoint.
+ *
+ * Since the hashmap only contains IDs of the bpf_link and diff_map, it
+ * does not hold any references to the leader program. Once all perf-stat
+ * sessions of these events exit, the leader prog, its maps, and the
+ * perf_events will be freed.
+ */
+struct perf_event_attr_map_entry {
+ __u32 link_id;
+ __u32 diff_map_id;
+};
+
+/* default attr_map name */
+#define BPF_PERF_DEFAULT_ATTR_MAP_PATH "perf_attr_map"
+
+#endif /* __LIBPERF_BPF_PERF_H */
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
index d82054225fcc..4d0c02ba3f7d 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -8,6 +8,8 @@
#include <linux/bpf.h>
#include <sys/types.h> /* pid_t */
+#define event_contains(obj, mem) ((obj).header.size > offsetof(typeof(obj), mem))
+
struct perf_record_mmap {
struct perf_event_header header;
__u32 pid, tid;
@@ -346,8 +348,9 @@ struct perf_record_time_conv {
__u64 time_zero;
__u64 time_cycles;
__u64 time_mask;
- bool cap_user_time_zero;
- bool cap_user_time_short;
+ __u8 cap_user_time_zero;
+ __u8 cap_user_time_short;
+ __u8 reserved[6]; /* For alignment */
};
struct perf_record_header_feature {
diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h
index c82ec39a4ad0..60eae25076d3 100644
--- a/tools/lib/perf/include/perf/evsel.h
+++ b/tools/lib/perf/include/perf/evsel.h
@@ -27,6 +27,9 @@ LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *
struct perf_thread_map *threads);
LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel);
LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
+LIBPERF_API int perf_evsel__mmap(struct perf_evsel *evsel, int pages);
+LIBPERF_API void perf_evsel__munmap(struct perf_evsel *evsel);
+LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread);
LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
struct perf_counts_values *count);
LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel);
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
index 7be1af8a546c..c0c7ceb11060 100644
--- a/tools/lib/perf/libperf.map
+++ b/tools/lib/perf/libperf.map
@@ -23,6 +23,9 @@ LIBPERF_0.0.1 {
perf_evsel__disable;
perf_evsel__open;
perf_evsel__close;
+ perf_evsel__mmap;
+ perf_evsel__munmap;
+ perf_evsel__mmap_base;
perf_evsel__read;
perf_evsel__cpus;
perf_evsel__threads;
diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
index 79d5ed6c38cc..c89dfa5f67b3 100644
--- a/tools/lib/perf/mmap.c
+++ b/tools/lib/perf/mmap.c
@@ -8,9 +8,11 @@
#include <linux/perf_event.h>
#include <perf/mmap.h>
#include <perf/event.h>
+#include <perf/evsel.h>
#include <internal/mmap.h>
#include <internal/lib.h>
#include <linux/kernel.h>
+#include <linux/math64.h>
#include "internal.h"
void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
@@ -273,3 +275,89 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map)
return event;
}
+
+#if defined(__i386__) || defined(__x86_64__)
+static u64 read_perf_counter(unsigned int counter)
+{
+ unsigned int low, high;
+
+ asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
+
+ return low | ((u64)high) << 32;
+}
+
+static u64 read_timestamp(void)
+{
+ unsigned int low, high;
+
+ asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+ return low | ((u64)high) << 32;
+}
+#else
+static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; }
+static u64 read_timestamp(void) { return 0; }
+#endif
+
+int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count)
+{
+ struct perf_event_mmap_page *pc = map->base;
+ u32 seq, idx, time_mult = 0, time_shift = 0;
+ u64 cnt, cyc = 0, time_offset = 0, time_cycles = 0, time_mask = ~0ULL;
+
+ if (!pc || !pc->cap_user_rdpmc)
+ return -1;
+
+ do {
+ seq = READ_ONCE(pc->lock);
+ barrier();
+
+ count->ena = READ_ONCE(pc->time_enabled);
+ count->run = READ_ONCE(pc->time_running);
+
+ if (pc->cap_user_time && count->ena != count->run) {
+ cyc = read_timestamp();
+ time_mult = READ_ONCE(pc->time_mult);
+ time_shift = READ_ONCE(pc->time_shift);
+ time_offset = READ_ONCE(pc->time_offset);
+
+ if (pc->cap_user_time_short) {
+ time_cycles = READ_ONCE(pc->time_cycles);
+ time_mask = READ_ONCE(pc->time_mask);
+ }
+ }
+
+ idx = READ_ONCE(pc->index);
+ cnt = READ_ONCE(pc->offset);
+ if (pc->cap_user_rdpmc && idx) {
+ s64 evcnt = read_perf_counter(idx - 1);
+ u16 width = READ_ONCE(pc->pmc_width);
+
+ evcnt <<= 64 - width;
+ evcnt >>= 64 - width;
+ cnt += evcnt;
+ } else
+ return -1;
+
+ barrier();
+ } while (READ_ONCE(pc->lock) != seq);
+
+ if (count->ena != count->run) {
+ u64 delta;
+
+ /* Adjust for cap_usr_time_short, a nop if not */
+ cyc = time_cycles + ((cyc - time_cycles) & time_mask);
+
+ delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift);
+
+ count->ena += delta;
+ if (idx)
+ count->run += delta;
+
+ cnt = mul_u64_u64_div64(cnt, count->ena, count->run);
+ }
+
+ count->val = cnt;
+
+ return 0;
+}
diff --git a/tools/lib/perf/tests/Makefile b/tools/lib/perf/tests/Makefile
index 96841775feaf..b536cc9a26dd 100644
--- a/tools/lib/perf/tests/Makefile
+++ b/tools/lib/perf/tests/Makefile
@@ -5,6 +5,8 @@ TESTS = test-cpumap test-threadmap test-evlist test-evsel
TESTS_SO := $(addsuffix -so,$(TESTS))
TESTS_A := $(addsuffix -a,$(TESTS))
+TEST_ARGS := $(if $(V),-v)
+
# Set compile option CFLAGS
ifdef EXTRA_CFLAGS
CFLAGS := $(EXTRA_CFLAGS)
@@ -28,9 +30,9 @@ all: $(TESTS_A) $(TESTS_SO)
run:
@echo "running static:"
- @for i in $(TESTS_A); do ./$$i; done
+ @for i in $(TESTS_A); do ./$$i $(TEST_ARGS); done
@echo "running dynamic:"
- @for i in $(TESTS_SO); do LD_LIBRARY_PATH=../ ./$$i; done
+ @for i in $(TESTS_SO); do LD_LIBRARY_PATH=../ ./$$i $(TEST_ARGS); done
clean:
$(call QUIET_CLEAN, tests)$(RM) $(TESTS_A) $(TESTS_SO)
diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c
index 0ad82d7a2a51..288b5feaefe2 100644
--- a/tools/lib/perf/tests/test-evsel.c
+++ b/tools/lib/perf/tests/test-evsel.c
@@ -120,6 +120,70 @@ static int test_stat_thread_enable(void)
return 0;
}
+static int test_stat_user_read(int event)
+{
+ struct perf_counts_values counts = { .val = 0 };
+ struct perf_thread_map *threads;
+ struct perf_evsel *evsel;
+ struct perf_event_mmap_page *pc;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = event,
+ };
+ int err, i;
+
+ threads = perf_thread_map__new_dummy();
+ __T("failed to create threads", threads);
+
+ perf_thread_map__set_pid(threads, 0, 0);
+
+ evsel = perf_evsel__new(&attr);
+ __T("failed to create evsel", evsel);
+
+ err = perf_evsel__open(evsel, NULL, threads);
+ __T("failed to open evsel", err == 0);
+
+ err = perf_evsel__mmap(evsel, 0);
+ __T("failed to mmap evsel", err == 0);
+
+ pc = perf_evsel__mmap_base(evsel, 0, 0);
+
+#if defined(__i386__) || defined(__x86_64__)
+ __T("userspace counter access not supported", pc->cap_user_rdpmc);
+ __T("userspace counter access not enabled", pc->index);
+ __T("userspace counter width not set", pc->pmc_width >= 32);
+#endif
+
+ perf_evsel__read(evsel, 0, 0, &counts);
+ __T("failed to read value for evsel", counts.val != 0);
+
+ for (i = 0; i < 5; i++) {
+ volatile int count = 0x10000 << i;
+ __u64 start, end, last = 0;
+
+ __T_VERBOSE("\tloop = %u, ", count);
+
+ perf_evsel__read(evsel, 0, 0, &counts);
+ start = counts.val;
+
+ while (count--) ;
+
+ perf_evsel__read(evsel, 0, 0, &counts);
+ end = counts.val;
+
+ __T("invalid counter data", (end - start) > last);
+ last = end - start;
+ __T_VERBOSE("count = %llu\n", end - start);
+ }
+
+ perf_evsel__munmap(evsel);
+ perf_evsel__close(evsel);
+ perf_evsel__delete(evsel);
+
+ perf_thread_map__put(threads);
+ return 0;
+}
+
int main(int argc, char **argv)
{
__T_START;
@@ -129,6 +193,8 @@ int main(int argc, char **argv)
test_stat_cpu();
test_stat_thread();
test_stat_thread_enable();
+ test_stat_user_read(PERF_COUNT_HW_INSTRUCTIONS);
+ test_stat_user_read(PERF_COUNT_HW_CPU_CYCLES);
__T_END;
return tests_failed == 0 ? 0 : -1;