summaryrefslogtreecommitdiff
path: root/tools/lib/perf/evsel.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-01-18 06:32:11 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2022-01-18 06:32:11 +0200
commit57d17378a4a042401b0c2fe211e5a0e3a276cb3d (patch)
treebe65e51a7b11cccb903b44708b98b3af47477304 /tools/lib/perf/evsel.c
parentf0033681f0fe8421baf8db125e57fa6157824c2d (diff)
parent9bce13ea88f85344b765abe5d3dabdd0f44dc177 (diff)
Merge tag 'perf-tools-for-v5.17-2022-01-16' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tool updates from Arnaldo Carvalho de Melo: "New features: - Add 'trace' subcommand for 'perf ftrace', setting the stage for more 'perf ftrace' subcommands. Not using a subcommand yields the previous behaviour of 'perf ftrace'. - Add 'latency' subcommand to 'perf ftrace', that can use the function graph tracer or a BPF optimized one, via the -b/--use-bpf option. E.g.: $ sudo perf ftrace latency -a -T mutex_lock sleep 1 # DURATION | COUNT | GRAPH | 0 - 1 us | 4596 | ######################## | 1 - 2 us | 1680 | ######### | 2 - 4 us | 1106 | ##### | 4 - 8 us | 546 | ## | 8 - 16 us | 562 | ### | 16 - 32 us | 1 | | 32 - 64 us | 0 | | 64 - 128 us | 0 | | 128 - 256 us | 0 | | 256 - 512 us | 0 | | 512 - 1024 us | 0 | | 1 - 2 ms | 0 | | 2 - 4 ms | 0 | | 4 - 8 ms | 0 | | 8 - 16 ms | 0 | | 16 - 32 ms | 0 | | 32 - 64 ms | 0 | | 64 - 128 ms | 0 | | 128 - 256 ms | 0 | | 256 - 512 ms | 0 | | 512 - 1024 ms | 0 | | 1 - ... s | 0 | | The original implementation of this command was in the bcc tool. - Support --cputype option for hybrid events in 'perf stat'. Improvements: - Call chain improvements for ARM64. - No need to do any affinity setup when profiling pids. - Reduce multiplexing with duration_time in 'perf stat' metrics. - Improve error message for uncore events, stating that some event groups are can only be used in system wide (-a) mode. - perf stat metric group leader fixes/improvements, including arch specific changes to better support Intel topdown events. - Probe non-deprecated sysfs path first, i.e. try the path /sys/devices/system/cpu/cpuN/topology/thread_siblings first, then the old /sys/devices/system/cpu/cpuN/topology/core_cpus. - Disable debuginfod by default in 'perf record', to avoid stalls on distros such as Fedora 35. - Use unbuffered output in 'perf bench' when pipe/tee'ing to a file. - Enable ignore_missing_thread in 'perf trace' Fixes: - Avoid TUI crash when navigating in the annotation of recursive functions. - Fix hex dump character output in 'perf script'. - Fix JSON indentation to 4 spaces standard in the ARM vendor event files. - Fix use after free in metric__new(). - Fix IS_ERR_OR_NULL() usage in the perf BPF loader. - Fix up cross-arch register support, i.e. when printing register names take into account the architecture where the perf.data file was collected. - Fix SMT fallback with large core counts. - Don't lower case MetricExpr when parsing JSON files so as not to lose info such as the ":G" event modifier in metrics. perf test: - Add basic stress test for sigtrap handling to 'perf test'. - Fix 'perf test' failures on s/390 - Enable system wide for metricgroups test in 'perf test´. - Use 3 digits for test numbering now we can have more tests. Arch specific: - Add events for Arm Neoverse N2 in the ARM JSON vendor event files - Support PERF_MEM_LVLNUM encodings in powerpc, that came from a single patch series, where I incorrectly merged the kernel bits, that were then reverted after coordination with Michael Ellerman and Stephen Rothwell. - Add ARM SPE total latency as PERF_SAMPLE_WEIGHT. - Update AMD documentation, with info on raw event encoding. - Add support for global and local variants of the "p_stage_cyc" sort key, applicable to perf.data files collected on powerpc. - Remove duplicate and incorrect aux size checks in the ARM CoreSight ETM code. Refactorings: - Add a perf_cpu abstraction to disambiguate CPUs and CPU map indexes, fixing problems along the way. - Document CPU map methods. UAPI sync: - Update arch/x86/lib/mem{cpy,set}_64.S copies used in 'perf bench mem memcpy' - Sync UAPI files with the kernel sources: drm, msr-index, cpufeatures. Build system - Enable warnings through HOSTCFLAGS. - Drop requirement for libstdc++.so for libopencsd check libperf: - Make libperf adopt perf_counts_values__scale() from tools/perf/util/. - Add a stat multiplexing test to libperf" * tag 'perf-tools-for-v5.17-2022-01-16' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (115 commits) perf record: Disable debuginfod by default perf evlist: No need to do any affinity setup when profiling pids perf cpumap: Add is_dummy() method perf metric: Fix metric_leader perf cputopo: Fix CPU topology reading on s/390 perf metricgroup: Fix use after free in metric__new() libperf tests: Update a use of the new cpumap API perf arm: Fix off-by-one directory path tools arch x86: Sync the msr-index.h copy with the kernel sources tools headers cpufeatures: Sync with the kernel sources tools headers UAPI: Update tools's copy of drm.h header tools arch: Update arch/x86/lib/mem{cpy,set}_64.S copies used in 'perf bench mem memcpy' perf pmu-events: Don't lower case MetricExpr perf expr: Add debug logging for literals perf tools: Probe non-deprecated sysfs path 1st perf tools: Fix SMT fallback with large core counts perf cpumap: Give CPUs their own type perf stat: Correct first_shadow_cpu to return index perf script: Fix flipped index and cpu perf c2c: Use more intention revealing iterator ...
Diffstat (limited to 'tools/lib/perf/evsel.c')
-rw-r--r--tools/lib/perf/evsel.c111
1 files changed, 67 insertions, 44 deletions
diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
index 8441e3e1aaac..7ea86a44eae5 100644
--- a/tools/lib/perf/evsel.c
+++ b/tools/lib/perf/evsel.c
@@ -43,18 +43,22 @@ void perf_evsel__delete(struct perf_evsel *evsel)
free(evsel);
}
-#define FD(e, x, y) ((int *) xyarray__entry(e->fd, x, y))
-#define MMAP(e, x, y) (e->mmap ? ((struct perf_mmap *) xyarray__entry(e->mmap, x, y)) : NULL)
+#define FD(_evsel, _cpu_map_idx, _thread) \
+ ((int *)xyarray__entry(_evsel->fd, _cpu_map_idx, _thread))
+#define MMAP(_evsel, _cpu_map_idx, _thread) \
+ (_evsel->mmap ? ((struct perf_mmap *) xyarray__entry(_evsel->mmap, _cpu_map_idx, _thread)) \
+ : NULL)
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
{
evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
if (evsel->fd) {
- int cpu, thread;
- for (cpu = 0; cpu < ncpus; cpu++) {
+ int idx, thread;
+
+ for (idx = 0; idx < ncpus; idx++) {
for (thread = 0; thread < nthreads; thread++) {
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, idx, thread);
if (fd)
*fd = -1;
@@ -74,13 +78,13 @@ static int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthre
static int
sys_perf_event_open(struct perf_event_attr *attr,
- pid_t pid, int cpu, int group_fd,
+ pid_t pid, struct perf_cpu cpu, int group_fd,
unsigned long flags)
{
- return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+ return syscall(__NR_perf_event_open, attr, pid, cpu.cpu, group_fd, flags);
}
-static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *group_fd)
+static int get_group_fd(struct perf_evsel *evsel, int cpu_map_idx, int thread, int *group_fd)
{
struct perf_evsel *leader = evsel->leader;
int *fd;
@@ -97,7 +101,7 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou
if (!leader->fd)
return -ENOTCONN;
- fd = FD(leader, cpu, thread);
+ fd = FD(leader, cpu_map_idx, thread);
if (fd == NULL || *fd == -1)
return -EBADF;
@@ -109,7 +113,8 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou
int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads)
{
- int cpu, thread, err = 0;
+ struct perf_cpu cpu;
+ int idx, thread, err = 0;
if (cpus == NULL) {
static struct perf_cpu_map *empty_cpu_map;
@@ -139,21 +144,21 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
return -ENOMEM;
- for (cpu = 0; cpu < cpus->nr; cpu++) {
+ perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
for (thread = 0; thread < threads->nr; thread++) {
int fd, group_fd, *evsel_fd;
- evsel_fd = FD(evsel, cpu, thread);
+ evsel_fd = FD(evsel, idx, thread);
if (evsel_fd == NULL)
return -EINVAL;
- err = get_group_fd(evsel, cpu, thread, &group_fd);
+ err = get_group_fd(evsel, idx, thread, &group_fd);
if (err < 0)
return err;
fd = sys_perf_event_open(&evsel->attr,
threads->map[thread].pid,
- cpus->map[cpu], group_fd, 0);
+ cpu, group_fd, 0);
if (fd < 0)
return -errno;
@@ -165,12 +170,12 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
return err;
}
-static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)
+static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
int thread;
for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, cpu_map_idx, thread);
if (fd && *fd >= 0) {
close(*fd);
@@ -181,10 +186,8 @@ static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)
void perf_evsel__close_fd(struct perf_evsel *evsel)
{
- int cpu;
-
- for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++)
- perf_evsel__close_fd_cpu(evsel, cpu);
+ for (int idx = 0; idx < xyarray__max_x(evsel->fd); idx++)
+ perf_evsel__close_fd_cpu(evsel, idx);
}
void perf_evsel__free_fd(struct perf_evsel *evsel)
@@ -202,29 +205,29 @@ void perf_evsel__close(struct perf_evsel *evsel)
perf_evsel__free_fd(evsel);
}
-void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu)
+void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
if (evsel->fd == NULL)
return;
- perf_evsel__close_fd_cpu(evsel, cpu);
+ perf_evsel__close_fd_cpu(evsel, cpu_map_idx);
}
void perf_evsel__munmap(struct perf_evsel *evsel)
{
- int cpu, thread;
+ int idx, thread;
if (evsel->fd == NULL || evsel->mmap == NULL)
return;
- for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
+ for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) {
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, idx, thread);
if (fd == NULL || *fd < 0)
continue;
- perf_mmap__munmap(MMAP(evsel, cpu, thread));
+ perf_mmap__munmap(MMAP(evsel, idx, thread));
}
}
@@ -234,7 +237,7 @@ void perf_evsel__munmap(struct perf_evsel *evsel)
int perf_evsel__mmap(struct perf_evsel *evsel, int pages)
{
- int ret, cpu, thread;
+ int ret, idx, thread;
struct perf_mmap_param mp = {
.prot = PROT_READ | PROT_WRITE,
.mask = (pages * page_size) - 1,
@@ -246,15 +249,16 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages)
if (perf_evsel__alloc_mmap(evsel, xyarray__max_x(evsel->fd), xyarray__max_y(evsel->fd)) < 0)
return -ENOMEM;
- for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
+ for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) {
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, idx, thread);
struct perf_mmap *map;
+ struct perf_cpu cpu = perf_cpu_map__cpu(evsel->cpus, idx);
if (fd == NULL || *fd < 0)
continue;
- map = MMAP(evsel, cpu, thread);
+ map = MMAP(evsel, idx, thread);
perf_mmap__init(map, NULL, false, NULL);
ret = perf_mmap__mmap(map, &mp, *fd, cpu);
@@ -268,14 +272,14 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages)
return 0;
}
-void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread)
+void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread)
{
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, cpu_map_idx, thread);
- if (fd == NULL || *fd < 0 || MMAP(evsel, cpu, thread) == NULL)
+ if (fd == NULL || *fd < 0 || MMAP(evsel, cpu_map_idx, thread) == NULL)
return NULL;
- return MMAP(evsel, cpu, thread)->base;
+ return MMAP(evsel, cpu_map_idx, thread)->base;
}
int perf_evsel__read_size(struct perf_evsel *evsel)
@@ -303,19 +307,19 @@ int perf_evsel__read_size(struct perf_evsel *evsel)
return size;
}
-int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
struct perf_counts_values *count)
{
size_t size = perf_evsel__read_size(evsel);
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, cpu_map_idx, thread);
memset(count, 0, sizeof(*count));
if (fd == NULL || *fd < 0)
return -EINVAL;
- if (MMAP(evsel, cpu, thread) &&
- !perf_mmap__read_self(MMAP(evsel, cpu, thread), count))
+ if (MMAP(evsel, cpu_map_idx, thread) &&
+ !perf_mmap__read_self(MMAP(evsel, cpu_map_idx, thread), count))
return 0;
if (readn(*fd, count->values, size) <= 0)
@@ -326,13 +330,13 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
int ioc, void *arg,
- int cpu)
+ int cpu_map_idx)
{
int thread;
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
int err;
- int *fd = FD(evsel, cpu, thread);
+ int *fd = FD(evsel, cpu_map_idx, thread);
if (fd == NULL || *fd < 0)
return -1;
@@ -346,9 +350,9 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
return 0;
}
-int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu)
+int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
- return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu);
+ return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu_map_idx);
}
int perf_evsel__enable(struct perf_evsel *evsel)
@@ -361,9 +365,9 @@ int perf_evsel__enable(struct perf_evsel *evsel)
return err;
}
-int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu)
+int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
- return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu);
+ return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu_map_idx);
}
int perf_evsel__disable(struct perf_evsel *evsel)
@@ -431,3 +435,22 @@ void perf_evsel__free_id(struct perf_evsel *evsel)
zfree(&evsel->id);
evsel->ids = 0;
}
+
+void perf_counts_values__scale(struct perf_counts_values *count,
+ bool scale, __s8 *pscaled)
+{
+ s8 scaled = 0;
+
+ if (scale) {
+ if (count->run == 0) {
+ scaled = -1;
+ count->val = 0;
+ } else if (count->run < count->ena) {
+ scaled = 1;
+ count->val = (u64)((double)count->val * count->ena / count->run);
+ }
+ }
+
+ if (pscaled)
+ *pscaled = scaled;
+}