diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-18 06:32:11 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-18 06:32:11 +0200 |
commit | 57d17378a4a042401b0c2fe211e5a0e3a276cb3d (patch) | |
tree | be65e51a7b11cccb903b44708b98b3af47477304 /tools/lib/perf/tests/test-evlist.c | |
parent | f0033681f0fe8421baf8db125e57fa6157824c2d (diff) | |
parent | 9bce13ea88f85344b765abe5d3dabdd0f44dc177 (diff) |
Merge tag 'perf-tools-for-v5.17-2022-01-16' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tool updates from Arnaldo Carvalho de Melo:
"New features:
- Add 'trace' subcommand for 'perf ftrace', setting the stage for
more 'perf ftrace' subcommands. Not using a subcommand yields the
previous behaviour of 'perf ftrace'.
- Add 'latency' subcommand to 'perf ftrace', that can use the
function graph tracer or a BPF optimized one, via the -b/--use-bpf
option.
E.g.:
$ sudo perf ftrace latency -a -T mutex_lock sleep 1
# DURATION | COUNT | GRAPH |
0 - 1 us | 4596 | ######################## |
1 - 2 us | 1680 | ######### |
2 - 4 us | 1106 | ##### |
4 - 8 us | 546 | ## |
8 - 16 us | 562 | ### |
16 - 32 us | 1 | |
32 - 64 us | 0 | |
64 - 128 us | 0 | |
128 - 256 us | 0 | |
256 - 512 us | 0 | |
512 - 1024 us | 0 | |
1 - 2 ms | 0 | |
2 - 4 ms | 0 | |
4 - 8 ms | 0 | |
8 - 16 ms | 0 | |
16 - 32 ms | 0 | |
32 - 64 ms | 0 | |
64 - 128 ms | 0 | |
128 - 256 ms | 0 | |
256 - 512 ms | 0 | |
512 - 1024 ms | 0 | |
1 - ... s | 0 | |
The original implementation of this command was in the bcc tool.
- Support --cputype option for hybrid events in 'perf stat'.
Improvements:
- Call chain improvements for ARM64.
- No need to do any affinity setup when profiling pids.
- Reduce multiplexing with duration_time in 'perf stat' metrics.
- Improve error message for uncore events, stating that some event
groups are can only be used in system wide (-a) mode.
- perf stat metric group leader fixes/improvements, including arch
specific changes to better support Intel topdown events.
- Probe non-deprecated sysfs path first, i.e. try the path
/sys/devices/system/cpu/cpuN/topology/thread_siblings first, then
the old /sys/devices/system/cpu/cpuN/topology/core_cpus.
- Disable debuginfod by default in 'perf record', to avoid stalls on
distros such as Fedora 35.
- Use unbuffered output in 'perf bench' when pipe/tee'ing to a file.
- Enable ignore_missing_thread in 'perf trace'
Fixes:
- Avoid TUI crash when navigating in the annotation of recursive
functions.
- Fix hex dump character output in 'perf script'.
- Fix JSON indentation to 4 spaces standard in the ARM vendor event
files.
- Fix use after free in metric__new().
- Fix IS_ERR_OR_NULL() usage in the perf BPF loader.
- Fix up cross-arch register support, i.e. when printing register
names take into account the architecture where the perf.data file
was collected.
- Fix SMT fallback with large core counts.
- Don't lower case MetricExpr when parsing JSON files so as not to
lose info such as the ":G" event modifier in metrics.
perf test:
- Add basic stress test for sigtrap handling to 'perf test'.
- Fix 'perf test' failures on s/390
- Enable system wide for metricgroups test in 'perf test´.
- Use 3 digits for test numbering now we can have more tests.
Arch specific:
- Add events for Arm Neoverse N2 in the ARM JSON vendor event files
- Support PERF_MEM_LVLNUM encodings in powerpc, that came from a
single patch series, where I incorrectly merged the kernel bits,
that were then reverted after coordination with Michael Ellerman
and Stephen Rothwell.
- Add ARM SPE total latency as PERF_SAMPLE_WEIGHT.
- Update AMD documentation, with info on raw event encoding.
- Add support for global and local variants of the "p_stage_cyc" sort
key, applicable to perf.data files collected on powerpc.
- Remove duplicate and incorrect aux size checks in the ARM CoreSight
ETM code.
Refactorings:
- Add a perf_cpu abstraction to disambiguate CPUs and CPU map
indexes, fixing problems along the way.
- Document CPU map methods.
UAPI sync:
- Update arch/x86/lib/mem{cpy,set}_64.S copies used in 'perf bench
mem memcpy'
- Sync UAPI files with the kernel sources: drm, msr-index,
cpufeatures.
Build system
- Enable warnings through HOSTCFLAGS.
- Drop requirement for libstdc++.so for libopencsd check
libperf:
- Make libperf adopt perf_counts_values__scale() from tools/perf/util/.
- Add a stat multiplexing test to libperf"
* tag 'perf-tools-for-v5.17-2022-01-16' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (115 commits)
perf record: Disable debuginfod by default
perf evlist: No need to do any affinity setup when profiling pids
perf cpumap: Add is_dummy() method
perf metric: Fix metric_leader
perf cputopo: Fix CPU topology reading on s/390
perf metricgroup: Fix use after free in metric__new()
libperf tests: Update a use of the new cpumap API
perf arm: Fix off-by-one directory path
tools arch x86: Sync the msr-index.h copy with the kernel sources
tools headers cpufeatures: Sync with the kernel sources
tools headers UAPI: Update tools's copy of drm.h header
tools arch: Update arch/x86/lib/mem{cpy,set}_64.S copies used in 'perf bench mem memcpy'
perf pmu-events: Don't lower case MetricExpr
perf expr: Add debug logging for literals
perf tools: Probe non-deprecated sysfs path 1st
perf tools: Fix SMT fallback with large core counts
perf cpumap: Give CPUs their own type
perf stat: Correct first_shadow_cpu to return index
perf script: Fix flipped index and cpu
perf c2c: Use more intention revealing iterator
...
Diffstat (limited to 'tools/lib/perf/tests/test-evlist.c')
-rw-r--r-- | tools/lib/perf/tests/test-evlist.c | 162 |
1 files changed, 160 insertions, 2 deletions
diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index ce91a582f0e4..b3479dfa9a1c 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -21,6 +21,9 @@ #include "tests.h" #include <internal/evsel.h> +#define EVENT_NUM 15 +#define WAIT_COUNT 100000000UL + static int libperf_print(enum libperf_print_level level, const char *fmt, va_list ap) { @@ -331,7 +334,8 @@ static int test_mmap_cpus(void) }; cpu_set_t saved_mask; char path[PATH_MAX]; - int id, err, cpu, tmp; + int id, err, tmp; + struct perf_cpu cpu; union perf_event *event; int count = 0; @@ -374,7 +378,7 @@ static int test_mmap_cpus(void) cpu_set_t mask; CPU_ZERO(&mask); - CPU_SET(cpu, &mask); + CPU_SET(cpu.cpu, &mask); err = sched_setaffinity(0, sizeof(mask), &mask); __T("sched_setaffinity failed", err == 0); @@ -413,6 +417,159 @@ static int test_mmap_cpus(void) return 0; } +static double display_error(long long average, + long long high, + long long low, + long long expected) +{ + double error; + + error = (((double)average - expected) / expected) * 100.0; + + __T_VERBOSE(" Expected: %lld\n", expected); + __T_VERBOSE(" High: %lld Low: %lld Average: %lld\n", + high, low, average); + + __T_VERBOSE(" Average Error = %.2f%%\n", error); + + return error; +} + +static int test_stat_multiplexing(void) +{ + struct perf_counts_values expected_counts = { .val = 0 }; + struct perf_counts_values counts[EVENT_NUM] = {{ .val = 0 },}; + struct perf_thread_map *threads; + struct perf_evlist *evlist; + struct perf_evsel *evsel; + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_INSTRUCTIONS, + .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING, + .disabled = 1, + }; + int err, i, nonzero = 0; + unsigned long count; + long long max = 0, min = 0, avg = 0; + double error = 0.0; + s8 scaled = 0; + + /* read for non-multiplexing event count */ + threads = perf_thread_map__new_dummy(); + __T("failed to create threads", threads); + + perf_thread_map__set_pid(threads, 0, 0); + + evsel = perf_evsel__new(&attr); + __T("failed to create evsel", evsel); + + err = perf_evsel__open(evsel, NULL, threads); + __T("failed to open evsel", err == 0); + + err = perf_evsel__enable(evsel); + __T("failed to enable evsel", err == 0); + + /* wait loop */ + count = WAIT_COUNT; + while (count--) + ; + + perf_evsel__read(evsel, 0, 0, &expected_counts); + __T("failed to read value for evsel", expected_counts.val != 0); + __T("failed to read non-multiplexing event count", + expected_counts.ena == expected_counts.run); + + err = perf_evsel__disable(evsel); + __T("failed to enable evsel", err == 0); + + perf_evsel__close(evsel); + perf_evsel__delete(evsel); + + perf_thread_map__put(threads); + + /* read for multiplexing event count */ + threads = perf_thread_map__new_dummy(); + __T("failed to create threads", threads); + + perf_thread_map__set_pid(threads, 0, 0); + + evlist = perf_evlist__new(); + __T("failed to create evlist", evlist); + + for (i = 0; i < EVENT_NUM; i++) { + evsel = perf_evsel__new(&attr); + __T("failed to create evsel", evsel); + + perf_evlist__add(evlist, evsel); + } + perf_evlist__set_maps(evlist, NULL, threads); + + err = perf_evlist__open(evlist); + __T("failed to open evsel", err == 0); + + perf_evlist__enable(evlist); + + /* wait loop */ + count = WAIT_COUNT; + while (count--) + ; + + i = 0; + perf_evlist__for_each_evsel(evlist, evsel) { + perf_evsel__read(evsel, 0, 0, &counts[i]); + __T("failed to read value for evsel", counts[i].val != 0); + i++; + } + + perf_evlist__disable(evlist); + + min = counts[0].val; + for (i = 0; i < EVENT_NUM; i++) { + __T_VERBOSE("Event %2d -- Raw count = %lu, run = %lu, enable = %lu\n", + i, counts[i].val, counts[i].run, counts[i].ena); + + perf_counts_values__scale(&counts[i], true, &scaled); + if (scaled == 1) { + __T_VERBOSE("\t Scaled count = %lu (%.2lf%%, %lu/%lu)\n", + counts[i].val, + (double)counts[i].run / (double)counts[i].ena * 100.0, + counts[i].run, counts[i].ena); + } else if (scaled == -1) { + __T_VERBOSE("\t Not Running\n"); + } else { + __T_VERBOSE("\t Not Scaling\n"); + } + + if (counts[i].val > max) + max = counts[i].val; + + if (counts[i].val < min) + min = counts[i].val; + + avg += counts[i].val; + + if (counts[i].val != 0) + nonzero++; + } + + if (nonzero != 0) + avg = avg / nonzero; + else + avg = 0; + + error = display_error(avg, max, min, expected_counts.val); + + __T("Error out of range!", ((error <= 1.0) && (error >= -1.0))); + + perf_evlist__close(evlist); + perf_evlist__delete(evlist); + + perf_thread_map__put(threads); + + return 0; +} + int test_evlist(int argc, char **argv) { __T_START; @@ -424,6 +581,7 @@ int test_evlist(int argc, char **argv) test_stat_thread_enable(); test_mmap_thread(); test_mmap_cpus(); + test_stat_multiplexing(); __T_END; return tests_failed == 0 ? 0 : -1; |