summaryrefslogtreecommitdiff
path: root/tools/lib/perf/tests/test-evlist.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-01-18 06:32:11 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2022-01-18 06:32:11 +0200
commit57d17378a4a042401b0c2fe211e5a0e3a276cb3d (patch)
treebe65e51a7b11cccb903b44708b98b3af47477304 /tools/lib/perf/tests/test-evlist.c
parentf0033681f0fe8421baf8db125e57fa6157824c2d (diff)
parent9bce13ea88f85344b765abe5d3dabdd0f44dc177 (diff)
Merge tag 'perf-tools-for-v5.17-2022-01-16' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tool updates from Arnaldo Carvalho de Melo: "New features: - Add 'trace' subcommand for 'perf ftrace', setting the stage for more 'perf ftrace' subcommands. Not using a subcommand yields the previous behaviour of 'perf ftrace'. - Add 'latency' subcommand to 'perf ftrace', that can use the function graph tracer or a BPF optimized one, via the -b/--use-bpf option. E.g.: $ sudo perf ftrace latency -a -T mutex_lock sleep 1 # DURATION | COUNT | GRAPH | 0 - 1 us | 4596 | ######################## | 1 - 2 us | 1680 | ######### | 2 - 4 us | 1106 | ##### | 4 - 8 us | 546 | ## | 8 - 16 us | 562 | ### | 16 - 32 us | 1 | | 32 - 64 us | 0 | | 64 - 128 us | 0 | | 128 - 256 us | 0 | | 256 - 512 us | 0 | | 512 - 1024 us | 0 | | 1 - 2 ms | 0 | | 2 - 4 ms | 0 | | 4 - 8 ms | 0 | | 8 - 16 ms | 0 | | 16 - 32 ms | 0 | | 32 - 64 ms | 0 | | 64 - 128 ms | 0 | | 128 - 256 ms | 0 | | 256 - 512 ms | 0 | | 512 - 1024 ms | 0 | | 1 - ... s | 0 | | The original implementation of this command was in the bcc tool. - Support --cputype option for hybrid events in 'perf stat'. Improvements: - Call chain improvements for ARM64. - No need to do any affinity setup when profiling pids. - Reduce multiplexing with duration_time in 'perf stat' metrics. - Improve error message for uncore events, stating that some event groups are can only be used in system wide (-a) mode. - perf stat metric group leader fixes/improvements, including arch specific changes to better support Intel topdown events. - Probe non-deprecated sysfs path first, i.e. try the path /sys/devices/system/cpu/cpuN/topology/thread_siblings first, then the old /sys/devices/system/cpu/cpuN/topology/core_cpus. - Disable debuginfod by default in 'perf record', to avoid stalls on distros such as Fedora 35. - Use unbuffered output in 'perf bench' when pipe/tee'ing to a file. - Enable ignore_missing_thread in 'perf trace' Fixes: - Avoid TUI crash when navigating in the annotation of recursive functions. - Fix hex dump character output in 'perf script'. - Fix JSON indentation to 4 spaces standard in the ARM vendor event files. - Fix use after free in metric__new(). - Fix IS_ERR_OR_NULL() usage in the perf BPF loader. - Fix up cross-arch register support, i.e. when printing register names take into account the architecture where the perf.data file was collected. - Fix SMT fallback with large core counts. - Don't lower case MetricExpr when parsing JSON files so as not to lose info such as the ":G" event modifier in metrics. perf test: - Add basic stress test for sigtrap handling to 'perf test'. - Fix 'perf test' failures on s/390 - Enable system wide for metricgroups test in 'perf test´. - Use 3 digits for test numbering now we can have more tests. Arch specific: - Add events for Arm Neoverse N2 in the ARM JSON vendor event files - Support PERF_MEM_LVLNUM encodings in powerpc, that came from a single patch series, where I incorrectly merged the kernel bits, that were then reverted after coordination with Michael Ellerman and Stephen Rothwell. - Add ARM SPE total latency as PERF_SAMPLE_WEIGHT. - Update AMD documentation, with info on raw event encoding. - Add support for global and local variants of the "p_stage_cyc" sort key, applicable to perf.data files collected on powerpc. - Remove duplicate and incorrect aux size checks in the ARM CoreSight ETM code. Refactorings: - Add a perf_cpu abstraction to disambiguate CPUs and CPU map indexes, fixing problems along the way. - Document CPU map methods. UAPI sync: - Update arch/x86/lib/mem{cpy,set}_64.S copies used in 'perf bench mem memcpy' - Sync UAPI files with the kernel sources: drm, msr-index, cpufeatures. Build system - Enable warnings through HOSTCFLAGS. - Drop requirement for libstdc++.so for libopencsd check libperf: - Make libperf adopt perf_counts_values__scale() from tools/perf/util/. - Add a stat multiplexing test to libperf" * tag 'perf-tools-for-v5.17-2022-01-16' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (115 commits) perf record: Disable debuginfod by default perf evlist: No need to do any affinity setup when profiling pids perf cpumap: Add is_dummy() method perf metric: Fix metric_leader perf cputopo: Fix CPU topology reading on s/390 perf metricgroup: Fix use after free in metric__new() libperf tests: Update a use of the new cpumap API perf arm: Fix off-by-one directory path tools arch x86: Sync the msr-index.h copy with the kernel sources tools headers cpufeatures: Sync with the kernel sources tools headers UAPI: Update tools's copy of drm.h header tools arch: Update arch/x86/lib/mem{cpy,set}_64.S copies used in 'perf bench mem memcpy' perf pmu-events: Don't lower case MetricExpr perf expr: Add debug logging for literals perf tools: Probe non-deprecated sysfs path 1st perf tools: Fix SMT fallback with large core counts perf cpumap: Give CPUs their own type perf stat: Correct first_shadow_cpu to return index perf script: Fix flipped index and cpu perf c2c: Use more intention revealing iterator ...
Diffstat (limited to 'tools/lib/perf/tests/test-evlist.c')
-rw-r--r--tools/lib/perf/tests/test-evlist.c162
1 files changed, 160 insertions, 2 deletions
diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c
index ce91a582f0e4..b3479dfa9a1c 100644
--- a/tools/lib/perf/tests/test-evlist.c
+++ b/tools/lib/perf/tests/test-evlist.c
@@ -21,6 +21,9 @@
#include "tests.h"
#include <internal/evsel.h>
+#define EVENT_NUM 15
+#define WAIT_COUNT 100000000UL
+
static int libperf_print(enum libperf_print_level level,
const char *fmt, va_list ap)
{
@@ -331,7 +334,8 @@ static int test_mmap_cpus(void)
};
cpu_set_t saved_mask;
char path[PATH_MAX];
- int id, err, cpu, tmp;
+ int id, err, tmp;
+ struct perf_cpu cpu;
union perf_event *event;
int count = 0;
@@ -374,7 +378,7 @@ static int test_mmap_cpus(void)
cpu_set_t mask;
CPU_ZERO(&mask);
- CPU_SET(cpu, &mask);
+ CPU_SET(cpu.cpu, &mask);
err = sched_setaffinity(0, sizeof(mask), &mask);
__T("sched_setaffinity failed", err == 0);
@@ -413,6 +417,159 @@ static int test_mmap_cpus(void)
return 0;
}
+static double display_error(long long average,
+ long long high,
+ long long low,
+ long long expected)
+{
+ double error;
+
+ error = (((double)average - expected) / expected) * 100.0;
+
+ __T_VERBOSE(" Expected: %lld\n", expected);
+ __T_VERBOSE(" High: %lld Low: %lld Average: %lld\n",
+ high, low, average);
+
+ __T_VERBOSE(" Average Error = %.2f%%\n", error);
+
+ return error;
+}
+
+static int test_stat_multiplexing(void)
+{
+ struct perf_counts_values expected_counts = { .val = 0 };
+ struct perf_counts_values counts[EVENT_NUM] = {{ .val = 0 },};
+ struct perf_thread_map *threads;
+ struct perf_evlist *evlist;
+ struct perf_evsel *evsel;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_INSTRUCTIONS,
+ .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING,
+ .disabled = 1,
+ };
+ int err, i, nonzero = 0;
+ unsigned long count;
+ long long max = 0, min = 0, avg = 0;
+ double error = 0.0;
+ s8 scaled = 0;
+
+ /* read for non-multiplexing event count */
+ threads = perf_thread_map__new_dummy();
+ __T("failed to create threads", threads);
+
+ perf_thread_map__set_pid(threads, 0, 0);
+
+ evsel = perf_evsel__new(&attr);
+ __T("failed to create evsel", evsel);
+
+ err = perf_evsel__open(evsel, NULL, threads);
+ __T("failed to open evsel", err == 0);
+
+ err = perf_evsel__enable(evsel);
+ __T("failed to enable evsel", err == 0);
+
+ /* wait loop */
+ count = WAIT_COUNT;
+ while (count--)
+ ;
+
+ perf_evsel__read(evsel, 0, 0, &expected_counts);
+ __T("failed to read value for evsel", expected_counts.val != 0);
+ __T("failed to read non-multiplexing event count",
+ expected_counts.ena == expected_counts.run);
+
+ err = perf_evsel__disable(evsel);
+ __T("failed to enable evsel", err == 0);
+
+ perf_evsel__close(evsel);
+ perf_evsel__delete(evsel);
+
+ perf_thread_map__put(threads);
+
+ /* read for multiplexing event count */
+ threads = perf_thread_map__new_dummy();
+ __T("failed to create threads", threads);
+
+ perf_thread_map__set_pid(threads, 0, 0);
+
+ evlist = perf_evlist__new();
+ __T("failed to create evlist", evlist);
+
+ for (i = 0; i < EVENT_NUM; i++) {
+ evsel = perf_evsel__new(&attr);
+ __T("failed to create evsel", evsel);
+
+ perf_evlist__add(evlist, evsel);
+ }
+ perf_evlist__set_maps(evlist, NULL, threads);
+
+ err = perf_evlist__open(evlist);
+ __T("failed to open evsel", err == 0);
+
+ perf_evlist__enable(evlist);
+
+ /* wait loop */
+ count = WAIT_COUNT;
+ while (count--)
+ ;
+
+ i = 0;
+ perf_evlist__for_each_evsel(evlist, evsel) {
+ perf_evsel__read(evsel, 0, 0, &counts[i]);
+ __T("failed to read value for evsel", counts[i].val != 0);
+ i++;
+ }
+
+ perf_evlist__disable(evlist);
+
+ min = counts[0].val;
+ for (i = 0; i < EVENT_NUM; i++) {
+ __T_VERBOSE("Event %2d -- Raw count = %lu, run = %lu, enable = %lu\n",
+ i, counts[i].val, counts[i].run, counts[i].ena);
+
+ perf_counts_values__scale(&counts[i], true, &scaled);
+ if (scaled == 1) {
+ __T_VERBOSE("\t Scaled count = %lu (%.2lf%%, %lu/%lu)\n",
+ counts[i].val,
+ (double)counts[i].run / (double)counts[i].ena * 100.0,
+ counts[i].run, counts[i].ena);
+ } else if (scaled == -1) {
+ __T_VERBOSE("\t Not Running\n");
+ } else {
+ __T_VERBOSE("\t Not Scaling\n");
+ }
+
+ if (counts[i].val > max)
+ max = counts[i].val;
+
+ if (counts[i].val < min)
+ min = counts[i].val;
+
+ avg += counts[i].val;
+
+ if (counts[i].val != 0)
+ nonzero++;
+ }
+
+ if (nonzero != 0)
+ avg = avg / nonzero;
+ else
+ avg = 0;
+
+ error = display_error(avg, max, min, expected_counts.val);
+
+ __T("Error out of range!", ((error <= 1.0) && (error >= -1.0)));
+
+ perf_evlist__close(evlist);
+ perf_evlist__delete(evlist);
+
+ perf_thread_map__put(threads);
+
+ return 0;
+}
+
int test_evlist(int argc, char **argv)
{
__T_START;
@@ -424,6 +581,7 @@ int test_evlist(int argc, char **argv)
test_stat_thread_enable();
test_mmap_thread();
test_mmap_cpus();
+ test_stat_multiplexing();
__T_END;
return tests_failed == 0 ? 0 : -1;