summaryrefslogtreecommitdiff
path: root/tools/perf/bench
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/bench')
-rw-r--r--tools/perf/bench/Build44
-rw-r--r--tools/perf/bench/bench.h11
-rw-r--r--tools/perf/bench/breakpoint.c24
-rw-r--r--tools/perf/bench/epoll-ctl.c9
-rw-r--r--tools/perf/bench/epoll-wait.c16
-rw-r--r--tools/perf/bench/evlist-open-close.c77
-rw-r--r--tools/perf/bench/find-bit-bench.c10
-rw-r--r--tools/perf/bench/futex-hash.c9
-rw-r--r--tools/perf/bench/futex-lock-pi.c20
-rw-r--r--tools/perf/bench/futex-requeue.c21
-rw-r--r--tools/perf/bench/futex-wake-parallel.c33
-rw-r--r--tools/perf/bench/futex-wake.c19
-rw-r--r--tools/perf/bench/futex.c64
-rw-r--r--tools/perf/bench/futex.h5
-rw-r--r--tools/perf/bench/inject-buildid.c20
-rw-r--r--tools/perf/bench/mem-functions.c390
-rw-r--r--tools/perf/bench/mem-memcpy-arch.h2
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm-def.h8
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm.S2
-rw-r--r--tools/perf/bench/mem-memset-arch.h2
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm-def.h8
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm.S2
-rw-r--r--tools/perf/bench/numa.c55
-rw-r--r--tools/perf/bench/pmu-scan.c187
-rw-r--r--tools/perf/bench/sched-messaging.c120
-rw-r--r--tools/perf/bench/sched-pipe.c186
-rw-r--r--tools/perf/bench/sched-seccomp-notify.c178
-rw-r--r--tools/perf/bench/synthesize.c30
-rw-r--r--tools/perf/bench/syscall.c119
-rw-r--r--tools/perf/bench/uprobe.c213
30 files changed, 1585 insertions, 299 deletions
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index 6b6155a8ad09..b558ab98719f 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -1,22 +1,26 @@
-perf-y += sched-messaging.o
-perf-y += sched-pipe.o
-perf-y += syscall.o
-perf-y += mem-functions.o
-perf-y += futex-hash.o
-perf-y += futex-wake.o
-perf-y += futex-wake-parallel.o
-perf-y += futex-requeue.o
-perf-y += futex-lock-pi.o
-perf-y += epoll-wait.o
-perf-y += epoll-ctl.o
-perf-y += synthesize.o
-perf-y += kallsyms-parse.o
-perf-y += find-bit-bench.o
-perf-y += inject-buildid.o
-perf-y += evlist-open-close.o
-perf-y += breakpoint.o
+perf-bench-y += sched-messaging.o
+perf-bench-y += sched-pipe.o
+perf-bench-y += sched-seccomp-notify.o
+perf-bench-y += syscall.o
+perf-bench-y += mem-functions.o
+perf-bench-y += futex.o
+perf-bench-y += futex-hash.o
+perf-bench-y += futex-wake.o
+perf-bench-y += futex-wake-parallel.o
+perf-bench-y += futex-requeue.o
+perf-bench-y += futex-lock-pi.o
+perf-bench-y += epoll-wait.o
+perf-bench-y += epoll-ctl.o
+perf-bench-y += synthesize.o
+perf-bench-y += kallsyms-parse.o
+perf-bench-y += find-bit-bench.o
+perf-bench-y += inject-buildid.o
+perf-bench-y += evlist-open-close.o
+perf-bench-y += breakpoint.o
+perf-bench-y += pmu-scan.o
+perf-bench-y += uprobe.o
-perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
-perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
+perf-bench-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
+perf-bench-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
-perf-$(CONFIG_NUMA) += numa.o
+perf-bench-$(CONFIG_NUMA) += numa.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index a5d49b3b6a09..8519eb5a42fa 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -21,9 +21,14 @@ extern struct timeval bench__start, bench__end, bench__runtime;
int bench_numa(int argc, const char **argv);
int bench_sched_messaging(int argc, const char **argv);
int bench_sched_pipe(int argc, const char **argv);
+int bench_sched_seccomp_notify(int argc, const char **argv);
int bench_syscall_basic(int argc, const char **argv);
+int bench_syscall_getpgid(int argc, const char **argv);
+int bench_syscall_fork(int argc, const char **argv);
+int bench_syscall_execve(int argc, const char **argv);
int bench_mem_memcpy(int argc, const char **argv);
int bench_mem_memset(int argc, const char **argv);
+int bench_mem_mmap(int argc, const char **argv);
int bench_mem_find_bit(int argc, const char **argv);
int bench_futex_hash(int argc, const char **argv);
int bench_futex_wake(int argc, const char **argv);
@@ -39,6 +44,12 @@ int bench_inject_build_id(int argc, const char **argv);
int bench_evlist_open_close(int argc, const char **argv);
int bench_breakpoint_thread(int argc, const char **argv);
int bench_breakpoint_enable(int argc, const char **argv);
+int bench_uprobe_baseline(int argc, const char **argv);
+int bench_uprobe_empty(int argc, const char **argv);
+int bench_uprobe_trace_printk(int argc, const char **argv);
+int bench_uprobe_empty_ret(int argc, const char **argv);
+int bench_uprobe_trace_printk_ret(int argc, const char **argv);
+int bench_pmu_scan(int argc, const char **argv);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/breakpoint.c b/tools/perf/bench/breakpoint.c
index 41385f89ffc7..dfd18f5db97d 100644
--- a/tools/perf/bench/breakpoint.c
+++ b/tools/perf/bench/breakpoint.c
@@ -47,6 +47,7 @@ struct breakpoint {
static int breakpoint_setup(void *addr)
{
struct perf_event_attr attr = { .size = 0, };
+ int fd;
attr.type = PERF_TYPE_BREAKPOINT;
attr.size = sizeof(attr);
@@ -56,7 +57,12 @@ static int breakpoint_setup(void *addr)
attr.bp_addr = (unsigned long)addr;
attr.bp_type = HW_BREAKPOINT_RW;
attr.bp_len = HW_BREAKPOINT_LEN_1;
- return syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0);
+ fd = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0);
+
+ if (fd < 0)
+ fd = -errno;
+
+ return fd;
}
static void *passive_thread(void *arg)
@@ -122,8 +128,14 @@ int bench_breakpoint_thread(int argc, const char **argv)
for (i = 0; i < thread_params.nbreakpoints; i++) {
breakpoints[i].fd = breakpoint_setup(&breakpoints[i].watched);
- if (breakpoints[i].fd == -1)
+
+ if (breakpoints[i].fd < 0) {
+ if (breakpoints[i].fd == -ENODEV) {
+ printf("Skipping perf bench breakpoint thread: No hardware support\n");
+ return 0;
+ }
exit((perror("perf_event_open"), EXIT_FAILURE));
+ }
}
gettimeofday(&start, NULL);
for (i = 0; i < thread_params.nparallel; i++) {
@@ -196,8 +208,14 @@ int bench_breakpoint_enable(int argc, const char **argv)
exit(EXIT_FAILURE);
}
fd = breakpoint_setup(&watched);
- if (fd == -1)
+
+ if (fd < 0) {
+ if (fd == -ENODEV) {
+ printf("Skipping perf bench breakpoint enable: No hardware support\n");
+ return 0;
+ }
exit((perror("perf_event_open"), EXIT_FAILURE));
+ }
nthreads = enable_params.npassive + enable_params.nactive;
threads = calloc(nthreads, sizeof(threads[0]));
if (!threads)
diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
index 521d1ff97b06..d66d852b90e4 100644
--- a/tools/perf/bench/epoll-ctl.c
+++ b/tools/perf/bench/epoll-ctl.c
@@ -232,7 +232,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
if (!noaffinity)
pthread_attr_init(&thread_attr);
- nrcpus = perf_cpu_map__nr(cpu);
+ nrcpus = cpu__max_cpu().cpu;
cpuset = CPU_ALLOC(nrcpus);
BUG_ON(!cpuset);
size = CPU_ALLOC_SIZE(nrcpus);
@@ -330,7 +330,7 @@ int bench_epoll_ctl(int argc, const char **argv)
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
- cpu = perf_cpu_map__new(NULL);
+ cpu = perf_cpu_map__new_online_cpus();
if (!cpu)
goto errmem;
@@ -421,6 +421,11 @@ int bench_epoll_ctl(int argc, const char **argv)
print_summary();
close(epollfd);
+ perf_cpu_map__put(cpu);
+ for (i = 0; i < nthreads; i++)
+ free(worker[i].fdmap);
+
+ free(worker);
return ret;
errmem:
err(EXIT_FAILURE, "calloc");
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index c1cdf03c075d..20fe4f72b4af 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -309,7 +309,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
if (!noaffinity)
pthread_attr_init(&thread_attr);
- nrcpus = perf_cpu_map__nr(cpu);
+ nrcpus = cpu__max_cpu().cpu;
cpuset = CPU_ALLOC(nrcpus);
BUG_ON(!cpuset);
size = CPU_ALLOC_SIZE(nrcpus);
@@ -420,7 +420,12 @@ static int cmpworker(const void *p1, const void *p2)
struct worker *w1 = (struct worker *) p1;
struct worker *w2 = (struct worker *) p2;
- return w1->tid > w2->tid;
+
+ if (w1->tid > w2->tid)
+ return 1;
+ if (w1->tid < w2->tid)
+ return -1;
+ return 0;
}
int bench_epoll_wait(int argc, const char **argv)
@@ -444,7 +449,7 @@ int bench_epoll_wait(int argc, const char **argv)
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
- cpu = perf_cpu_map__new(NULL);
+ cpu = perf_cpu_map__new_online_cpus();
if (!cpu)
goto errmem;
@@ -549,6 +554,11 @@ int bench_epoll_wait(int argc, const char **argv)
print_summary();
close(epollfd);
+ perf_cpu_map__put(cpu);
+ for (i = 0; i < nthreads; i++)
+ free(worker[i].fdmap);
+
+ free(worker);
return ret;
errmem:
err(EXIT_FAILURE, "calloc");
diff --git a/tools/perf/bench/evlist-open-close.c b/tools/perf/bench/evlist-open-close.c
index 5a27691469ed..faf9c34b4a5d 100644
--- a/tools/perf/bench/evlist-open-close.c
+++ b/tools/perf/bench/evlist-open-close.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
@@ -46,25 +47,6 @@ static struct record_opts opts = {
.ctl_fd_ack = -1,
};
-static const struct option options[] = {
- OPT_STRING('e', "event", &event_string, "event", "event selector. use 'perf list' to list available events"),
- OPT_INTEGER('n', "nr-events", &nr_events,
- "number of dummy events to create (default 1). If used with -e, it clones those events n times (1 = no change)"),
- OPT_INTEGER('i', "iterations", &iterations, "Number of iterations used to compute average (default=100)"),
- OPT_BOOLEAN('a', "all-cpus", &opts.target.system_wide, "system-wide collection from all CPUs"),
- OPT_STRING('C', "cpu", &opts.target.cpu_list, "cpu", "list of cpus where to open events"),
- OPT_STRING('p', "pid", &opts.target.pid, "pid", "record events on existing process id"),
- OPT_STRING('t', "tid", &opts.target.tid, "tid", "record events on existing thread id"),
- OPT_STRING('u', "uid", &opts.target.uid_str, "user", "user to profile"),
- OPT_BOOLEAN(0, "per-thread", &opts.target.per_thread, "use per-thread mmaps"),
- OPT_END()
-};
-
-static const char *const bench_usage[] = {
- "perf bench internals evlist-open-close <options>",
- NULL
-};
-
static int evlist__count_evsel_fds(struct evlist *evlist)
{
struct evsel *evsel;
@@ -76,7 +58,7 @@ static int evlist__count_evsel_fds(struct evlist *evlist)
return cnt;
}
-static struct evlist *bench__create_evlist(char *evstr)
+static struct evlist *bench__create_evlist(char *evstr, const char *uid_str)
{
struct parse_events_error err;
struct evlist *evlist = evlist__new();
@@ -97,6 +79,18 @@ static struct evlist *bench__create_evlist(char *evstr)
goto out_delete_evlist;
}
parse_events_error__exit(&err);
+ if (uid_str) {
+ uid_t uid = parse_uid(uid_str);
+
+ if (uid == UINT_MAX) {
+ pr_err("Invalid User: %s", uid_str);
+ ret = -EINVAL;
+ goto out_delete_evlist;
+ }
+ ret = parse_uid_filter(evlist, uid);
+ if (ret)
+ goto out_delete_evlist;
+ }
ret = evlist__create_maps(evlist, &opts.target);
if (ret < 0) {
pr_err("Not enough memory to create thread/cpu maps\n");
@@ -136,10 +130,10 @@ static int bench__do_evlist_open_close(struct evlist *evlist)
return 0;
}
-static int bench_evlist_open_close__run(char *evstr)
+static int bench_evlist_open_close__run(char *evstr, const char *uid_str)
{
// used to print statistics only
- struct evlist *evlist = bench__create_evlist(evstr);
+ struct evlist *evlist = bench__create_evlist(evstr, uid_str);
double time_average, time_stddev;
struct timeval start, end, diff;
struct stats time_stats;
@@ -161,7 +155,7 @@ static int bench_evlist_open_close__run(char *evstr)
for (i = 0; i < iterations; i++) {
pr_debug("Started iteration %d\n", i);
- evlist = bench__create_evlist(evstr);
+ evlist = bench__create_evlist(evstr, uid_str);
if (!evlist)
return -ENOMEM;
@@ -225,6 +219,30 @@ out_error:
int bench_evlist_open_close(int argc, const char **argv)
{
+ const char *uid_str = NULL;
+ const struct option options[] = {
+ OPT_STRING('e', "event", &event_string, "event",
+ "event selector. use 'perf list' to list available events"),
+ OPT_INTEGER('n', "nr-events", &nr_events,
+ "number of dummy events to create (default 1). If used with -e, it clones those events n times (1 = no change)"),
+ OPT_INTEGER('i', "iterations", &iterations,
+ "Number of iterations used to compute average (default=100)"),
+ OPT_BOOLEAN('a', "all-cpus", &opts.target.system_wide,
+ "system-wide collection from all CPUs"),
+ OPT_STRING('C', "cpu", &opts.target.cpu_list, "cpu",
+ "list of cpus where to open events"),
+ OPT_STRING('p', "pid", &opts.target.pid, "pid",
+ "record events on existing process id"),
+ OPT_STRING('t', "tid", &opts.target.tid, "tid",
+ "record events on existing thread id"),
+ OPT_STRING('u', "uid", &uid_str, "user", "user to profile"),
+ OPT_BOOLEAN(0, "per-thread", &opts.target.per_thread, "use per-thread mmaps"),
+ OPT_END()
+ };
+ const char *const bench_usage[] = {
+ "perf bench internals evlist-open-close <options>",
+ NULL
+ };
char *evstr, errbuf[BUFSIZ];
int err;
@@ -241,15 +259,8 @@ int bench_evlist_open_close(int argc, const char **argv)
goto out;
}
- err = target__parse_uid(&opts.target);
- if (err) {
- target__strerror(&opts.target, err, errbuf, sizeof(errbuf));
- pr_err("%s", errbuf);
- goto out;
- }
-
- /* Enable ignoring missing threads when -u/-p option is defined. */
- opts.ignore_missing_thread = opts.target.uid != UINT_MAX || opts.target.pid;
+ /* Enable ignoring missing threads when -p option is defined. */
+ opts.ignore_missing_thread = opts.target.pid;
evstr = bench__repeat_event_string(event_string, nr_events);
if (!evstr) {
@@ -257,7 +268,7 @@ int bench_evlist_open_close(int argc, const char **argv)
goto out;
}
- err = bench_evlist_open_close__run(evstr);
+ err = bench_evlist_open_close__run(evstr, uid_str);
free(evstr);
out:
diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c
index d103c3136983..e697c20951bc 100644
--- a/tools/perf/bench/find-bit-bench.c
+++ b/tools/perf/bench/find-bit-bench.c
@@ -37,7 +37,7 @@ static noinline void workload(int val)
accumulator++;
}
-#if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__)
+#if defined(__i386__) || defined(__x86_64__)
static bool asm_test_bit(long nr, const unsigned long *addr)
{
bool oldbit;
@@ -61,7 +61,6 @@ static int do_for_each_set_bit(unsigned int num_bits)
double time_average, time_stddev;
unsigned int bit, i, j;
unsigned int set_bits, skip;
- unsigned int old;
init_stats(&fb_time_stats);
init_stats(&tb_time_stats);
@@ -73,7 +72,10 @@ static int do_for_each_set_bit(unsigned int num_bits)
__set_bit(i, to_test);
for (i = 0; i < outer_iterations; i++) {
- old = accumulator;
+#ifndef NDEBUG
+ unsigned int old = accumulator;
+#endif
+
gettimeofday(&start, NULL);
for (j = 0; j < inner_iterations; j++) {
for_each_set_bit(bit, to_test, num_bits)
@@ -85,7 +87,9 @@ static int do_for_each_set_bit(unsigned int num_bits)
runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
update_stats(&fb_time_stats, runtime_us);
+#ifndef NDEBUG
old = accumulator;
+#endif
gettimeofday(&start, NULL);
for (j = 0; j < inner_iterations; j++) {
for (bit = 0; bit < num_bits; bit++) {
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 2005a3fa3026..7e29f04da744 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -21,6 +21,7 @@
#include <linux/zalloc.h>
#include <sys/time.h>
#include <sys/mman.h>
+#include <sys/prctl.h>
#include <perf/cpumap.h>
#include "../util/mutex.h"
@@ -50,9 +51,11 @@ struct worker {
static struct bench_futex_parameters params = {
.nfutexes = 1024,
.runtime = 10,
+ .nbuckets = -1,
};
static const struct option options[] = {
+ OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
OPT_UINTEGER('r', "runtime", &params.runtime, "Specify runtime (in seconds)"),
OPT_UINTEGER('f', "futexes", &params.nfutexes, "Specify amount of futexes per threads"),
@@ -118,6 +121,7 @@ static void print_summary(void)
printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
!params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
(int)bench__runtime.tv_sec);
+ futex_print_nbuckets(&params);
}
int bench_futex_hash(int argc, const char **argv)
@@ -138,7 +142,7 @@ int bench_futex_hash(int argc, const char **argv)
exit(EXIT_FAILURE);
}
- cpu = perf_cpu_map__new(NULL);
+ cpu = perf_cpu_map__new_online_cpus();
if (!cpu)
goto errmem;
@@ -161,6 +165,7 @@ int bench_futex_hash(int argc, const char **argv)
if (!params.fshared)
futex_flag = FUTEX_PRIVATE_FLAG;
+ futex_set_nbuckets_param(&params);
printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
getpid(), params.nthreads, params.nfutexes, params.fshared ? "shared":"private", params.runtime);
@@ -174,7 +179,7 @@ int bench_futex_hash(int argc, const char **argv)
pthread_attr_init(&thread_attr);
gettimeofday(&bench__start, NULL);
- nrcpus = perf_cpu_map__nr(cpu);
+ nrcpus = cpu__max_cpu().cpu;
cpuset = CPU_ALLOC(nrcpus);
BUG_ON(!cpuset);
size = CPU_ALLOC_SIZE(nrcpus);
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 2d0417949727..40640b674427 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -41,10 +41,12 @@ static struct stats throughput_stats;
static struct cond thread_parent, thread_worker;
static struct bench_futex_parameters params = {
+ .nbuckets = -1,
.runtime = 10,
};
static const struct option options[] = {
+ OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
OPT_UINTEGER('r', "runtime", &params.runtime, "Specify runtime (in seconds)"),
OPT_BOOLEAN( 'M', "multi", &params.multi, "Use multiple futexes"),
@@ -67,6 +69,7 @@ static void print_summary(void)
printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
!params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
(int)bench__runtime.tv_sec);
+ futex_print_nbuckets(&params);
}
static void toggle_done(int sig __maybe_unused,
@@ -118,12 +121,11 @@ static void *workerfn(void *arg)
return NULL;
}
-static void create_threads(struct worker *w, pthread_attr_t thread_attr,
- struct perf_cpu_map *cpu)
+static void create_threads(struct worker *w, struct perf_cpu_map *cpu)
{
cpu_set_t *cpuset;
unsigned int i;
- int nrcpus = perf_cpu_map__nr(cpu);
+ int nrcpus = cpu__max_cpu().cpu;
size_t size;
threads_starting = params.nthreads;
@@ -133,6 +135,9 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr,
size = CPU_ALLOC_SIZE(nrcpus);
for (i = 0; i < params.nthreads; i++) {
+ pthread_attr_t thread_attr;
+
+ pthread_attr_init(&thread_attr);
worker[i].tid = i;
if (params.multi) {
@@ -154,6 +159,7 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr,
CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
}
+ pthread_attr_destroy(&thread_attr);
}
CPU_FREE(cpuset);
}
@@ -163,14 +169,13 @@ int bench_futex_lock_pi(int argc, const char **argv)
int ret = 0;
unsigned int i;
struct sigaction act;
- pthread_attr_t thread_attr;
struct perf_cpu_map *cpu;
argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0);
if (argc)
goto err;
- cpu = perf_cpu_map__new(NULL);
+ cpu = perf_cpu_map__new_online_cpus();
if (!cpu)
err(EXIT_FAILURE, "calloc");
@@ -201,13 +206,12 @@ int bench_futex_lock_pi(int argc, const char **argv)
mutex_init(&thread_lock);
cond_init(&thread_parent);
cond_init(&thread_worker);
+ futex_set_nbuckets_param(&params);
threads_starting = params.nthreads;
- pthread_attr_init(&thread_attr);
gettimeofday(&bench__start, NULL);
- create_threads(worker, thread_attr, cpu);
- pthread_attr_destroy(&thread_attr);
+ create_threads(worker, cpu);
mutex_lock(&thread_lock);
while (threads_starting)
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index 69ad896f556c..0748b0fd689e 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -42,6 +42,7 @@ static unsigned int threads_starting;
static int futex_flag = 0;
static struct bench_futex_parameters params = {
+ .nbuckets = -1,
/*
* How many tasks to requeue at a time.
* Default to 1 in order to make the kernel work more.
@@ -50,6 +51,7 @@ static struct bench_futex_parameters params = {
};
static const struct option options[] = {
+ OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
OPT_UINTEGER('q', "nrequeue", &params.nrequeue, "Specify amount of threads to requeue at once"),
OPT_BOOLEAN( 's', "silent", &params.silent, "Silent mode: do not display data/details"),
@@ -77,6 +79,7 @@ static void print_summary(void)
params.nthreads,
requeuetime_avg / USEC_PER_MSEC,
rel_stddev_stats(requeuetime_stddev, requeuetime_avg));
+ futex_print_nbuckets(&params);
}
static void *workerfn(void *arg __maybe_unused)
@@ -121,12 +124,11 @@ static void *workerfn(void *arg __maybe_unused)
return NULL;
}
-static void block_threads(pthread_t *w,
- pthread_attr_t thread_attr, struct perf_cpu_map *cpu)
+static void block_threads(pthread_t *w, struct perf_cpu_map *cpu)
{
cpu_set_t *cpuset;
unsigned int i;
- int nrcpus = perf_cpu_map__nr(cpu);
+ int nrcpus = cpu__max_cpu().cpu;
size_t size;
threads_starting = params.nthreads;
@@ -137,6 +139,9 @@ static void block_threads(pthread_t *w,
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
+ pthread_attr_t thread_attr;
+
+ pthread_attr_init(&thread_attr);
CPU_ZERO_S(size, cpuset);
CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
@@ -149,6 +154,7 @@ static void block_threads(pthread_t *w,
CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
}
+ pthread_attr_destroy(&thread_attr);
}
CPU_FREE(cpuset);
}
@@ -165,14 +171,13 @@ int bench_futex_requeue(int argc, const char **argv)
int ret = 0;
unsigned int i, j;
struct sigaction act;
- pthread_attr_t thread_attr;
struct perf_cpu_map *cpu;
argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0);
if (argc)
goto err;
- cpu = perf_cpu_map__new(NULL);
+ cpu = perf_cpu_map__new_online_cpus();
if (!cpu)
err(EXIT_FAILURE, "cpu_map__new");
@@ -202,6 +207,8 @@ int bench_futex_requeue(int argc, const char **argv)
if (params.broadcast)
params.nrequeue = params.nthreads;
+ futex_set_nbuckets_param(&params);
+
printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %s%p), "
"%d at a time.\n\n", getpid(), params.nthreads,
params.fshared ? "shared":"private", &futex1,
@@ -209,7 +216,6 @@ int bench_futex_requeue(int argc, const char **argv)
init_stats(&requeued_stats);
init_stats(&requeuetime_stats);
- pthread_attr_init(&thread_attr);
mutex_init(&thread_lock);
cond_init(&thread_parent);
cond_init(&thread_worker);
@@ -219,7 +225,7 @@ int bench_futex_requeue(int argc, const char **argv)
struct timeval start, end, runtime;
/* create, launch & block all threads */
- block_threads(worker, thread_attr, cpu);
+ block_threads(worker, cpu);
/* make sure all threads are already blocked */
mutex_lock(&thread_lock);
@@ -301,7 +307,6 @@ int bench_futex_requeue(int argc, const char **argv)
cond_destroy(&thread_parent);
cond_destroy(&thread_worker);
mutex_destroy(&thread_lock);
- pthread_attr_destroy(&thread_attr);
print_summary();
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 6682e49d0ee0..6aede7c46b33 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -57,9 +57,12 @@ static struct stats waketime_stats, wakeup_stats;
static unsigned int threads_starting;
static int futex_flag = 0;
-static struct bench_futex_parameters params;
+static struct bench_futex_parameters params = {
+ .nbuckets = -1,
+};
static const struct option options[] = {
+ OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
OPT_UINTEGER('w', "nwakers", &params.nwakes, "Specify amount of waking threads"),
OPT_BOOLEAN( 's', "silent", &params.silent, "Silent mode: do not display data/details"),
@@ -95,10 +98,12 @@ static void *waking_workerfn(void *arg)
return NULL;
}
-static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
+static void wakeup_threads(struct thread_data *td)
{
unsigned int i;
+ pthread_attr_t thread_attr;
+ pthread_attr_init(&thread_attr);
pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
pthread_barrier_init(&barrier, NULL, params.nwakes + 1);
@@ -122,6 +127,7 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
err(EXIT_FAILURE, "pthread_join");
pthread_barrier_destroy(&barrier);
+ pthread_attr_destroy(&thread_attr);
}
static void *blocked_workerfn(void *arg __maybe_unused)
@@ -142,12 +148,11 @@ static void *blocked_workerfn(void *arg __maybe_unused)
return NULL;
}
-static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
- struct perf_cpu_map *cpu)
+static void block_threads(pthread_t *w, struct perf_cpu_map *cpu)
{
cpu_set_t *cpuset;
unsigned int i;
- int nrcpus = perf_cpu_map__nr(cpu);
+ int nrcpus = cpu__max_cpu().cpu;
size_t size;
threads_starting = params.nthreads;
@@ -158,6 +163,9 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
+ pthread_attr_t thread_attr;
+
+ pthread_attr_init(&thread_attr);
CPU_ZERO_S(size, cpuset);
CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
@@ -170,6 +178,7 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
}
+ pthread_attr_destroy(&thread_attr);
}
CPU_FREE(cpuset);
}
@@ -212,6 +221,7 @@ static void print_summary(void)
params.nthreads,
waketime_avg / USEC_PER_MSEC,
rel_stddev_stats(waketime_stddev, waketime_avg));
+ futex_print_nbuckets(&params);
}
@@ -238,7 +248,6 @@ int bench_futex_wake_parallel(int argc, const char **argv)
int ret = 0;
unsigned int i, j;
struct sigaction act;
- pthread_attr_t thread_attr;
struct thread_data *waking_worker;
struct perf_cpu_map *cpu;
@@ -259,7 +268,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
err(EXIT_FAILURE, "mlockall");
}
- cpu = perf_cpu_map__new(NULL);
+ cpu = perf_cpu_map__new_online_cpus();
if (!cpu)
err(EXIT_FAILURE, "calloc");
@@ -286,6 +295,8 @@ int bench_futex_wake_parallel(int argc, const char **argv)
if (!params.fshared)
futex_flag = FUTEX_PRIVATE_FLAG;
+ futex_set_nbuckets_param(&params);
+
printf("Run summary [PID %d]: blocking on %d threads (at [%s] "
"futex %p), %d threads waking up %d at a time.\n\n",
getpid(), params.nthreads, params.fshared ? "shared":"private",
@@ -294,7 +305,6 @@ int bench_futex_wake_parallel(int argc, const char **argv)
init_stats(&wakeup_stats);
init_stats(&waketime_stats);
- pthread_attr_init(&thread_attr);
mutex_init(&thread_lock);
cond_init(&thread_parent);
cond_init(&thread_worker);
@@ -305,7 +315,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
err(EXIT_FAILURE, "calloc");
/* create, launch & block all threads */
- block_threads(blocked_worker, thread_attr, cpu);
+ block_threads(blocked_worker, cpu);
/* make sure all threads are already blocked */
mutex_lock(&thread_lock);
@@ -314,10 +324,10 @@ int bench_futex_wake_parallel(int argc, const char **argv)
cond_broadcast(&thread_worker);
mutex_unlock(&thread_lock);
- usleep(100000);
+ usleep(200000);
/* Ok, all threads are patiently blocked, start waking folks up */
- wakeup_threads(waking_worker, thread_attr);
+ wakeup_threads(waking_worker);
for (i = 0; i < params.nthreads; i++) {
ret = pthread_join(blocked_worker[i], NULL);
@@ -336,7 +346,6 @@ int bench_futex_wake_parallel(int argc, const char **argv)
cond_destroy(&thread_parent);
cond_destroy(&thread_worker);
mutex_destroy(&thread_lock);
- pthread_attr_destroy(&thread_attr);
print_summary();
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 9ecab6620a87..a31fc1563862 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -42,6 +42,7 @@ static unsigned int threads_starting;
static int futex_flag = 0;
static struct bench_futex_parameters params = {
+ .nbuckets = -1,
/*
* How many wakeups to do at a time.
* Default to 1 in order to make the kernel work more.
@@ -50,6 +51,7 @@ static struct bench_futex_parameters params = {
};
static const struct option options[] = {
+ OPT_INTEGER( 'b', "buckets", &params.nbuckets, "Specify amount of hash buckets"),
OPT_UINTEGER('t', "threads", &params.nthreads, "Specify amount of threads"),
OPT_UINTEGER('w', "nwakes", &params.nwakes, "Specify amount of threads to wake at once"),
OPT_BOOLEAN( 's', "silent", &params.silent, "Silent mode: do not display data/details"),
@@ -93,15 +95,15 @@ static void print_summary(void)
params.nthreads,
waketime_avg / USEC_PER_MSEC,
rel_stddev_stats(waketime_stddev, waketime_avg));
+ futex_print_nbuckets(&params);
}
-static void block_threads(pthread_t *w,
- pthread_attr_t thread_attr, struct perf_cpu_map *cpu)
+static void block_threads(pthread_t *w, struct perf_cpu_map *cpu)
{
cpu_set_t *cpuset;
unsigned int i;
size_t size;
- int nrcpus = perf_cpu_map__nr(cpu);
+ int nrcpus = cpu__max_cpu().cpu;
threads_starting = params.nthreads;
cpuset = CPU_ALLOC(nrcpus);
@@ -110,6 +112,9 @@ static void block_threads(pthread_t *w,
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
+ pthread_attr_t thread_attr;
+
+ pthread_attr_init(&thread_attr);
CPU_ZERO_S(size, cpuset);
CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
@@ -122,6 +127,7 @@ static void block_threads(pthread_t *w,
CPU_FREE(cpuset);
err(EXIT_FAILURE, "pthread_create");
}
+ pthread_attr_destroy(&thread_attr);
}
CPU_FREE(cpuset);
}
@@ -138,7 +144,6 @@ int bench_futex_wake(int argc, const char **argv)
int ret = 0;
unsigned int i, j;
struct sigaction act;
- pthread_attr_t thread_attr;
struct perf_cpu_map *cpu;
argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0);
@@ -147,7 +152,7 @@ int bench_futex_wake(int argc, const char **argv)
exit(EXIT_FAILURE);
}
- cpu = perf_cpu_map__new(NULL);
+ cpu = perf_cpu_map__new_online_cpus();
if (!cpu)
err(EXIT_FAILURE, "calloc");
@@ -178,7 +183,6 @@ int bench_futex_wake(int argc, const char **argv)
init_stats(&wakeup_stats);
init_stats(&waketime_stats);
- pthread_attr_init(&thread_attr);
mutex_init(&thread_lock);
cond_init(&thread_parent);
cond_init(&thread_worker);
@@ -188,7 +192,7 @@ int bench_futex_wake(int argc, const char **argv)
struct timeval start, end, runtime;
/* create, launch & block all threads */
- block_threads(worker, thread_attr, cpu);
+ block_threads(worker, cpu);
/* make sure all threads are already blocked */
mutex_lock(&thread_lock);
@@ -228,7 +232,6 @@ int bench_futex_wake(int argc, const char **argv)
cond_destroy(&thread_parent);
cond_destroy(&thread_worker);
mutex_destroy(&thread_lock);
- pthread_attr_destroy(&thread_attr);
print_summary();
diff --git a/tools/perf/bench/futex.c b/tools/perf/bench/futex.c
new file mode 100644
index 000000000000..1968c9d00b5b
--- /dev/null
+++ b/tools/perf/bench/futex.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <err.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/prctl.h>
+
+#include "futex.h"
+
+#ifndef PR_FUTEX_HASH
+#define PR_FUTEX_HASH 78
+# define PR_FUTEX_HASH_SET_SLOTS 1
+# define PR_FUTEX_HASH_GET_SLOTS 2
+#endif // PR_FUTEX_HASH
+
+void futex_set_nbuckets_param(struct bench_futex_parameters *params)
+{
+ int ret;
+
+ if (params->nbuckets < 0)
+ return;
+
+ ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, params->nbuckets, 0);
+ if (ret) {
+ printf("Requesting %d hash buckets failed: %d/%m\n",
+ params->nbuckets, ret);
+ err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)");
+ }
+}
+
+void futex_print_nbuckets(struct bench_futex_parameters *params)
+{
+ char *futex_hash_mode;
+ int ret;
+
+ ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS);
+ if (params->nbuckets >= 0) {
+ if (ret != params->nbuckets) {
+ if (ret < 0) {
+ printf("Can't query number of buckets: %m\n");
+ err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)");
+ }
+ printf("Requested number of hash buckets does not currently used.\n");
+ printf("Requested: %d in usage: %d\n", params->nbuckets, ret);
+ err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)");
+ }
+ if (params->nbuckets == 0)
+ ret = asprintf(&futex_hash_mode, "Futex hashing: global hash");
+ else
+ ret = asprintf(&futex_hash_mode, "Futex hashing: %d hash buckets",
+ params->nbuckets);
+ } else {
+ if (ret <= 0) {
+ ret = asprintf(&futex_hash_mode, "Futex hashing: global hash");
+ } else {
+ ret = asprintf(&futex_hash_mode, "Futex hashing: auto resized to %d buckets",
+ ret);
+ }
+ }
+ if (ret < 0)
+ err(EXIT_FAILURE, "ENOMEM, futex_hash_mode");
+ printf("%s\n", futex_hash_mode);
+ free(futex_hash_mode);
+}
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index ebdc2b032afc..fcb72d682cf8 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -8,6 +8,7 @@
#ifndef _FUTEX_H
#define _FUTEX_H
+#include <stdbool.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/types.h>
@@ -25,6 +26,7 @@ struct bench_futex_parameters {
unsigned int nfutexes;
unsigned int nwakes;
unsigned int nrequeue;
+ int nbuckets;
};
/**
@@ -143,4 +145,7 @@ futex_cmp_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2,
val, opflags);
}
+void futex_set_nbuckets_param(struct bench_futex_parameters *params);
+void futex_print_nbuckets(struct bench_futex_parameters *params);
+
#endif /* _FUTEX_H */
diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c
index 4561bda0ce6a..aad572a78d7f 100644
--- a/tools/perf/bench/inject-buildid.c
+++ b/tools/perf/bench/inject-buildid.c
@@ -12,6 +12,7 @@
#include <linux/time64.h>
#include <linux/list.h>
#include <linux/err.h>
+#include <linux/zalloc.h>
#include <internal/lib.h>
#include <subcmd/parse-options.h>
@@ -51,7 +52,7 @@ struct bench_dso {
static int nr_dsos;
static struct bench_dso *dsos;
-extern int cmd_inject(int argc, const char *argv[]);
+extern int main(int argc, const char **argv);
static const struct option options[] = {
OPT_UINTEGER('i', "iterations", &iterations,
@@ -79,7 +80,7 @@ static int add_dso(const char *fpath, const struct stat *sb __maybe_unused,
int typeflag, struct FTW *ftwbuf __maybe_unused)
{
struct bench_dso *dso = &dsos[nr_dsos];
- struct build_id bid;
+ struct build_id bid = { .size = 0, };
if (typeflag == FTW_D || typeflag == FTW_SL)
return 0;
@@ -122,7 +123,7 @@ static void release_dso(void)
for (i = 0; i < nr_dsos; i++) {
struct bench_dso *dso = &dsos[i];
- free(dso->name);
+ zfree(&dso->name);
}
free(dsos);
}
@@ -293,7 +294,7 @@ static int setup_injection(struct bench_data *data, bool build_id_all)
if (data->pid == 0) {
const char **inject_argv;
- int inject_argc = 2;
+ int inject_argc = 3;
close(data->input_pipe[1]);
close(data->output_pipe[0]);
@@ -317,15 +318,16 @@ static int setup_injection(struct bench_data *data, bool build_id_all)
if (inject_argv == NULL)
exit(1);
- inject_argv[0] = strdup("inject");
- inject_argv[1] = strdup("-b");
+ inject_argv[0] = strdup("perf");
+ inject_argv[1] = strdup("inject");
+ inject_argv[2] = strdup("-b");
if (build_id_all)
- inject_argv[2] = strdup("--buildid-all");
+ inject_argv[3] = strdup("--buildid-all");
/* signal that we're ready to go */
close(ready_pipe[1]);
- cmd_inject(inject_argc, inject_argv);
+ main(inject_argc, inject_argv);
exit(0);
}
@@ -361,7 +363,7 @@ static int inject_build_id(struct bench_data *data, u64 *max_rss)
return -1;
for (i = 0; i < nr_mmaps; i++) {
- int idx = rand() % (nr_dsos - 1);
+ int idx = rand() % nr_dsos;
struct bench_dso *dso = &dsos[idx];
u64 timestamp = rand() % 1000000;
diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
index 19d45c377ac1..2908a3a796c9 100644
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c
@@ -22,27 +22,39 @@
#include <string.h>
#include <unistd.h>
#include <sys/time.h>
+#include <sys/mman.h>
#include <errno.h>
#include <linux/time64.h>
-#include <linux/zalloc.h>
+#include <linux/log2.h>
#define K 1024
+#define PAGE_SHIFT_4KB 12
+#define PAGE_SHIFT_2MB 21
+#define PAGE_SHIFT_1GB 30
+
static const char *size_str = "1MB";
static const char *function_str = "all";
-static int nr_loops = 1;
+static const char *page_size_str = "4KB";
+static const char *chunk_size_str = "0";
+static unsigned int nr_loops = 1;
static bool use_cycles;
static int cycles_fd;
+static unsigned int seed;
-static const struct option options[] = {
+static const struct option bench_common_options[] = {
OPT_STRING('s', "size", &size_str, "1MB",
"Specify the size of the memory buffers. "
"Available units: B, KB, MB, GB and TB (case insensitive)"),
+ OPT_STRING('p', "page", &page_size_str, "4KB",
+ "Specify page-size for mapping memory buffers. "
+ "Available sizes: 4KB, 2MB, 1GB (case insensitive)"),
+
OPT_STRING('f', "function", &function_str, "all",
"Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
- OPT_INTEGER('l', "nr_loops", &nr_loops,
+ OPT_UINTEGER('l', "nr_loops", &nr_loops,
"Specify the number of loops to run. (default: 1)"),
OPT_BOOLEAN('c', "cycles", &use_cycles,
@@ -51,15 +63,56 @@ static const struct option options[] = {
OPT_END()
};
+static const struct option bench_mem_options[] = {
+ OPT_STRING('k', "chunk", &chunk_size_str, "0",
+ "Specify the chunk-size for each invocation. "
+ "Available units: B, KB, MB, GB and TB (case insensitive)"),
+ OPT_PARENT(bench_common_options),
+ OPT_END()
+};
+
+union bench_clock {
+ u64 cycles;
+ struct timeval tv;
+};
+
+struct bench_params {
+ size_t size;
+ size_t size_total;
+ size_t chunk_size;
+ unsigned int nr_loops;
+ unsigned int page_shift;
+ unsigned int seed;
+};
+
+struct bench_mem_info {
+ const struct function *functions;
+ int (*do_op)(const struct function *r, struct bench_params *p,
+ void *src, void *dst, union bench_clock *rt);
+ const char *const *usage;
+ const struct option *options;
+ bool alloc_src;
+};
+
+typedef bool (*mem_init_t)(struct bench_mem_info *, struct bench_params *,
+ void **, void **);
+typedef void (*mem_fini_t)(struct bench_mem_info *, struct bench_params *,
+ void **, void **);
typedef void *(*memcpy_t)(void *, const void *, size_t);
typedef void *(*memset_t)(void *, int, size_t);
+typedef void (*mmap_op_t)(void *, size_t, unsigned int, bool);
struct function {
const char *name;
const char *desc;
- union {
- memcpy_t memcpy;
- memset_t memset;
+ struct {
+ mem_init_t init;
+ mem_fini_t fini;
+ union {
+ memcpy_t memcpy;
+ memset_t memset;
+ mmap_op_t mmap_op;
+ };
} fn;
};
@@ -91,6 +144,34 @@ static u64 get_cycles(void)
return clk;
}
+static void clock_get(union bench_clock *t)
+{
+ if (use_cycles)
+ t->cycles = get_cycles();
+ else
+ BUG_ON(gettimeofday(&t->tv, NULL));
+}
+
+static union bench_clock clock_diff(union bench_clock *s, union bench_clock *e)
+{
+ union bench_clock t;
+
+ if (use_cycles)
+ t.cycles = e->cycles - s->cycles;
+ else
+ timersub(&e->tv, &s->tv, &t.tv);
+
+ return t;
+}
+
+static void clock_accum(union bench_clock *a, union bench_clock *b)
+{
+ if (use_cycles)
+ a->cycles += b->cycles;
+ else
+ timeradd(&a->tv, &b->tv, &a->tv);
+}
+
static double timeval2double(struct timeval *ts)
{
return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
@@ -107,54 +188,40 @@ static double timeval2double(struct timeval *ts)
printf(" %14lf GB/sec\n", x / K / K / K); \
} while (0)
-struct bench_mem_info {
- const struct function *functions;
- u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst);
- double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst);
- const char *const *usage;
- bool alloc_src;
-};
-
-static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
+static void __bench_mem_function(struct bench_mem_info *info, struct bench_params *p,
+ int r_idx)
{
const struct function *r = &info->functions[r_idx];
double result_bps = 0.0;
- u64 result_cycles = 0;
- void *src = NULL, *dst = zalloc(size);
+ union bench_clock rt = { 0 };
+ void *src = NULL, *dst = NULL;
printf("# function '%s' (%s)\n", r->name, r->desc);
- if (dst == NULL)
- goto out_alloc_failed;
-
- if (info->alloc_src) {
- src = zalloc(size);
- if (src == NULL)
- goto out_alloc_failed;
- }
+ if (r->fn.init && r->fn.init(info, p, &src, &dst))
+ goto out_init_failed;
if (bench_format == BENCH_FORMAT_DEFAULT)
printf("# Copying %s bytes ...\n\n", size_str);
- if (use_cycles) {
- result_cycles = info->do_cycles(r, size, src, dst);
- } else {
- result_bps = info->do_gettimeofday(r, size, src, dst);
- }
+ if (info->do_op(r, p, src, dst, &rt))
+ goto out_test_failed;
switch (bench_format) {
case BENCH_FORMAT_DEFAULT:
if (use_cycles) {
- printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
+ printf(" %14lf cycles/byte\n", (double)rt.cycles/(double)p->size_total);
} else {
+ result_bps = (double)p->size_total/timeval2double(&rt.tv);
print_bps(result_bps);
}
break;
case BENCH_FORMAT_SIMPLE:
if (use_cycles) {
- printf("%lf\n", (double)result_cycles/size_total);
+ printf("%lf\n", (double)rt.cycles/(double)p->size_total);
} else {
+ result_bps = (double)p->size_total/timeval2double(&rt.tv);
printf("%lf\n", result_bps);
}
break;
@@ -164,22 +231,23 @@ static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t
break;
}
+out_test_failed:
out_free:
- free(src);
- free(dst);
+ if (r->fn.fini) r->fn.fini(info, p, &src, &dst);
return;
-out_alloc_failed:
- printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str);
+out_init_failed:
+ printf("# Memory allocation failed - maybe size (%s) %s?\n", size_str,
+ p->page_shift != PAGE_SHIFT_4KB ? "has insufficient hugepages" : "is too large");
goto out_free;
}
static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
{
int i;
- size_t size;
- double size_total;
+ struct bench_params p = { 0 };
+ unsigned int page_size;
- argc = parse_options(argc, argv, options, info->usage, 0);
+ argc = parse_options(argc, argv, info->options, info->usage, 0);
if (use_cycles) {
i = init_cycles();
@@ -189,17 +257,37 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
}
}
- size = (size_t)perf_atoll((char *)size_str);
- size_total = (double)size * nr_loops;
+ p.nr_loops = nr_loops;
+ p.size = (size_t)perf_atoll((char *)size_str);
- if ((s64)size <= 0) {
+ if ((s64)p.size <= 0) {
fprintf(stderr, "Invalid size:%s\n", size_str);
return 1;
}
+ p.size_total = p.size * p.nr_loops;
+
+ p.chunk_size = (size_t)perf_atoll((char *)chunk_size_str);
+ if ((s64)p.chunk_size < 0 || (s64)p.chunk_size > (s64)p.size) {
+ fprintf(stderr, "Invalid chunk_size:%s\n", chunk_size_str);
+ return 1;
+ }
+ if (!p.chunk_size)
+ p.chunk_size = p.size;
+
+ page_size = (unsigned int)perf_atoll((char *)page_size_str);
+ if (page_size != (1 << PAGE_SHIFT_4KB) &&
+ page_size != (1 << PAGE_SHIFT_2MB) &&
+ page_size != (1 << PAGE_SHIFT_1GB)) {
+ fprintf(stderr, "Invalid page-size:%s\n", page_size_str);
+ return 1;
+ }
+ p.page_shift = ilog2(page_size);
+
+ p.seed = seed;
if (!strncmp(function_str, "all", 3)) {
for (i = 0; info->functions[i].name; i++)
- __bench_mem_function(info, i, size, size_total);
+ __bench_mem_function(info, &p, i);
return 0;
}
@@ -218,7 +306,7 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
return 1;
}
- __bench_mem_function(info, i, size, size_total);
+ __bench_mem_function(info, &p, i);
return 0;
}
@@ -235,47 +323,81 @@ static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst)
fn(dst, src, size);
}
-static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
+static int do_memcpy(const struct function *r, struct bench_params *p,
+ void *src, void *dst, union bench_clock *rt)
{
- u64 cycle_start = 0ULL, cycle_end = 0ULL;
+ union bench_clock start, end;
memcpy_t fn = r->fn.memcpy;
- int i;
- memcpy_prefault(fn, size, src, dst);
+ memcpy_prefault(fn, p->size, src, dst);
+
+ clock_get(&start);
+ for (unsigned int i = 0; i < p->nr_loops; ++i)
+ for (size_t off = 0; off < p->size; off += p->chunk_size)
+ fn(dst + off, src + off, min(p->chunk_size, p->size - off));
+ clock_get(&end);
- cycle_start = get_cycles();
- for (i = 0; i < nr_loops; ++i)
- fn(dst, src, size);
- cycle_end = get_cycles();
+ *rt = clock_diff(&start, &end);
- return cycle_end - cycle_start;
+ return 0;
}
-static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst)
+static void *bench_mmap(size_t size, bool populate, unsigned int page_shift)
{
- struct timeval tv_start, tv_end, tv_diff;
- memcpy_t fn = r->fn.memcpy;
- int i;
+ void *p;
+ int extra = populate ? MAP_POPULATE : 0;
+
+ if (page_shift != PAGE_SHIFT_4KB)
+ extra |= MAP_HUGETLB | (page_shift << MAP_HUGE_SHIFT);
+
+ p = mmap(NULL, size, PROT_READ|PROT_WRITE,
+ extra | MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+
+ return p == MAP_FAILED ? NULL : p;
+}
+
+static void bench_munmap(void *p, size_t size)
+{
+ if (p)
+ munmap(p, size);
+}
+
+static bool mem_alloc(struct bench_mem_info *info, struct bench_params *p,
+ void **src, void **dst)
+{
+ bool failed;
- memcpy_prefault(fn, size, src, dst);
+ *dst = bench_mmap(p->size, true, p->page_shift);
+ failed = *dst == NULL;
- BUG_ON(gettimeofday(&tv_start, NULL));
- for (i = 0; i < nr_loops; ++i)
- fn(dst, src, size);
- BUG_ON(gettimeofday(&tv_end, NULL));
+ if (info->alloc_src) {
+ *src = bench_mmap(p->size, true, p->page_shift);
+ failed = failed || *src == NULL;
+ }
+
+ return failed;
+}
- timersub(&tv_end, &tv_start, &tv_diff);
+static void mem_free(struct bench_mem_info *info __maybe_unused,
+ struct bench_params *p __maybe_unused,
+ void **src, void **dst)
+{
+ bench_munmap(*dst, p->size);
+ bench_munmap(*src, p->size);
- return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
+ *dst = *src = NULL;
}
struct function memcpy_functions[] = {
{ .name = "default",
.desc = "Default memcpy() provided by glibc",
+ .fn.init = mem_alloc,
+ .fn.fini = mem_free,
.fn.memcpy = memcpy },
#ifdef HAVE_ARCH_X86_64_SUPPORT
-# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
+# define MEMCPY_FN(_fn, _init, _fini, _name, _desc) \
+ {.name = _name, .desc = _desc, .fn.memcpy = _fn, .fn.init = _init, .fn.fini = _fini },
# include "mem-memcpy-x86-64-asm-def.h"
# undef MEMCPY_FN
#endif
@@ -292,55 +414,36 @@ int bench_mem_memcpy(int argc, const char **argv)
{
struct bench_mem_info info = {
.functions = memcpy_functions,
- .do_cycles = do_memcpy_cycles,
- .do_gettimeofday = do_memcpy_gettimeofday,
+ .do_op = do_memcpy,
.usage = bench_mem_memcpy_usage,
+ .options = bench_mem_options,
.alloc_src = true,
};
return bench_mem_common(argc, argv, &info);
}
-static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
-{
- u64 cycle_start = 0ULL, cycle_end = 0ULL;
- memset_t fn = r->fn.memset;
- int i;
-
- /*
- * We prefault the freshly allocated memory range here,
- * to not measure page fault overhead:
- */
- fn(dst, -1, size);
-
- cycle_start = get_cycles();
- for (i = 0; i < nr_loops; ++i)
- fn(dst, i, size);
- cycle_end = get_cycles();
-
- return cycle_end - cycle_start;
-}
-
-static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
+static int do_memset(const struct function *r, struct bench_params *p,
+ void *src __maybe_unused, void *dst, union bench_clock *rt)
{
- struct timeval tv_start, tv_end, tv_diff;
+ union bench_clock start, end;
memset_t fn = r->fn.memset;
- int i;
/*
* We prefault the freshly allocated memory range here,
* to not measure page fault overhead:
*/
- fn(dst, -1, size);
+ fn(dst, -1, p->size);
- BUG_ON(gettimeofday(&tv_start, NULL));
- for (i = 0; i < nr_loops; ++i)
- fn(dst, i, size);
- BUG_ON(gettimeofday(&tv_end, NULL));
+ clock_get(&start);
+ for (unsigned int i = 0; i < p->nr_loops; ++i)
+ for (size_t off = 0; off < p->size; off += p->chunk_size)
+ fn(dst + off, i, min(p->chunk_size, p->size - off));
+ clock_get(&end);
- timersub(&tv_end, &tv_start, &tv_diff);
+ *rt = clock_diff(&start, &end);
- return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
+ return 0;
}
static const char * const bench_mem_memset_usage[] = {
@@ -351,10 +454,13 @@ static const char * const bench_mem_memset_usage[] = {
static const struct function memset_functions[] = {
{ .name = "default",
.desc = "Default memset() provided by glibc",
+ .fn.init = mem_alloc,
+ .fn.fini = mem_free,
.fn.memset = memset },
#ifdef HAVE_ARCH_X86_64_SUPPORT
-# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
+# define MEMSET_FN(_fn, _init, _fini, _name, _desc) \
+ {.name = _name, .desc = _desc, .fn.memset = _fn, .fn.init = _init, .fn.fini = _fini },
# include "mem-memset-x86-64-asm-def.h"
# undef MEMSET_FN
#endif
@@ -366,9 +472,91 @@ int bench_mem_memset(int argc, const char **argv)
{
struct bench_mem_info info = {
.functions = memset_functions,
- .do_cycles = do_memset_cycles,
- .do_gettimeofday = do_memset_gettimeofday,
+ .do_op = do_memset,
.usage = bench_mem_memset_usage,
+ .options = bench_mem_options,
+ };
+
+ return bench_mem_common(argc, argv, &info);
+}
+
+static void mmap_page_touch(void *dst, size_t size, unsigned int page_shift, bool random)
+{
+ unsigned long npages = size / (1 << page_shift);
+ unsigned long offset = 0, r = 0;
+
+ for (unsigned long i = 0; i < npages; i++) {
+ if (random)
+ r = rand() % (1 << page_shift);
+
+ *((char *)dst + offset + r) = *(char *)(dst + offset + r) + i;
+ offset += 1 << page_shift;
+ }
+}
+
+static int do_mmap(const struct function *r, struct bench_params *p,
+ void *src __maybe_unused, void *dst __maybe_unused,
+ union bench_clock *accum)
+{
+ union bench_clock start, end, diff;
+ mmap_op_t fn = r->fn.mmap_op;
+ bool populate = strcmp(r->name, "populate") == 0;
+
+ if (p->seed)
+ srand(p->seed);
+
+ for (unsigned int i = 0; i < p->nr_loops; i++) {
+ clock_get(&start);
+ dst = bench_mmap(p->size, populate, p->page_shift);
+ if (!dst)
+ goto out;
+
+ fn(dst, p->size, p->page_shift, p->seed);
+ clock_get(&end);
+ diff = clock_diff(&start, &end);
+ clock_accum(accum, &diff);
+
+ bench_munmap(dst, p->size);
+ }
+
+ return 0;
+out:
+ printf("# Memory allocation failed - maybe size (%s) %s?\n", size_str,
+ p->page_shift != PAGE_SHIFT_4KB ? "has insufficient hugepages" : "is too large");
+ return -1;
+}
+
+static const char * const bench_mem_mmap_usage[] = {
+ "perf bench mem mmap <options>",
+ NULL
+};
+
+static const struct function mmap_functions[] = {
+ { .name = "demand",
+ .desc = "Demand loaded mmap()",
+ .fn.mmap_op = mmap_page_touch },
+
+ { .name = "populate",
+ .desc = "Eagerly populated mmap()",
+ .fn.mmap_op = mmap_page_touch },
+
+ { .name = NULL, }
+};
+
+int bench_mem_mmap(int argc, const char **argv)
+{
+ static const struct option bench_mmap_options[] = {
+ OPT_UINTEGER('r', "randomize", &seed,
+ "Seed to randomize page access offset."),
+ OPT_PARENT(bench_common_options),
+ OPT_END()
+ };
+
+ struct bench_mem_info info = {
+ .functions = mmap_functions,
+ .do_op = do_mmap,
+ .usage = bench_mem_mmap_usage,
+ .options = bench_mmap_options,
};
return bench_mem_common(argc, argv, &info);
diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h
index 5bcaec5601a8..852e48cfd8fe 100644
--- a/tools/perf/bench/mem-memcpy-arch.h
+++ b/tools/perf/bench/mem-memcpy-arch.h
@@ -2,7 +2,7 @@
#ifdef HAVE_ARCH_X86_64_SUPPORT
-#define MEMCPY_FN(fn, name, desc) \
+#define MEMCPY_FN(fn, init, fini, name, desc) \
void *fn(void *, const void *, size_t);
#include "mem-memcpy-x86-64-asm-def.h"
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
index 50ae8bd58296..f43038f4448b 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
@@ -1,13 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
MEMCPY_FN(memcpy_orig,
+ mem_alloc,
+ mem_free,
"x86-64-unrolled",
"unrolled memcpy() in arch/x86/lib/memcpy_64.S")
MEMCPY_FN(__memcpy,
+ mem_alloc,
+ mem_free,
"x86-64-movsq",
"movsq-based memcpy() in arch/x86/lib/memcpy_64.S")
-
-MEMCPY_FN(memcpy_erms,
- "x86-64-movsb",
- "movsb-based memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
index 6eb45a2aa8db..1b9fef7efcdc 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm.S
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -2,7 +2,7 @@
/* Various wrappers to make the kernel .S file build in user-space: */
-// memcpy_orig and memcpy_erms are being defined as SYM_L_LOCAL but we need it
+// memcpy_orig is being defined as SYM_L_LOCAL but we need it
#define SYM_FUNC_START_LOCAL(name) \
SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
#define memcpy MEMCPY /* don't hide glibc's memcpy() */
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h
index 53f45482663f..278c5da12d63 100644
--- a/tools/perf/bench/mem-memset-arch.h
+++ b/tools/perf/bench/mem-memset-arch.h
@@ -2,7 +2,7 @@
#ifdef HAVE_ARCH_X86_64_SUPPORT
-#define MEMSET_FN(fn, name, desc) \
+#define MEMSET_FN(fn, init, fini, name, desc) \
void *fn(void *, int, size_t);
#include "mem-memset-x86-64-asm-def.h"
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h
index dac6d2b7c39b..80ad1b7ea770 100644
--- a/tools/perf/bench/mem-memset-x86-64-asm-def.h
+++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h
@@ -1,13 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
MEMSET_FN(memset_orig,
+ mem_alloc,
+ mem_free,
"x86-64-unrolled",
"unrolled memset() in arch/x86/lib/memset_64.S")
MEMSET_FN(__memset,
+ mem_alloc,
+ mem_free,
"x86-64-stosq",
"movsq-based memset() in arch/x86/lib/memset_64.S")
-
-MEMSET_FN(memset_erms,
- "x86-64-stosb",
- "movsb-based memset() in arch/x86/lib/memset_64.S")
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S
index 6f093c483842..abd26c95f1aa 100644
--- a/tools/perf/bench/mem-memset-x86-64-asm.S
+++ b/tools/perf/bench/mem-memset-x86-64-asm.S
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// memset_orig and memset_erms are being defined as SYM_L_LOCAL but we need it
+// memset_orig is being defined as SYM_L_LOCAL but we need it
#define SYM_FUNC_START_LOCAL(name) \
SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
#define memset MEMSET /* don't hide glibc's memset() */
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 9717c6c17433..19be2aaf4dc0 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -27,6 +27,7 @@
#include <sys/resource.h>
#include <sys/wait.h>
#include <sys/prctl.h>
+#include <sys/stat.h>
#include <sys/types.h>
#include <linux/kernel.h>
#include <linux/time64.h>
@@ -35,6 +36,7 @@
#include "../util/header.h"
#include "../util/mutex.h"
+#include <api/fs/fs.h>
#include <numa.h>
#include <numaif.h>
@@ -533,6 +535,57 @@ static int parse_cpu_list(const char *arg)
return 0;
}
+/*
+ * Check whether a CPU is online
+ *
+ * Returns:
+ * 1 -> if CPU is online
+ * 0 -> if CPU is offline
+ * -1 -> error case
+ */
+static int is_cpu_online(unsigned int cpu)
+{
+ char *str;
+ size_t strlen;
+ char buf[256];
+ int status = -1;
+ struct stat statbuf;
+
+ snprintf(buf, sizeof(buf),
+ "/sys/devices/system/cpu/cpu%d", cpu);
+ if (stat(buf, &statbuf) != 0)
+ return 0;
+
+ /*
+ * Check if /sys/devices/system/cpu/cpux/online file
+ * exists. Some cases cpu0 won't have online file since
+ * it is not expected to be turned off generally.
+ * In kernels without CONFIG_HOTPLUG_CPU, this
+ * file won't exist
+ */
+ snprintf(buf, sizeof(buf),
+ "/sys/devices/system/cpu/cpu%d/online", cpu);
+ if (stat(buf, &statbuf) != 0)
+ return 1;
+
+ /*
+ * Read online file using sysfs__read_str.
+ * If read or open fails, return -1.
+ * If read succeeds, return value from file
+ * which gets stored in "str"
+ */
+ snprintf(buf, sizeof(buf),
+ "devices/system/cpu/cpu%d/online", cpu);
+
+ if (sysfs__read_str(buf, &str, &strlen) < 0)
+ return status;
+
+ status = atoi(str);
+
+ free(str);
+ return status;
+}
+
static int parse_setup_cpu_list(void)
{
struct thread_data *td;
@@ -847,7 +900,7 @@ static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val
if (g->p.data_rand_walk) {
u32 lfsr = nr + loop + val;
- int j;
+ long j;
for (i = 0; i < words/1024; i++) {
long start, end;
diff --git a/tools/perf/bench/pmu-scan.c b/tools/perf/bench/pmu-scan.c
new file mode 100644
index 000000000000..14a464ad8cea
--- /dev/null
+++ b/tools/perf/bench/pmu-scan.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Benchmark scanning sysfs files for PMU information.
+ *
+ * Copyright 2023 Google LLC.
+ */
+#include <errno.h>
+#include <stdio.h>
+#include "bench.h"
+#include "util/debug.h"
+#include "util/pmu.h"
+#include "util/pmus.h"
+#include "util/stat.h"
+#include <linux/atomic.h>
+#include <linux/err.h>
+#include <linux/time64.h>
+#include <subcmd/parse-options.h>
+
+static unsigned int iterations = 100;
+
+struct pmu_scan_result {
+ char *name;
+ int nr_aliases;
+ int nr_formats;
+ int nr_caps;
+ bool is_core;
+};
+
+static const struct option options[] = {
+ OPT_UINTEGER('i', "iterations", &iterations,
+ "Number of iterations used to compute average"),
+ OPT_END()
+};
+
+static const char *const bench_usage[] = {
+ "perf bench internals pmu-scan <options>",
+ NULL
+};
+
+static int nr_pmus;
+static struct pmu_scan_result *results;
+
+static int save_result(void)
+{
+ struct perf_pmu *pmu = NULL;
+ struct list_head *list;
+ struct pmu_scan_result *r;
+
+ while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+ r = realloc(results, (nr_pmus + 1) * sizeof(*r));
+ if (r == NULL)
+ return -ENOMEM;
+
+ results = r;
+ r = results + nr_pmus;
+
+ r->name = strdup(pmu->name);
+ r->is_core = pmu->is_core;
+ r->nr_caps = pmu->nr_caps;
+
+ r->nr_aliases = perf_pmu__num_events(pmu);
+
+ r->nr_formats = 0;
+ list_for_each(list, &pmu->format)
+ r->nr_formats++;
+
+ pr_debug("pmu[%d] name=%s, nr_caps=%d, nr_aliases=%d, nr_formats=%d\n",
+ nr_pmus, r->name, r->nr_caps, r->nr_aliases, r->nr_formats);
+ nr_pmus++;
+ }
+
+ perf_pmus__destroy();
+ return 0;
+}
+
+static int check_result(bool core_only)
+{
+ struct pmu_scan_result *r;
+ struct perf_pmu *pmu;
+ struct list_head *list;
+ int nr;
+
+ for (int i = 0; i < nr_pmus; i++) {
+ r = &results[i];
+ if (core_only && !r->is_core)
+ continue;
+
+ pmu = perf_pmus__find(r->name);
+ if (pmu == NULL) {
+ pr_err("Cannot find PMU %s\n", r->name);
+ return -1;
+ }
+
+ if (pmu->nr_caps != (u32)r->nr_caps) {
+ pr_err("Unmatched number of event caps in %s: expect %d vs got %d\n",
+ pmu->name, r->nr_caps, pmu->nr_caps);
+ return -1;
+ }
+
+ nr = perf_pmu__num_events(pmu);
+ if (nr != r->nr_aliases) {
+ pr_err("Unmatched number of event aliases in %s: expect %d vs got %d\n",
+ pmu->name, r->nr_aliases, nr);
+ return -1;
+ }
+
+ nr = 0;
+ list_for_each(list, &pmu->format)
+ nr++;
+ if (nr != r->nr_formats) {
+ pr_err("Unmatched number of event formats in %s: expect %d vs got %d\n",
+ pmu->name, r->nr_formats, nr);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static void delete_result(void)
+{
+ for (int i = 0; i < nr_pmus; i++)
+ free(results[i].name);
+ free(results);
+
+ results = NULL;
+ nr_pmus = 0;
+}
+
+static int run_pmu_scan(void)
+{
+ struct stats stats;
+ struct timeval start, end, diff;
+ double time_average, time_stddev;
+ u64 runtime_us;
+ int ret;
+
+ init_stats(&stats);
+ pr_info("Computing performance of sysfs PMU event scan for %u times\n",
+ iterations);
+
+ if (save_result() < 0) {
+ pr_err("Failed to initialize PMU scan result\n");
+ return -1;
+ }
+
+ for (int j = 0; j < 2; j++) {
+ bool core_only = (j == 0);
+
+ for (unsigned int i = 0; i < iterations; i++) {
+ gettimeofday(&start, NULL);
+ if (core_only)
+ perf_pmus__scan_core(NULL);
+ else
+ perf_pmus__scan(NULL);
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&stats, runtime_us);
+
+ ret = check_result(core_only);
+ perf_pmus__destroy();
+ if (ret < 0)
+ break;
+ }
+ time_average = avg_stats(&stats);
+ time_stddev = stddev_stats(&stats);
+ pr_info(" Average%s PMU scanning took: %.3f usec (+- %.3f usec)\n",
+ core_only ? " core" : "", time_average, time_stddev);
+ }
+ delete_result();
+ return 0;
+}
+
+int bench_pmu_scan(int argc, const char **argv)
+{
+ int err = 0;
+
+ argc = parse_options(argc, argv, options, bench_usage, 0);
+ if (argc) {
+ usage_with_options(bench_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ err = run_pmu_scan();
+
+ return err;
+}
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index 488f6e6ba1a5..93dcd9dba3d0 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -27,6 +27,7 @@
#include <poll.h>
#include <limits.h>
#include <err.h>
+#include <linux/list.h>
#include <linux/time64.h>
#define DATASIZE 100
@@ -35,8 +36,12 @@ static bool use_pipes = false;
static unsigned int nr_loops = 100;
static bool thread_mode = false;
static unsigned int num_groups = 10;
+static unsigned int total_children = 0;
+static struct list_head sender_contexts = LIST_HEAD_INIT(sender_contexts);
+static struct list_head receiver_contexts = LIST_HEAD_INIT(receiver_contexts);
struct sender_context {
+ struct list_head list;
unsigned int num_fds;
int ready_out;
int wakefd;
@@ -44,12 +49,20 @@ struct sender_context {
};
struct receiver_context {
+ struct list_head list;
unsigned int num_packets;
int in_fds[2];
int ready_out;
int wakefd;
};
+union messaging_worker {
+ pthread_t thread;
+ pid_t pid;
+};
+
+static union messaging_worker *worker_tab;
+
static void fdpair(int fds[2])
{
if (use_pipes) {
@@ -93,7 +106,7 @@ static void *sender(struct sender_context *ctx)
again:
ret = write(ctx->out_fds[j], data + done,
- sizeof(data)-done);
+ sizeof(data) - done);
if (ret < 0)
err(EXIT_FAILURE, "SENDER: write");
done += ret;
@@ -134,30 +147,12 @@ again:
return NULL;
}
-static pthread_t create_worker(void *ctx, void *(*func)(void *))
+static void create_thread_worker(union messaging_worker *worker,
+ void *ctx, void *(*func)(void *))
{
pthread_attr_t attr;
- pthread_t childid;
int ret;
- if (!thread_mode) {
- /* process mode */
- /* Fork the receiver. */
- switch (fork()) {
- case -1:
- err(EXIT_FAILURE, "fork()");
- break;
- case 0:
- (*func) (ctx);
- exit(0);
- break;
- default:
- break;
- }
-
- return (pthread_t)0;
- }
-
if (pthread_attr_init(&attr) != 0)
err(EXIT_FAILURE, "pthread_attr_init:");
@@ -166,14 +161,37 @@ static pthread_t create_worker(void *ctx, void *(*func)(void *))
err(EXIT_FAILURE, "pthread_attr_setstacksize");
#endif
- ret = pthread_create(&childid, &attr, func, ctx);
+ ret = pthread_create(&worker->thread, &attr, func, ctx);
if (ret != 0)
err(EXIT_FAILURE, "pthread_create failed");
- return childid;
+ pthread_attr_destroy(&attr);
+}
+
+static void create_process_worker(union messaging_worker *worker,
+ void *ctx, void *(*func)(void *))
+{
+ /* Fork the receiver. */
+ worker->pid = fork();
+
+ if (worker->pid == -1) {
+ err(EXIT_FAILURE, "fork()");
+ } else if (worker->pid == 0) {
+ (*func) (ctx);
+ exit(0);
+ }
+}
+
+static void create_worker(union messaging_worker *worker,
+ void *ctx, void *(*func)(void *))
+{
+ if (!thread_mode)
+ return create_process_worker(worker, ctx, func);
+ else
+ return create_thread_worker(worker, ctx, func);
}
-static void reap_worker(pthread_t id)
+static void reap_worker(union messaging_worker *worker)
{
int proc_status;
void *thread_status;
@@ -184,23 +202,24 @@ static void reap_worker(pthread_t id)
if (!WIFEXITED(proc_status))
exit(1);
} else {
- pthread_join(id, &thread_status);
+ pthread_join(worker->thread, &thread_status);
}
}
/* One group of senders and receivers */
-static unsigned int group(pthread_t *pth,
+static unsigned int group(union messaging_worker *worker,
unsigned int num_fds,
int ready_out,
int wakefd)
{
unsigned int i;
- struct sender_context *snd_ctx = malloc(sizeof(struct sender_context)
- + num_fds * sizeof(int));
+ struct sender_context *snd_ctx = malloc(sizeof(struct sender_context) +
+ num_fds * sizeof(int));
if (!snd_ctx)
err(EXIT_FAILURE, "malloc()");
+ list_add(&snd_ctx->list, &sender_contexts);
for (i = 0; i < num_fds; i++) {
int fds[2];
struct receiver_context *ctx = malloc(sizeof(*ctx));
@@ -208,6 +227,7 @@ static unsigned int group(pthread_t *pth,
if (!ctx)
err(EXIT_FAILURE, "malloc()");
+ list_add(&ctx->list, &receiver_contexts);
/* Create the pipe between client and server */
fdpair(fds);
@@ -218,7 +238,7 @@ static unsigned int group(pthread_t *pth,
ctx->ready_out = ready_out;
ctx->wakefd = wakefd;
- pth[i] = create_worker(ctx, (void *)receiver);
+ create_worker(worker + i, ctx, (void *)receiver);
snd_ctx->out_fds[i] = fds[1];
if (!thread_mode)
@@ -231,7 +251,7 @@ static unsigned int group(pthread_t *pth,
snd_ctx->wakefd = wakefd;
snd_ctx->num_fds = num_fds;
- pth[num_fds+i] = create_worker(snd_ctx, (void *)sender);
+ create_worker(worker + num_fds + i, snd_ctx, (void *)sender);
}
/* Close the fds we have left */
@@ -243,6 +263,17 @@ static unsigned int group(pthread_t *pth,
return num_fds * 2;
}
+static void sig_handler(int sig __maybe_unused)
+{
+ unsigned int i;
+
+ /*
+ * When exit abnormally, kill all forked child processes.
+ */
+ for (i = 0; i < total_children; i++)
+ kill(worker_tab[i].pid, SIGKILL);
+}
+
static const struct option options[] = {
OPT_BOOLEAN('p', "pipe", &use_pipes,
"Use pipe() instead of socketpair()"),
@@ -260,26 +291,30 @@ static const char * const bench_sched_message_usage[] = {
int bench_sched_messaging(int argc, const char **argv)
{
- unsigned int i, total_children;
+ unsigned int i;
struct timeval start, stop, diff;
unsigned int num_fds = 20;
int readyfds[2], wakefds[2];
char dummy;
- pthread_t *pth_tab;
+ struct sender_context *pos, *n;
argc = parse_options(argc, argv, options,
bench_sched_message_usage, 0);
- pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t));
- if (!pth_tab)
+ worker_tab = malloc(num_fds * 2 * num_groups * sizeof(union messaging_worker));
+ if (!worker_tab)
err(EXIT_FAILURE, "main:malloc()");
fdpair(readyfds);
fdpair(wakefds);
- total_children = 0;
+ if (!thread_mode) {
+ signal(SIGINT, sig_handler);
+ signal(SIGTERM, sig_handler);
+ }
+
for (i = 0; i < num_groups; i++)
- total_children += group(pth_tab+total_children, num_fds,
+ total_children += group(worker_tab + total_children, num_fds,
readyfds[1], wakefds[0]);
/* Wait for everyone to be ready */
@@ -295,7 +330,7 @@ int bench_sched_messaging(int argc, const char **argv)
/* Reap them all */
for (i = 0; i < total_children; i++)
- reap_worker(pth_tab[i]);
+ reap_worker(worker_tab + i);
gettimeofday(&stop, NULL);
@@ -323,7 +358,14 @@ int bench_sched_messaging(int argc, const char **argv)
break;
}
- free(pth_tab);
-
+ free(worker_tab);
+ list_for_each_entry_safe(pos, n, &sender_contexts, list) {
+ list_del_init(&pos->list);
+ free(pos);
+ }
+ list_for_each_entry_safe(pos, n, &receiver_contexts, list) {
+ list_del_init(&pos->list);
+ free(pos);
+ }
return 0;
}
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index a960e7a93aec..70139036d68f 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -10,7 +10,9 @@
* Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
*/
#include <subcmd/parse-options.h>
+#include <api/fs/fs.h>
#include "bench.h"
+#include "util/cgroup.h"
#include <unistd.h>
#include <stdio.h>
@@ -19,7 +21,9 @@
#include <sys/wait.h>
#include <string.h>
#include <errno.h>
+#include <fcntl.h>
#include <assert.h>
+#include <sys/epoll.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/syscall.h>
@@ -31,6 +35,9 @@ struct thread_data {
int nr;
int pipe_read;
int pipe_write;
+ struct epoll_event epoll_ev;
+ int epoll_fd;
+ bool cgroup_failed;
pthread_t pthread;
};
@@ -40,9 +47,50 @@ static int loops = LOOPS_DEFAULT;
/* Use processes by default: */
static bool threaded;
+static bool nonblocking;
+static char *cgrp_names[2];
+static struct cgroup *cgrps[2];
+
+static int parse_two_cgroups(const struct option *opt __maybe_unused,
+ const char *str, int unset __maybe_unused)
+{
+ char *p = strdup(str);
+ char *q;
+ int ret = -1;
+
+ if (p == NULL) {
+ fprintf(stderr, "memory allocation failure\n");
+ return -1;
+ }
+
+ q = strchr(p, ',');
+ if (q == NULL) {
+ fprintf(stderr, "it should have two cgroup names: %s\n", p);
+ goto out;
+ }
+ *q = '\0';
+
+ cgrp_names[0] = strdup(p);
+ cgrp_names[1] = strdup(q + 1);
+
+ if (cgrp_names[0] == NULL || cgrp_names[1] == NULL) {
+ fprintf(stderr, "memory allocation failure\n");
+ goto out;
+ }
+ ret = 0;
+
+out:
+ free(p);
+ return ret;
+}
+
static const struct option options[] = {
+ OPT_BOOLEAN('n', "nonblocking", &nonblocking, "Use non-blocking operations"),
OPT_INTEGER('l', "loop", &loops, "Specify number of loops"),
OPT_BOOLEAN('T', "threaded", &threaded, "Specify threads/process based task setup"),
+ OPT_CALLBACK('G', "cgroups", NULL, "SEND,RECV",
+ "Put sender and receivers in given cgroups",
+ parse_two_cgroups),
OPT_END()
};
@@ -51,24 +99,115 @@ static const char * const bench_sched_pipe_usage[] = {
NULL
};
+static int enter_cgroup(int nr)
+{
+ char buf[32];
+ int fd, len, ret;
+ int saved_errno;
+ struct cgroup *cgrp;
+ pid_t pid;
+
+ if (cgrp_names[nr] == NULL)
+ return 0;
+
+ if (cgrps[nr] == NULL) {
+ cgrps[nr] = cgroup__new(cgrp_names[nr], /*do_open=*/true);
+ if (cgrps[nr] == NULL)
+ goto err;
+ }
+ cgrp = cgrps[nr];
+
+ if (threaded)
+ pid = syscall(__NR_gettid);
+ else
+ pid = getpid();
+
+ snprintf(buf, sizeof(buf), "%d\n", pid);
+ len = strlen(buf);
+
+ /* try cgroup v2 interface first */
+ if (threaded)
+ fd = openat(cgrp->fd, "cgroup.threads", O_WRONLY);
+ else
+ fd = openat(cgrp->fd, "cgroup.procs", O_WRONLY);
+
+ /* try cgroup v1 if failed */
+ if (fd < 0 && errno == ENOENT)
+ fd = openat(cgrp->fd, "tasks", O_WRONLY);
+
+ if (fd < 0)
+ goto err;
+
+ ret = write(fd, buf, len);
+ close(fd);
+
+ if (ret != len) {
+ printf("Cannot enter to cgroup: %s\n", cgrp->name);
+ return -1;
+ }
+ return 0;
+
+err:
+ saved_errno = errno;
+ printf("Failed to open cgroup file in %s\n", cgrp_names[nr]);
+
+ if (saved_errno == ENOENT) {
+ char mnt[PATH_MAX];
+
+ if (cgroupfs_find_mountpoint(mnt, sizeof(mnt), "perf_event") == 0)
+ printf(" Hint: create the cgroup first, like 'mkdir %s/%s'\n",
+ mnt, cgrp_names[nr]);
+ } else if (saved_errno == EACCES && geteuid() > 0) {
+ printf(" Hint: try to run as root\n");
+ }
+
+ return -1;
+}
+
+static void exit_cgroup(int nr)
+{
+ cgroup__put(cgrps[nr]);
+ free(cgrp_names[nr]);
+}
+
+static inline int read_pipe(struct thread_data *td)
+{
+ int ret, m;
+retry:
+ if (nonblocking) {
+ ret = epoll_wait(td->epoll_fd, &td->epoll_ev, 1, -1);
+ if (ret < 0)
+ return ret;
+ }
+ ret = read(td->pipe_read, &m, sizeof(int));
+ if (nonblocking && ret < 0 && errno == EWOULDBLOCK)
+ goto retry;
+ return ret;
+}
+
static void *worker_thread(void *__tdata)
{
struct thread_data *td = __tdata;
- int m = 0, i;
- int ret;
+ int i, ret, m = 0;
+
+ ret = enter_cgroup(td->nr);
+ if (ret < 0) {
+ td->cgroup_failed = true;
+ return NULL;
+ }
+
+ if (nonblocking) {
+ td->epoll_ev.events = EPOLLIN;
+ td->epoll_fd = epoll_create(1);
+ BUG_ON(td->epoll_fd < 0);
+ BUG_ON(epoll_ctl(td->epoll_fd, EPOLL_CTL_ADD, td->pipe_read, &td->epoll_ev) < 0);
+ }
for (i = 0; i < loops; i++) {
- if (!td->nr) {
- ret = read(td->pipe_read, &m, sizeof(int));
- BUG_ON(ret != sizeof(int));
- ret = write(td->pipe_write, &m, sizeof(int));
- BUG_ON(ret != sizeof(int));
- } else {
- ret = write(td->pipe_write, &m, sizeof(int));
- BUG_ON(ret != sizeof(int));
- ret = read(td->pipe_read, &m, sizeof(int));
- BUG_ON(ret != sizeof(int));
- }
+ ret = write(td->pipe_write, &m, sizeof(int));
+ BUG_ON(ret != sizeof(int));
+ ret = read_pipe(td);
+ BUG_ON(ret != sizeof(int));
}
return NULL;
@@ -76,7 +215,8 @@ static void *worker_thread(void *__tdata)
int bench_sched_pipe(int argc, const char **argv)
{
- struct thread_data threads[2], *td;
+ struct thread_data threads[2] = {};
+ struct thread_data *td;
int pipe_1[2], pipe_2[2];
struct timeval start, stop, diff;
unsigned long long result_usec = 0;
@@ -88,13 +228,16 @@ int bench_sched_pipe(int argc, const char **argv)
* discarding returned value of read(), write()
* causes error in building environment for perf
*/
- int __maybe_unused ret, wait_stat;
+ int __maybe_unused ret, wait_stat, flags = 0;
pid_t pid, retpid __maybe_unused;
argc = parse_options(argc, argv, options, bench_sched_pipe_usage, 0);
- BUG_ON(pipe(pipe_1));
- BUG_ON(pipe(pipe_2));
+ if (nonblocking)
+ flags |= O_NONBLOCK;
+
+ BUG_ON(pipe2(pipe_1, flags));
+ BUG_ON(pipe2(pipe_2, flags));
gettimeofday(&start, NULL);
@@ -112,9 +255,7 @@ int bench_sched_pipe(int argc, const char **argv)
}
}
-
if (threaded) {
-
for (t = 0; t < nr_threads; t++) {
td = threads + t;
@@ -128,7 +269,6 @@ int bench_sched_pipe(int argc, const char **argv)
ret = pthread_join(td->pthread, NULL);
BUG_ON(ret);
}
-
} else {
pid = fork();
assert(pid >= 0);
@@ -147,6 +287,12 @@ int bench_sched_pipe(int argc, const char **argv)
gettimeofday(&stop, NULL);
timersub(&stop, &start, &diff);
+ exit_cgroup(0);
+ exit_cgroup(1);
+
+ if (threads[0].cgroup_failed || threads[1].cgroup_failed)
+ return 0;
+
switch (bench_format) {
case BENCH_FORMAT_DEFAULT:
printf("# Executed %d pipe operations between two %s\n\n",
diff --git a/tools/perf/bench/sched-seccomp-notify.c b/tools/perf/bench/sched-seccomp-notify.c
new file mode 100644
index 000000000000..269c1f4a6852
--- /dev/null
+++ b/tools/perf/bench/sched-seccomp-notify.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <subcmd/parse-options.h>
+#include "bench.h"
+
+#include <uapi/linux/filter.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <linux/unistd.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <linux/time64.h>
+#include <uapi/linux/seccomp.h>
+#include <sys/prctl.h>
+
+#include <unistd.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <string.h>
+#include <errno.h>
+#include <err.h>
+#include <inttypes.h>
+
+#define LOOPS_DEFAULT 1000000UL
+static uint64_t loops = LOOPS_DEFAULT;
+static bool sync_mode;
+
+static const struct option options[] = {
+ OPT_U64('l', "loop", &loops, "Specify number of loops"),
+ OPT_BOOLEAN('s', "sync-mode", &sync_mode,
+ "Enable the synchronous mode for seccomp notifications"),
+ OPT_END()
+};
+
+static const char * const bench_seccomp_usage[] = {
+ "perf bench sched secccomp-notify <options>",
+ NULL
+};
+
+static int seccomp(unsigned int op, unsigned int flags, void *args)
+{
+ return syscall(__NR_seccomp, op, flags, args);
+}
+
+static int user_notif_syscall(int nr, unsigned int flags)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
+}
+
+#define USER_NOTIF_MAGIC INT_MAX
+static void user_notification_sync_loop(int listener)
+{
+ struct seccomp_notif_resp resp;
+ struct seccomp_notif req;
+ uint64_t nr;
+
+ for (nr = 0; nr < loops; nr++) {
+ memset(&req, 0, sizeof(req));
+ if (ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req))
+ err(EXIT_FAILURE, "SECCOMP_IOCTL_NOTIF_RECV failed");
+
+ if (req.data.nr != __NR_gettid)
+ errx(EXIT_FAILURE, "unexpected syscall: %d", req.data.nr);
+
+ resp.id = req.id;
+ resp.error = 0;
+ resp.val = USER_NOTIF_MAGIC;
+ resp.flags = 0;
+ if (ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp))
+ err(EXIT_FAILURE, "SECCOMP_IOCTL_NOTIF_SEND failed");
+ }
+}
+
+#ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP
+#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
+#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64)
+#endif
+int bench_sched_seccomp_notify(int argc, const char **argv)
+{
+ struct timeval start, stop, diff;
+ unsigned long long result_usec = 0;
+ int status, listener;
+ pid_t pid;
+ long ret;
+
+ argc = parse_options(argc, argv, options, bench_seccomp_usage, 0);
+
+ gettimeofday(&start, NULL);
+
+ prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ listener = user_notif_syscall(__NR_gettid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ if (listener < 0)
+ err(EXIT_FAILURE, "can't create a notification descriptor");
+
+ pid = fork();
+ if (pid < 0)
+ err(EXIT_FAILURE, "fork");
+ if (pid == 0) {
+ if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0))
+ err(EXIT_FAILURE, "can't set the parent death signal");
+ while (1) {
+ ret = syscall(__NR_gettid);
+ if (ret == USER_NOTIF_MAGIC)
+ continue;
+ break;
+ }
+ _exit(1);
+ }
+
+ if (sync_mode) {
+ if (ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS,
+ SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0))
+ err(EXIT_FAILURE,
+ "can't set SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP");
+ }
+ user_notification_sync_loop(listener);
+
+ kill(pid, SIGKILL);
+ if (waitpid(pid, &status, 0) != pid)
+ err(EXIT_FAILURE, "waitpid(%d) failed", pid);
+ if (!WIFSIGNALED(status) || WTERMSIG(status) != SIGKILL)
+ errx(EXIT_FAILURE, "unexpected exit code: %d", status);
+
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start, &diff);
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ printf("# Executed %" PRIu64 " system calls\n\n",
+ loops);
+
+ result_usec = diff.tv_sec * USEC_PER_SEC;
+ result_usec += diff.tv_usec;
+
+ printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
+ (unsigned long) diff.tv_sec,
+ (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
+
+ printf(" %14lf usecs/op\n",
+ (double)result_usec / (double)loops);
+ printf(" %14d ops/sec\n",
+ (int)((double)loops /
+ ((double)result_usec / (double)USEC_PER_SEC)));
+ break;
+
+ case BENCH_FORMAT_SIMPLE:
+ printf("%lu.%03lu\n",
+ (unsigned long) diff.tv_sec,
+ (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
+ break;
+
+ default:
+ /* reaching here is something disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(1);
+ break;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c
index 7401ebbac100..265d49a913d9 100644
--- a/tools/perf/bench/synthesize.c
+++ b/tools/perf/bench/synthesize.c
@@ -6,6 +6,7 @@
*
* Copyright 2019 Google LLC.
*/
+#include <errno.h>
#include <stdio.h>
#include "bench.h"
#include "../util/debug.h"
@@ -49,7 +50,7 @@ static const char *const bench_usage[] = {
static atomic_t event_count;
-static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
+static int process_synthesized_event(const struct perf_tool *tool __maybe_unused,
union perf_event *event __maybe_unused,
struct perf_sample *sample __maybe_unused,
struct machine *machine __maybe_unused)
@@ -114,12 +115,16 @@ static int run_single_threaded(void)
.pid = "self",
};
struct perf_thread_map *threads;
+ struct perf_env host_env;
int err;
perf_set_singlethreaded();
- session = perf_session__new(NULL, NULL);
+ perf_env__init(&host_env);
+ session = __perf_session__new(/*data=*/NULL, /*tool=*/NULL,
+ /*trace_event_repipe=*/false, &host_env);
if (IS_ERR(session)) {
pr_err("Session creation failed.\n");
+ perf_env__exit(&host_env);
return PTR_ERR(session);
}
threads = thread_map__new_by_pid(getpid());
@@ -144,6 +149,7 @@ err_out:
perf_thread_map__put(threads);
perf_session__delete(session);
+ perf_env__exit(&host_env);
return err;
}
@@ -154,17 +160,21 @@ static int do_run_multi_threaded(struct target *target,
u64 runtime_us;
unsigned int i;
double time_average, time_stddev, event_average, event_stddev;
- int err;
+ int err = 0;
struct stats time_stats, event_stats;
struct perf_session *session;
+ struct perf_env host_env;
+ perf_env__init(&host_env);
init_stats(&time_stats);
init_stats(&event_stats);
for (i = 0; i < multi_iterations; i++) {
- session = perf_session__new(NULL, NULL);
- if (IS_ERR(session))
- return PTR_ERR(session);
-
+ session = __perf_session__new(/*data=*/NULL, /*tool=*/NULL,
+ /*trace_event_repipe=*/false, &host_env);
+ if (IS_ERR(session)) {
+ err = PTR_ERR(session);
+ goto err_out;
+ }
atomic_set(&event_count, 0);
gettimeofday(&start, NULL);
err = __machine__synthesize_threads(&session->machines.host,
@@ -175,7 +185,7 @@ static int do_run_multi_threaded(struct target *target,
nr_threads_synthesize);
if (err) {
perf_session__delete(session);
- return err;
+ goto err_out;
}
gettimeofday(&end, NULL);
@@ -198,7 +208,9 @@ static int do_run_multi_threaded(struct target *target,
printf(" Average time per event %.3f usec\n",
time_average / event_average);
- return 0;
+err_out:
+ perf_env__exit(&host_env);
+ return err;
}
static int run_multi_threaded(void)
diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c
index 9b751016f4b6..e7dc216f717f 100644
--- a/tools/perf/bench/syscall.c
+++ b/tools/perf/bench/syscall.c
@@ -14,11 +14,15 @@
#include <sys/time.h>
#include <sys/syscall.h>
#include <sys/types.h>
+#include <sys/wait.h>
#include <unistd.h>
#include <stdlib.h>
-#define LOOPS_DEFAULT 10000000
-static int loops = LOOPS_DEFAULT;
+#ifndef __NR_fork
+#define __NR_fork -1
+#endif
+
+static int loops;
static const struct option options[] = {
OPT_INTEGER('l', "loop", &loops, "Specify number of loops"),
@@ -30,25 +34,108 @@ static const char * const bench_syscall_usage[] = {
NULL
};
-int bench_syscall_basic(int argc, const char **argv)
+static void test_fork(void)
+{
+ pid_t pid = fork();
+
+ if (pid < 0) {
+ fprintf(stderr, "fork failed\n");
+ exit(1);
+ } else if (pid == 0) {
+ exit(0);
+ } else {
+ if (waitpid(pid, NULL, 0) < 0) {
+ fprintf(stderr, "waitpid failed\n");
+ exit(1);
+ }
+ }
+}
+
+static void test_execve(void)
+{
+ const char *pathname = "/bin/true";
+ char *const argv[] = { (char *)pathname, NULL };
+ pid_t pid = fork();
+
+ if (pid < 0) {
+ fprintf(stderr, "fork failed\n");
+ exit(1);
+ } else if (pid == 0) {
+ execve(pathname, argv, NULL);
+ fprintf(stderr, "execve /bin/true failed\n");
+ exit(1);
+ } else {
+ if (waitpid(pid, NULL, 0) < 0) {
+ fprintf(stderr, "waitpid failed\n");
+ exit(1);
+ }
+ }
+}
+
+static int bench_syscall_common(int argc, const char **argv, int syscall)
{
struct timeval start, stop, diff;
unsigned long long result_usec = 0;
+ const char *name = NULL;
int i;
+ switch (syscall) {
+ case __NR_fork:
+ case __NR_execve:
+ /* Limit default loop to 10000 times to save time */
+ loops = 10000;
+ break;
+ default:
+ loops = 10000000;
+ break;
+ }
+
+ /* Options -l and --loops override default above */
argc = parse_options(argc, argv, options, bench_syscall_usage, 0);
gettimeofday(&start, NULL);
- for (i = 0; i < loops; i++)
- getppid();
+ for (i = 0; i < loops; i++) {
+ switch (syscall) {
+ case __NR_getppid:
+ getppid();
+ break;
+ case __NR_getpgid:
+ getpgid(0);
+ break;
+ case __NR_fork:
+ test_fork();
+ break;
+ case __NR_execve:
+ test_execve();
+ default:
+ break;
+ }
+ }
gettimeofday(&stop, NULL);
timersub(&stop, &start, &diff);
+ switch (syscall) {
+ case __NR_getppid:
+ name = "getppid()";
+ break;
+ case __NR_getpgid:
+ name = "getpgid()";
+ break;
+ case __NR_fork:
+ name = "fork()";
+ break;
+ case __NR_execve:
+ name = "execve()";
+ break;
+ default:
+ break;
+ }
+
switch (bench_format) {
case BENCH_FORMAT_DEFAULT:
- printf("# Executed %'d getppid() calls\n", loops);
+ printf("# Executed %'d %s calls\n", loops, name);
result_usec = diff.tv_sec * 1000000;
result_usec += diff.tv_usec;
@@ -79,3 +166,23 @@ int bench_syscall_basic(int argc, const char **argv)
return 0;
}
+
+int bench_syscall_basic(int argc, const char **argv)
+{
+ return bench_syscall_common(argc, argv, __NR_getppid);
+}
+
+int bench_syscall_getpgid(int argc, const char **argv)
+{
+ return bench_syscall_common(argc, argv, __NR_getpgid);
+}
+
+int bench_syscall_fork(int argc, const char **argv)
+{
+ return bench_syscall_common(argc, argv, __NR_fork);
+}
+
+int bench_syscall_execve(int argc, const char **argv)
+{
+ return bench_syscall_common(argc, argv, __NR_execve);
+}
diff --git a/tools/perf/bench/uprobe.c b/tools/perf/bench/uprobe.c
new file mode 100644
index 000000000000..0b90275862e1
--- /dev/null
+++ b/tools/perf/bench/uprobe.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * uprobe.c
+ *
+ * uprobe benchmarks
+ *
+ * Copyright (C) 2023, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+#include "../perf.h"
+#include "../util/util.h"
+#include <subcmd/parse-options.h>
+#include "../builtin.h"
+#include "bench.h"
+#include <linux/compiler.h>
+#include <linux/time64.h>
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#define LOOPS_DEFAULT 1000
+static int loops = LOOPS_DEFAULT;
+
+enum bench_uprobe {
+ BENCH_UPROBE__BASELINE,
+ BENCH_UPROBE__EMPTY,
+ BENCH_UPROBE__TRACE_PRINTK,
+ BENCH_UPROBE__EMPTY_RET,
+ BENCH_UPROBE__TRACE_PRINTK_RET,
+};
+
+static const struct option options[] = {
+ OPT_INTEGER('l', "loop", &loops, "Specify number of loops"),
+ OPT_END()
+};
+
+static const char * const bench_uprobe_usage[] = {
+ "perf bench uprobe <options>",
+ NULL
+};
+
+#ifdef HAVE_BPF_SKEL
+#include "bpf_skel/bench_uprobe.skel.h"
+
+#define bench_uprobe__attach_uprobe(prog) \
+ skel->links.prog = bpf_program__attach_uprobe_opts(/*prog=*/skel->progs.prog, \
+ /*pid=*/-1, \
+ /*binary_path=*/"libc.so.6", \
+ /*func_offset=*/0, \
+ /*opts=*/&uprobe_opts); \
+ if (!skel->links.prog) { \
+ err = -errno; \
+ fprintf(stderr, "Failed to attach bench uprobe \"%s\": %s\n", #prog, strerror(errno)); \
+ goto cleanup; \
+ }
+
+struct bench_uprobe_bpf *skel;
+
+static int bench_uprobe__setup_bpf_skel(enum bench_uprobe bench)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ int err;
+
+ /* Load and verify BPF application */
+ skel = bench_uprobe_bpf__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to open and load uprobes bench BPF skeleton\n");
+ return -1;
+ }
+
+ err = bench_uprobe_bpf__load(skel);
+ if (err) {
+ fprintf(stderr, "Failed to load and verify BPF skeleton\n");
+ goto cleanup;
+ }
+
+ uprobe_opts.func_name = "usleep";
+ switch (bench) {
+ case BENCH_UPROBE__BASELINE: break;
+ case BENCH_UPROBE__EMPTY: bench_uprobe__attach_uprobe(empty); break;
+ case BENCH_UPROBE__TRACE_PRINTK: bench_uprobe__attach_uprobe(trace_printk); break;
+ case BENCH_UPROBE__EMPTY_RET: bench_uprobe__attach_uprobe(empty_ret); break;
+ case BENCH_UPROBE__TRACE_PRINTK_RET: bench_uprobe__attach_uprobe(trace_printk_ret); break;
+ default:
+ fprintf(stderr, "Invalid bench: %d\n", bench);
+ goto cleanup;
+ }
+
+ return err;
+cleanup:
+ bench_uprobe_bpf__destroy(skel);
+ skel = NULL;
+ return err;
+}
+
+static void bench_uprobe__teardown_bpf_skel(void)
+{
+ if (skel) {
+ bench_uprobe_bpf__destroy(skel);
+ skel = NULL;
+ }
+}
+#else
+static int bench_uprobe__setup_bpf_skel(enum bench_uprobe bench __maybe_unused) { return 0; }
+static void bench_uprobe__teardown_bpf_skel(void) {};
+#endif
+
+static int bench_uprobe_format__default_fprintf(const char *name, const char *unit, u64 diff, FILE *fp)
+{
+ static u64 baseline, previous;
+ s64 diff_to_baseline = diff - baseline,
+ diff_to_previous = diff - previous;
+ int printed = fprintf(fp, "# Executed %'d %s calls\n", loops, name);
+
+ printed += fprintf(fp, " %14s: %'" PRIu64 " %ss", "Total time", diff, unit);
+
+ if (baseline) {
+ printed += fprintf(fp, " %s%'" PRId64 " to baseline", diff_to_baseline > 0 ? "+" : "", diff_to_baseline);
+
+ if (previous != baseline)
+ fprintf(stdout, " %s%'" PRId64 " to previous", diff_to_previous > 0 ? "+" : "", diff_to_previous);
+ }
+
+ printed += fprintf(fp, "\n\n %'.3f %ss/op", (double)diff / (double)loops, unit);
+
+ if (baseline) {
+ printed += fprintf(fp, " %'.3f %ss/op to baseline", (double)diff_to_baseline / (double)loops, unit);
+
+ if (previous != baseline)
+ printed += fprintf(fp, " %'.3f %ss/op to previous", (double)diff_to_previous / (double)loops, unit);
+ } else {
+ baseline = diff;
+ }
+
+ fputc('\n', fp);
+
+ previous = diff;
+
+ return printed + 1;
+}
+
+static int bench_uprobe(int argc, const char **argv, enum bench_uprobe bench)
+{
+ const char *name = "usleep(1000)", *unit = "usec";
+ struct timespec start, end;
+ u64 diff;
+ int i;
+
+ argc = parse_options(argc, argv, options, bench_uprobe_usage, 0);
+
+ if (bench != BENCH_UPROBE__BASELINE && bench_uprobe__setup_bpf_skel(bench) < 0)
+ return 0;
+
+ clock_gettime(CLOCK_REALTIME, &start);
+
+ for (i = 0; i < loops; i++) {
+ usleep(USEC_PER_MSEC);
+ }
+
+ clock_gettime(CLOCK_REALTIME, &end);
+
+ diff = end.tv_sec * NSEC_PER_SEC + end.tv_nsec - (start.tv_sec * NSEC_PER_SEC + start.tv_nsec);
+ diff /= NSEC_PER_USEC;
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ bench_uprobe_format__default_fprintf(name, unit, diff, stdout);
+ break;
+
+ case BENCH_FORMAT_SIMPLE:
+ printf("%" PRIu64 "\n", diff);
+ break;
+
+ default:
+ /* reaching here is something of a disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(1);
+ }
+
+ if (bench != BENCH_UPROBE__BASELINE)
+ bench_uprobe__teardown_bpf_skel();
+
+ return 0;
+}
+
+int bench_uprobe_baseline(int argc, const char **argv)
+{
+ return bench_uprobe(argc, argv, BENCH_UPROBE__BASELINE);
+}
+
+int bench_uprobe_empty(int argc, const char **argv)
+{
+ return bench_uprobe(argc, argv, BENCH_UPROBE__EMPTY);
+}
+
+int bench_uprobe_trace_printk(int argc, const char **argv)
+{
+ return bench_uprobe(argc, argv, BENCH_UPROBE__TRACE_PRINTK);
+}
+
+int bench_uprobe_empty_ret(int argc, const char **argv)
+{
+ return bench_uprobe(argc, argv, BENCH_UPROBE__EMPTY_RET);
+}
+
+int bench_uprobe_trace_printk_ret(int argc, const char **argv)
+{
+ return bench_uprobe(argc, argv, BENCH_UPROBE__TRACE_PRINTK_RET);
+}