diff options
Diffstat (limited to 'tools/perf/bench')
24 files changed, 610 insertions, 211 deletions
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index c2ab30907ae7..b558ab98719f 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -1,25 +1,26 @@ -perf-y += sched-messaging.o -perf-y += sched-pipe.o -perf-y += sched-seccomp-notify.o -perf-y += syscall.o -perf-y += mem-functions.o -perf-y += futex-hash.o -perf-y += futex-wake.o -perf-y += futex-wake-parallel.o -perf-y += futex-requeue.o -perf-y += futex-lock-pi.o -perf-y += epoll-wait.o -perf-y += epoll-ctl.o -perf-y += synthesize.o -perf-y += kallsyms-parse.o -perf-y += find-bit-bench.o -perf-y += inject-buildid.o -perf-y += evlist-open-close.o -perf-y += breakpoint.o -perf-y += pmu-scan.o -perf-y += uprobe.o +perf-bench-y += sched-messaging.o +perf-bench-y += sched-pipe.o +perf-bench-y += sched-seccomp-notify.o +perf-bench-y += syscall.o +perf-bench-y += mem-functions.o +perf-bench-y += futex.o +perf-bench-y += futex-hash.o +perf-bench-y += futex-wake.o +perf-bench-y += futex-wake-parallel.o +perf-bench-y += futex-requeue.o +perf-bench-y += futex-lock-pi.o +perf-bench-y += epoll-wait.o +perf-bench-y += epoll-ctl.o +perf-bench-y += synthesize.o +perf-bench-y += kallsyms-parse.o +perf-bench-y += find-bit-bench.o +perf-bench-y += inject-buildid.o +perf-bench-y += evlist-open-close.o +perf-bench-y += breakpoint.o +perf-bench-y += pmu-scan.o +perf-bench-y += uprobe.o -perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o -perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o +perf-bench-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o +perf-bench-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o -perf-$(CONFIG_NUMA) += numa.o +perf-bench-$(CONFIG_NUMA) += numa.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 9f736423af53..8519eb5a42fa 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -28,6 +28,7 @@ int bench_syscall_fork(int argc, const char **argv); int bench_syscall_execve(int argc, const char **argv); int bench_mem_memcpy(int argc, const char **argv); int bench_mem_memset(int argc, const char **argv); +int bench_mem_mmap(int argc, const char **argv); int bench_mem_find_bit(int argc, const char **argv); int bench_futex_hash(int argc, const char **argv); int bench_futex_wake(int argc, const char **argv); diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index d3db73dac66a..d66d852b90e4 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -232,7 +232,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) if (!noaffinity) pthread_attr_init(&thread_attr); - nrcpus = perf_cpu_map__nr(cpu); + nrcpus = cpu__max_cpu().cpu; cpuset = CPU_ALLOC(nrcpus); BUG_ON(!cpuset); size = CPU_ALLOC_SIZE(nrcpus); diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 06bb3187660a..20fe4f72b4af 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -309,7 +309,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) if (!noaffinity) pthread_attr_init(&thread_attr); - nrcpus = perf_cpu_map__nr(cpu); + nrcpus = cpu__max_cpu().cpu; cpuset = CPU_ALLOC(nrcpus); BUG_ON(!cpuset); size = CPU_ALLOC_SIZE(nrcpus); @@ -420,7 +420,12 @@ static int cmpworker(const void *p1, const void *p2) struct worker *w1 = (struct worker *) p1; struct worker *w2 = (struct worker *) p2; - return w1->tid > w2->tid; + + if (w1->tid > w2->tid) + return 1; + if (w1->tid < w2->tid) + return -1; + return 0; } int bench_epoll_wait(int argc, const char **argv) diff --git a/tools/perf/bench/evlist-open-close.c b/tools/perf/bench/evlist-open-close.c index 5a27691469ed..faf9c34b4a5d 100644 --- a/tools/perf/bench/evlist-open-close.c +++ b/tools/perf/bench/evlist-open-close.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <errno.h> #include <inttypes.h> #include <stdio.h> #include <stdlib.h> @@ -46,25 +47,6 @@ static struct record_opts opts = { .ctl_fd_ack = -1, }; -static const struct option options[] = { - OPT_STRING('e', "event", &event_string, "event", "event selector. use 'perf list' to list available events"), - OPT_INTEGER('n', "nr-events", &nr_events, - "number of dummy events to create (default 1). If used with -e, it clones those events n times (1 = no change)"), - OPT_INTEGER('i', "iterations", &iterations, "Number of iterations used to compute average (default=100)"), - OPT_BOOLEAN('a', "all-cpus", &opts.target.system_wide, "system-wide collection from all CPUs"), - OPT_STRING('C', "cpu", &opts.target.cpu_list, "cpu", "list of cpus where to open events"), - OPT_STRING('p', "pid", &opts.target.pid, "pid", "record events on existing process id"), - OPT_STRING('t', "tid", &opts.target.tid, "tid", "record events on existing thread id"), - OPT_STRING('u', "uid", &opts.target.uid_str, "user", "user to profile"), - OPT_BOOLEAN(0, "per-thread", &opts.target.per_thread, "use per-thread mmaps"), - OPT_END() -}; - -static const char *const bench_usage[] = { - "perf bench internals evlist-open-close <options>", - NULL -}; - static int evlist__count_evsel_fds(struct evlist *evlist) { struct evsel *evsel; @@ -76,7 +58,7 @@ static int evlist__count_evsel_fds(struct evlist *evlist) return cnt; } -static struct evlist *bench__create_evlist(char *evstr) +static struct evlist *bench__create_evlist(char *evstr, const char *uid_str) { struct parse_events_error err; struct evlist *evlist = evlist__new(); @@ -97,6 +79,18 @@ static struct evlist *bench__create_evlist(char *evstr) goto out_delete_evlist; } parse_events_error__exit(&err); + if (uid_str) { + uid_t uid = parse_uid(uid_str); + + if (uid == UINT_MAX) { + pr_err("Invalid User: %s", uid_str); + ret = -EINVAL; + goto out_delete_evlist; + } + ret = parse_uid_filter(evlist, uid); + if (ret) + goto out_delete_evlist; + } ret = evlist__create_maps(evlist, &opts.target); if (ret < 0) { pr_err("Not enough memory to create thread/cpu maps\n"); @@ -136,10 +130,10 @@ static int bench__do_evlist_open_close(struct evlist *evlist) return 0; } -static int bench_evlist_open_close__run(char *evstr) +static int bench_evlist_open_close__run(char *evstr, const char *uid_str) { // used to print statistics only - struct evlist *evlist = bench__create_evlist(evstr); + struct evlist *evlist = bench__create_evlist(evstr, uid_str); double time_average, time_stddev; struct timeval start, end, diff; struct stats time_stats; @@ -161,7 +155,7 @@ static int bench_evlist_open_close__run(char *evstr) for (i = 0; i < iterations; i++) { pr_debug("Started iteration %d\n", i); - evlist = bench__create_evlist(evstr); + evlist = bench__create_evlist(evstr, uid_str); if (!evlist) return -ENOMEM; @@ -225,6 +219,30 @@ out_error: int bench_evlist_open_close(int argc, const char **argv) { + const char *uid_str = NULL; + const struct option options[] = { + OPT_STRING('e', "event", &event_string, "event", + "event selector. use 'perf list' to list available events"), + OPT_INTEGER('n', "nr-events", &nr_events, + "number of dummy events to create (default 1). If used with -e, it clones those events n times (1 = no change)"), + OPT_INTEGER('i', "iterations", &iterations, + "Number of iterations used to compute average (default=100)"), + OPT_BOOLEAN('a', "all-cpus", &opts.target.system_wide, + "system-wide collection from all CPUs"), + OPT_STRING('C', "cpu", &opts.target.cpu_list, "cpu", + "list of cpus where to open events"), + OPT_STRING('p', "pid", &opts.target.pid, "pid", + "record events on existing process id"), + OPT_STRING('t', "tid", &opts.target.tid, "tid", + "record events on existing thread id"), + OPT_STRING('u', "uid", &uid_str, "user", "user to profile"), + OPT_BOOLEAN(0, "per-thread", &opts.target.per_thread, "use per-thread mmaps"), + OPT_END() + }; + const char *const bench_usage[] = { + "perf bench internals evlist-open-close <options>", + NULL + }; char *evstr, errbuf[BUFSIZ]; int err; @@ -241,15 +259,8 @@ int bench_evlist_open_close(int argc, const char **argv) goto out; } - err = target__parse_uid(&opts.target); - if (err) { - target__strerror(&opts.target, err, errbuf, sizeof(errbuf)); - pr_err("%s", errbuf); - goto out; - } - - /* Enable ignoring missing threads when -u/-p option is defined. */ - opts.ignore_missing_thread = opts.target.uid != UINT_MAX || opts.target.pid; + /* Enable ignoring missing threads when -p option is defined. */ + opts.ignore_missing_thread = opts.target.pid; evstr = bench__repeat_event_string(event_string, nr_events); if (!evstr) { @@ -257,7 +268,7 @@ int bench_evlist_open_close(int argc, const char **argv) goto out; } - err = bench_evlist_open_close__run(evstr); + err = bench_evlist_open_close__run(evstr, uid_str); free(evstr); out: diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c index 7e25b0e413f6..e697c20951bc 100644 --- a/tools/perf/bench/find-bit-bench.c +++ b/tools/perf/bench/find-bit-bench.c @@ -37,7 +37,7 @@ static noinline void workload(int val) accumulator++; } -#if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__) +#if defined(__i386__) || defined(__x86_64__) static bool asm_test_bit(long nr, const unsigned long *addr) { bool oldbit; diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 0c69d20efa32..7e29f04da744 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -21,6 +21,7 @@ #include <linux/zalloc.h> #include <sys/time.h> #include <sys/mman.h> +#include <sys/prctl.h> #include <perf/cpumap.h> #include "../util/mutex.h" @@ -50,9 +51,11 @@ struct worker { static struct bench_futex_parameters params = { .nfutexes = 1024, .runtime = 10, + .nbuckets = -1, }; static const struct option options[] = { + OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('r', "runtime", ¶ms.runtime, "Specify runtime (in seconds)"), OPT_UINTEGER('f', "futexes", ¶ms.nfutexes, "Specify amount of futexes per threads"), @@ -118,6 +121,7 @@ static void print_summary(void) printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", !params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), (int)bench__runtime.tv_sec); + futex_print_nbuckets(¶ms); } int bench_futex_hash(int argc, const char **argv) @@ -161,6 +165,7 @@ int bench_futex_hash(int argc, const char **argv) if (!params.fshared) futex_flag = FUTEX_PRIVATE_FLAG; + futex_set_nbuckets_param(¶ms); printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n", getpid(), params.nthreads, params.nfutexes, params.fshared ? "shared":"private", params.runtime); @@ -174,7 +179,7 @@ int bench_futex_hash(int argc, const char **argv) pthread_attr_init(&thread_attr); gettimeofday(&bench__start, NULL); - nrcpus = perf_cpu_map__nr(cpu); + nrcpus = cpu__max_cpu().cpu; cpuset = CPU_ALLOC(nrcpus); BUG_ON(!cpuset); size = CPU_ALLOC_SIZE(nrcpus); diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 7a4973346180..40640b674427 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -41,10 +41,12 @@ static struct stats throughput_stats; static struct cond thread_parent, thread_worker; static struct bench_futex_parameters params = { + .nbuckets = -1, .runtime = 10, }; static const struct option options[] = { + OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('r', "runtime", ¶ms.runtime, "Specify runtime (in seconds)"), OPT_BOOLEAN( 'M', "multi", ¶ms.multi, "Use multiple futexes"), @@ -67,6 +69,7 @@ static void print_summary(void) printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", !params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), (int)bench__runtime.tv_sec); + futex_print_nbuckets(¶ms); } static void toggle_done(int sig __maybe_unused, @@ -122,7 +125,7 @@ static void create_threads(struct worker *w, struct perf_cpu_map *cpu) { cpu_set_t *cpuset; unsigned int i; - int nrcpus = perf_cpu_map__nr(cpu); + int nrcpus = cpu__max_cpu().cpu; size_t size; threads_starting = params.nthreads; @@ -203,6 +206,7 @@ int bench_futex_lock_pi(int argc, const char **argv) mutex_init(&thread_lock); cond_init(&thread_parent); cond_init(&thread_worker); + futex_set_nbuckets_param(¶ms); threads_starting = params.nthreads; gettimeofday(&bench__start, NULL); diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index d9ad736c1a3e..0748b0fd689e 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -42,6 +42,7 @@ static unsigned int threads_starting; static int futex_flag = 0; static struct bench_futex_parameters params = { + .nbuckets = -1, /* * How many tasks to requeue at a time. * Default to 1 in order to make the kernel work more. @@ -50,6 +51,7 @@ static struct bench_futex_parameters params = { }; static const struct option options[] = { + OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('q', "nrequeue", ¶ms.nrequeue, "Specify amount of threads to requeue at once"), OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), @@ -77,6 +79,7 @@ static void print_summary(void) params.nthreads, requeuetime_avg / USEC_PER_MSEC, rel_stddev_stats(requeuetime_stddev, requeuetime_avg)); + futex_print_nbuckets(¶ms); } static void *workerfn(void *arg __maybe_unused) @@ -125,7 +128,7 @@ static void block_threads(pthread_t *w, struct perf_cpu_map *cpu) { cpu_set_t *cpuset; unsigned int i; - int nrcpus = perf_cpu_map__nr(cpu); + int nrcpus = cpu__max_cpu().cpu; size_t size; threads_starting = params.nthreads; @@ -204,6 +207,8 @@ int bench_futex_requeue(int argc, const char **argv) if (params.broadcast) params.nrequeue = params.nthreads; + futex_set_nbuckets_param(¶ms); + printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %s%p), " "%d at a time.\n\n", getpid(), params.nthreads, params.fshared ? "shared":"private", &futex1, diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index b66df553e561..6aede7c46b33 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -57,9 +57,12 @@ static struct stats waketime_stats, wakeup_stats; static unsigned int threads_starting; static int futex_flag = 0; -static struct bench_futex_parameters params; +static struct bench_futex_parameters params = { + .nbuckets = -1, +}; static const struct option options[] = { + OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('w', "nwakers", ¶ms.nwakes, "Specify amount of waking threads"), OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), @@ -149,7 +152,7 @@ static void block_threads(pthread_t *w, struct perf_cpu_map *cpu) { cpu_set_t *cpuset; unsigned int i; - int nrcpus = perf_cpu_map__nr(cpu); + int nrcpus = cpu__max_cpu().cpu; size_t size; threads_starting = params.nthreads; @@ -218,6 +221,7 @@ static void print_summary(void) params.nthreads, waketime_avg / USEC_PER_MSEC, rel_stddev_stats(waketime_stddev, waketime_avg)); + futex_print_nbuckets(¶ms); } @@ -291,6 +295,8 @@ int bench_futex_wake_parallel(int argc, const char **argv) if (!params.fshared) futex_flag = FUTEX_PRIVATE_FLAG; + futex_set_nbuckets_param(¶ms); + printf("Run summary [PID %d]: blocking on %d threads (at [%s] " "futex %p), %d threads waking up %d at a time.\n\n", getpid(), params.nthreads, params.fshared ? "shared":"private", @@ -318,7 +324,7 @@ int bench_futex_wake_parallel(int argc, const char **argv) cond_broadcast(&thread_worker); mutex_unlock(&thread_lock); - usleep(100000); + usleep(200000); /* Ok, all threads are patiently blocked, start waking folks up */ wakeup_threads(waking_worker); diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 690fd6d3da13..a31fc1563862 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -42,6 +42,7 @@ static unsigned int threads_starting; static int futex_flag = 0; static struct bench_futex_parameters params = { + .nbuckets = -1, /* * How many wakeups to do at a time. * Default to 1 in order to make the kernel work more. @@ -50,6 +51,7 @@ static struct bench_futex_parameters params = { }; static const struct option options[] = { + OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('w', "nwakes", ¶ms.nwakes, "Specify amount of threads to wake at once"), OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), @@ -93,6 +95,7 @@ static void print_summary(void) params.nthreads, waketime_avg / USEC_PER_MSEC, rel_stddev_stats(waketime_stddev, waketime_avg)); + futex_print_nbuckets(¶ms); } static void block_threads(pthread_t *w, struct perf_cpu_map *cpu) @@ -100,7 +103,7 @@ static void block_threads(pthread_t *w, struct perf_cpu_map *cpu) cpu_set_t *cpuset; unsigned int i; size_t size; - int nrcpus = perf_cpu_map__nr(cpu); + int nrcpus = cpu__max_cpu().cpu; threads_starting = params.nthreads; cpuset = CPU_ALLOC(nrcpus); diff --git a/tools/perf/bench/futex.c b/tools/perf/bench/futex.c new file mode 100644 index 000000000000..1968c9d00b5b --- /dev/null +++ b/tools/perf/bench/futex.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <err.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/prctl.h> + +#include "futex.h" + +#ifndef PR_FUTEX_HASH +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define PR_FUTEX_HASH_GET_SLOTS 2 +#endif // PR_FUTEX_HASH + +void futex_set_nbuckets_param(struct bench_futex_parameters *params) +{ + int ret; + + if (params->nbuckets < 0) + return; + + ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, params->nbuckets, 0); + if (ret) { + printf("Requesting %d hash buckets failed: %d/%m\n", + params->nbuckets, ret); + err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)"); + } +} + +void futex_print_nbuckets(struct bench_futex_parameters *params) +{ + char *futex_hash_mode; + int ret; + + ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS); + if (params->nbuckets >= 0) { + if (ret != params->nbuckets) { + if (ret < 0) { + printf("Can't query number of buckets: %m\n"); + err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)"); + } + printf("Requested number of hash buckets does not currently used.\n"); + printf("Requested: %d in usage: %d\n", params->nbuckets, ret); + err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)"); + } + if (params->nbuckets == 0) + ret = asprintf(&futex_hash_mode, "Futex hashing: global hash"); + else + ret = asprintf(&futex_hash_mode, "Futex hashing: %d hash buckets", + params->nbuckets); + } else { + if (ret <= 0) { + ret = asprintf(&futex_hash_mode, "Futex hashing: global hash"); + } else { + ret = asprintf(&futex_hash_mode, "Futex hashing: auto resized to %d buckets", + ret); + } + } + if (ret < 0) + err(EXIT_FAILURE, "ENOMEM, futex_hash_mode"); + printf("%s\n", futex_hash_mode); + free(futex_hash_mode); +} diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index ebdc2b032afc..fcb72d682cf8 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -8,6 +8,7 @@ #ifndef _FUTEX_H #define _FUTEX_H +#include <stdbool.h> #include <unistd.h> #include <sys/syscall.h> #include <sys/types.h> @@ -25,6 +26,7 @@ struct bench_futex_parameters { unsigned int nfutexes; unsigned int nwakes; unsigned int nrequeue; + int nbuckets; }; /** @@ -143,4 +145,7 @@ futex_cmp_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, val, opflags); } +void futex_set_nbuckets_param(struct bench_futex_parameters *params); +void futex_print_nbuckets(struct bench_futex_parameters *params); + #endif /* _FUTEX_H */ diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c index a759eb2328be..aad572a78d7f 100644 --- a/tools/perf/bench/inject-buildid.c +++ b/tools/perf/bench/inject-buildid.c @@ -52,7 +52,7 @@ struct bench_dso { static int nr_dsos; static struct bench_dso *dsos; -extern int cmd_inject(int argc, const char *argv[]); +extern int main(int argc, const char **argv); static const struct option options[] = { OPT_UINTEGER('i', "iterations", &iterations, @@ -80,7 +80,7 @@ static int add_dso(const char *fpath, const struct stat *sb __maybe_unused, int typeflag, struct FTW *ftwbuf __maybe_unused) { struct bench_dso *dso = &dsos[nr_dsos]; - struct build_id bid; + struct build_id bid = { .size = 0, }; if (typeflag == FTW_D || typeflag == FTW_SL) return 0; @@ -294,7 +294,7 @@ static int setup_injection(struct bench_data *data, bool build_id_all) if (data->pid == 0) { const char **inject_argv; - int inject_argc = 2; + int inject_argc = 3; close(data->input_pipe[1]); close(data->output_pipe[0]); @@ -318,15 +318,16 @@ static int setup_injection(struct bench_data *data, bool build_id_all) if (inject_argv == NULL) exit(1); - inject_argv[0] = strdup("inject"); - inject_argv[1] = strdup("-b"); + inject_argv[0] = strdup("perf"); + inject_argv[1] = strdup("inject"); + inject_argv[2] = strdup("-b"); if (build_id_all) - inject_argv[2] = strdup("--buildid-all"); + inject_argv[3] = strdup("--buildid-all"); /* signal that we're ready to go */ close(ready_pipe[1]); - cmd_inject(inject_argc, inject_argv); + main(inject_argc, inject_argv); exit(0); } diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index 19d45c377ac1..2908a3a796c9 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -22,27 +22,39 @@ #include <string.h> #include <unistd.h> #include <sys/time.h> +#include <sys/mman.h> #include <errno.h> #include <linux/time64.h> -#include <linux/zalloc.h> +#include <linux/log2.h> #define K 1024 +#define PAGE_SHIFT_4KB 12 +#define PAGE_SHIFT_2MB 21 +#define PAGE_SHIFT_1GB 30 + static const char *size_str = "1MB"; static const char *function_str = "all"; -static int nr_loops = 1; +static const char *page_size_str = "4KB"; +static const char *chunk_size_str = "0"; +static unsigned int nr_loops = 1; static bool use_cycles; static int cycles_fd; +static unsigned int seed; -static const struct option options[] = { +static const struct option bench_common_options[] = { OPT_STRING('s', "size", &size_str, "1MB", "Specify the size of the memory buffers. " "Available units: B, KB, MB, GB and TB (case insensitive)"), + OPT_STRING('p', "page", &page_size_str, "4KB", + "Specify page-size for mapping memory buffers. " + "Available sizes: 4KB, 2MB, 1GB (case insensitive)"), + OPT_STRING('f', "function", &function_str, "all", "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"), - OPT_INTEGER('l', "nr_loops", &nr_loops, + OPT_UINTEGER('l', "nr_loops", &nr_loops, "Specify the number of loops to run. (default: 1)"), OPT_BOOLEAN('c', "cycles", &use_cycles, @@ -51,15 +63,56 @@ static const struct option options[] = { OPT_END() }; +static const struct option bench_mem_options[] = { + OPT_STRING('k', "chunk", &chunk_size_str, "0", + "Specify the chunk-size for each invocation. " + "Available units: B, KB, MB, GB and TB (case insensitive)"), + OPT_PARENT(bench_common_options), + OPT_END() +}; + +union bench_clock { + u64 cycles; + struct timeval tv; +}; + +struct bench_params { + size_t size; + size_t size_total; + size_t chunk_size; + unsigned int nr_loops; + unsigned int page_shift; + unsigned int seed; +}; + +struct bench_mem_info { + const struct function *functions; + int (*do_op)(const struct function *r, struct bench_params *p, + void *src, void *dst, union bench_clock *rt); + const char *const *usage; + const struct option *options; + bool alloc_src; +}; + +typedef bool (*mem_init_t)(struct bench_mem_info *, struct bench_params *, + void **, void **); +typedef void (*mem_fini_t)(struct bench_mem_info *, struct bench_params *, + void **, void **); typedef void *(*memcpy_t)(void *, const void *, size_t); typedef void *(*memset_t)(void *, int, size_t); +typedef void (*mmap_op_t)(void *, size_t, unsigned int, bool); struct function { const char *name; const char *desc; - union { - memcpy_t memcpy; - memset_t memset; + struct { + mem_init_t init; + mem_fini_t fini; + union { + memcpy_t memcpy; + memset_t memset; + mmap_op_t mmap_op; + }; } fn; }; @@ -91,6 +144,34 @@ static u64 get_cycles(void) return clk; } +static void clock_get(union bench_clock *t) +{ + if (use_cycles) + t->cycles = get_cycles(); + else + BUG_ON(gettimeofday(&t->tv, NULL)); +} + +static union bench_clock clock_diff(union bench_clock *s, union bench_clock *e) +{ + union bench_clock t; + + if (use_cycles) + t.cycles = e->cycles - s->cycles; + else + timersub(&e->tv, &s->tv, &t.tv); + + return t; +} + +static void clock_accum(union bench_clock *a, union bench_clock *b) +{ + if (use_cycles) + a->cycles += b->cycles; + else + timeradd(&a->tv, &b->tv, &a->tv); +} + static double timeval2double(struct timeval *ts) { return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC; @@ -107,54 +188,40 @@ static double timeval2double(struct timeval *ts) printf(" %14lf GB/sec\n", x / K / K / K); \ } while (0) -struct bench_mem_info { - const struct function *functions; - u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst); - double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst); - const char *const *usage; - bool alloc_src; -}; - -static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total) +static void __bench_mem_function(struct bench_mem_info *info, struct bench_params *p, + int r_idx) { const struct function *r = &info->functions[r_idx]; double result_bps = 0.0; - u64 result_cycles = 0; - void *src = NULL, *dst = zalloc(size); + union bench_clock rt = { 0 }; + void *src = NULL, *dst = NULL; printf("# function '%s' (%s)\n", r->name, r->desc); - if (dst == NULL) - goto out_alloc_failed; - - if (info->alloc_src) { - src = zalloc(size); - if (src == NULL) - goto out_alloc_failed; - } + if (r->fn.init && r->fn.init(info, p, &src, &dst)) + goto out_init_failed; if (bench_format == BENCH_FORMAT_DEFAULT) printf("# Copying %s bytes ...\n\n", size_str); - if (use_cycles) { - result_cycles = info->do_cycles(r, size, src, dst); - } else { - result_bps = info->do_gettimeofday(r, size, src, dst); - } + if (info->do_op(r, p, src, dst, &rt)) + goto out_test_failed; switch (bench_format) { case BENCH_FORMAT_DEFAULT: if (use_cycles) { - printf(" %14lf cycles/byte\n", (double)result_cycles/size_total); + printf(" %14lf cycles/byte\n", (double)rt.cycles/(double)p->size_total); } else { + result_bps = (double)p->size_total/timeval2double(&rt.tv); print_bps(result_bps); } break; case BENCH_FORMAT_SIMPLE: if (use_cycles) { - printf("%lf\n", (double)result_cycles/size_total); + printf("%lf\n", (double)rt.cycles/(double)p->size_total); } else { + result_bps = (double)p->size_total/timeval2double(&rt.tv); printf("%lf\n", result_bps); } break; @@ -164,22 +231,23 @@ static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t break; } +out_test_failed: out_free: - free(src); - free(dst); + if (r->fn.fini) r->fn.fini(info, p, &src, &dst); return; -out_alloc_failed: - printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str); +out_init_failed: + printf("# Memory allocation failed - maybe size (%s) %s?\n", size_str, + p->page_shift != PAGE_SHIFT_4KB ? "has insufficient hugepages" : "is too large"); goto out_free; } static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info) { int i; - size_t size; - double size_total; + struct bench_params p = { 0 }; + unsigned int page_size; - argc = parse_options(argc, argv, options, info->usage, 0); + argc = parse_options(argc, argv, info->options, info->usage, 0); if (use_cycles) { i = init_cycles(); @@ -189,17 +257,37 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * } } - size = (size_t)perf_atoll((char *)size_str); - size_total = (double)size * nr_loops; + p.nr_loops = nr_loops; + p.size = (size_t)perf_atoll((char *)size_str); - if ((s64)size <= 0) { + if ((s64)p.size <= 0) { fprintf(stderr, "Invalid size:%s\n", size_str); return 1; } + p.size_total = p.size * p.nr_loops; + + p.chunk_size = (size_t)perf_atoll((char *)chunk_size_str); + if ((s64)p.chunk_size < 0 || (s64)p.chunk_size > (s64)p.size) { + fprintf(stderr, "Invalid chunk_size:%s\n", chunk_size_str); + return 1; + } + if (!p.chunk_size) + p.chunk_size = p.size; + + page_size = (unsigned int)perf_atoll((char *)page_size_str); + if (page_size != (1 << PAGE_SHIFT_4KB) && + page_size != (1 << PAGE_SHIFT_2MB) && + page_size != (1 << PAGE_SHIFT_1GB)) { + fprintf(stderr, "Invalid page-size:%s\n", page_size_str); + return 1; + } + p.page_shift = ilog2(page_size); + + p.seed = seed; if (!strncmp(function_str, "all", 3)) { for (i = 0; info->functions[i].name; i++) - __bench_mem_function(info, i, size, size_total); + __bench_mem_function(info, &p, i); return 0; } @@ -218,7 +306,7 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * return 1; } - __bench_mem_function(info, i, size, size_total); + __bench_mem_function(info, &p, i); return 0; } @@ -235,47 +323,81 @@ static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst) fn(dst, src, size); } -static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst) +static int do_memcpy(const struct function *r, struct bench_params *p, + void *src, void *dst, union bench_clock *rt) { - u64 cycle_start = 0ULL, cycle_end = 0ULL; + union bench_clock start, end; memcpy_t fn = r->fn.memcpy; - int i; - memcpy_prefault(fn, size, src, dst); + memcpy_prefault(fn, p->size, src, dst); + + clock_get(&start); + for (unsigned int i = 0; i < p->nr_loops; ++i) + for (size_t off = 0; off < p->size; off += p->chunk_size) + fn(dst + off, src + off, min(p->chunk_size, p->size - off)); + clock_get(&end); - cycle_start = get_cycles(); - for (i = 0; i < nr_loops; ++i) - fn(dst, src, size); - cycle_end = get_cycles(); + *rt = clock_diff(&start, &end); - return cycle_end - cycle_start; + return 0; } -static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst) +static void *bench_mmap(size_t size, bool populate, unsigned int page_shift) { - struct timeval tv_start, tv_end, tv_diff; - memcpy_t fn = r->fn.memcpy; - int i; + void *p; + int extra = populate ? MAP_POPULATE : 0; + + if (page_shift != PAGE_SHIFT_4KB) + extra |= MAP_HUGETLB | (page_shift << MAP_HUGE_SHIFT); + + p = mmap(NULL, size, PROT_READ|PROT_WRITE, + extra | MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + + return p == MAP_FAILED ? NULL : p; +} + +static void bench_munmap(void *p, size_t size) +{ + if (p) + munmap(p, size); +} + +static bool mem_alloc(struct bench_mem_info *info, struct bench_params *p, + void **src, void **dst) +{ + bool failed; - memcpy_prefault(fn, size, src, dst); + *dst = bench_mmap(p->size, true, p->page_shift); + failed = *dst == NULL; - BUG_ON(gettimeofday(&tv_start, NULL)); - for (i = 0; i < nr_loops; ++i) - fn(dst, src, size); - BUG_ON(gettimeofday(&tv_end, NULL)); + if (info->alloc_src) { + *src = bench_mmap(p->size, true, p->page_shift); + failed = failed || *src == NULL; + } + + return failed; +} - timersub(&tv_end, &tv_start, &tv_diff); +static void mem_free(struct bench_mem_info *info __maybe_unused, + struct bench_params *p __maybe_unused, + void **src, void **dst) +{ + bench_munmap(*dst, p->size); + bench_munmap(*src, p->size); - return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); + *dst = *src = NULL; } struct function memcpy_functions[] = { { .name = "default", .desc = "Default memcpy() provided by glibc", + .fn.init = mem_alloc, + .fn.fini = mem_free, .fn.memcpy = memcpy }, #ifdef HAVE_ARCH_X86_64_SUPPORT -# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, +# define MEMCPY_FN(_fn, _init, _fini, _name, _desc) \ + {.name = _name, .desc = _desc, .fn.memcpy = _fn, .fn.init = _init, .fn.fini = _fini }, # include "mem-memcpy-x86-64-asm-def.h" # undef MEMCPY_FN #endif @@ -292,55 +414,36 @@ int bench_mem_memcpy(int argc, const char **argv) { struct bench_mem_info info = { .functions = memcpy_functions, - .do_cycles = do_memcpy_cycles, - .do_gettimeofday = do_memcpy_gettimeofday, + .do_op = do_memcpy, .usage = bench_mem_memcpy_usage, + .options = bench_mem_options, .alloc_src = true, }; return bench_mem_common(argc, argv, &info); } -static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst) -{ - u64 cycle_start = 0ULL, cycle_end = 0ULL; - memset_t fn = r->fn.memset; - int i; - - /* - * We prefault the freshly allocated memory range here, - * to not measure page fault overhead: - */ - fn(dst, -1, size); - - cycle_start = get_cycles(); - for (i = 0; i < nr_loops; ++i) - fn(dst, i, size); - cycle_end = get_cycles(); - - return cycle_end - cycle_start; -} - -static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst) +static int do_memset(const struct function *r, struct bench_params *p, + void *src __maybe_unused, void *dst, union bench_clock *rt) { - struct timeval tv_start, tv_end, tv_diff; + union bench_clock start, end; memset_t fn = r->fn.memset; - int i; /* * We prefault the freshly allocated memory range here, * to not measure page fault overhead: */ - fn(dst, -1, size); + fn(dst, -1, p->size); - BUG_ON(gettimeofday(&tv_start, NULL)); - for (i = 0; i < nr_loops; ++i) - fn(dst, i, size); - BUG_ON(gettimeofday(&tv_end, NULL)); + clock_get(&start); + for (unsigned int i = 0; i < p->nr_loops; ++i) + for (size_t off = 0; off < p->size; off += p->chunk_size) + fn(dst + off, i, min(p->chunk_size, p->size - off)); + clock_get(&end); - timersub(&tv_end, &tv_start, &tv_diff); + *rt = clock_diff(&start, &end); - return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); + return 0; } static const char * const bench_mem_memset_usage[] = { @@ -351,10 +454,13 @@ static const char * const bench_mem_memset_usage[] = { static const struct function memset_functions[] = { { .name = "default", .desc = "Default memset() provided by glibc", + .fn.init = mem_alloc, + .fn.fini = mem_free, .fn.memset = memset }, #ifdef HAVE_ARCH_X86_64_SUPPORT -# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, +# define MEMSET_FN(_fn, _init, _fini, _name, _desc) \ + {.name = _name, .desc = _desc, .fn.memset = _fn, .fn.init = _init, .fn.fini = _fini }, # include "mem-memset-x86-64-asm-def.h" # undef MEMSET_FN #endif @@ -366,9 +472,91 @@ int bench_mem_memset(int argc, const char **argv) { struct bench_mem_info info = { .functions = memset_functions, - .do_cycles = do_memset_cycles, - .do_gettimeofday = do_memset_gettimeofday, + .do_op = do_memset, .usage = bench_mem_memset_usage, + .options = bench_mem_options, + }; + + return bench_mem_common(argc, argv, &info); +} + +static void mmap_page_touch(void *dst, size_t size, unsigned int page_shift, bool random) +{ + unsigned long npages = size / (1 << page_shift); + unsigned long offset = 0, r = 0; + + for (unsigned long i = 0; i < npages; i++) { + if (random) + r = rand() % (1 << page_shift); + + *((char *)dst + offset + r) = *(char *)(dst + offset + r) + i; + offset += 1 << page_shift; + } +} + +static int do_mmap(const struct function *r, struct bench_params *p, + void *src __maybe_unused, void *dst __maybe_unused, + union bench_clock *accum) +{ + union bench_clock start, end, diff; + mmap_op_t fn = r->fn.mmap_op; + bool populate = strcmp(r->name, "populate") == 0; + + if (p->seed) + srand(p->seed); + + for (unsigned int i = 0; i < p->nr_loops; i++) { + clock_get(&start); + dst = bench_mmap(p->size, populate, p->page_shift); + if (!dst) + goto out; + + fn(dst, p->size, p->page_shift, p->seed); + clock_get(&end); + diff = clock_diff(&start, &end); + clock_accum(accum, &diff); + + bench_munmap(dst, p->size); + } + + return 0; +out: + printf("# Memory allocation failed - maybe size (%s) %s?\n", size_str, + p->page_shift != PAGE_SHIFT_4KB ? "has insufficient hugepages" : "is too large"); + return -1; +} + +static const char * const bench_mem_mmap_usage[] = { + "perf bench mem mmap <options>", + NULL +}; + +static const struct function mmap_functions[] = { + { .name = "demand", + .desc = "Demand loaded mmap()", + .fn.mmap_op = mmap_page_touch }, + + { .name = "populate", + .desc = "Eagerly populated mmap()", + .fn.mmap_op = mmap_page_touch }, + + { .name = NULL, } +}; + +int bench_mem_mmap(int argc, const char **argv) +{ + static const struct option bench_mmap_options[] = { + OPT_UINTEGER('r', "randomize", &seed, + "Seed to randomize page access offset."), + OPT_PARENT(bench_common_options), + OPT_END() + }; + + struct bench_mem_info info = { + .functions = mmap_functions, + .do_op = do_mmap, + .usage = bench_mem_mmap_usage, + .options = bench_mmap_options, }; return bench_mem_common(argc, argv, &info); diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h index 5bcaec5601a8..852e48cfd8fe 100644 --- a/tools/perf/bench/mem-memcpy-arch.h +++ b/tools/perf/bench/mem-memcpy-arch.h @@ -2,7 +2,7 @@ #ifdef HAVE_ARCH_X86_64_SUPPORT -#define MEMCPY_FN(fn, name, desc) \ +#define MEMCPY_FN(fn, init, fini, name, desc) \ void *fn(void *, const void *, size_t); #include "mem-memcpy-x86-64-asm-def.h" diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h index 6188e19d3129..f43038f4448b 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h +++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h @@ -1,9 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ MEMCPY_FN(memcpy_orig, + mem_alloc, + mem_free, "x86-64-unrolled", "unrolled memcpy() in arch/x86/lib/memcpy_64.S") MEMCPY_FN(__memcpy, + mem_alloc, + mem_free, "x86-64-movsq", "movsq-based memcpy() in arch/x86/lib/memcpy_64.S") diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h index 53f45482663f..278c5da12d63 100644 --- a/tools/perf/bench/mem-memset-arch.h +++ b/tools/perf/bench/mem-memset-arch.h @@ -2,7 +2,7 @@ #ifdef HAVE_ARCH_X86_64_SUPPORT -#define MEMSET_FN(fn, name, desc) \ +#define MEMSET_FN(fn, init, fini, name, desc) \ void *fn(void *, int, size_t); #include "mem-memset-x86-64-asm-def.h" diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h index 247c72fdfb9d..80ad1b7ea770 100644 --- a/tools/perf/bench/mem-memset-x86-64-asm-def.h +++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h @@ -1,9 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ MEMSET_FN(memset_orig, + mem_alloc, + mem_free, "x86-64-unrolled", "unrolled memset() in arch/x86/lib/memset_64.S") MEMSET_FN(__memset, + mem_alloc, + mem_free, "x86-64-stosq", "movsq-based memset() in arch/x86/lib/memset_64.S") diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 1fbd7c947abc..19be2aaf4dc0 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -27,6 +27,7 @@ #include <sys/resource.h> #include <sys/wait.h> #include <sys/prctl.h> +#include <sys/stat.h> #include <sys/types.h> #include <linux/kernel.h> #include <linux/time64.h> @@ -35,6 +36,7 @@ #include "../util/header.h" #include "../util/mutex.h" +#include <api/fs/fs.h> #include <numa.h> #include <numaif.h> @@ -533,6 +535,57 @@ static int parse_cpu_list(const char *arg) return 0; } +/* + * Check whether a CPU is online + * + * Returns: + * 1 -> if CPU is online + * 0 -> if CPU is offline + * -1 -> error case + */ +static int is_cpu_online(unsigned int cpu) +{ + char *str; + size_t strlen; + char buf[256]; + int status = -1; + struct stat statbuf; + + snprintf(buf, sizeof(buf), + "/sys/devices/system/cpu/cpu%d", cpu); + if (stat(buf, &statbuf) != 0) + return 0; + + /* + * Check if /sys/devices/system/cpu/cpux/online file + * exists. Some cases cpu0 won't have online file since + * it is not expected to be turned off generally. + * In kernels without CONFIG_HOTPLUG_CPU, this + * file won't exist + */ + snprintf(buf, sizeof(buf), + "/sys/devices/system/cpu/cpu%d/online", cpu); + if (stat(buf, &statbuf) != 0) + return 1; + + /* + * Read online file using sysfs__read_str. + * If read or open fails, return -1. + * If read succeeds, return value from file + * which gets stored in "str" + */ + snprintf(buf, sizeof(buf), + "devices/system/cpu/cpu%d/online", cpu); + + if (sysfs__read_str(buf, &str, &strlen) < 0) + return status; + + status = atoi(str); + + free(str); + return status; +} + static int parse_setup_cpu_list(void) { struct thread_data *td; diff --git a/tools/perf/bench/pmu-scan.c b/tools/perf/bench/pmu-scan.c index 9e4d36486f62..14a464ad8cea 100644 --- a/tools/perf/bench/pmu-scan.c +++ b/tools/perf/bench/pmu-scan.c @@ -4,6 +4,7 @@ * * Copyright 2023 Google LLC. */ +#include <errno.h> #include <stdio.h> #include "bench.h" #include "util/debug.h" diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 3af6d3c55aba..70139036d68f 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -23,6 +23,7 @@ #include <errno.h> #include <fcntl.h> #include <assert.h> +#include <sys/epoll.h> #include <sys/time.h> #include <sys/types.h> #include <sys/syscall.h> @@ -34,6 +35,8 @@ struct thread_data { int nr; int pipe_read; int pipe_write; + struct epoll_event epoll_ev; + int epoll_fd; bool cgroup_failed; pthread_t pthread; }; @@ -44,6 +47,7 @@ static int loops = LOOPS_DEFAULT; /* Use processes by default: */ static bool threaded; +static bool nonblocking; static char *cgrp_names[2]; static struct cgroup *cgrps[2]; @@ -81,6 +85,7 @@ out: } static const struct option options[] = { + OPT_BOOLEAN('n', "nonblocking", &nonblocking, "Use non-blocking operations"), OPT_INTEGER('l', "loop", &loops, "Specify number of loops"), OPT_BOOLEAN('T', "threaded", &threaded, "Specify threads/process based task setup"), OPT_CALLBACK('G', "cgroups", NULL, "SEND,RECV", @@ -165,11 +170,25 @@ static void exit_cgroup(int nr) free(cgrp_names[nr]); } +static inline int read_pipe(struct thread_data *td) +{ + int ret, m; +retry: + if (nonblocking) { + ret = epoll_wait(td->epoll_fd, &td->epoll_ev, 1, -1); + if (ret < 0) + return ret; + } + ret = read(td->pipe_read, &m, sizeof(int)); + if (nonblocking && ret < 0 && errno == EWOULDBLOCK) + goto retry; + return ret; +} + static void *worker_thread(void *__tdata) { struct thread_data *td = __tdata; - int m = 0, i; - int ret; + int i, ret, m = 0; ret = enter_cgroup(td->nr); if (ret < 0) { @@ -177,18 +196,18 @@ static void *worker_thread(void *__tdata) return NULL; } + if (nonblocking) { + td->epoll_ev.events = EPOLLIN; + td->epoll_fd = epoll_create(1); + BUG_ON(td->epoll_fd < 0); + BUG_ON(epoll_ctl(td->epoll_fd, EPOLL_CTL_ADD, td->pipe_read, &td->epoll_ev) < 0); + } + for (i = 0; i < loops; i++) { - if (!td->nr) { - ret = read(td->pipe_read, &m, sizeof(int)); - BUG_ON(ret != sizeof(int)); - ret = write(td->pipe_write, &m, sizeof(int)); - BUG_ON(ret != sizeof(int)); - } else { - ret = write(td->pipe_write, &m, sizeof(int)); - BUG_ON(ret != sizeof(int)); - ret = read(td->pipe_read, &m, sizeof(int)); - BUG_ON(ret != sizeof(int)); - } + ret = write(td->pipe_write, &m, sizeof(int)); + BUG_ON(ret != sizeof(int)); + ret = read_pipe(td); + BUG_ON(ret != sizeof(int)); } return NULL; @@ -209,13 +228,16 @@ int bench_sched_pipe(int argc, const char **argv) * discarding returned value of read(), write() * causes error in building environment for perf */ - int __maybe_unused ret, wait_stat; + int __maybe_unused ret, wait_stat, flags = 0; pid_t pid, retpid __maybe_unused; argc = parse_options(argc, argv, options, bench_sched_pipe_usage, 0); - BUG_ON(pipe(pipe_1)); - BUG_ON(pipe(pipe_2)); + if (nonblocking) + flags |= O_NONBLOCK; + + BUG_ON(pipe2(pipe_1, flags)); + BUG_ON(pipe2(pipe_2, flags)); gettimeofday(&start, NULL); diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c index 7401ebbac100..265d49a913d9 100644 --- a/tools/perf/bench/synthesize.c +++ b/tools/perf/bench/synthesize.c @@ -6,6 +6,7 @@ * * Copyright 2019 Google LLC. */ +#include <errno.h> #include <stdio.h> #include "bench.h" #include "../util/debug.h" @@ -49,7 +50,7 @@ static const char *const bench_usage[] = { static atomic_t event_count; -static int process_synthesized_event(struct perf_tool *tool __maybe_unused, +static int process_synthesized_event(const struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, struct perf_sample *sample __maybe_unused, struct machine *machine __maybe_unused) @@ -114,12 +115,16 @@ static int run_single_threaded(void) .pid = "self", }; struct perf_thread_map *threads; + struct perf_env host_env; int err; perf_set_singlethreaded(); - session = perf_session__new(NULL, NULL); + perf_env__init(&host_env); + session = __perf_session__new(/*data=*/NULL, /*tool=*/NULL, + /*trace_event_repipe=*/false, &host_env); if (IS_ERR(session)) { pr_err("Session creation failed.\n"); + perf_env__exit(&host_env); return PTR_ERR(session); } threads = thread_map__new_by_pid(getpid()); @@ -144,6 +149,7 @@ err_out: perf_thread_map__put(threads); perf_session__delete(session); + perf_env__exit(&host_env); return err; } @@ -154,17 +160,21 @@ static int do_run_multi_threaded(struct target *target, u64 runtime_us; unsigned int i; double time_average, time_stddev, event_average, event_stddev; - int err; + int err = 0; struct stats time_stats, event_stats; struct perf_session *session; + struct perf_env host_env; + perf_env__init(&host_env); init_stats(&time_stats); init_stats(&event_stats); for (i = 0; i < multi_iterations; i++) { - session = perf_session__new(NULL, NULL); - if (IS_ERR(session)) - return PTR_ERR(session); - + session = __perf_session__new(/*data=*/NULL, /*tool=*/NULL, + /*trace_event_repipe=*/false, &host_env); + if (IS_ERR(session)) { + err = PTR_ERR(session); + goto err_out; + } atomic_set(&event_count, 0); gettimeofday(&start, NULL); err = __machine__synthesize_threads(&session->machines.host, @@ -175,7 +185,7 @@ static int do_run_multi_threaded(struct target *target, nr_threads_synthesize); if (err) { perf_session__delete(session); - return err; + goto err_out; } gettimeofday(&end, NULL); @@ -198,7 +208,9 @@ static int do_run_multi_threaded(struct target *target, printf(" Average time per event %.3f usec\n", time_average / event_average); - return 0; +err_out: + perf_env__exit(&host_env); + return err; } static int run_multi_threaded(void) diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c index ea4dfc07cbd6..e7dc216f717f 100644 --- a/tools/perf/bench/syscall.c +++ b/tools/perf/bench/syscall.c @@ -22,8 +22,7 @@ #define __NR_fork -1 #endif -#define LOOPS_DEFAULT 10000000 -static int loops = LOOPS_DEFAULT; +static int loops; static const struct option options[] = { OPT_INTEGER('l', "loop", &loops, "Specify number of loops"), @@ -80,6 +79,18 @@ static int bench_syscall_common(int argc, const char **argv, int syscall) const char *name = NULL; int i; + switch (syscall) { + case __NR_fork: + case __NR_execve: + /* Limit default loop to 10000 times to save time */ + loops = 10000; + break; + default: + loops = 10000000; + break; + } + + /* Options -l and --loops override default above */ argc = parse_options(argc, argv, options, bench_syscall_usage, 0); gettimeofday(&start, NULL); @@ -94,16 +105,9 @@ static int bench_syscall_common(int argc, const char **argv, int syscall) break; case __NR_fork: test_fork(); - /* Only loop 10000 times to save time */ - if (i == 10000) - loops = 10000; break; case __NR_execve: test_execve(); - /* Only loop 10000 times to save time */ - if (i == 10000) - loops = 10000; - break; default: break; } |
