diff options
Diffstat (limited to 'tools/perf/bench')
30 files changed, 1585 insertions, 299 deletions
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index 6b6155a8ad09..b558ab98719f 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -1,22 +1,26 @@ -perf-y += sched-messaging.o -perf-y += sched-pipe.o -perf-y += syscall.o -perf-y += mem-functions.o -perf-y += futex-hash.o -perf-y += futex-wake.o -perf-y += futex-wake-parallel.o -perf-y += futex-requeue.o -perf-y += futex-lock-pi.o -perf-y += epoll-wait.o -perf-y += epoll-ctl.o -perf-y += synthesize.o -perf-y += kallsyms-parse.o -perf-y += find-bit-bench.o -perf-y += inject-buildid.o -perf-y += evlist-open-close.o -perf-y += breakpoint.o +perf-bench-y += sched-messaging.o +perf-bench-y += sched-pipe.o +perf-bench-y += sched-seccomp-notify.o +perf-bench-y += syscall.o +perf-bench-y += mem-functions.o +perf-bench-y += futex.o +perf-bench-y += futex-hash.o +perf-bench-y += futex-wake.o +perf-bench-y += futex-wake-parallel.o +perf-bench-y += futex-requeue.o +perf-bench-y += futex-lock-pi.o +perf-bench-y += epoll-wait.o +perf-bench-y += epoll-ctl.o +perf-bench-y += synthesize.o +perf-bench-y += kallsyms-parse.o +perf-bench-y += find-bit-bench.o +perf-bench-y += inject-buildid.o +perf-bench-y += evlist-open-close.o +perf-bench-y += breakpoint.o +perf-bench-y += pmu-scan.o +perf-bench-y += uprobe.o -perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o -perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o +perf-bench-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o +perf-bench-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o -perf-$(CONFIG_NUMA) += numa.o +perf-bench-$(CONFIG_NUMA) += numa.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index a5d49b3b6a09..8519eb5a42fa 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -21,9 +21,14 @@ extern struct timeval bench__start, bench__end, bench__runtime; int bench_numa(int argc, const char **argv); int bench_sched_messaging(int argc, const char **argv); int bench_sched_pipe(int argc, const char **argv); +int bench_sched_seccomp_notify(int argc, const char **argv); int bench_syscall_basic(int argc, const char **argv); +int bench_syscall_getpgid(int argc, const char **argv); +int bench_syscall_fork(int argc, const char **argv); +int bench_syscall_execve(int argc, const char **argv); int bench_mem_memcpy(int argc, const char **argv); int bench_mem_memset(int argc, const char **argv); +int bench_mem_mmap(int argc, const char **argv); int bench_mem_find_bit(int argc, const char **argv); int bench_futex_hash(int argc, const char **argv); int bench_futex_wake(int argc, const char **argv); @@ -39,6 +44,12 @@ int bench_inject_build_id(int argc, const char **argv); int bench_evlist_open_close(int argc, const char **argv); int bench_breakpoint_thread(int argc, const char **argv); int bench_breakpoint_enable(int argc, const char **argv); +int bench_uprobe_baseline(int argc, const char **argv); +int bench_uprobe_empty(int argc, const char **argv); +int bench_uprobe_trace_printk(int argc, const char **argv); +int bench_uprobe_empty_ret(int argc, const char **argv); +int bench_uprobe_trace_printk_ret(int argc, const char **argv); +int bench_pmu_scan(int argc, const char **argv); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/breakpoint.c b/tools/perf/bench/breakpoint.c index 41385f89ffc7..dfd18f5db97d 100644 --- a/tools/perf/bench/breakpoint.c +++ b/tools/perf/bench/breakpoint.c @@ -47,6 +47,7 @@ struct breakpoint { static int breakpoint_setup(void *addr) { struct perf_event_attr attr = { .size = 0, }; + int fd; attr.type = PERF_TYPE_BREAKPOINT; attr.size = sizeof(attr); @@ -56,7 +57,12 @@ static int breakpoint_setup(void *addr) attr.bp_addr = (unsigned long)addr; attr.bp_type = HW_BREAKPOINT_RW; attr.bp_len = HW_BREAKPOINT_LEN_1; - return syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0); + fd = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0); + + if (fd < 0) + fd = -errno; + + return fd; } static void *passive_thread(void *arg) @@ -122,8 +128,14 @@ int bench_breakpoint_thread(int argc, const char **argv) for (i = 0; i < thread_params.nbreakpoints; i++) { breakpoints[i].fd = breakpoint_setup(&breakpoints[i].watched); - if (breakpoints[i].fd == -1) + + if (breakpoints[i].fd < 0) { + if (breakpoints[i].fd == -ENODEV) { + printf("Skipping perf bench breakpoint thread: No hardware support\n"); + return 0; + } exit((perror("perf_event_open"), EXIT_FAILURE)); + } } gettimeofday(&start, NULL); for (i = 0; i < thread_params.nparallel; i++) { @@ -196,8 +208,14 @@ int bench_breakpoint_enable(int argc, const char **argv) exit(EXIT_FAILURE); } fd = breakpoint_setup(&watched); - if (fd == -1) + + if (fd < 0) { + if (fd == -ENODEV) { + printf("Skipping perf bench breakpoint enable: No hardware support\n"); + return 0; + } exit((perror("perf_event_open"), EXIT_FAILURE)); + } nthreads = enable_params.npassive + enable_params.nactive; threads = calloc(nthreads, sizeof(threads[0])); if (!threads) diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index 521d1ff97b06..d66d852b90e4 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -232,7 +232,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) if (!noaffinity) pthread_attr_init(&thread_attr); - nrcpus = perf_cpu_map__nr(cpu); + nrcpus = cpu__max_cpu().cpu; cpuset = CPU_ALLOC(nrcpus); BUG_ON(!cpuset); size = CPU_ALLOC_SIZE(nrcpus); @@ -330,7 +330,7 @@ int bench_epoll_ctl(int argc, const char **argv) act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) goto errmem; @@ -421,6 +421,11 @@ int bench_epoll_ctl(int argc, const char **argv) print_summary(); close(epollfd); + perf_cpu_map__put(cpu); + for (i = 0; i < nthreads; i++) + free(worker[i].fdmap); + + free(worker); return ret; errmem: err(EXIT_FAILURE, "calloc"); diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index c1cdf03c075d..20fe4f72b4af 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -309,7 +309,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) if (!noaffinity) pthread_attr_init(&thread_attr); - nrcpus = perf_cpu_map__nr(cpu); + nrcpus = cpu__max_cpu().cpu; cpuset = CPU_ALLOC(nrcpus); BUG_ON(!cpuset); size = CPU_ALLOC_SIZE(nrcpus); @@ -420,7 +420,12 @@ static int cmpworker(const void *p1, const void *p2) struct worker *w1 = (struct worker *) p1; struct worker *w2 = (struct worker *) p2; - return w1->tid > w2->tid; + + if (w1->tid > w2->tid) + return 1; + if (w1->tid < w2->tid) + return -1; + return 0; } int bench_epoll_wait(int argc, const char **argv) @@ -444,7 +449,7 @@ int bench_epoll_wait(int argc, const char **argv) act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) goto errmem; @@ -549,6 +554,11 @@ int bench_epoll_wait(int argc, const char **argv) print_summary(); close(epollfd); + perf_cpu_map__put(cpu); + for (i = 0; i < nthreads; i++) + free(worker[i].fdmap); + + free(worker); return ret; errmem: err(EXIT_FAILURE, "calloc"); diff --git a/tools/perf/bench/evlist-open-close.c b/tools/perf/bench/evlist-open-close.c index 5a27691469ed..faf9c34b4a5d 100644 --- a/tools/perf/bench/evlist-open-close.c +++ b/tools/perf/bench/evlist-open-close.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <errno.h> #include <inttypes.h> #include <stdio.h> #include <stdlib.h> @@ -46,25 +47,6 @@ static struct record_opts opts = { .ctl_fd_ack = -1, }; -static const struct option options[] = { - OPT_STRING('e', "event", &event_string, "event", "event selector. use 'perf list' to list available events"), - OPT_INTEGER('n', "nr-events", &nr_events, - "number of dummy events to create (default 1). If used with -e, it clones those events n times (1 = no change)"), - OPT_INTEGER('i', "iterations", &iterations, "Number of iterations used to compute average (default=100)"), - OPT_BOOLEAN('a', "all-cpus", &opts.target.system_wide, "system-wide collection from all CPUs"), - OPT_STRING('C', "cpu", &opts.target.cpu_list, "cpu", "list of cpus where to open events"), - OPT_STRING('p', "pid", &opts.target.pid, "pid", "record events on existing process id"), - OPT_STRING('t', "tid", &opts.target.tid, "tid", "record events on existing thread id"), - OPT_STRING('u', "uid", &opts.target.uid_str, "user", "user to profile"), - OPT_BOOLEAN(0, "per-thread", &opts.target.per_thread, "use per-thread mmaps"), - OPT_END() -}; - -static const char *const bench_usage[] = { - "perf bench internals evlist-open-close <options>", - NULL -}; - static int evlist__count_evsel_fds(struct evlist *evlist) { struct evsel *evsel; @@ -76,7 +58,7 @@ static int evlist__count_evsel_fds(struct evlist *evlist) return cnt; } -static struct evlist *bench__create_evlist(char *evstr) +static struct evlist *bench__create_evlist(char *evstr, const char *uid_str) { struct parse_events_error err; struct evlist *evlist = evlist__new(); @@ -97,6 +79,18 @@ static struct evlist *bench__create_evlist(char *evstr) goto out_delete_evlist; } parse_events_error__exit(&err); + if (uid_str) { + uid_t uid = parse_uid(uid_str); + + if (uid == UINT_MAX) { + pr_err("Invalid User: %s", uid_str); + ret = -EINVAL; + goto out_delete_evlist; + } + ret = parse_uid_filter(evlist, uid); + if (ret) + goto out_delete_evlist; + } ret = evlist__create_maps(evlist, &opts.target); if (ret < 0) { pr_err("Not enough memory to create thread/cpu maps\n"); @@ -136,10 +130,10 @@ static int bench__do_evlist_open_close(struct evlist *evlist) return 0; } -static int bench_evlist_open_close__run(char *evstr) +static int bench_evlist_open_close__run(char *evstr, const char *uid_str) { // used to print statistics only - struct evlist *evlist = bench__create_evlist(evstr); + struct evlist *evlist = bench__create_evlist(evstr, uid_str); double time_average, time_stddev; struct timeval start, end, diff; struct stats time_stats; @@ -161,7 +155,7 @@ static int bench_evlist_open_close__run(char *evstr) for (i = 0; i < iterations; i++) { pr_debug("Started iteration %d\n", i); - evlist = bench__create_evlist(evstr); + evlist = bench__create_evlist(evstr, uid_str); if (!evlist) return -ENOMEM; @@ -225,6 +219,30 @@ out_error: int bench_evlist_open_close(int argc, const char **argv) { + const char *uid_str = NULL; + const struct option options[] = { + OPT_STRING('e', "event", &event_string, "event", + "event selector. use 'perf list' to list available events"), + OPT_INTEGER('n', "nr-events", &nr_events, + "number of dummy events to create (default 1). If used with -e, it clones those events n times (1 = no change)"), + OPT_INTEGER('i', "iterations", &iterations, + "Number of iterations used to compute average (default=100)"), + OPT_BOOLEAN('a', "all-cpus", &opts.target.system_wide, + "system-wide collection from all CPUs"), + OPT_STRING('C', "cpu", &opts.target.cpu_list, "cpu", + "list of cpus where to open events"), + OPT_STRING('p', "pid", &opts.target.pid, "pid", + "record events on existing process id"), + OPT_STRING('t', "tid", &opts.target.tid, "tid", + "record events on existing thread id"), + OPT_STRING('u', "uid", &uid_str, "user", "user to profile"), + OPT_BOOLEAN(0, "per-thread", &opts.target.per_thread, "use per-thread mmaps"), + OPT_END() + }; + const char *const bench_usage[] = { + "perf bench internals evlist-open-close <options>", + NULL + }; char *evstr, errbuf[BUFSIZ]; int err; @@ -241,15 +259,8 @@ int bench_evlist_open_close(int argc, const char **argv) goto out; } - err = target__parse_uid(&opts.target); - if (err) { - target__strerror(&opts.target, err, errbuf, sizeof(errbuf)); - pr_err("%s", errbuf); - goto out; - } - - /* Enable ignoring missing threads when -u/-p option is defined. */ - opts.ignore_missing_thread = opts.target.uid != UINT_MAX || opts.target.pid; + /* Enable ignoring missing threads when -p option is defined. */ + opts.ignore_missing_thread = opts.target.pid; evstr = bench__repeat_event_string(event_string, nr_events); if (!evstr) { @@ -257,7 +268,7 @@ int bench_evlist_open_close(int argc, const char **argv) goto out; } - err = bench_evlist_open_close__run(evstr); + err = bench_evlist_open_close__run(evstr, uid_str); free(evstr); out: diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c index d103c3136983..e697c20951bc 100644 --- a/tools/perf/bench/find-bit-bench.c +++ b/tools/perf/bench/find-bit-bench.c @@ -37,7 +37,7 @@ static noinline void workload(int val) accumulator++; } -#if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__) +#if defined(__i386__) || defined(__x86_64__) static bool asm_test_bit(long nr, const unsigned long *addr) { bool oldbit; @@ -61,7 +61,6 @@ static int do_for_each_set_bit(unsigned int num_bits) double time_average, time_stddev; unsigned int bit, i, j; unsigned int set_bits, skip; - unsigned int old; init_stats(&fb_time_stats); init_stats(&tb_time_stats); @@ -73,7 +72,10 @@ static int do_for_each_set_bit(unsigned int num_bits) __set_bit(i, to_test); for (i = 0; i < outer_iterations; i++) { - old = accumulator; +#ifndef NDEBUG + unsigned int old = accumulator; +#endif + gettimeofday(&start, NULL); for (j = 0; j < inner_iterations; j++) { for_each_set_bit(bit, to_test, num_bits) @@ -85,7 +87,9 @@ static int do_for_each_set_bit(unsigned int num_bits) runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; update_stats(&fb_time_stats, runtime_us); +#ifndef NDEBUG old = accumulator; +#endif gettimeofday(&start, NULL); for (j = 0; j < inner_iterations; j++) { for (bit = 0; bit < num_bits; bit++) { diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 2005a3fa3026..7e29f04da744 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -21,6 +21,7 @@ #include <linux/zalloc.h> #include <sys/time.h> #include <sys/mman.h> +#include <sys/prctl.h> #include <perf/cpumap.h> #include "../util/mutex.h" @@ -50,9 +51,11 @@ struct worker { static struct bench_futex_parameters params = { .nfutexes = 1024, .runtime = 10, + .nbuckets = -1, }; static const struct option options[] = { + OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('r', "runtime", ¶ms.runtime, "Specify runtime (in seconds)"), OPT_UINTEGER('f', "futexes", ¶ms.nfutexes, "Specify amount of futexes per threads"), @@ -118,6 +121,7 @@ static void print_summary(void) printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", !params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), (int)bench__runtime.tv_sec); + futex_print_nbuckets(¶ms); } int bench_futex_hash(int argc, const char **argv) @@ -138,7 +142,7 @@ int bench_futex_hash(int argc, const char **argv) exit(EXIT_FAILURE); } - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) goto errmem; @@ -161,6 +165,7 @@ int bench_futex_hash(int argc, const char **argv) if (!params.fshared) futex_flag = FUTEX_PRIVATE_FLAG; + futex_set_nbuckets_param(¶ms); printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n", getpid(), params.nthreads, params.nfutexes, params.fshared ? "shared":"private", params.runtime); @@ -174,7 +179,7 @@ int bench_futex_hash(int argc, const char **argv) pthread_attr_init(&thread_attr); gettimeofday(&bench__start, NULL); - nrcpus = perf_cpu_map__nr(cpu); + nrcpus = cpu__max_cpu().cpu; cpuset = CPU_ALLOC(nrcpus); BUG_ON(!cpuset); size = CPU_ALLOC_SIZE(nrcpus); diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 2d0417949727..40640b674427 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -41,10 +41,12 @@ static struct stats throughput_stats; static struct cond thread_parent, thread_worker; static struct bench_futex_parameters params = { + .nbuckets = -1, .runtime = 10, }; static const struct option options[] = { + OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('r', "runtime", ¶ms.runtime, "Specify runtime (in seconds)"), OPT_BOOLEAN( 'M', "multi", ¶ms.multi, "Use multiple futexes"), @@ -67,6 +69,7 @@ static void print_summary(void) printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", !params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), (int)bench__runtime.tv_sec); + futex_print_nbuckets(¶ms); } static void toggle_done(int sig __maybe_unused, @@ -118,12 +121,11 @@ static void *workerfn(void *arg) return NULL; } -static void create_threads(struct worker *w, pthread_attr_t thread_attr, - struct perf_cpu_map *cpu) +static void create_threads(struct worker *w, struct perf_cpu_map *cpu) { cpu_set_t *cpuset; unsigned int i; - int nrcpus = perf_cpu_map__nr(cpu); + int nrcpus = cpu__max_cpu().cpu; size_t size; threads_starting = params.nthreads; @@ -133,6 +135,9 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr, size = CPU_ALLOC_SIZE(nrcpus); for (i = 0; i < params.nthreads; i++) { + pthread_attr_t thread_attr; + + pthread_attr_init(&thread_attr); worker[i].tid = i; if (params.multi) { @@ -154,6 +159,7 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr, CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); } + pthread_attr_destroy(&thread_attr); } CPU_FREE(cpuset); } @@ -163,14 +169,13 @@ int bench_futex_lock_pi(int argc, const char **argv) int ret = 0; unsigned int i; struct sigaction act; - pthread_attr_t thread_attr; struct perf_cpu_map *cpu; argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0); if (argc) goto err; - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) err(EXIT_FAILURE, "calloc"); @@ -201,13 +206,12 @@ int bench_futex_lock_pi(int argc, const char **argv) mutex_init(&thread_lock); cond_init(&thread_parent); cond_init(&thread_worker); + futex_set_nbuckets_param(¶ms); threads_starting = params.nthreads; - pthread_attr_init(&thread_attr); gettimeofday(&bench__start, NULL); - create_threads(worker, thread_attr, cpu); - pthread_attr_destroy(&thread_attr); + create_threads(worker, cpu); mutex_lock(&thread_lock); while (threads_starting) diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 69ad896f556c..0748b0fd689e 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -42,6 +42,7 @@ static unsigned int threads_starting; static int futex_flag = 0; static struct bench_futex_parameters params = { + .nbuckets = -1, /* * How many tasks to requeue at a time. * Default to 1 in order to make the kernel work more. @@ -50,6 +51,7 @@ static struct bench_futex_parameters params = { }; static const struct option options[] = { + OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('q', "nrequeue", ¶ms.nrequeue, "Specify amount of threads to requeue at once"), OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), @@ -77,6 +79,7 @@ static void print_summary(void) params.nthreads, requeuetime_avg / USEC_PER_MSEC, rel_stddev_stats(requeuetime_stddev, requeuetime_avg)); + futex_print_nbuckets(¶ms); } static void *workerfn(void *arg __maybe_unused) @@ -121,12 +124,11 @@ static void *workerfn(void *arg __maybe_unused) return NULL; } -static void block_threads(pthread_t *w, - pthread_attr_t thread_attr, struct perf_cpu_map *cpu) +static void block_threads(pthread_t *w, struct perf_cpu_map *cpu) { cpu_set_t *cpuset; unsigned int i; - int nrcpus = perf_cpu_map__nr(cpu); + int nrcpus = cpu__max_cpu().cpu; size_t size; threads_starting = params.nthreads; @@ -137,6 +139,9 @@ static void block_threads(pthread_t *w, /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { + pthread_attr_t thread_attr; + + pthread_attr_init(&thread_attr); CPU_ZERO_S(size, cpuset); CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); @@ -149,6 +154,7 @@ static void block_threads(pthread_t *w, CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); } + pthread_attr_destroy(&thread_attr); } CPU_FREE(cpuset); } @@ -165,14 +171,13 @@ int bench_futex_requeue(int argc, const char **argv) int ret = 0; unsigned int i, j; struct sigaction act; - pthread_attr_t thread_attr; struct perf_cpu_map *cpu; argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0); if (argc) goto err; - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) err(EXIT_FAILURE, "cpu_map__new"); @@ -202,6 +207,8 @@ int bench_futex_requeue(int argc, const char **argv) if (params.broadcast) params.nrequeue = params.nthreads; + futex_set_nbuckets_param(¶ms); + printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %s%p), " "%d at a time.\n\n", getpid(), params.nthreads, params.fshared ? "shared":"private", &futex1, @@ -209,7 +216,6 @@ int bench_futex_requeue(int argc, const char **argv) init_stats(&requeued_stats); init_stats(&requeuetime_stats); - pthread_attr_init(&thread_attr); mutex_init(&thread_lock); cond_init(&thread_parent); cond_init(&thread_worker); @@ -219,7 +225,7 @@ int bench_futex_requeue(int argc, const char **argv) struct timeval start, end, runtime; /* create, launch & block all threads */ - block_threads(worker, thread_attr, cpu); + block_threads(worker, cpu); /* make sure all threads are already blocked */ mutex_lock(&thread_lock); @@ -301,7 +307,6 @@ int bench_futex_requeue(int argc, const char **argv) cond_destroy(&thread_parent); cond_destroy(&thread_worker); mutex_destroy(&thread_lock); - pthread_attr_destroy(&thread_attr); print_summary(); diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 6682e49d0ee0..6aede7c46b33 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -57,9 +57,12 @@ static struct stats waketime_stats, wakeup_stats; static unsigned int threads_starting; static int futex_flag = 0; -static struct bench_futex_parameters params; +static struct bench_futex_parameters params = { + .nbuckets = -1, +}; static const struct option options[] = { + OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('w', "nwakers", ¶ms.nwakes, "Specify amount of waking threads"), OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), @@ -95,10 +98,12 @@ static void *waking_workerfn(void *arg) return NULL; } -static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr) +static void wakeup_threads(struct thread_data *td) { unsigned int i; + pthread_attr_t thread_attr; + pthread_attr_init(&thread_attr); pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE); pthread_barrier_init(&barrier, NULL, params.nwakes + 1); @@ -122,6 +127,7 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr) err(EXIT_FAILURE, "pthread_join"); pthread_barrier_destroy(&barrier); + pthread_attr_destroy(&thread_attr); } static void *blocked_workerfn(void *arg __maybe_unused) @@ -142,12 +148,11 @@ static void *blocked_workerfn(void *arg __maybe_unused) return NULL; } -static void block_threads(pthread_t *w, pthread_attr_t thread_attr, - struct perf_cpu_map *cpu) +static void block_threads(pthread_t *w, struct perf_cpu_map *cpu) { cpu_set_t *cpuset; unsigned int i; - int nrcpus = perf_cpu_map__nr(cpu); + int nrcpus = cpu__max_cpu().cpu; size_t size; threads_starting = params.nthreads; @@ -158,6 +163,9 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr, /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { + pthread_attr_t thread_attr; + + pthread_attr_init(&thread_attr); CPU_ZERO_S(size, cpuset); CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); @@ -170,6 +178,7 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr, CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); } + pthread_attr_destroy(&thread_attr); } CPU_FREE(cpuset); } @@ -212,6 +221,7 @@ static void print_summary(void) params.nthreads, waketime_avg / USEC_PER_MSEC, rel_stddev_stats(waketime_stddev, waketime_avg)); + futex_print_nbuckets(¶ms); } @@ -238,7 +248,6 @@ int bench_futex_wake_parallel(int argc, const char **argv) int ret = 0; unsigned int i, j; struct sigaction act; - pthread_attr_t thread_attr; struct thread_data *waking_worker; struct perf_cpu_map *cpu; @@ -259,7 +268,7 @@ int bench_futex_wake_parallel(int argc, const char **argv) err(EXIT_FAILURE, "mlockall"); } - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) err(EXIT_FAILURE, "calloc"); @@ -286,6 +295,8 @@ int bench_futex_wake_parallel(int argc, const char **argv) if (!params.fshared) futex_flag = FUTEX_PRIVATE_FLAG; + futex_set_nbuckets_param(¶ms); + printf("Run summary [PID %d]: blocking on %d threads (at [%s] " "futex %p), %d threads waking up %d at a time.\n\n", getpid(), params.nthreads, params.fshared ? "shared":"private", @@ -294,7 +305,6 @@ int bench_futex_wake_parallel(int argc, const char **argv) init_stats(&wakeup_stats); init_stats(&waketime_stats); - pthread_attr_init(&thread_attr); mutex_init(&thread_lock); cond_init(&thread_parent); cond_init(&thread_worker); @@ -305,7 +315,7 @@ int bench_futex_wake_parallel(int argc, const char **argv) err(EXIT_FAILURE, "calloc"); /* create, launch & block all threads */ - block_threads(blocked_worker, thread_attr, cpu); + block_threads(blocked_worker, cpu); /* make sure all threads are already blocked */ mutex_lock(&thread_lock); @@ -314,10 +324,10 @@ int bench_futex_wake_parallel(int argc, const char **argv) cond_broadcast(&thread_worker); mutex_unlock(&thread_lock); - usleep(100000); + usleep(200000); /* Ok, all threads are patiently blocked, start waking folks up */ - wakeup_threads(waking_worker, thread_attr); + wakeup_threads(waking_worker); for (i = 0; i < params.nthreads; i++) { ret = pthread_join(blocked_worker[i], NULL); @@ -336,7 +346,6 @@ int bench_futex_wake_parallel(int argc, const char **argv) cond_destroy(&thread_parent); cond_destroy(&thread_worker); mutex_destroy(&thread_lock); - pthread_attr_destroy(&thread_attr); print_summary(); diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 9ecab6620a87..a31fc1563862 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -42,6 +42,7 @@ static unsigned int threads_starting; static int futex_flag = 0; static struct bench_futex_parameters params = { + .nbuckets = -1, /* * How many wakeups to do at a time. * Default to 1 in order to make the kernel work more. @@ -50,6 +51,7 @@ static struct bench_futex_parameters params = { }; static const struct option options[] = { + OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('w', "nwakes", ¶ms.nwakes, "Specify amount of threads to wake at once"), OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), @@ -93,15 +95,15 @@ static void print_summary(void) params.nthreads, waketime_avg / USEC_PER_MSEC, rel_stddev_stats(waketime_stddev, waketime_avg)); + futex_print_nbuckets(¶ms); } -static void block_threads(pthread_t *w, - pthread_attr_t thread_attr, struct perf_cpu_map *cpu) +static void block_threads(pthread_t *w, struct perf_cpu_map *cpu) { cpu_set_t *cpuset; unsigned int i; size_t size; - int nrcpus = perf_cpu_map__nr(cpu); + int nrcpus = cpu__max_cpu().cpu; threads_starting = params.nthreads; cpuset = CPU_ALLOC(nrcpus); @@ -110,6 +112,9 @@ static void block_threads(pthread_t *w, /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { + pthread_attr_t thread_attr; + + pthread_attr_init(&thread_attr); CPU_ZERO_S(size, cpuset); CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); @@ -122,6 +127,7 @@ static void block_threads(pthread_t *w, CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); } + pthread_attr_destroy(&thread_attr); } CPU_FREE(cpuset); } @@ -138,7 +144,6 @@ int bench_futex_wake(int argc, const char **argv) int ret = 0; unsigned int i, j; struct sigaction act; - pthread_attr_t thread_attr; struct perf_cpu_map *cpu; argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0); @@ -147,7 +152,7 @@ int bench_futex_wake(int argc, const char **argv) exit(EXIT_FAILURE); } - cpu = perf_cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) err(EXIT_FAILURE, "calloc"); @@ -178,7 +183,6 @@ int bench_futex_wake(int argc, const char **argv) init_stats(&wakeup_stats); init_stats(&waketime_stats); - pthread_attr_init(&thread_attr); mutex_init(&thread_lock); cond_init(&thread_parent); cond_init(&thread_worker); @@ -188,7 +192,7 @@ int bench_futex_wake(int argc, const char **argv) struct timeval start, end, runtime; /* create, launch & block all threads */ - block_threads(worker, thread_attr, cpu); + block_threads(worker, cpu); /* make sure all threads are already blocked */ mutex_lock(&thread_lock); @@ -228,7 +232,6 @@ int bench_futex_wake(int argc, const char **argv) cond_destroy(&thread_parent); cond_destroy(&thread_worker); mutex_destroy(&thread_lock); - pthread_attr_destroy(&thread_attr); print_summary(); diff --git a/tools/perf/bench/futex.c b/tools/perf/bench/futex.c new file mode 100644 index 000000000000..1968c9d00b5b --- /dev/null +++ b/tools/perf/bench/futex.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <err.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/prctl.h> + +#include "futex.h" + +#ifndef PR_FUTEX_HASH +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define PR_FUTEX_HASH_GET_SLOTS 2 +#endif // PR_FUTEX_HASH + +void futex_set_nbuckets_param(struct bench_futex_parameters *params) +{ + int ret; + + if (params->nbuckets < 0) + return; + + ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, params->nbuckets, 0); + if (ret) { + printf("Requesting %d hash buckets failed: %d/%m\n", + params->nbuckets, ret); + err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)"); + } +} + +void futex_print_nbuckets(struct bench_futex_parameters *params) +{ + char *futex_hash_mode; + int ret; + + ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS); + if (params->nbuckets >= 0) { + if (ret != params->nbuckets) { + if (ret < 0) { + printf("Can't query number of buckets: %m\n"); + err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)"); + } + printf("Requested number of hash buckets does not currently used.\n"); + printf("Requested: %d in usage: %d\n", params->nbuckets, ret); + err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)"); + } + if (params->nbuckets == 0) + ret = asprintf(&futex_hash_mode, "Futex hashing: global hash"); + else + ret = asprintf(&futex_hash_mode, "Futex hashing: %d hash buckets", + params->nbuckets); + } else { + if (ret <= 0) { + ret = asprintf(&futex_hash_mode, "Futex hashing: global hash"); + } else { + ret = asprintf(&futex_hash_mode, "Futex hashing: auto resized to %d buckets", + ret); + } + } + if (ret < 0) + err(EXIT_FAILURE, "ENOMEM, futex_hash_mode"); + printf("%s\n", futex_hash_mode); + free(futex_hash_mode); +} diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index ebdc2b032afc..fcb72d682cf8 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -8,6 +8,7 @@ #ifndef _FUTEX_H #define _FUTEX_H +#include <stdbool.h> #include <unistd.h> #include <sys/syscall.h> #include <sys/types.h> @@ -25,6 +26,7 @@ struct bench_futex_parameters { unsigned int nfutexes; unsigned int nwakes; unsigned int nrequeue; + int nbuckets; }; /** @@ -143,4 +145,7 @@ futex_cmp_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, val, opflags); } +void futex_set_nbuckets_param(struct bench_futex_parameters *params); +void futex_print_nbuckets(struct bench_futex_parameters *params); + #endif /* _FUTEX_H */ diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c index 4561bda0ce6a..aad572a78d7f 100644 --- a/tools/perf/bench/inject-buildid.c +++ b/tools/perf/bench/inject-buildid.c @@ -12,6 +12,7 @@ #include <linux/time64.h> #include <linux/list.h> #include <linux/err.h> +#include <linux/zalloc.h> #include <internal/lib.h> #include <subcmd/parse-options.h> @@ -51,7 +52,7 @@ struct bench_dso { static int nr_dsos; static struct bench_dso *dsos; -extern int cmd_inject(int argc, const char *argv[]); +extern int main(int argc, const char **argv); static const struct option options[] = { OPT_UINTEGER('i', "iterations", &iterations, @@ -79,7 +80,7 @@ static int add_dso(const char *fpath, const struct stat *sb __maybe_unused, int typeflag, struct FTW *ftwbuf __maybe_unused) { struct bench_dso *dso = &dsos[nr_dsos]; - struct build_id bid; + struct build_id bid = { .size = 0, }; if (typeflag == FTW_D || typeflag == FTW_SL) return 0; @@ -122,7 +123,7 @@ static void release_dso(void) for (i = 0; i < nr_dsos; i++) { struct bench_dso *dso = &dsos[i]; - free(dso->name); + zfree(&dso->name); } free(dsos); } @@ -293,7 +294,7 @@ static int setup_injection(struct bench_data *data, bool build_id_all) if (data->pid == 0) { const char **inject_argv; - int inject_argc = 2; + int inject_argc = 3; close(data->input_pipe[1]); close(data->output_pipe[0]); @@ -317,15 +318,16 @@ static int setup_injection(struct bench_data *data, bool build_id_all) if (inject_argv == NULL) exit(1); - inject_argv[0] = strdup("inject"); - inject_argv[1] = strdup("-b"); + inject_argv[0] = strdup("perf"); + inject_argv[1] = strdup("inject"); + inject_argv[2] = strdup("-b"); if (build_id_all) - inject_argv[2] = strdup("--buildid-all"); + inject_argv[3] = strdup("--buildid-all"); /* signal that we're ready to go */ close(ready_pipe[1]); - cmd_inject(inject_argc, inject_argv); + main(inject_argc, inject_argv); exit(0); } @@ -361,7 +363,7 @@ static int inject_build_id(struct bench_data *data, u64 *max_rss) return -1; for (i = 0; i < nr_mmaps; i++) { - int idx = rand() % (nr_dsos - 1); + int idx = rand() % nr_dsos; struct bench_dso *dso = &dsos[idx]; u64 timestamp = rand() % 1000000; diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index 19d45c377ac1..2908a3a796c9 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -22,27 +22,39 @@ #include <string.h> #include <unistd.h> #include <sys/time.h> +#include <sys/mman.h> #include <errno.h> #include <linux/time64.h> -#include <linux/zalloc.h> +#include <linux/log2.h> #define K 1024 +#define PAGE_SHIFT_4KB 12 +#define PAGE_SHIFT_2MB 21 +#define PAGE_SHIFT_1GB 30 + static const char *size_str = "1MB"; static const char *function_str = "all"; -static int nr_loops = 1; +static const char *page_size_str = "4KB"; +static const char *chunk_size_str = "0"; +static unsigned int nr_loops = 1; static bool use_cycles; static int cycles_fd; +static unsigned int seed; -static const struct option options[] = { +static const struct option bench_common_options[] = { OPT_STRING('s', "size", &size_str, "1MB", "Specify the size of the memory buffers. " "Available units: B, KB, MB, GB and TB (case insensitive)"), + OPT_STRING('p', "page", &page_size_str, "4KB", + "Specify page-size for mapping memory buffers. " + "Available sizes: 4KB, 2MB, 1GB (case insensitive)"), + OPT_STRING('f', "function", &function_str, "all", "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"), - OPT_INTEGER('l', "nr_loops", &nr_loops, + OPT_UINTEGER('l', "nr_loops", &nr_loops, "Specify the number of loops to run. (default: 1)"), OPT_BOOLEAN('c', "cycles", &use_cycles, @@ -51,15 +63,56 @@ static const struct option options[] = { OPT_END() }; +static const struct option bench_mem_options[] = { + OPT_STRING('k', "chunk", &chunk_size_str, "0", + "Specify the chunk-size for each invocation. " + "Available units: B, KB, MB, GB and TB (case insensitive)"), + OPT_PARENT(bench_common_options), + OPT_END() +}; + +union bench_clock { + u64 cycles; + struct timeval tv; +}; + +struct bench_params { + size_t size; + size_t size_total; + size_t chunk_size; + unsigned int nr_loops; + unsigned int page_shift; + unsigned int seed; +}; + +struct bench_mem_info { + const struct function *functions; + int (*do_op)(const struct function *r, struct bench_params *p, + void *src, void *dst, union bench_clock *rt); + const char *const *usage; + const struct option *options; + bool alloc_src; +}; + +typedef bool (*mem_init_t)(struct bench_mem_info *, struct bench_params *, + void **, void **); +typedef void (*mem_fini_t)(struct bench_mem_info *, struct bench_params *, + void **, void **); typedef void *(*memcpy_t)(void *, const void *, size_t); typedef void *(*memset_t)(void *, int, size_t); +typedef void (*mmap_op_t)(void *, size_t, unsigned int, bool); struct function { const char *name; const char *desc; - union { - memcpy_t memcpy; - memset_t memset; + struct { + mem_init_t init; + mem_fini_t fini; + union { + memcpy_t memcpy; + memset_t memset; + mmap_op_t mmap_op; + }; } fn; }; @@ -91,6 +144,34 @@ static u64 get_cycles(void) return clk; } +static void clock_get(union bench_clock *t) +{ + if (use_cycles) + t->cycles = get_cycles(); + else + BUG_ON(gettimeofday(&t->tv, NULL)); +} + +static union bench_clock clock_diff(union bench_clock *s, union bench_clock *e) +{ + union bench_clock t; + + if (use_cycles) + t.cycles = e->cycles - s->cycles; + else + timersub(&e->tv, &s->tv, &t.tv); + + return t; +} + +static void clock_accum(union bench_clock *a, union bench_clock *b) +{ + if (use_cycles) + a->cycles += b->cycles; + else + timeradd(&a->tv, &b->tv, &a->tv); +} + static double timeval2double(struct timeval *ts) { return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC; @@ -107,54 +188,40 @@ static double timeval2double(struct timeval *ts) printf(" %14lf GB/sec\n", x / K / K / K); \ } while (0) -struct bench_mem_info { - const struct function *functions; - u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst); - double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst); - const char *const *usage; - bool alloc_src; -}; - -static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total) +static void __bench_mem_function(struct bench_mem_info *info, struct bench_params *p, + int r_idx) { const struct function *r = &info->functions[r_idx]; double result_bps = 0.0; - u64 result_cycles = 0; - void *src = NULL, *dst = zalloc(size); + union bench_clock rt = { 0 }; + void *src = NULL, *dst = NULL; printf("# function '%s' (%s)\n", r->name, r->desc); - if (dst == NULL) - goto out_alloc_failed; - - if (info->alloc_src) { - src = zalloc(size); - if (src == NULL) - goto out_alloc_failed; - } + if (r->fn.init && r->fn.init(info, p, &src, &dst)) + goto out_init_failed; if (bench_format == BENCH_FORMAT_DEFAULT) printf("# Copying %s bytes ...\n\n", size_str); - if (use_cycles) { - result_cycles = info->do_cycles(r, size, src, dst); - } else { - result_bps = info->do_gettimeofday(r, size, src, dst); - } + if (info->do_op(r, p, src, dst, &rt)) + goto out_test_failed; switch (bench_format) { case BENCH_FORMAT_DEFAULT: if (use_cycles) { - printf(" %14lf cycles/byte\n", (double)result_cycles/size_total); + printf(" %14lf cycles/byte\n", (double)rt.cycles/(double)p->size_total); } else { + result_bps = (double)p->size_total/timeval2double(&rt.tv); print_bps(result_bps); } break; case BENCH_FORMAT_SIMPLE: if (use_cycles) { - printf("%lf\n", (double)result_cycles/size_total); + printf("%lf\n", (double)rt.cycles/(double)p->size_total); } else { + result_bps = (double)p->size_total/timeval2double(&rt.tv); printf("%lf\n", result_bps); } break; @@ -164,22 +231,23 @@ static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t break; } +out_test_failed: out_free: - free(src); - free(dst); + if (r->fn.fini) r->fn.fini(info, p, &src, &dst); return; -out_alloc_failed: - printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str); +out_init_failed: + printf("# Memory allocation failed - maybe size (%s) %s?\n", size_str, + p->page_shift != PAGE_SHIFT_4KB ? "has insufficient hugepages" : "is too large"); goto out_free; } static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info) { int i; - size_t size; - double size_total; + struct bench_params p = { 0 }; + unsigned int page_size; - argc = parse_options(argc, argv, options, info->usage, 0); + argc = parse_options(argc, argv, info->options, info->usage, 0); if (use_cycles) { i = init_cycles(); @@ -189,17 +257,37 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * } } - size = (size_t)perf_atoll((char *)size_str); - size_total = (double)size * nr_loops; + p.nr_loops = nr_loops; + p.size = (size_t)perf_atoll((char *)size_str); - if ((s64)size <= 0) { + if ((s64)p.size <= 0) { fprintf(stderr, "Invalid size:%s\n", size_str); return 1; } + p.size_total = p.size * p.nr_loops; + + p.chunk_size = (size_t)perf_atoll((char *)chunk_size_str); + if ((s64)p.chunk_size < 0 || (s64)p.chunk_size > (s64)p.size) { + fprintf(stderr, "Invalid chunk_size:%s\n", chunk_size_str); + return 1; + } + if (!p.chunk_size) + p.chunk_size = p.size; + + page_size = (unsigned int)perf_atoll((char *)page_size_str); + if (page_size != (1 << PAGE_SHIFT_4KB) && + page_size != (1 << PAGE_SHIFT_2MB) && + page_size != (1 << PAGE_SHIFT_1GB)) { + fprintf(stderr, "Invalid page-size:%s\n", page_size_str); + return 1; + } + p.page_shift = ilog2(page_size); + + p.seed = seed; if (!strncmp(function_str, "all", 3)) { for (i = 0; info->functions[i].name; i++) - __bench_mem_function(info, i, size, size_total); + __bench_mem_function(info, &p, i); return 0; } @@ -218,7 +306,7 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * return 1; } - __bench_mem_function(info, i, size, size_total); + __bench_mem_function(info, &p, i); return 0; } @@ -235,47 +323,81 @@ static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst) fn(dst, src, size); } -static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst) +static int do_memcpy(const struct function *r, struct bench_params *p, + void *src, void *dst, union bench_clock *rt) { - u64 cycle_start = 0ULL, cycle_end = 0ULL; + union bench_clock start, end; memcpy_t fn = r->fn.memcpy; - int i; - memcpy_prefault(fn, size, src, dst); + memcpy_prefault(fn, p->size, src, dst); + + clock_get(&start); + for (unsigned int i = 0; i < p->nr_loops; ++i) + for (size_t off = 0; off < p->size; off += p->chunk_size) + fn(dst + off, src + off, min(p->chunk_size, p->size - off)); + clock_get(&end); - cycle_start = get_cycles(); - for (i = 0; i < nr_loops; ++i) - fn(dst, src, size); - cycle_end = get_cycles(); + *rt = clock_diff(&start, &end); - return cycle_end - cycle_start; + return 0; } -static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst) +static void *bench_mmap(size_t size, bool populate, unsigned int page_shift) { - struct timeval tv_start, tv_end, tv_diff; - memcpy_t fn = r->fn.memcpy; - int i; + void *p; + int extra = populate ? MAP_POPULATE : 0; + + if (page_shift != PAGE_SHIFT_4KB) + extra |= MAP_HUGETLB | (page_shift << MAP_HUGE_SHIFT); + + p = mmap(NULL, size, PROT_READ|PROT_WRITE, + extra | MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + + return p == MAP_FAILED ? NULL : p; +} + +static void bench_munmap(void *p, size_t size) +{ + if (p) + munmap(p, size); +} + +static bool mem_alloc(struct bench_mem_info *info, struct bench_params *p, + void **src, void **dst) +{ + bool failed; - memcpy_prefault(fn, size, src, dst); + *dst = bench_mmap(p->size, true, p->page_shift); + failed = *dst == NULL; - BUG_ON(gettimeofday(&tv_start, NULL)); - for (i = 0; i < nr_loops; ++i) - fn(dst, src, size); - BUG_ON(gettimeofday(&tv_end, NULL)); + if (info->alloc_src) { + *src = bench_mmap(p->size, true, p->page_shift); + failed = failed || *src == NULL; + } + + return failed; +} - timersub(&tv_end, &tv_start, &tv_diff); +static void mem_free(struct bench_mem_info *info __maybe_unused, + struct bench_params *p __maybe_unused, + void **src, void **dst) +{ + bench_munmap(*dst, p->size); + bench_munmap(*src, p->size); - return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); + *dst = *src = NULL; } struct function memcpy_functions[] = { { .name = "default", .desc = "Default memcpy() provided by glibc", + .fn.init = mem_alloc, + .fn.fini = mem_free, .fn.memcpy = memcpy }, #ifdef HAVE_ARCH_X86_64_SUPPORT -# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn}, +# define MEMCPY_FN(_fn, _init, _fini, _name, _desc) \ + {.name = _name, .desc = _desc, .fn.memcpy = _fn, .fn.init = _init, .fn.fini = _fini }, # include "mem-memcpy-x86-64-asm-def.h" # undef MEMCPY_FN #endif @@ -292,55 +414,36 @@ int bench_mem_memcpy(int argc, const char **argv) { struct bench_mem_info info = { .functions = memcpy_functions, - .do_cycles = do_memcpy_cycles, - .do_gettimeofday = do_memcpy_gettimeofday, + .do_op = do_memcpy, .usage = bench_mem_memcpy_usage, + .options = bench_mem_options, .alloc_src = true, }; return bench_mem_common(argc, argv, &info); } -static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst) -{ - u64 cycle_start = 0ULL, cycle_end = 0ULL; - memset_t fn = r->fn.memset; - int i; - - /* - * We prefault the freshly allocated memory range here, - * to not measure page fault overhead: - */ - fn(dst, -1, size); - - cycle_start = get_cycles(); - for (i = 0; i < nr_loops; ++i) - fn(dst, i, size); - cycle_end = get_cycles(); - - return cycle_end - cycle_start; -} - -static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst) +static int do_memset(const struct function *r, struct bench_params *p, + void *src __maybe_unused, void *dst, union bench_clock *rt) { - struct timeval tv_start, tv_end, tv_diff; + union bench_clock start, end; memset_t fn = r->fn.memset; - int i; /* * We prefault the freshly allocated memory range here, * to not measure page fault overhead: */ - fn(dst, -1, size); + fn(dst, -1, p->size); - BUG_ON(gettimeofday(&tv_start, NULL)); - for (i = 0; i < nr_loops; ++i) - fn(dst, i, size); - BUG_ON(gettimeofday(&tv_end, NULL)); + clock_get(&start); + for (unsigned int i = 0; i < p->nr_loops; ++i) + for (size_t off = 0; off < p->size; off += p->chunk_size) + fn(dst + off, i, min(p->chunk_size, p->size - off)); + clock_get(&end); - timersub(&tv_end, &tv_start, &tv_diff); + *rt = clock_diff(&start, &end); - return (double)(((double)size * nr_loops) / timeval2double(&tv_diff)); + return 0; } static const char * const bench_mem_memset_usage[] = { @@ -351,10 +454,13 @@ static const char * const bench_mem_memset_usage[] = { static const struct function memset_functions[] = { { .name = "default", .desc = "Default memset() provided by glibc", + .fn.init = mem_alloc, + .fn.fini = mem_free, .fn.memset = memset }, #ifdef HAVE_ARCH_X86_64_SUPPORT -# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn }, +# define MEMSET_FN(_fn, _init, _fini, _name, _desc) \ + {.name = _name, .desc = _desc, .fn.memset = _fn, .fn.init = _init, .fn.fini = _fini }, # include "mem-memset-x86-64-asm-def.h" # undef MEMSET_FN #endif @@ -366,9 +472,91 @@ int bench_mem_memset(int argc, const char **argv) { struct bench_mem_info info = { .functions = memset_functions, - .do_cycles = do_memset_cycles, - .do_gettimeofday = do_memset_gettimeofday, + .do_op = do_memset, .usage = bench_mem_memset_usage, + .options = bench_mem_options, + }; + + return bench_mem_common(argc, argv, &info); +} + +static void mmap_page_touch(void *dst, size_t size, unsigned int page_shift, bool random) +{ + unsigned long npages = size / (1 << page_shift); + unsigned long offset = 0, r = 0; + + for (unsigned long i = 0; i < npages; i++) { + if (random) + r = rand() % (1 << page_shift); + + *((char *)dst + offset + r) = *(char *)(dst + offset + r) + i; + offset += 1 << page_shift; + } +} + +static int do_mmap(const struct function *r, struct bench_params *p, + void *src __maybe_unused, void *dst __maybe_unused, + union bench_clock *accum) +{ + union bench_clock start, end, diff; + mmap_op_t fn = r->fn.mmap_op; + bool populate = strcmp(r->name, "populate") == 0; + + if (p->seed) + srand(p->seed); + + for (unsigned int i = 0; i < p->nr_loops; i++) { + clock_get(&start); + dst = bench_mmap(p->size, populate, p->page_shift); + if (!dst) + goto out; + + fn(dst, p->size, p->page_shift, p->seed); + clock_get(&end); + diff = clock_diff(&start, &end); + clock_accum(accum, &diff); + + bench_munmap(dst, p->size); + } + + return 0; +out: + printf("# Memory allocation failed - maybe size (%s) %s?\n", size_str, + p->page_shift != PAGE_SHIFT_4KB ? "has insufficient hugepages" : "is too large"); + return -1; +} + +static const char * const bench_mem_mmap_usage[] = { + "perf bench mem mmap <options>", + NULL +}; + +static const struct function mmap_functions[] = { + { .name = "demand", + .desc = "Demand loaded mmap()", + .fn.mmap_op = mmap_page_touch }, + + { .name = "populate", + .desc = "Eagerly populated mmap()", + .fn.mmap_op = mmap_page_touch }, + + { .name = NULL, } +}; + +int bench_mem_mmap(int argc, const char **argv) +{ + static const struct option bench_mmap_options[] = { + OPT_UINTEGER('r', "randomize", &seed, + "Seed to randomize page access offset."), + OPT_PARENT(bench_common_options), + OPT_END() + }; + + struct bench_mem_info info = { + .functions = mmap_functions, + .do_op = do_mmap, + .usage = bench_mem_mmap_usage, + .options = bench_mmap_options, }; return bench_mem_common(argc, argv, &info); diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h index 5bcaec5601a8..852e48cfd8fe 100644 --- a/tools/perf/bench/mem-memcpy-arch.h +++ b/tools/perf/bench/mem-memcpy-arch.h @@ -2,7 +2,7 @@ #ifdef HAVE_ARCH_X86_64_SUPPORT -#define MEMCPY_FN(fn, name, desc) \ +#define MEMCPY_FN(fn, init, fini, name, desc) \ void *fn(void *, const void *, size_t); #include "mem-memcpy-x86-64-asm-def.h" diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h index 50ae8bd58296..f43038f4448b 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h +++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h @@ -1,13 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ MEMCPY_FN(memcpy_orig, + mem_alloc, + mem_free, "x86-64-unrolled", "unrolled memcpy() in arch/x86/lib/memcpy_64.S") MEMCPY_FN(__memcpy, + mem_alloc, + mem_free, "x86-64-movsq", "movsq-based memcpy() in arch/x86/lib/memcpy_64.S") - -MEMCPY_FN(memcpy_erms, - "x86-64-movsb", - "movsb-based memcpy() in arch/x86/lib/memcpy_64.S") diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index 6eb45a2aa8db..1b9fef7efcdc 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -2,7 +2,7 @@ /* Various wrappers to make the kernel .S file build in user-space: */ -// memcpy_orig and memcpy_erms are being defined as SYM_L_LOCAL but we need it +// memcpy_orig is being defined as SYM_L_LOCAL but we need it #define SYM_FUNC_START_LOCAL(name) \ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #define memcpy MEMCPY /* don't hide glibc's memcpy() */ diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h index 53f45482663f..278c5da12d63 100644 --- a/tools/perf/bench/mem-memset-arch.h +++ b/tools/perf/bench/mem-memset-arch.h @@ -2,7 +2,7 @@ #ifdef HAVE_ARCH_X86_64_SUPPORT -#define MEMSET_FN(fn, name, desc) \ +#define MEMSET_FN(fn, init, fini, name, desc) \ void *fn(void *, int, size_t); #include "mem-memset-x86-64-asm-def.h" diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h index dac6d2b7c39b..80ad1b7ea770 100644 --- a/tools/perf/bench/mem-memset-x86-64-asm-def.h +++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h @@ -1,13 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 */ MEMSET_FN(memset_orig, + mem_alloc, + mem_free, "x86-64-unrolled", "unrolled memset() in arch/x86/lib/memset_64.S") MEMSET_FN(__memset, + mem_alloc, + mem_free, "x86-64-stosq", "movsq-based memset() in arch/x86/lib/memset_64.S") - -MEMSET_FN(memset_erms, - "x86-64-stosb", - "movsb-based memset() in arch/x86/lib/memset_64.S") diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S index 6f093c483842..abd26c95f1aa 100644 --- a/tools/perf/bench/mem-memset-x86-64-asm.S +++ b/tools/perf/bench/mem-memset-x86-64-asm.S @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -// memset_orig and memset_erms are being defined as SYM_L_LOCAL but we need it +// memset_orig is being defined as SYM_L_LOCAL but we need it #define SYM_FUNC_START_LOCAL(name) \ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #define memset MEMSET /* don't hide glibc's memset() */ diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 9717c6c17433..19be2aaf4dc0 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -27,6 +27,7 @@ #include <sys/resource.h> #include <sys/wait.h> #include <sys/prctl.h> +#include <sys/stat.h> #include <sys/types.h> #include <linux/kernel.h> #include <linux/time64.h> @@ -35,6 +36,7 @@ #include "../util/header.h" #include "../util/mutex.h" +#include <api/fs/fs.h> #include <numa.h> #include <numaif.h> @@ -533,6 +535,57 @@ static int parse_cpu_list(const char *arg) return 0; } +/* + * Check whether a CPU is online + * + * Returns: + * 1 -> if CPU is online + * 0 -> if CPU is offline + * -1 -> error case + */ +static int is_cpu_online(unsigned int cpu) +{ + char *str; + size_t strlen; + char buf[256]; + int status = -1; + struct stat statbuf; + + snprintf(buf, sizeof(buf), + "/sys/devices/system/cpu/cpu%d", cpu); + if (stat(buf, &statbuf) != 0) + return 0; + + /* + * Check if /sys/devices/system/cpu/cpux/online file + * exists. Some cases cpu0 won't have online file since + * it is not expected to be turned off generally. + * In kernels without CONFIG_HOTPLUG_CPU, this + * file won't exist + */ + snprintf(buf, sizeof(buf), + "/sys/devices/system/cpu/cpu%d/online", cpu); + if (stat(buf, &statbuf) != 0) + return 1; + + /* + * Read online file using sysfs__read_str. + * If read or open fails, return -1. + * If read succeeds, return value from file + * which gets stored in "str" + */ + snprintf(buf, sizeof(buf), + "devices/system/cpu/cpu%d/online", cpu); + + if (sysfs__read_str(buf, &str, &strlen) < 0) + return status; + + status = atoi(str); + + free(str); + return status; +} + static int parse_setup_cpu_list(void) { struct thread_data *td; @@ -847,7 +900,7 @@ static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val if (g->p.data_rand_walk) { u32 lfsr = nr + loop + val; - int j; + long j; for (i = 0; i < words/1024; i++) { long start, end; diff --git a/tools/perf/bench/pmu-scan.c b/tools/perf/bench/pmu-scan.c new file mode 100644 index 000000000000..14a464ad8cea --- /dev/null +++ b/tools/perf/bench/pmu-scan.c @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Benchmark scanning sysfs files for PMU information. + * + * Copyright 2023 Google LLC. + */ +#include <errno.h> +#include <stdio.h> +#include "bench.h" +#include "util/debug.h" +#include "util/pmu.h" +#include "util/pmus.h" +#include "util/stat.h" +#include <linux/atomic.h> +#include <linux/err.h> +#include <linux/time64.h> +#include <subcmd/parse-options.h> + +static unsigned int iterations = 100; + +struct pmu_scan_result { + char *name; + int nr_aliases; + int nr_formats; + int nr_caps; + bool is_core; +}; + +static const struct option options[] = { + OPT_UINTEGER('i', "iterations", &iterations, + "Number of iterations used to compute average"), + OPT_END() +}; + +static const char *const bench_usage[] = { + "perf bench internals pmu-scan <options>", + NULL +}; + +static int nr_pmus; +static struct pmu_scan_result *results; + +static int save_result(void) +{ + struct perf_pmu *pmu = NULL; + struct list_head *list; + struct pmu_scan_result *r; + + while ((pmu = perf_pmus__scan(pmu)) != NULL) { + r = realloc(results, (nr_pmus + 1) * sizeof(*r)); + if (r == NULL) + return -ENOMEM; + + results = r; + r = results + nr_pmus; + + r->name = strdup(pmu->name); + r->is_core = pmu->is_core; + r->nr_caps = pmu->nr_caps; + + r->nr_aliases = perf_pmu__num_events(pmu); + + r->nr_formats = 0; + list_for_each(list, &pmu->format) + r->nr_formats++; + + pr_debug("pmu[%d] name=%s, nr_caps=%d, nr_aliases=%d, nr_formats=%d\n", + nr_pmus, r->name, r->nr_caps, r->nr_aliases, r->nr_formats); + nr_pmus++; + } + + perf_pmus__destroy(); + return 0; +} + +static int check_result(bool core_only) +{ + struct pmu_scan_result *r; + struct perf_pmu *pmu; + struct list_head *list; + int nr; + + for (int i = 0; i < nr_pmus; i++) { + r = &results[i]; + if (core_only && !r->is_core) + continue; + + pmu = perf_pmus__find(r->name); + if (pmu == NULL) { + pr_err("Cannot find PMU %s\n", r->name); + return -1; + } + + if (pmu->nr_caps != (u32)r->nr_caps) { + pr_err("Unmatched number of event caps in %s: expect %d vs got %d\n", + pmu->name, r->nr_caps, pmu->nr_caps); + return -1; + } + + nr = perf_pmu__num_events(pmu); + if (nr != r->nr_aliases) { + pr_err("Unmatched number of event aliases in %s: expect %d vs got %d\n", + pmu->name, r->nr_aliases, nr); + return -1; + } + + nr = 0; + list_for_each(list, &pmu->format) + nr++; + if (nr != r->nr_formats) { + pr_err("Unmatched number of event formats in %s: expect %d vs got %d\n", + pmu->name, r->nr_formats, nr); + return -1; + } + } + return 0; +} + +static void delete_result(void) +{ + for (int i = 0; i < nr_pmus; i++) + free(results[i].name); + free(results); + + results = NULL; + nr_pmus = 0; +} + +static int run_pmu_scan(void) +{ + struct stats stats; + struct timeval start, end, diff; + double time_average, time_stddev; + u64 runtime_us; + int ret; + + init_stats(&stats); + pr_info("Computing performance of sysfs PMU event scan for %u times\n", + iterations); + + if (save_result() < 0) { + pr_err("Failed to initialize PMU scan result\n"); + return -1; + } + + for (int j = 0; j < 2; j++) { + bool core_only = (j == 0); + + for (unsigned int i = 0; i < iterations; i++) { + gettimeofday(&start, NULL); + if (core_only) + perf_pmus__scan_core(NULL); + else + perf_pmus__scan(NULL); + gettimeofday(&end, NULL); + timersub(&end, &start, &diff); + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + update_stats(&stats, runtime_us); + + ret = check_result(core_only); + perf_pmus__destroy(); + if (ret < 0) + break; + } + time_average = avg_stats(&stats); + time_stddev = stddev_stats(&stats); + pr_info(" Average%s PMU scanning took: %.3f usec (+- %.3f usec)\n", + core_only ? " core" : "", time_average, time_stddev); + } + delete_result(); + return 0; +} + +int bench_pmu_scan(int argc, const char **argv) +{ + int err = 0; + + argc = parse_options(argc, argv, options, bench_usage, 0); + if (argc) { + usage_with_options(bench_usage, options); + exit(EXIT_FAILURE); + } + + err = run_pmu_scan(); + + return err; +} diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index 488f6e6ba1a5..93dcd9dba3d0 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -27,6 +27,7 @@ #include <poll.h> #include <limits.h> #include <err.h> +#include <linux/list.h> #include <linux/time64.h> #define DATASIZE 100 @@ -35,8 +36,12 @@ static bool use_pipes = false; static unsigned int nr_loops = 100; static bool thread_mode = false; static unsigned int num_groups = 10; +static unsigned int total_children = 0; +static struct list_head sender_contexts = LIST_HEAD_INIT(sender_contexts); +static struct list_head receiver_contexts = LIST_HEAD_INIT(receiver_contexts); struct sender_context { + struct list_head list; unsigned int num_fds; int ready_out; int wakefd; @@ -44,12 +49,20 @@ struct sender_context { }; struct receiver_context { + struct list_head list; unsigned int num_packets; int in_fds[2]; int ready_out; int wakefd; }; +union messaging_worker { + pthread_t thread; + pid_t pid; +}; + +static union messaging_worker *worker_tab; + static void fdpair(int fds[2]) { if (use_pipes) { @@ -93,7 +106,7 @@ static void *sender(struct sender_context *ctx) again: ret = write(ctx->out_fds[j], data + done, - sizeof(data)-done); + sizeof(data) - done); if (ret < 0) err(EXIT_FAILURE, "SENDER: write"); done += ret; @@ -134,30 +147,12 @@ again: return NULL; } -static pthread_t create_worker(void *ctx, void *(*func)(void *)) +static void create_thread_worker(union messaging_worker *worker, + void *ctx, void *(*func)(void *)) { pthread_attr_t attr; - pthread_t childid; int ret; - if (!thread_mode) { - /* process mode */ - /* Fork the receiver. */ - switch (fork()) { - case -1: - err(EXIT_FAILURE, "fork()"); - break; - case 0: - (*func) (ctx); - exit(0); - break; - default: - break; - } - - return (pthread_t)0; - } - if (pthread_attr_init(&attr) != 0) err(EXIT_FAILURE, "pthread_attr_init:"); @@ -166,14 +161,37 @@ static pthread_t create_worker(void *ctx, void *(*func)(void *)) err(EXIT_FAILURE, "pthread_attr_setstacksize"); #endif - ret = pthread_create(&childid, &attr, func, ctx); + ret = pthread_create(&worker->thread, &attr, func, ctx); if (ret != 0) err(EXIT_FAILURE, "pthread_create failed"); - return childid; + pthread_attr_destroy(&attr); +} + +static void create_process_worker(union messaging_worker *worker, + void *ctx, void *(*func)(void *)) +{ + /* Fork the receiver. */ + worker->pid = fork(); + + if (worker->pid == -1) { + err(EXIT_FAILURE, "fork()"); + } else if (worker->pid == 0) { + (*func) (ctx); + exit(0); + } +} + +static void create_worker(union messaging_worker *worker, + void *ctx, void *(*func)(void *)) +{ + if (!thread_mode) + return create_process_worker(worker, ctx, func); + else + return create_thread_worker(worker, ctx, func); } -static void reap_worker(pthread_t id) +static void reap_worker(union messaging_worker *worker) { int proc_status; void *thread_status; @@ -184,23 +202,24 @@ static void reap_worker(pthread_t id) if (!WIFEXITED(proc_status)) exit(1); } else { - pthread_join(id, &thread_status); + pthread_join(worker->thread, &thread_status); } } /* One group of senders and receivers */ -static unsigned int group(pthread_t *pth, +static unsigned int group(union messaging_worker *worker, unsigned int num_fds, int ready_out, int wakefd) { unsigned int i; - struct sender_context *snd_ctx = malloc(sizeof(struct sender_context) - + num_fds * sizeof(int)); + struct sender_context *snd_ctx = malloc(sizeof(struct sender_context) + + num_fds * sizeof(int)); if (!snd_ctx) err(EXIT_FAILURE, "malloc()"); + list_add(&snd_ctx->list, &sender_contexts); for (i = 0; i < num_fds; i++) { int fds[2]; struct receiver_context *ctx = malloc(sizeof(*ctx)); @@ -208,6 +227,7 @@ static unsigned int group(pthread_t *pth, if (!ctx) err(EXIT_FAILURE, "malloc()"); + list_add(&ctx->list, &receiver_contexts); /* Create the pipe between client and server */ fdpair(fds); @@ -218,7 +238,7 @@ static unsigned int group(pthread_t *pth, ctx->ready_out = ready_out; ctx->wakefd = wakefd; - pth[i] = create_worker(ctx, (void *)receiver); + create_worker(worker + i, ctx, (void *)receiver); snd_ctx->out_fds[i] = fds[1]; if (!thread_mode) @@ -231,7 +251,7 @@ static unsigned int group(pthread_t *pth, snd_ctx->wakefd = wakefd; snd_ctx->num_fds = num_fds; - pth[num_fds+i] = create_worker(snd_ctx, (void *)sender); + create_worker(worker + num_fds + i, snd_ctx, (void *)sender); } /* Close the fds we have left */ @@ -243,6 +263,17 @@ static unsigned int group(pthread_t *pth, return num_fds * 2; } +static void sig_handler(int sig __maybe_unused) +{ + unsigned int i; + + /* + * When exit abnormally, kill all forked child processes. + */ + for (i = 0; i < total_children; i++) + kill(worker_tab[i].pid, SIGKILL); +} + static const struct option options[] = { OPT_BOOLEAN('p', "pipe", &use_pipes, "Use pipe() instead of socketpair()"), @@ -260,26 +291,30 @@ static const char * const bench_sched_message_usage[] = { int bench_sched_messaging(int argc, const char **argv) { - unsigned int i, total_children; + unsigned int i; struct timeval start, stop, diff; unsigned int num_fds = 20; int readyfds[2], wakefds[2]; char dummy; - pthread_t *pth_tab; + struct sender_context *pos, *n; argc = parse_options(argc, argv, options, bench_sched_message_usage, 0); - pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t)); - if (!pth_tab) + worker_tab = malloc(num_fds * 2 * num_groups * sizeof(union messaging_worker)); + if (!worker_tab) err(EXIT_FAILURE, "main:malloc()"); fdpair(readyfds); fdpair(wakefds); - total_children = 0; + if (!thread_mode) { + signal(SIGINT, sig_handler); + signal(SIGTERM, sig_handler); + } + for (i = 0; i < num_groups; i++) - total_children += group(pth_tab+total_children, num_fds, + total_children += group(worker_tab + total_children, num_fds, readyfds[1], wakefds[0]); /* Wait for everyone to be ready */ @@ -295,7 +330,7 @@ int bench_sched_messaging(int argc, const char **argv) /* Reap them all */ for (i = 0; i < total_children; i++) - reap_worker(pth_tab[i]); + reap_worker(worker_tab + i); gettimeofday(&stop, NULL); @@ -323,7 +358,14 @@ int bench_sched_messaging(int argc, const char **argv) break; } - free(pth_tab); - + free(worker_tab); + list_for_each_entry_safe(pos, n, &sender_contexts, list) { + list_del_init(&pos->list); + free(pos); + } + list_for_each_entry_safe(pos, n, &receiver_contexts, list) { + list_del_init(&pos->list); + free(pos); + } return 0; } diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index a960e7a93aec..70139036d68f 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -10,7 +10,9 @@ * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> */ #include <subcmd/parse-options.h> +#include <api/fs/fs.h> #include "bench.h" +#include "util/cgroup.h" #include <unistd.h> #include <stdio.h> @@ -19,7 +21,9 @@ #include <sys/wait.h> #include <string.h> #include <errno.h> +#include <fcntl.h> #include <assert.h> +#include <sys/epoll.h> #include <sys/time.h> #include <sys/types.h> #include <sys/syscall.h> @@ -31,6 +35,9 @@ struct thread_data { int nr; int pipe_read; int pipe_write; + struct epoll_event epoll_ev; + int epoll_fd; + bool cgroup_failed; pthread_t pthread; }; @@ -40,9 +47,50 @@ static int loops = LOOPS_DEFAULT; /* Use processes by default: */ static bool threaded; +static bool nonblocking; +static char *cgrp_names[2]; +static struct cgroup *cgrps[2]; + +static int parse_two_cgroups(const struct option *opt __maybe_unused, + const char *str, int unset __maybe_unused) +{ + char *p = strdup(str); + char *q; + int ret = -1; + + if (p == NULL) { + fprintf(stderr, "memory allocation failure\n"); + return -1; + } + + q = strchr(p, ','); + if (q == NULL) { + fprintf(stderr, "it should have two cgroup names: %s\n", p); + goto out; + } + *q = '\0'; + + cgrp_names[0] = strdup(p); + cgrp_names[1] = strdup(q + 1); + + if (cgrp_names[0] == NULL || cgrp_names[1] == NULL) { + fprintf(stderr, "memory allocation failure\n"); + goto out; + } + ret = 0; + +out: + free(p); + return ret; +} + static const struct option options[] = { + OPT_BOOLEAN('n', "nonblocking", &nonblocking, "Use non-blocking operations"), OPT_INTEGER('l', "loop", &loops, "Specify number of loops"), OPT_BOOLEAN('T', "threaded", &threaded, "Specify threads/process based task setup"), + OPT_CALLBACK('G', "cgroups", NULL, "SEND,RECV", + "Put sender and receivers in given cgroups", + parse_two_cgroups), OPT_END() }; @@ -51,24 +99,115 @@ static const char * const bench_sched_pipe_usage[] = { NULL }; +static int enter_cgroup(int nr) +{ + char buf[32]; + int fd, len, ret; + int saved_errno; + struct cgroup *cgrp; + pid_t pid; + + if (cgrp_names[nr] == NULL) + return 0; + + if (cgrps[nr] == NULL) { + cgrps[nr] = cgroup__new(cgrp_names[nr], /*do_open=*/true); + if (cgrps[nr] == NULL) + goto err; + } + cgrp = cgrps[nr]; + + if (threaded) + pid = syscall(__NR_gettid); + else + pid = getpid(); + + snprintf(buf, sizeof(buf), "%d\n", pid); + len = strlen(buf); + + /* try cgroup v2 interface first */ + if (threaded) + fd = openat(cgrp->fd, "cgroup.threads", O_WRONLY); + else + fd = openat(cgrp->fd, "cgroup.procs", O_WRONLY); + + /* try cgroup v1 if failed */ + if (fd < 0 && errno == ENOENT) + fd = openat(cgrp->fd, "tasks", O_WRONLY); + + if (fd < 0) + goto err; + + ret = write(fd, buf, len); + close(fd); + + if (ret != len) { + printf("Cannot enter to cgroup: %s\n", cgrp->name); + return -1; + } + return 0; + +err: + saved_errno = errno; + printf("Failed to open cgroup file in %s\n", cgrp_names[nr]); + + if (saved_errno == ENOENT) { + char mnt[PATH_MAX]; + + if (cgroupfs_find_mountpoint(mnt, sizeof(mnt), "perf_event") == 0) + printf(" Hint: create the cgroup first, like 'mkdir %s/%s'\n", + mnt, cgrp_names[nr]); + } else if (saved_errno == EACCES && geteuid() > 0) { + printf(" Hint: try to run as root\n"); + } + + return -1; +} + +static void exit_cgroup(int nr) +{ + cgroup__put(cgrps[nr]); + free(cgrp_names[nr]); +} + +static inline int read_pipe(struct thread_data *td) +{ + int ret, m; +retry: + if (nonblocking) { + ret = epoll_wait(td->epoll_fd, &td->epoll_ev, 1, -1); + if (ret < 0) + return ret; + } + ret = read(td->pipe_read, &m, sizeof(int)); + if (nonblocking && ret < 0 && errno == EWOULDBLOCK) + goto retry; + return ret; +} + static void *worker_thread(void *__tdata) { struct thread_data *td = __tdata; - int m = 0, i; - int ret; + int i, ret, m = 0; + + ret = enter_cgroup(td->nr); + if (ret < 0) { + td->cgroup_failed = true; + return NULL; + } + + if (nonblocking) { + td->epoll_ev.events = EPOLLIN; + td->epoll_fd = epoll_create(1); + BUG_ON(td->epoll_fd < 0); + BUG_ON(epoll_ctl(td->epoll_fd, EPOLL_CTL_ADD, td->pipe_read, &td->epoll_ev) < 0); + } for (i = 0; i < loops; i++) { - if (!td->nr) { - ret = read(td->pipe_read, &m, sizeof(int)); - BUG_ON(ret != sizeof(int)); - ret = write(td->pipe_write, &m, sizeof(int)); - BUG_ON(ret != sizeof(int)); - } else { - ret = write(td->pipe_write, &m, sizeof(int)); - BUG_ON(ret != sizeof(int)); - ret = read(td->pipe_read, &m, sizeof(int)); - BUG_ON(ret != sizeof(int)); - } + ret = write(td->pipe_write, &m, sizeof(int)); + BUG_ON(ret != sizeof(int)); + ret = read_pipe(td); + BUG_ON(ret != sizeof(int)); } return NULL; @@ -76,7 +215,8 @@ static void *worker_thread(void *__tdata) int bench_sched_pipe(int argc, const char **argv) { - struct thread_data threads[2], *td; + struct thread_data threads[2] = {}; + struct thread_data *td; int pipe_1[2], pipe_2[2]; struct timeval start, stop, diff; unsigned long long result_usec = 0; @@ -88,13 +228,16 @@ int bench_sched_pipe(int argc, const char **argv) * discarding returned value of read(), write() * causes error in building environment for perf */ - int __maybe_unused ret, wait_stat; + int __maybe_unused ret, wait_stat, flags = 0; pid_t pid, retpid __maybe_unused; argc = parse_options(argc, argv, options, bench_sched_pipe_usage, 0); - BUG_ON(pipe(pipe_1)); - BUG_ON(pipe(pipe_2)); + if (nonblocking) + flags |= O_NONBLOCK; + + BUG_ON(pipe2(pipe_1, flags)); + BUG_ON(pipe2(pipe_2, flags)); gettimeofday(&start, NULL); @@ -112,9 +255,7 @@ int bench_sched_pipe(int argc, const char **argv) } } - if (threaded) { - for (t = 0; t < nr_threads; t++) { td = threads + t; @@ -128,7 +269,6 @@ int bench_sched_pipe(int argc, const char **argv) ret = pthread_join(td->pthread, NULL); BUG_ON(ret); } - } else { pid = fork(); assert(pid >= 0); @@ -147,6 +287,12 @@ int bench_sched_pipe(int argc, const char **argv) gettimeofday(&stop, NULL); timersub(&stop, &start, &diff); + exit_cgroup(0); + exit_cgroup(1); + + if (threads[0].cgroup_failed || threads[1].cgroup_failed) + return 0; + switch (bench_format) { case BENCH_FORMAT_DEFAULT: printf("# Executed %d pipe operations between two %s\n\n", diff --git a/tools/perf/bench/sched-seccomp-notify.c b/tools/perf/bench/sched-seccomp-notify.c new file mode 100644 index 000000000000..269c1f4a6852 --- /dev/null +++ b/tools/perf/bench/sched-seccomp-notify.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <subcmd/parse-options.h> +#include "bench.h" + +#include <uapi/linux/filter.h> +#include <sys/types.h> +#include <sys/time.h> +#include <linux/unistd.h> +#include <sys/syscall.h> +#include <sys/ioctl.h> +#include <linux/time64.h> +#include <uapi/linux/seccomp.h> +#include <sys/prctl.h> + +#include <unistd.h> +#include <limits.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <sys/wait.h> +#include <string.h> +#include <errno.h> +#include <err.h> +#include <inttypes.h> + +#define LOOPS_DEFAULT 1000000UL +static uint64_t loops = LOOPS_DEFAULT; +static bool sync_mode; + +static const struct option options[] = { + OPT_U64('l', "loop", &loops, "Specify number of loops"), + OPT_BOOLEAN('s', "sync-mode", &sync_mode, + "Enable the synchronous mode for seccomp notifications"), + OPT_END() +}; + +static const char * const bench_seccomp_usage[] = { + "perf bench sched secccomp-notify <options>", + NULL +}; + +static int seccomp(unsigned int op, unsigned int flags, void *args) +{ + return syscall(__NR_seccomp, op, flags, args); +} + +static int user_notif_syscall(int nr, unsigned int flags) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog); +} + +#define USER_NOTIF_MAGIC INT_MAX +static void user_notification_sync_loop(int listener) +{ + struct seccomp_notif_resp resp; + struct seccomp_notif req; + uint64_t nr; + + for (nr = 0; nr < loops; nr++) { + memset(&req, 0, sizeof(req)); + if (ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req)) + err(EXIT_FAILURE, "SECCOMP_IOCTL_NOTIF_RECV failed"); + + if (req.data.nr != __NR_gettid) + errx(EXIT_FAILURE, "unexpected syscall: %d", req.data.nr); + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + resp.flags = 0; + if (ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp)) + err(EXIT_FAILURE, "SECCOMP_IOCTL_NOTIF_SEND failed"); + } +} + +#ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP +#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0) +#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64) +#endif +int bench_sched_seccomp_notify(int argc, const char **argv) +{ + struct timeval start, stop, diff; + unsigned long long result_usec = 0; + int status, listener; + pid_t pid; + long ret; + + argc = parse_options(argc, argv, options, bench_seccomp_usage, 0); + + gettimeofday(&start, NULL); + + prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + listener = user_notif_syscall(__NR_gettid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + if (listener < 0) + err(EXIT_FAILURE, "can't create a notification descriptor"); + + pid = fork(); + if (pid < 0) + err(EXIT_FAILURE, "fork"); + if (pid == 0) { + if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0)) + err(EXIT_FAILURE, "can't set the parent death signal"); + while (1) { + ret = syscall(__NR_gettid); + if (ret == USER_NOTIF_MAGIC) + continue; + break; + } + _exit(1); + } + + if (sync_mode) { + if (ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, + SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0)) + err(EXIT_FAILURE, + "can't set SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP"); + } + user_notification_sync_loop(listener); + + kill(pid, SIGKILL); + if (waitpid(pid, &status, 0) != pid) + err(EXIT_FAILURE, "waitpid(%d) failed", pid); + if (!WIFSIGNALED(status) || WTERMSIG(status) != SIGKILL) + errx(EXIT_FAILURE, "unexpected exit code: %d", status); + + gettimeofday(&stop, NULL); + timersub(&stop, &start, &diff); + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + printf("# Executed %" PRIu64 " system calls\n\n", + loops); + + result_usec = diff.tv_sec * USEC_PER_SEC; + result_usec += diff.tv_usec; + + printf(" %14s: %lu.%03lu [sec]\n\n", "Total time", + (unsigned long) diff.tv_sec, + (unsigned long) (diff.tv_usec / USEC_PER_MSEC)); + + printf(" %14lf usecs/op\n", + (double)result_usec / (double)loops); + printf(" %14d ops/sec\n", + (int)((double)loops / + ((double)result_usec / (double)USEC_PER_SEC))); + break; + + case BENCH_FORMAT_SIMPLE: + printf("%lu.%03lu\n", + (unsigned long) diff.tv_sec, + (unsigned long) (diff.tv_usec / USEC_PER_MSEC)); + break; + + default: + /* reaching here is something disaster */ + fprintf(stderr, "Unknown format:%d\n", bench_format); + exit(1); + break; + } + + return 0; +} diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c index 7401ebbac100..265d49a913d9 100644 --- a/tools/perf/bench/synthesize.c +++ b/tools/perf/bench/synthesize.c @@ -6,6 +6,7 @@ * * Copyright 2019 Google LLC. */ +#include <errno.h> #include <stdio.h> #include "bench.h" #include "../util/debug.h" @@ -49,7 +50,7 @@ static const char *const bench_usage[] = { static atomic_t event_count; -static int process_synthesized_event(struct perf_tool *tool __maybe_unused, +static int process_synthesized_event(const struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, struct perf_sample *sample __maybe_unused, struct machine *machine __maybe_unused) @@ -114,12 +115,16 @@ static int run_single_threaded(void) .pid = "self", }; struct perf_thread_map *threads; + struct perf_env host_env; int err; perf_set_singlethreaded(); - session = perf_session__new(NULL, NULL); + perf_env__init(&host_env); + session = __perf_session__new(/*data=*/NULL, /*tool=*/NULL, + /*trace_event_repipe=*/false, &host_env); if (IS_ERR(session)) { pr_err("Session creation failed.\n"); + perf_env__exit(&host_env); return PTR_ERR(session); } threads = thread_map__new_by_pid(getpid()); @@ -144,6 +149,7 @@ err_out: perf_thread_map__put(threads); perf_session__delete(session); + perf_env__exit(&host_env); return err; } @@ -154,17 +160,21 @@ static int do_run_multi_threaded(struct target *target, u64 runtime_us; unsigned int i; double time_average, time_stddev, event_average, event_stddev; - int err; + int err = 0; struct stats time_stats, event_stats; struct perf_session *session; + struct perf_env host_env; + perf_env__init(&host_env); init_stats(&time_stats); init_stats(&event_stats); for (i = 0; i < multi_iterations; i++) { - session = perf_session__new(NULL, NULL); - if (IS_ERR(session)) - return PTR_ERR(session); - + session = __perf_session__new(/*data=*/NULL, /*tool=*/NULL, + /*trace_event_repipe=*/false, &host_env); + if (IS_ERR(session)) { + err = PTR_ERR(session); + goto err_out; + } atomic_set(&event_count, 0); gettimeofday(&start, NULL); err = __machine__synthesize_threads(&session->machines.host, @@ -175,7 +185,7 @@ static int do_run_multi_threaded(struct target *target, nr_threads_synthesize); if (err) { perf_session__delete(session); - return err; + goto err_out; } gettimeofday(&end, NULL); @@ -198,7 +208,9 @@ static int do_run_multi_threaded(struct target *target, printf(" Average time per event %.3f usec\n", time_average / event_average); - return 0; +err_out: + perf_env__exit(&host_env); + return err; } static int run_multi_threaded(void) diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c index 9b751016f4b6..e7dc216f717f 100644 --- a/tools/perf/bench/syscall.c +++ b/tools/perf/bench/syscall.c @@ -14,11 +14,15 @@ #include <sys/time.h> #include <sys/syscall.h> #include <sys/types.h> +#include <sys/wait.h> #include <unistd.h> #include <stdlib.h> -#define LOOPS_DEFAULT 10000000 -static int loops = LOOPS_DEFAULT; +#ifndef __NR_fork +#define __NR_fork -1 +#endif + +static int loops; static const struct option options[] = { OPT_INTEGER('l', "loop", &loops, "Specify number of loops"), @@ -30,25 +34,108 @@ static const char * const bench_syscall_usage[] = { NULL }; -int bench_syscall_basic(int argc, const char **argv) +static void test_fork(void) +{ + pid_t pid = fork(); + + if (pid < 0) { + fprintf(stderr, "fork failed\n"); + exit(1); + } else if (pid == 0) { + exit(0); + } else { + if (waitpid(pid, NULL, 0) < 0) { + fprintf(stderr, "waitpid failed\n"); + exit(1); + } + } +} + +static void test_execve(void) +{ + const char *pathname = "/bin/true"; + char *const argv[] = { (char *)pathname, NULL }; + pid_t pid = fork(); + + if (pid < 0) { + fprintf(stderr, "fork failed\n"); + exit(1); + } else if (pid == 0) { + execve(pathname, argv, NULL); + fprintf(stderr, "execve /bin/true failed\n"); + exit(1); + } else { + if (waitpid(pid, NULL, 0) < 0) { + fprintf(stderr, "waitpid failed\n"); + exit(1); + } + } +} + +static int bench_syscall_common(int argc, const char **argv, int syscall) { struct timeval start, stop, diff; unsigned long long result_usec = 0; + const char *name = NULL; int i; + switch (syscall) { + case __NR_fork: + case __NR_execve: + /* Limit default loop to 10000 times to save time */ + loops = 10000; + break; + default: + loops = 10000000; + break; + } + + /* Options -l and --loops override default above */ argc = parse_options(argc, argv, options, bench_syscall_usage, 0); gettimeofday(&start, NULL); - for (i = 0; i < loops; i++) - getppid(); + for (i = 0; i < loops; i++) { + switch (syscall) { + case __NR_getppid: + getppid(); + break; + case __NR_getpgid: + getpgid(0); + break; + case __NR_fork: + test_fork(); + break; + case __NR_execve: + test_execve(); + default: + break; + } + } gettimeofday(&stop, NULL); timersub(&stop, &start, &diff); + switch (syscall) { + case __NR_getppid: + name = "getppid()"; + break; + case __NR_getpgid: + name = "getpgid()"; + break; + case __NR_fork: + name = "fork()"; + break; + case __NR_execve: + name = "execve()"; + break; + default: + break; + } + switch (bench_format) { case BENCH_FORMAT_DEFAULT: - printf("# Executed %'d getppid() calls\n", loops); + printf("# Executed %'d %s calls\n", loops, name); result_usec = diff.tv_sec * 1000000; result_usec += diff.tv_usec; @@ -79,3 +166,23 @@ int bench_syscall_basic(int argc, const char **argv) return 0; } + +int bench_syscall_basic(int argc, const char **argv) +{ + return bench_syscall_common(argc, argv, __NR_getppid); +} + +int bench_syscall_getpgid(int argc, const char **argv) +{ + return bench_syscall_common(argc, argv, __NR_getpgid); +} + +int bench_syscall_fork(int argc, const char **argv) +{ + return bench_syscall_common(argc, argv, __NR_fork); +} + +int bench_syscall_execve(int argc, const char **argv) +{ + return bench_syscall_common(argc, argv, __NR_execve); +} diff --git a/tools/perf/bench/uprobe.c b/tools/perf/bench/uprobe.c new file mode 100644 index 000000000000..0b90275862e1 --- /dev/null +++ b/tools/perf/bench/uprobe.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* + * uprobe.c + * + * uprobe benchmarks + * + * Copyright (C) 2023, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> + */ +#include "../perf.h" +#include "../util/util.h" +#include <subcmd/parse-options.h> +#include "../builtin.h" +#include "bench.h" +#include <linux/compiler.h> +#include <linux/time64.h> + +#include <inttypes.h> +#include <stdio.h> +#include <sys/time.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> +#include <stdlib.h> + +#define LOOPS_DEFAULT 1000 +static int loops = LOOPS_DEFAULT; + +enum bench_uprobe { + BENCH_UPROBE__BASELINE, + BENCH_UPROBE__EMPTY, + BENCH_UPROBE__TRACE_PRINTK, + BENCH_UPROBE__EMPTY_RET, + BENCH_UPROBE__TRACE_PRINTK_RET, +}; + +static const struct option options[] = { + OPT_INTEGER('l', "loop", &loops, "Specify number of loops"), + OPT_END() +}; + +static const char * const bench_uprobe_usage[] = { + "perf bench uprobe <options>", + NULL +}; + +#ifdef HAVE_BPF_SKEL +#include "bpf_skel/bench_uprobe.skel.h" + +#define bench_uprobe__attach_uprobe(prog) \ + skel->links.prog = bpf_program__attach_uprobe_opts(/*prog=*/skel->progs.prog, \ + /*pid=*/-1, \ + /*binary_path=*/"libc.so.6", \ + /*func_offset=*/0, \ + /*opts=*/&uprobe_opts); \ + if (!skel->links.prog) { \ + err = -errno; \ + fprintf(stderr, "Failed to attach bench uprobe \"%s\": %s\n", #prog, strerror(errno)); \ + goto cleanup; \ + } + +struct bench_uprobe_bpf *skel; + +static int bench_uprobe__setup_bpf_skel(enum bench_uprobe bench) +{ + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + int err; + + /* Load and verify BPF application */ + skel = bench_uprobe_bpf__open(); + if (!skel) { + fprintf(stderr, "Failed to open and load uprobes bench BPF skeleton\n"); + return -1; + } + + err = bench_uprobe_bpf__load(skel); + if (err) { + fprintf(stderr, "Failed to load and verify BPF skeleton\n"); + goto cleanup; + } + + uprobe_opts.func_name = "usleep"; + switch (bench) { + case BENCH_UPROBE__BASELINE: break; + case BENCH_UPROBE__EMPTY: bench_uprobe__attach_uprobe(empty); break; + case BENCH_UPROBE__TRACE_PRINTK: bench_uprobe__attach_uprobe(trace_printk); break; + case BENCH_UPROBE__EMPTY_RET: bench_uprobe__attach_uprobe(empty_ret); break; + case BENCH_UPROBE__TRACE_PRINTK_RET: bench_uprobe__attach_uprobe(trace_printk_ret); break; + default: + fprintf(stderr, "Invalid bench: %d\n", bench); + goto cleanup; + } + + return err; +cleanup: + bench_uprobe_bpf__destroy(skel); + skel = NULL; + return err; +} + +static void bench_uprobe__teardown_bpf_skel(void) +{ + if (skel) { + bench_uprobe_bpf__destroy(skel); + skel = NULL; + } +} +#else +static int bench_uprobe__setup_bpf_skel(enum bench_uprobe bench __maybe_unused) { return 0; } +static void bench_uprobe__teardown_bpf_skel(void) {}; +#endif + +static int bench_uprobe_format__default_fprintf(const char *name, const char *unit, u64 diff, FILE *fp) +{ + static u64 baseline, previous; + s64 diff_to_baseline = diff - baseline, + diff_to_previous = diff - previous; + int printed = fprintf(fp, "# Executed %'d %s calls\n", loops, name); + + printed += fprintf(fp, " %14s: %'" PRIu64 " %ss", "Total time", diff, unit); + + if (baseline) { + printed += fprintf(fp, " %s%'" PRId64 " to baseline", diff_to_baseline > 0 ? "+" : "", diff_to_baseline); + + if (previous != baseline) + fprintf(stdout, " %s%'" PRId64 " to previous", diff_to_previous > 0 ? "+" : "", diff_to_previous); + } + + printed += fprintf(fp, "\n\n %'.3f %ss/op", (double)diff / (double)loops, unit); + + if (baseline) { + printed += fprintf(fp, " %'.3f %ss/op to baseline", (double)diff_to_baseline / (double)loops, unit); + + if (previous != baseline) + printed += fprintf(fp, " %'.3f %ss/op to previous", (double)diff_to_previous / (double)loops, unit); + } else { + baseline = diff; + } + + fputc('\n', fp); + + previous = diff; + + return printed + 1; +} + +static int bench_uprobe(int argc, const char **argv, enum bench_uprobe bench) +{ + const char *name = "usleep(1000)", *unit = "usec"; + struct timespec start, end; + u64 diff; + int i; + + argc = parse_options(argc, argv, options, bench_uprobe_usage, 0); + + if (bench != BENCH_UPROBE__BASELINE && bench_uprobe__setup_bpf_skel(bench) < 0) + return 0; + + clock_gettime(CLOCK_REALTIME, &start); + + for (i = 0; i < loops; i++) { + usleep(USEC_PER_MSEC); + } + + clock_gettime(CLOCK_REALTIME, &end); + + diff = end.tv_sec * NSEC_PER_SEC + end.tv_nsec - (start.tv_sec * NSEC_PER_SEC + start.tv_nsec); + diff /= NSEC_PER_USEC; + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + bench_uprobe_format__default_fprintf(name, unit, diff, stdout); + break; + + case BENCH_FORMAT_SIMPLE: + printf("%" PRIu64 "\n", diff); + break; + + default: + /* reaching here is something of a disaster */ + fprintf(stderr, "Unknown format:%d\n", bench_format); + exit(1); + } + + if (bench != BENCH_UPROBE__BASELINE) + bench_uprobe__teardown_bpf_skel(); + + return 0; +} + +int bench_uprobe_baseline(int argc, const char **argv) +{ + return bench_uprobe(argc, argv, BENCH_UPROBE__BASELINE); +} + +int bench_uprobe_empty(int argc, const char **argv) +{ + return bench_uprobe(argc, argv, BENCH_UPROBE__EMPTY); +} + +int bench_uprobe_trace_printk(int argc, const char **argv) +{ + return bench_uprobe(argc, argv, BENCH_UPROBE__TRACE_PRINTK); +} + +int bench_uprobe_empty_ret(int argc, const char **argv) +{ + return bench_uprobe(argc, argv, BENCH_UPROBE__EMPTY_RET); +} + +int bench_uprobe_trace_printk_ret(int argc, const char **argv) +{ + return bench_uprobe(argc, argv, BENCH_UPROBE__TRACE_PRINTK_RET); +} |
