diff options
Diffstat (limited to 'tools/perf/bench/epoll-wait.c')
| -rw-r--r-- | tools/perf/bench/epoll-wait.c | 104 |
1 files changed, 65 insertions, 39 deletions
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 5a11534e96a0..20fe4f72b4af 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#ifdef HAVE_EVENTFD +#ifdef HAVE_EVENTFD_SUPPORT /* * Copyright (C) 2018 Davidlohr Bueso. * @@ -17,7 +17,7 @@ * While the second model, enabled via --multiq option, uses multiple * queueing (which refers to one epoll instance per worker). For example, * short lived tcp connections in a high throughput httpd server will - * ditribute the accept()'ing connections across CPUs. In this case each + * distribute the accept()'ing connections across CPUs. In this case each * worker does a limited amount of processing. * * [queue A] ---> [worker] @@ -63,6 +63,7 @@ /* For the CLR_() macros */ #include <string.h> #include <pthread.h> +#include <unistd.h> #include <errno.h> #include <inttypes.h> @@ -75,11 +76,12 @@ #include <sys/epoll.h> #include <sys/eventfd.h> #include <sys/types.h> +#include <perf/cpumap.h> #include "../util/stat.h" +#include "../util/mutex.h" #include <subcmd/parse-options.h> #include "bench.h" -#include "cpumap.h" #include <err.h> @@ -88,7 +90,6 @@ static unsigned int nthreads = 0; static unsigned int nsecs = 8; -struct timeval start, end, runtime; static bool wdone, done, __verbose, randomize, nonblocking; /* @@ -109,10 +110,10 @@ static bool multiq; /* use an epoll instance per thread */ /* amount of fds to monitor, per thread */ static unsigned int nfds = 64; -static pthread_mutex_t thread_lock; +static struct mutex thread_lock; static unsigned int threads_starting; static struct stats throughput_stats; -static pthread_cond_t thread_parent, thread_worker; +static struct cond thread_parent, thread_worker; struct worker { int tid; @@ -189,16 +190,16 @@ static void *workerfn(void *arg) int to = nonblocking? 0 : -1; int efd = multiq ? w->epollfd : epollfd; - pthread_mutex_lock(&thread_lock); + mutex_lock(&thread_lock); threads_starting--; if (!threads_starting) - pthread_cond_signal(&thread_parent); - pthread_cond_wait(&thread_worker, &thread_lock); - pthread_mutex_unlock(&thread_lock); + cond_signal(&thread_parent); + cond_wait(&thread_worker, &thread_lock); + mutex_unlock(&thread_lock); do { /* - * Block undefinitely waiting for the IN event. + * Block indefinitely waiting for the IN event. * In order to stress the epoll_wait(2) syscall, * call it event per event, instead of a larger * batch (max)limit. @@ -274,8 +275,8 @@ static void toggle_done(int sig __maybe_unused, { /* inform all threads that we're done for the day */ done = true; - gettimeofday(&end, NULL); - timersub(&end, &start, &runtime); + gettimeofday(&bench__end, NULL); + timersub(&bench__end, &bench__start, &bench__runtime); } static void print_summary(void) @@ -285,15 +286,17 @@ static void print_summary(void) printf("\nAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", avg, rel_stddev_stats(stddev, avg), - (int) runtime.tv_sec); + (int)bench__runtime.tv_sec); } -static int do_threads(struct worker *worker, struct cpu_map *cpu) +static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) { pthread_attr_t thread_attr, *attrp = NULL; - cpu_set_t cpuset; + cpu_set_t *cpuset; unsigned int i, j; - int ret, events = EPOLLIN; + int ret = 0, events = EPOLLIN; + int nrcpus; + size_t size; if (oneshot) events |= EPOLLONESHOT; @@ -306,6 +309,11 @@ static int do_threads(struct worker *worker, struct cpu_map *cpu) if (!noaffinity) pthread_attr_init(&thread_attr); + nrcpus = cpu__max_cpu().cpu; + cpuset = CPU_ALLOC(nrcpus); + BUG_ON(!cpuset); + size = CPU_ALLOC_SIZE(nrcpus); + for (i = 0; i < nthreads; i++) { struct worker *w = &worker[i]; @@ -341,22 +349,28 @@ static int do_threads(struct worker *worker, struct cpu_map *cpu) } if (!noaffinity) { - CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_ZERO_S(size, cpuset); + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, + size, cpuset); - ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); - if (ret) + ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset); + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + } attrp = &thread_attr; } ret = pthread_create(&w->thread, attrp, workerfn, (void *)(struct worker *) w); - if (ret) + if (ret) { + CPU_FREE(cpuset); err(EXIT_FAILURE, "pthread_create"); + } } + CPU_FREE(cpuset); if (!noaffinity) pthread_attr_destroy(&thread_attr); @@ -406,7 +420,12 @@ static int cmpworker(const void *p1, const void *p2) struct worker *w1 = (struct worker *) p1; struct worker *w2 = (struct worker *) p2; - return w1->tid > w2->tid; + + if (w1->tid > w2->tid) + return 1; + if (w1->tid < w2->tid) + return -1; + return 0; } int bench_epoll_wait(int argc, const char **argv) @@ -415,7 +434,7 @@ int bench_epoll_wait(int argc, const char **argv) struct sigaction act; unsigned int i; struct worker *worker = NULL; - struct cpu_map *cpu; + struct perf_cpu_map *cpu; pthread_t wthread; struct rlimit rl, prevrl; @@ -425,11 +444,12 @@ int bench_epoll_wait(int argc, const char **argv) exit(EXIT_FAILURE); } + memset(&act, 0, sizeof(act)); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); - cpu = cpu_map__new(NULL); + cpu = perf_cpu_map__new_online_cpus(); if (!cpu) goto errmem; @@ -451,7 +471,7 @@ int bench_epoll_wait(int argc, const char **argv) /* default to the number of CPUs and leave one for the writer pthread */ if (!nthreads) - nthreads = cpu->nr - 1; + nthreads = perf_cpu_map__nr(cpu) - 1; worker = calloc(nthreads, sizeof(*worker)); if (!worker) { @@ -471,21 +491,21 @@ int bench_epoll_wait(int argc, const char **argv) getpid(), nthreads, oneshot ? " (EPOLLONESHOT semantics)": "", nfds, nsecs); init_stats(&throughput_stats); - pthread_mutex_init(&thread_lock, NULL); - pthread_cond_init(&thread_parent, NULL); - pthread_cond_init(&thread_worker, NULL); + mutex_init(&thread_lock); + cond_init(&thread_parent); + cond_init(&thread_worker); threads_starting = nthreads; - gettimeofday(&start, NULL); + gettimeofday(&bench__start, NULL); do_threads(worker, cpu); - pthread_mutex_lock(&thread_lock); + mutex_lock(&thread_lock); while (threads_starting) - pthread_cond_wait(&thread_parent, &thread_lock); - pthread_cond_broadcast(&thread_worker); - pthread_mutex_unlock(&thread_lock); + cond_wait(&thread_parent, &thread_lock); + cond_broadcast(&thread_worker); + mutex_unlock(&thread_lock); /* * At this point the workers should be blocked waiting for read events @@ -508,16 +528,17 @@ int bench_epoll_wait(int argc, const char **argv) err(EXIT_FAILURE, "pthread_join"); /* cleanup & report results */ - pthread_cond_destroy(&thread_parent); - pthread_cond_destroy(&thread_worker); - pthread_mutex_destroy(&thread_lock); + cond_destroy(&thread_parent); + cond_destroy(&thread_worker); + mutex_destroy(&thread_lock); /* sort the array back before reporting */ if (randomize) qsort(worker, nthreads, sizeof(struct worker), cmpworker); for (i = 0; i < nthreads; i++) { - unsigned long t = worker[i].ops/runtime.tv_sec; + unsigned long t = bench__runtime.tv_sec > 0 ? + worker[i].ops / bench__runtime.tv_sec : 0; update_stats(&throughput_stats, t); @@ -533,8 +554,13 @@ int bench_epoll_wait(int argc, const char **argv) print_summary(); close(epollfd); + perf_cpu_map__put(cpu); + for (i = 0; i < nthreads; i++) + free(worker[i].fdmap); + + free(worker); return ret; errmem: err(EXIT_FAILURE, "calloc"); } -#endif // HAVE_EVENTFD +#endif // HAVE_EVENTFD_SUPPORT |
