summaryrefslogtreecommitdiff
path: root/tools/perf/bench
diff options
context:
space:
mode:
authorIan Rogers <irogers@google.com>2020-04-02 08:43:53 -0700
committerArnaldo Carvalho de Melo <acme@redhat.com>2020-04-16 12:19:12 -0300
commit2a4b51666af8bf0b67ccc2e53120bad27351917c (patch)
tree80b83a1a3bd37d5d10972be4f9d322fbc2cfaef6 /tools/perf/bench
parent1a2725f3ee5571cf07966f467b73a9941bcbacb8 (diff)
perf bench: Add event synthesis benchmark
Event synthesis may occur at the start or end (tail) of a perf command. In system-wide mode it can scan every process in /proc, which may add seconds of latency before event recording. Add a new benchmark that times how long event synthesis takes with and without data synthesis. An example execution looks like: $ perf bench internals synthesize # Running 'internals/synthesize' benchmark: Average synthesis took: 168.253800 usec Average data synthesis took: 208.104700 usec Signed-off-by: Ian Rogers <irogers@google.com> Acked-by: Jiri Olsa <jolsa@redhat.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andrey Zhizhikin <andrey.z@gmail.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Kefeng Wang <wangkefeng.wang@huawei.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Petr Mladek <pmladek@suse.com> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lore.kernel.org/lkml/20200402154357.107873-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/bench')
-rw-r--r--tools/perf/bench/Build2
-rw-r--r--tools/perf/bench/bench.h2
-rw-r--r--tools/perf/bench/synthesize.c101
3 files changed, 103 insertions, 2 deletions
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index e4e321b6f883..042827385c87 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -6,9 +6,9 @@ perf-y += futex-wake.o
perf-y += futex-wake-parallel.o
perf-y += futex-requeue.o
perf-y += futex-lock-pi.o
-
perf-y += epoll-wait.o
perf-y += epoll-ctl.o
+perf-y += synthesize.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 4aa6de1aa67d..4d669c803237 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -41,9 +41,9 @@ int bench_futex_wake_parallel(int argc, const char **argv);
int bench_futex_requeue(int argc, const char **argv);
/* pi futexes */
int bench_futex_lock_pi(int argc, const char **argv);
-
int bench_epoll_wait(int argc, const char **argv);
int bench_epoll_ctl(int argc, const char **argv);
+int bench_synthesize(int argc, const char **argv);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c
new file mode 100644
index 000000000000..6291257bc9c9
--- /dev/null
+++ b/tools/perf/bench/synthesize.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Benchmark synthesis of perf events such as at the start of a 'perf
+ * record'. Synthesis is done on the current process and the 'dummy' event
+ * handlers are invoked that support dump_trace but otherwise do nothing.
+ *
+ * Copyright 2019 Google LLC.
+ */
+#include <stdio.h>
+#include "bench.h"
+#include "../util/debug.h"
+#include "../util/session.h"
+#include "../util/synthetic-events.h"
+#include "../util/target.h"
+#include "../util/thread_map.h"
+#include "../util/tool.h"
+#include <linux/err.h>
+#include <linux/time64.h>
+#include <subcmd/parse-options.h>
+
+static unsigned int iterations = 10000;
+
+static const struct option options[] = {
+ OPT_UINTEGER('i', "iterations", &iterations,
+ "Number of iterations used to compute average"),
+ OPT_END()
+};
+
+static const char *const usage[] = {
+ "perf bench internals synthesize <options>",
+ NULL
+};
+
+
+static int do_synthesize(struct perf_session *session,
+ struct perf_thread_map *threads,
+ struct target *target, bool data_mmap)
+{
+ const unsigned int nr_threads_synthesize = 1;
+ struct timeval start, end, diff;
+ u64 runtime_us;
+ unsigned int i;
+ double average;
+ int err;
+
+ gettimeofday(&start, NULL);
+ for (i = 0; i < iterations; i++) {
+ err = machine__synthesize_threads(&session->machines.host,
+ target, threads, data_mmap,
+ nr_threads_synthesize);
+ if (err)
+ return err;
+ }
+
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ average = (double)runtime_us/(double)iterations;
+ printf("Average %ssynthesis took: %f usec\n",
+ data_mmap ? "data " : "", average);
+ return 0;
+}
+
+int bench_synthesize(int argc, const char **argv)
+{
+ struct perf_tool tool;
+ struct perf_session *session;
+ struct target target = {
+ .pid = "self",
+ };
+ struct perf_thread_map *threads;
+ int err;
+
+ argc = parse_options(argc, argv, options, usage, 0);
+
+ session = perf_session__new(NULL, false, NULL);
+ if (IS_ERR(session)) {
+ pr_err("Session creation failed.\n");
+ return PTR_ERR(session);
+ }
+ threads = thread_map__new_by_pid(getpid());
+ if (!threads) {
+ pr_err("Thread map creation failed.\n");
+ err = -ENOMEM;
+ goto err_out;
+ }
+ perf_tool__fill_defaults(&tool);
+
+ err = do_synthesize(session, threads, &target, false);
+ if (err)
+ goto err_out;
+
+ err = do_synthesize(session, threads, &target, true);
+
+err_out:
+ if (threads)
+ perf_thread_map__put(threads);
+
+ perf_session__delete(session);
+ return err;
+}