selftest/bpf/benchs: Add bpf_loop benchmark

Add benchmark to measure the throughput and latency of the bpf_loop call. Testing this on my dev machine on 1 thread, the data is as follows: nr_loops: 10 bpf_loop - throughput: 198.519 ± 0.155 M ops/s, latency: 5.037 ns/op nr_loops: 100 bpf_loop - throughput: 247.448 ± 0.305 M ops/s, latency: 4.041 ns/op nr_loops: 500 bpf_loop - throughput: 260.839 ± 0.380 M ops/s, latency: 3.834 ns/op nr_loops: 1000 bpf_loop - throughput: 262.806 ± 0.629 M ops/s, latency: 3.805 ns/op nr_loops: 5000 bpf_loop - throughput: 264.211 ± 1.508 M ops/s, latency: 3.785 ns/op nr_loops: 10000 bpf_loop - throughput: 265.366 ± 3.054 M ops/s, latency: 3.768 ns/op nr_loops: 50000 bpf_loop - throughput: 235.986 ± 20.205 M ops/s, latency: 4.238 ns/op nr_loops: 100000 bpf_loop - throughput: 264.482 ± 0.279 M ops/s, latency: 3.781 ns/op nr_loops: 500000 bpf_loop - throughput: 309.773 ± 87.713 M ops/s, latency: 3.228 ns/op nr_loops: 1000000 bpf_loop - throughput: 262.818 ± 4.143 M ops/s, latency: 3.805 ns/op >From this data, we can see that the latency per loop decreases as the number of loops increases. On this particular machine, each loop had an overhead of about ~4 ns, and we were able to run ~250 million loops per second. Signed-off-by: Joanne Koong <joannekoong@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/bpf/20211130030622.4131246-5-joannekoong@fb.com
author: Joanne Koong <joannekoong@fb.com> 2021-11-29 19:06:22 -0800
committer: Alexei Starovoitov <ast@kernel.org> 2021-11-30 10:56:28 -0800
commit: ec151037af4f56065d5b258af82f13dbbf279ebd (patch)
tree: 348a28f660d90113801e47f6685a6ee2afcfa8cf /tools/testing/selftests/bpf/benchs
parent: f6e659b7f97c76d0471d12bf274ea2a097cf3c5c (diff)
3 files changed, 135 insertions, 0 deletions
diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c
new file mode 100644
index 000000000000..d0a6572bfab6
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <argp.h>
+#include "bench.h"
+#include "bpf_loop_bench.skel.h"
+
+/* BPF triggering benchmarks */
+static struct ctx {
+	struct bpf_loop_bench *skel;
+} ctx;
+
+static struct {
+	__u32 nr_loops;
+} args = {
+	.nr_loops = 10,
+};
+
+enum {
+	ARG_NR_LOOPS = 4000,
+};
+
+static const struct argp_option opts[] = {
+	{ "nr_loops", ARG_NR_LOOPS, "nr_loops", 0,
+		"Set number of loops for the bpf_loop helper"},
+	{},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+	switch (key) {
+	case ARG_NR_LOOPS:
+		args.nr_loops = strtol(arg, NULL, 10);
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+
+	return 0;
+}
+
+/* exported into benchmark runner */
+const struct argp bench_bpf_loop_argp = {
+	.options = opts,
+	.parser = parse_arg,
+};
+
+static void validate(void)
+{
+	if (env.consumer_cnt != 1) {
+		fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+		exit(1);
+	}
+}
+
+static void *producer(void *input)
+{
+	while (true)
+		/* trigger the bpf program */
+		syscall(__NR_getpgid);
+
+	return NULL;
+}
+
+static void *consumer(void *input)
+{
+	return NULL;
+}
+
+static void measure(struct bench_res *res)
+{
+	res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+}
+
+static void setup(void)
+{
+	struct bpf_link *link;
+
+	setup_libbpf();
+
+	ctx.skel = bpf_loop_bench__open_and_load();
+	if (!ctx.skel) {
+		fprintf(stderr, "failed to open skeleton\n");
+		exit(1);
+	}
+
+	link = bpf_program__attach(ctx.skel->progs.benchmark);
+	if (!link) {
+		fprintf(stderr, "failed to attach program!\n");
+		exit(1);
+	}
+
+	ctx.skel->bss->nr_loops = args.nr_loops;
+}
+
+const struct bench bench_bpf_loop = {
+	.name = "bpf-loop",
+	.validate = validate,
+	.setup = setup,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = ops_report_progress,
+	.report_final = ops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh
new file mode 100755
index 000000000000..d4f5f73b356b
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+for t in 1 4 8 12 16; do
+for i in 10 100 500 1000 5000 10000 50000 100000 500000 1000000; do
+subtitle "nr_loops: $i, nr_threads: $t"
+	summarize_ops "bpf_loop: " \
+	    "$($RUN_BENCH -p $t --nr_loops $i bpf-loop)"
+	printf "\n"
+done
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh
index 9a16be78b180..6c5e6023a69f 100644
--- a/tools/testing/selftests/bpf/benchs/run_common.sh
+++ b/tools/testing/selftests/bpf/benchs/run_common.sh
@@ -33,6 +33,14 @@ function percentage()
 	echo "$*" | sed -E "s/.*Percentage\s=\s+([0-9]+\.[0-9]+).*/\1/"
 }
 
+function ops()
+{
+	echo -n "throughput: "
+	echo -n "$*" | sed -E "s/.*throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/"
+	echo -n -e ", latency: "
+	echo "$*" | sed -E "s/.*latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/"
+}
+
 function total()
 {
 	echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
@@ -52,6 +60,13 @@ function summarize_percentage()
 	printf "%-20s %s%%\n" "$bench" "$(percentage $summary)"
 }
 
+function summarize_ops()
+{
+	bench="$1"
+	summary=$(echo $2 | tail -n1)
+	printf "%-20s %s\n" "$bench" "$(ops $summary)"
+}
+
 function summarize_total()
 {
 	bench="$1"
author	Joanne Koong <joannekoong@fb.com>	2021-11-29 19:06:22 -0800
committer	Alexei Starovoitov <ast@kernel.org>	2021-11-30 10:56:28 -0800
commit	ec151037af4f56065d5b258af82f13dbbf279ebd (patch)
tree	348a28f660d90113801e47f6685a6ee2afcfa8cf /tools/testing/selftests/bpf/benchs
parent	f6e659b7f97c76d0471d12bf274ea2a097cf3c5c (diff)