diff options
Diffstat (limited to 'tools/testing/selftests/bpf/benchs/bench_trigger.c')
| -rw-r--r-- | tools/testing/selftests/bpf/benchs/bench_trigger.c | 611 |
1 files changed, 611 insertions, 0 deletions
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c new file mode 100644 index 000000000000..34018fc3927f --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -0,0 +1,611 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define _GNU_SOURCE +#include <argp.h> +#include <unistd.h> +#include <stdint.h> +#include "bpf_util.h" +#include "bench.h" +#include "trigger_bench.skel.h" +#include "trace_helpers.h" + +#define MAX_TRIG_BATCH_ITERS 1000 + +static struct { + __u32 batch_iters; +} args = { + .batch_iters = 100, +}; + +enum { + ARG_TRIG_BATCH_ITERS = 7000, +}; + +static const struct argp_option opts[] = { + { "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0, + "Number of in-kernel iterations per one driver test run"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_TRIG_BATCH_ITERS: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) { + fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n", + 1, MAX_TRIG_BATCH_ITERS); + argp_usage(state); + } + args.batch_iters = ret; + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_trigger_batch_argp = { + .options = opts, + .parser = parse_arg, +}; + +/* adjust slot shift in inc_hits() if changing */ +#define MAX_BUCKETS 256 + +#pragma GCC diagnostic ignored "-Wattributes" + +/* BPF triggering benchmarks */ +static struct trigger_ctx { + struct trigger_bench *skel; + bool usermode_counters; + int driver_prog_fd; +} ctx; + +static struct counter base_hits[MAX_BUCKETS]; + +static __always_inline void inc_counter(struct counter *counters) +{ + static __thread int tid = 0; + unsigned slot; + + if (unlikely(tid == 0)) + tid = sys_gettid(); + + /* multiplicative hashing, it's fast */ + slot = 2654435769U * tid; + slot >>= 24; + + atomic_inc(&base_hits[slot].value); /* use highest byte as an index */ +} + +static long sum_and_reset_counters(struct counter *counters) +{ + int i; + long sum = 0; + + for (i = 0; i < MAX_BUCKETS; i++) + sum += atomic_swap(&counters[i].value, 0); + return sum; +} + +static void trigger_validate(void) +{ + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); + exit(1); + } +} + +static void *trigger_producer(void *input) +{ + if (ctx.usermode_counters) { + while (true) { + (void)syscall(__NR_getpgid); + inc_counter(base_hits); + } + } else { + while (true) + (void)syscall(__NR_getpgid); + } + return NULL; +} + +static void *trigger_producer_batch(void *input) +{ + int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver); + + while (true) + bpf_prog_test_run_opts(fd, NULL); + + return NULL; +} + +static void trigger_measure(struct bench_res *res) +{ + if (ctx.usermode_counters) + res->hits = sum_and_reset_counters(base_hits); + else + res->hits = sum_and_reset_counters(ctx.skel->bss->hits); +} + +static void setup_ctx(void) +{ + setup_libbpf(); + + ctx.skel = trigger_bench__open(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + /* default "driver" BPF program */ + bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true); + + ctx.skel->rodata->batch_iters = args.batch_iters; +} + +static void load_ctx(void) +{ + int err; + + err = trigger_bench__load(ctx.skel); + if (err) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } +} + +static void attach_bpf(struct bpf_program *prog) +{ + struct bpf_link *link; + + link = bpf_program__attach(prog); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void trigger_syscall_count_setup(void) +{ + ctx.usermode_counters = true; +} + +/* Batched, staying mostly in-kernel triggering setups */ +static void trigger_kernel_count_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); + bpf_program__set_autoload(ctx.skel->progs.trigger_kernel_count, true); + load_ctx(); + /* override driver program */ + ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_kernel_count); +} + +static void trigger_kprobe_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true); + load_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_kprobe); +} + +static void trigger_kretprobe_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true); + load_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_kretprobe); +} + +static void trigger_kprobe_multi_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true); + load_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi); +} + +static void trigger_kretprobe_multi_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true); + load_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi); +} + +static void trigger_fentry_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true); + load_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_fentry); +} + +static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + char **syms = NULL; + size_t cnt = 0; + + /* Some recursive functions will be skipped in + * bpf_get_ksyms -> skip_entry, as they can introduce sufficient + * overhead. However, it's difficut to skip all the recursive + * functions for a debug kernel. + * + * So, don't run the kprobe-multi-all and kretprobe-multi-all on + * a debug kernel. + */ + if (bpf_get_ksyms(&syms, &cnt, true)) { + fprintf(stderr, "failed to get ksyms\n"); + exit(1); + } + + opts.syms = (const char **) syms; + opts.cnt = cnt; + opts.retprobe = kretprobe; + /* attach empty to all the kernel functions except bpf_get_numa_node_id. */ + if (!bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts)) { + fprintf(stderr, "failed to attach bpf_program__attach_kprobe_multi_opts to all\n"); + exit(1); + } +} + +static void trigger_kprobe_multi_all_setup(void) +{ + struct bpf_program *prog, *empty; + + setup_ctx(); + empty = ctx.skel->progs.bench_kprobe_multi_empty; + prog = ctx.skel->progs.bench_trigger_kprobe_multi; + bpf_program__set_autoload(empty, true); + bpf_program__set_autoload(prog, true); + load_ctx(); + + attach_ksyms_all(empty, false); + attach_bpf(prog); +} + +static void trigger_kretprobe_multi_all_setup(void) +{ + struct bpf_program *prog, *empty; + + setup_ctx(); + empty = ctx.skel->progs.bench_kretprobe_multi_empty; + prog = ctx.skel->progs.bench_trigger_kretprobe_multi; + bpf_program__set_autoload(empty, true); + bpf_program__set_autoload(prog, true); + load_ctx(); + + attach_ksyms_all(empty, true); + attach_bpf(prog); +} + +static void trigger_fexit_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true); + load_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_fexit); +} + +static void trigger_fmodret_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fmodret, true); + load_ctx(); + /* override driver program */ + ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); + attach_bpf(ctx.skel->progs.bench_trigger_fmodret); +} + +static void trigger_tp_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_tp, true); + load_ctx(); + /* override driver program */ + ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); + attach_bpf(ctx.skel->progs.bench_trigger_tp); +} + +static void trigger_rawtp_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_rawtp, true); + load_ctx(); + /* override driver program */ + ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); + attach_bpf(ctx.skel->progs.bench_trigger_rawtp); +} + +/* make sure call is not inlined and not avoided by compiler, so __weak and + * inline asm volatile in the body of the function + * + * There is a performance difference between uprobing at nop location vs other + * instructions. So use two different targets, one of which starts with nop + * and another doesn't. + * + * GCC doesn't generate stack setup preamble for these functions due to them + * having no input arguments and doing nothing in the body. + */ +__nocf_check __weak void uprobe_target_nop(void) +{ + asm volatile ("nop"); +} + +__weak void opaque_noop_func(void) +{ +} + +__nocf_check __weak int uprobe_target_push(void) +{ + /* overhead of function call is negligible compared to uprobe + * triggering, so this shouldn't affect benchmark results much + */ + opaque_noop_func(); + return 1; +} + +__nocf_check __weak void uprobe_target_ret(void) +{ + asm volatile (""); +} + +static void *uprobe_producer_count(void *input) +{ + while (true) { + uprobe_target_nop(); + inc_counter(base_hits); + } + return NULL; +} + +static void *uprobe_producer_nop(void *input) +{ + while (true) + uprobe_target_nop(); + return NULL; +} + +static void *uprobe_producer_push(void *input) +{ + while (true) + uprobe_target_push(); + return NULL; +} + +static void *uprobe_producer_ret(void *input) +{ + while (true) + uprobe_target_ret(); + return NULL; +} + +#ifdef __x86_64__ +__nocf_check __weak void uprobe_target_nop5(void) +{ + asm volatile (".byte 0x0f, 0x1f, 0x44, 0x00, 0x00"); +} + +static void *uprobe_producer_nop5(void *input) +{ + while (true) + uprobe_target_nop5(); + return NULL; +} +#endif + +static void usetup(bool use_retprobe, bool use_multi, void *target_addr) +{ + size_t uprobe_offset; + struct bpf_link *link; + int err; + + setup_libbpf(); + + ctx.skel = trigger_bench__open(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + if (use_multi) + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe_multi, true); + else + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true); + + err = trigger_bench__load(ctx.skel); + if (err) { + fprintf(stderr, "failed to load skeleton\n"); + exit(1); + } + + uprobe_offset = get_uprobe_offset(target_addr); + if (use_multi) { + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts, + .retprobe = use_retprobe, + .cnt = 1, + .offsets = &uprobe_offset, + ); + link = bpf_program__attach_uprobe_multi( + ctx.skel->progs.bench_trigger_uprobe_multi, + -1 /* all PIDs */, "/proc/self/exe", NULL, &opts); + ctx.skel->links.bench_trigger_uprobe_multi = link; + } else { + link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe, + use_retprobe, + -1 /* all PIDs */, + "/proc/self/exe", + uprobe_offset); + ctx.skel->links.bench_trigger_uprobe = link; + } + if (!link) { + fprintf(stderr, "failed to attach %s!\n", use_multi ? "multi-uprobe" : "uprobe"); + exit(1); + } +} + +static void usermode_count_setup(void) +{ + ctx.usermode_counters = true; +} + +static void uprobe_nop_setup(void) +{ + usetup(false, false /* !use_multi */, &uprobe_target_nop); +} + +static void uretprobe_nop_setup(void) +{ + usetup(true, false /* !use_multi */, &uprobe_target_nop); +} + +static void uprobe_push_setup(void) +{ + usetup(false, false /* !use_multi */, &uprobe_target_push); +} + +static void uretprobe_push_setup(void) +{ + usetup(true, false /* !use_multi */, &uprobe_target_push); +} + +static void uprobe_ret_setup(void) +{ + usetup(false, false /* !use_multi */, &uprobe_target_ret); +} + +static void uretprobe_ret_setup(void) +{ + usetup(true, false /* !use_multi */, &uprobe_target_ret); +} + +static void uprobe_multi_nop_setup(void) +{ + usetup(false, true /* use_multi */, &uprobe_target_nop); +} + +static void uretprobe_multi_nop_setup(void) +{ + usetup(true, true /* use_multi */, &uprobe_target_nop); +} + +static void uprobe_multi_push_setup(void) +{ + usetup(false, true /* use_multi */, &uprobe_target_push); +} + +static void uretprobe_multi_push_setup(void) +{ + usetup(true, true /* use_multi */, &uprobe_target_push); +} + +static void uprobe_multi_ret_setup(void) +{ + usetup(false, true /* use_multi */, &uprobe_target_ret); +} + +static void uretprobe_multi_ret_setup(void) +{ + usetup(true, true /* use_multi */, &uprobe_target_ret); +} + +#ifdef __x86_64__ +static void uprobe_nop5_setup(void) +{ + usetup(false, false /* !use_multi */, &uprobe_target_nop5); +} + +static void uretprobe_nop5_setup(void) +{ + usetup(true, false /* !use_multi */, &uprobe_target_nop5); +} + +static void uprobe_multi_nop5_setup(void) +{ + usetup(false, true /* use_multi */, &uprobe_target_nop5); +} + +static void uretprobe_multi_nop5_setup(void) +{ + usetup(true, true /* use_multi */, &uprobe_target_nop5); +} +#endif + +const struct bench bench_trig_syscall_count = { + .name = "trig-syscall-count", + .validate = trigger_validate, + .setup = trigger_syscall_count_setup, + .producer_thread = trigger_producer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +/* batched (staying mostly in kernel) kprobe/fentry benchmarks */ +#define BENCH_TRIG_KERNEL(KIND, NAME) \ +const struct bench bench_trig_##KIND = { \ + .name = "trig-" NAME, \ + .setup = trigger_##KIND##_setup, \ + .producer_thread = trigger_producer_batch, \ + .measure = trigger_measure, \ + .report_progress = hits_drops_report_progress, \ + .report_final = hits_drops_report_final, \ + .argp = &bench_trigger_batch_argp, \ +} + +BENCH_TRIG_KERNEL(kernel_count, "kernel-count"); +BENCH_TRIG_KERNEL(kprobe, "kprobe"); +BENCH_TRIG_KERNEL(kretprobe, "kretprobe"); +BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi"); +BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi"); +BENCH_TRIG_KERNEL(fentry, "fentry"); +BENCH_TRIG_KERNEL(kprobe_multi_all, "kprobe-multi-all"); +BENCH_TRIG_KERNEL(kretprobe_multi_all, "kretprobe-multi-all"); +BENCH_TRIG_KERNEL(fexit, "fexit"); +BENCH_TRIG_KERNEL(fmodret, "fmodret"); +BENCH_TRIG_KERNEL(tp, "tp"); +BENCH_TRIG_KERNEL(rawtp, "rawtp"); + +/* uprobe benchmarks */ +#define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME) \ +const struct bench bench_trig_##KIND = { \ + .name = "trig-" NAME, \ + .validate = trigger_validate, \ + .setup = KIND##_setup, \ + .producer_thread = uprobe_producer_##PRODUCER, \ + .measure = trigger_measure, \ + .report_progress = hits_drops_report_progress, \ + .report_final = hits_drops_report_final, \ +} + +BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count"); +BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop"); +BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push"); +BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret"); +BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop"); +BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push"); +BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret"); +BENCH_TRIG_USERMODE(uprobe_multi_nop, nop, "uprobe-multi-nop"); +BENCH_TRIG_USERMODE(uprobe_multi_push, push, "uprobe-multi-push"); +BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret"); +BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop"); +BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push"); +BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret"); +#ifdef __x86_64__ +BENCH_TRIG_USERMODE(uprobe_nop5, nop5, "uprobe-nop5"); +BENCH_TRIG_USERMODE(uretprobe_nop5, nop5, "uretprobe-nop5"); +BENCH_TRIG_USERMODE(uprobe_multi_nop5, nop5, "uprobe-multi-nop5"); +BENCH_TRIG_USERMODE(uretprobe_multi_nop5, nop5, "uretprobe-multi-nop5"); +#endif |
