diff options
Diffstat (limited to 'tools/testing/selftests/bpf')
97 files changed, 9309 insertions, 3674 deletions
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index be1ee7ba7ce0..19c1638e312a 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -23,6 +23,7 @@ test_tcpnotify_user test_libbpf xdping test_cpp +test_progs_verification_cert *.d *.subskel.h *.skel.h @@ -32,7 +33,6 @@ test_cpp /cpuv4 /host-tools /tools -/runqslower /bench /veristat /sign-file diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f00587d4ede6..b7030a6e2e76 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -46,6 +46,7 @@ endif CFLAGS += -g $(OPT_FLAGS) -rdynamic -std=gnu11 \ -Wall -Werror -fno-omit-frame-pointer \ + -Wno-unused-but-set-variable \ $(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ -I$(TOOLSINCDIR) -I$(TOOLSARCHINCDIR) -I$(APIDIR) -I$(OUTPUT) @@ -98,14 +99,11 @@ TEST_GEN_PROGS += test_progs-cpuv4 TEST_INST_SUBDIRS += cpuv4 endif -TEST_GEN_FILES = test_tc_edt.bpf.o TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c) # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ test_lirc_mode2.sh \ - test_tc_tunnel.sh \ - test_tc_edt.sh \ test_xdping.sh \ test_bpftool_build.sh \ test_bpftool.sh \ @@ -127,7 +125,6 @@ TEST_KMOD_TARGETS = $(addprefix $(OUTPUT)/,$(TEST_KMODS)) TEST_GEN_PROGS_EXTENDED = \ bench \ flow_dissector_load \ - runqslower \ test_cpp \ test_lirc_mode2_user \ veristat \ @@ -209,8 +206,6 @@ HOST_INCLUDE_DIR := $(INCLUDE_DIR) endif HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids -RUNQSLOWER_OUTPUT := $(BUILD_DIR)/runqslower/ - VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ ../../../../vmlinux \ @@ -232,7 +227,7 @@ $(notdir $(TEST_GEN_PROGS) $(TEST_KMODS) \ MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \ $(BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/bpftool \ $(HOST_BUILD_DIR)/resolve_btfids \ - $(RUNQSLOWER_OUTPUT) $(INCLUDE_DIR)) + $(INCLUDE_DIR)) $(MAKE_DIRS): $(call msg,MKDIR,,$@) $(Q)mkdir -p $@ @@ -304,17 +299,6 @@ TRUNNER_BPFTOOL := $(DEFAULT_BPFTOOL) USE_BOOTSTRAP := "bootstrap/" endif -$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT) - $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ - OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF) \ - BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ - BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf/ \ - BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) \ - BPF_TARGET_ENDIAN=$(BPF_TARGET_ENDIAN) \ - EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \ - EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' && \ - cp $(RUNQSLOWER_OUTPUT)runqslower $@ - TEST_GEN_PROGS_EXTENDED += $(TRUNNER_BPFTOOL) $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ) @@ -453,7 +437,9 @@ BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(abspath $(OUTPUT)/../usr/include) \ -std=gnu11 \ -fno-strict-aliasing \ - -Wno-compare-distinct-pointer-types + -Wno-compare-distinct-pointer-types \ + -Wno-initializer-overrides \ + # # TODO: enable me -Wsign-compare CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) @@ -498,7 +484,8 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ LSKELS := fexit_sleep.c trace_printk.c trace_vprintk.c map_ptr_kern.c \ core_kern.c core_kern_overflow.c test_ringbuf.c \ - test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c + test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c \ + test_ringbuf_overwrite.c LSKELS_SIGNED := fentry_test.c fexit_test.c atomics.c @@ -543,6 +530,8 @@ TRUNNER_TEST_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.test.o, \ $$(notdir $$(wildcard $(TRUNNER_TESTS_DIR)/*.c))) TRUNNER_EXTRA_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \ $$(filter %.c,$(TRUNNER_EXTRA_SOURCES))) +TRUNNER_LIB_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \ + $$(filter %.c,$(TRUNNER_LIB_SOURCES))) TRUNNER_EXTRA_HDRS := $$(filter %.h,$(TRUNNER_EXTRA_SOURCES)) TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c)) @@ -686,6 +675,10 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@) $(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ +$(TRUNNER_LIB_OBJS): $(TRUNNER_OUTPUT)/%.o:$(TOOLSDIR)/lib/%.c + $$(call msg,LIB-OBJ,$(TRUNNER_BINARY),$$@) + $(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ + # non-flavored in-srctree builds receive special treatment, in particular, we # do not need to copy extra resources (see e.g. test_btf_dump_case()) $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT) @@ -699,6 +692,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): | $(TRUNNER_BPF_OBJS) $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \ + $(TRUNNER_LIB_OBJS) \ $(RESOLVE_BTFIDS) \ $(TRUNNER_BPFTOOL) \ $(OUTPUT)/veristat \ @@ -721,7 +715,8 @@ $(VERIFICATION_CERT) $(PRIVATE_KEY): $(VERIFY_SIG_SETUP) $(Q)$(VERIFY_SIG_SETUP) genkey $(BUILD_DIR) $(VERIFY_SIG_HDR): $(VERIFICATION_CERT) - $(Q)xxd -i -n test_progs_verification_cert $< > $@ + $(Q)ln -fs $< test_progs_verification_cert && \ + xxd -i test_progs_verification_cert > $@ # Define test_progs test runner. TRUNNER_TESTS_DIR := prog_tests @@ -745,6 +740,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \ $(VERIFY_SIG_HDR) \ flow_dissector_load.h \ ip_check_defrag_frags.h +TRUNNER_LIB_SOURCES := find_bit.c TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ $(OUTPUT)/liburandom_read.so \ $(OUTPUT)/xdp_synproxy \ @@ -782,6 +778,7 @@ endif TRUNNER_TESTS_DIR := map_tests TRUNNER_BPF_PROGS_DIR := progs TRUNNER_EXTRA_SOURCES := test_maps.c +TRUNNER_LIB_SOURCES := TRUNNER_EXTRA_FILES := TRUNNER_BPF_BUILD_RULE := $$(error no BPF objects should be built) TRUNNER_BPF_CFLAGS := @@ -803,7 +800,7 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ # Include find_bit.c to compile xskxceiver. -EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c +EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c prog_tests/test_xsk.c prog_tests/test_xsk.h $(OUTPUT)/xskxceiver: $(EXTRA_SRC) xskxceiver.c xskxceiver.h $(OUTPUT)/network_helpers.o $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT) $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ @@ -893,7 +890,8 @@ EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ $(addprefix $(OUTPUT)/,*.o *.d *.skel.h *.lskel.h *.subskel.h \ no_alu32 cpuv4 bpf_gcc \ liburandom_read.so) \ - $(OUTPUT)/FEATURE-DUMP.selftests + $(OUTPUT)/FEATURE-DUMP.selftests \ + test_progs_verification_cert .PHONY: docs docs-clean diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c index e1ee979e6acc..01bdce692799 100644 --- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c +++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c @@ -19,6 +19,8 @@ static struct { int ringbuf_sz; /* per-ringbuf, in bytes */ bool ringbuf_use_output; /* use slower output API */ int perfbuf_sz; /* per-CPU size, in pages */ + bool overwrite; + bool bench_producer; } args = { .back2back = false, .batch_cnt = 500, @@ -27,6 +29,8 @@ static struct { .ringbuf_sz = 512 * 1024, .ringbuf_use_output = false, .perfbuf_sz = 128, + .overwrite = false, + .bench_producer = false, }; enum { @@ -35,6 +39,8 @@ enum { ARG_RB_BATCH_CNT = 2002, ARG_RB_SAMPLED = 2003, ARG_RB_SAMPLE_RATE = 2004, + ARG_RB_OVERWRITE = 2005, + ARG_RB_BENCH_PRODUCER = 2006, }; static const struct argp_option opts[] = { @@ -43,6 +49,8 @@ static const struct argp_option opts[] = { { "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"}, { "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"}, { "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"}, + { "rb-overwrite", ARG_RB_OVERWRITE, NULL, 0, "Overwrite mode"}, + { "rb-bench-producer", ARG_RB_BENCH_PRODUCER, NULL, 0, "Benchmark producer"}, {}, }; @@ -72,6 +80,12 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) argp_usage(state); } break; + case ARG_RB_OVERWRITE: + args.overwrite = true; + break; + case ARG_RB_BENCH_PRODUCER: + args.bench_producer = true; + break; default: return ARGP_ERR_UNKNOWN; } @@ -95,8 +109,33 @@ static inline void bufs_trigger_batch(void) static void bufs_validate(void) { - if (env.consumer_cnt != 1) { - fprintf(stderr, "rb-libbpf benchmark needs one consumer!\n"); + if (args.bench_producer && strcmp(env.bench_name, "rb-libbpf")) { + fprintf(stderr, "--rb-bench-producer only works with rb-libbpf!\n"); + exit(1); + } + + if (args.overwrite && !args.bench_producer) { + fprintf(stderr, "overwrite mode only works with --rb-bench-producer for now!\n"); + exit(1); + } + + if (args.bench_producer && env.consumer_cnt != 0) { + fprintf(stderr, "no consumer is needed for --rb-bench-producer!\n"); + exit(1); + } + + if (args.bench_producer && args.back2back) { + fprintf(stderr, "back-to-back mode makes no sense for --rb-bench-producer!\n"); + exit(1); + } + + if (args.bench_producer && args.sampled) { + fprintf(stderr, "sampling mode makes no sense for --rb-bench-producer!\n"); + exit(1); + } + + if (!args.bench_producer && env.consumer_cnt != 1) { + fprintf(stderr, "benchmarks without --rb-bench-producer require exactly one consumer!\n"); exit(1); } @@ -128,12 +167,17 @@ static void ringbuf_libbpf_measure(struct bench_res *res) { struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx; - res->hits = atomic_swap(&buf_hits.value, 0); + if (args.bench_producer) + res->hits = atomic_swap(&ctx->skel->bss->hits, 0); + else + res->hits = atomic_swap(&buf_hits.value, 0); res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); } static struct ringbuf_bench *ringbuf_setup_skeleton(void) { + __u32 flags; + struct bpf_map *ringbuf; struct ringbuf_bench *skel; setup_libbpf(); @@ -146,12 +190,19 @@ static struct ringbuf_bench *ringbuf_setup_skeleton(void) skel->rodata->batch_cnt = args.batch_cnt; skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0; + skel->rodata->bench_producer = args.bench_producer; if (args.sampled) /* record data + header take 16 bytes */ skel->rodata->wakeup_data_size = args.sample_rate * 16; - bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz); + ringbuf = skel->maps.ringbuf; + if (args.overwrite) { + flags = bpf_map__map_flags(ringbuf) | BPF_F_RB_OVERWRITE; + bpf_map__set_map_flags(ringbuf, flags); + } + + bpf_map__set_max_entries(ringbuf, args.ringbuf_sz); if (ringbuf_bench__load(skel)) { fprintf(stderr, "failed to load skeleton\n"); @@ -171,10 +222,12 @@ static void ringbuf_libbpf_setup(void) { struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx; struct bpf_link *link; + int map_fd; ctx->skel = ringbuf_setup_skeleton(); - ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf), - buf_process_sample, NULL, NULL); + + map_fd = bpf_map__fd(ctx->skel->maps.ringbuf); + ctx->ringbuf = ring_buffer__new(map_fd, buf_process_sample, NULL, NULL); if (!ctx->ringbuf) { fprintf(stderr, "failed to create ringbuf\n"); exit(1); diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 1e2aff007c2a..34018fc3927f 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -180,10 +180,10 @@ static void trigger_kernel_count_setup(void) { setup_ctx(); bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); - bpf_program__set_autoload(ctx.skel->progs.trigger_count, true); + bpf_program__set_autoload(ctx.skel->progs.trigger_kernel_count, true); load_ctx(); /* override driver program */ - ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count); + ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_kernel_count); } static void trigger_kprobe_setup(void) diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh index 91e3567962ff..83e05e837871 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh @@ -49,3 +49,7 @@ for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)" done +header "Ringbuf, multi-producer contention in overwrite mode, no consumer" +for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do + summarize "rb-prod nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 --rb-overwrite --rb-bench-producer rb-libbpf)" +done diff --git a/tools/testing/selftests/bpf/bpf_arena_list.h b/tools/testing/selftests/bpf/bpf_arena_list.h index 85dbc3ea4da5..e16fa7d95fcf 100644 --- a/tools/testing/selftests/bpf/bpf_arena_list.h +++ b/tools/testing/selftests/bpf/bpf_arena_list.h @@ -64,14 +64,12 @@ static inline void list_add_head(arena_list_node_t *n, arena_list_head_t *h) static inline void __list_del(arena_list_node_t *n) { - arena_list_node_t *next = n->next, *tmp; + arena_list_node_t *next = n->next; arena_list_node_t * __arena *pprev = n->pprev; cast_user(next); cast_kern(pprev); - tmp = *pprev; - cast_kern(tmp); - WRITE_ONCE(tmp, next); + WRITE_ONCE(*pprev, next); if (next) { cast_user(pprev); cast_kern(next); diff --git a/tools/testing/selftests/bpf/bpf_arena_strsearch.h b/tools/testing/selftests/bpf/bpf_arena_strsearch.h new file mode 100644 index 000000000000..c1b6eaa905bb --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_arena_strsearch.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#pragma once +#include "bpf_arena_common.h" + +__noinline int bpf_arena_strlen(const char __arena *s __arg_arena) +{ + const char __arena *sc; + + for (sc = s; *sc != '\0'; ++sc) + cond_break; + return sc - s; +} + +/** + * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0) + * @pat: Shell-style pattern to match, e.g. "*.[ch]". + * @str: String to match. The pattern must match the entire string. + * + * Perform shell-style glob matching, returning true (1) if the match + * succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0). + * + * Pattern metacharacters are ?, *, [ and \. + * (And, inside character classes, !, - and ].) + * + * This is small and simple implementation intended for device blacklists + * where a string is matched against a number of patterns. Thus, it + * does not preprocess the patterns. It is non-recursive, and run-time + * is at most quadratic: strlen(@str)*strlen(@pat). + * + * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa"); + * it takes 6 passes over the pattern before matching the string. + * + * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT + * treat / or leading . specially; it isn't actually used for pathnames. + * + * Note that according to glob(7) (and unlike bash), character classes + * are complemented by a leading !; this does not support the regex-style + * [^a-z] syntax. + * + * An opening bracket without a matching close is matched literally. + */ +__noinline bool glob_match(char const __arena *pat __arg_arena, char const __arena *str __arg_arena) +{ + /* + * Backtrack to previous * on mismatch and retry starting one + * character later in the string. Because * matches all characters + * (no exception for /), it can be easily proved that there's + * never a need to backtrack multiple levels. + */ + char const __arena *back_pat = NULL, *back_str; + + /* + * Loop over each token (character or class) in pat, matching + * it against the remaining unmatched tail of str. Return false + * on mismatch, or true after matching the trailing nul bytes. + */ + for (;;) { + unsigned char c = *str++; + unsigned char d = *pat++; + + switch (d) { + case '?': /* Wildcard: anything but nul */ + if (c == '\0') + return false; + break; + case '*': /* Any-length wildcard */ + if (*pat == '\0') /* Optimize trailing * case */ + return true; + back_pat = pat; + back_str = --str; /* Allow zero-length match */ + break; + case '[': { /* Character class */ + bool match = false, inverted = (*pat == '!'); + char const __arena *class = pat + inverted; + unsigned char a = *class++; + + /* + * Iterate over each span in the character class. + * A span is either a single character a, or a + * range a-b. The first span may begin with ']'. + */ + do { + unsigned char b = a; + + if (a == '\0') /* Malformed */ + goto literal; + + if (class[0] == '-' && class[1] != ']') { + b = class[1]; + + if (b == '\0') + goto literal; + + class += 2; + /* Any special action if a > b? */ + } + match |= (a <= c && c <= b); + cond_break; + } while ((a = *class++) != ']'); + + if (match == inverted) + goto backtrack; + pat = class; + } + break; + case '\\': + d = *pat++; + __attribute__((__fallthrough__)); + default: /* Literal character */ +literal: + if (c == d) { + if (d == '\0') + return true; + break; + } +backtrack: + if (c == '\0' || !back_pat) + return false; /* No point continuing */ + /* Try again from last *, one character later in str. */ + pat = back_pat; + str = ++back_str; + break; + } + cond_break; + } + return false; +} diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 794d44d19c88..e0189254bb6e 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -28,8 +28,8 @@ extern int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, __u64 flags, * Either a direct pointer to the dynptr data or a pointer to the user-provided * buffer if unable to obtain a direct pointer */ -extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset, - void *buffer, __u32 buffer__szk) __ksym __weak; +extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u64 offset, + void *buffer, __u64 buffer__szk) __ksym __weak; /* Description * Obtain a read-write pointer to the dynptr's data @@ -37,13 +37,13 @@ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset, * Either a direct pointer to the dynptr data or a pointer to the user-provided * buffer if unable to obtain a direct pointer */ -extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset, - void *buffer, __u32 buffer__szk) __ksym __weak; +extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u64 offset, void *buffer, + __u64 buffer__szk) __ksym __weak; -extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym __weak; +extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u64 start, __u64 end) __ksym __weak; extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym __weak; extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym __weak; -extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak; +extern __u64 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak; extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym __weak; /* Description diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index f2a2fd236ca8..558839e3c185 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -126,3 +126,8 @@ CONFIG_XDP_SOCKETS=y CONFIG_XFRM_INTERFACE=y CONFIG_TCP_CONG_DCTCP=y CONFIG_TCP_CONG_BBR=y +CONFIG_INFINIBAND=y +CONFIG_SMC=y +CONFIG_SMC_HS_CTRL_BPF=y +CONFIG_DIBS=y +CONFIG_DIBS_LO=y
\ No newline at end of file diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index cdf7b6641444..0a6a5561bed3 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -97,7 +97,7 @@ int settimeo(int fd, int timeout_ms) int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen, const struct network_helper_opts *opts) { - int fd; + int on = 1, fd; if (!opts) opts = &default_opts; @@ -111,6 +111,12 @@ int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t a if (settimeo(fd, opts->timeout_ms)) goto error_close; + if (type == SOCK_STREAM && + setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on))) { + log_err("Failed to enable SO_REUSEADDR"); + goto error_close; + } + if (opts->post_socket_cb && opts->post_socket_cb(fd, opts->cb_opts)) { log_err("Failed to call post_socket_cb"); @@ -766,6 +772,50 @@ int send_recv_data(int lfd, int fd, uint32_t total_bytes) return err; } +int tc_prog_attach(const char *dev, int ingress_fd, int egress_fd) +{ + int ifindex, ret; + + if (!ASSERT_TRUE(ingress_fd >= 0 || egress_fd >= 0, + "at least one program fd is valid")) + return -1; + + ifindex = if_nametoindex(dev); + if (!ASSERT_NEQ(ifindex, 0, "get ifindex")) + return -1; + + DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex, + .attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS); + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, + .priority = 1, .prog_fd = ingress_fd); + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1, + .priority = 1, .prog_fd = egress_fd); + + ret = bpf_tc_hook_create(&hook); + if (!ASSERT_OK(ret, "create tc hook")) + return ret; + + if (ingress_fd >= 0) { + hook.attach_point = BPF_TC_INGRESS; + ret = bpf_tc_attach(&hook, &opts1); + if (!ASSERT_OK(ret, "bpf_tc_attach")) { + bpf_tc_hook_destroy(&hook); + return ret; + } + } + + if (egress_fd >= 0) { + hook.attach_point = BPF_TC_EGRESS; + ret = bpf_tc_attach(&hook, &opts2); + if (!ASSERT_OK(ret, "bpf_tc_attach")) { + bpf_tc_hook_destroy(&hook); + return ret; + } + } + + return 0; +} + #ifdef TRAFFIC_MONITOR struct tmonitor_ctx { pcap_t *pcap; diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index ef208eefd571..79a010c88e11 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -255,6 +255,22 @@ struct tmonitor_ctx; typedef int (*tm_print_fn_t)(const char *format, va_list args); +/** + * tc_prog_attach - attach BPF program(s) to an interface + * + * Takes file descriptors pointing to at least one, at most two BPF + * programs, and attach those programs to an interface ingress, egress or + * both. + * + * @dev: string containing the interface name + * @ingress_fd: file descriptor of the program to attach to interface ingress + * @egress_fd: file descriptor of the program to attach to interface egress + * + * Returns 0 on success, -1 if no valid file descriptor has been found, if + * the interface name is invalid or if an error ocurred during attach. + */ +int tc_prog_attach(const char *dev, int ingress_fd, int egress_fd); + #ifdef TRAFFIC_MONITOR struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name, const char *subtest_name); diff --git a/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c b/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c new file mode 100644 index 000000000000..f81a0c066505 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include "arena_strsearch.skel.h" + +static void test_arena_str(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct arena_strsearch *skel; + int ret; + + skel = arena_strsearch__open_and_load(); + if (!ASSERT_OK_PTR(skel, "arena_strsearch__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_strsearch), &opts); + ASSERT_OK(ret, "ret_add"); + ASSERT_OK(opts.retval, "retval"); + if (skel->bss->skip) { + printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__); + test__skip(); + } + arena_strsearch__destroy(skel); +} + +void test_arena_strsearch(void) +{ + if (test__start_subtest("arena_strsearch")) + test_arena_str(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c new file mode 100644 index 000000000000..d138cc7b1bda --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> + +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/in6.h> +#include <linux/udp.h> +#include <linux/tcp.h> + +#include <sys/syscall.h> +#include <bpf/bpf.h> + +#include "bpf_gotox.skel.h" + +static void __test_run(struct bpf_program *prog, void *ctx_in, size_t ctx_size_in) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts, + .ctx_in = ctx_in, + .ctx_size_in = ctx_size_in, + ); + int err, prog_fd; + + prog_fd = bpf_program__fd(prog); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run_opts err"); +} + +static void __subtest(struct bpf_gotox *skel, void (*check)(struct bpf_gotox *)) +{ + if (skel->data->skip) + test__skip(); + else + check(skel); +} + +static void check_simple(struct bpf_gotox *skel, + struct bpf_program *prog, + __u64 ctx_in, + __u64 expected) +{ + skel->bss->ret_user = 0; + + __test_run(prog, &ctx_in, sizeof(ctx_in)); + + if (!ASSERT_EQ(skel->bss->ret_user, expected, "skel->bss->ret_user")) + return; +} + +static void check_simple_fentry(struct bpf_gotox *skel, + struct bpf_program *prog, + __u64 ctx_in, + __u64 expected) +{ + skel->bss->in_user = ctx_in; + skel->bss->ret_user = 0; + + /* trigger */ + usleep(1); + + if (!ASSERT_EQ(skel->bss->ret_user, expected, "skel->bss->ret_user")) + return; +} + +/* validate that for two loads of the same jump table libbpf generates only one map */ +static void check_one_map_two_jumps(struct bpf_gotox *skel) +{ + struct bpf_prog_info prog_info; + struct bpf_map_info map_info; + __u32 len; + __u32 map_ids[16]; + int prog_fd, map_fd; + int ret; + int i; + bool seen = false; + + memset(&prog_info, 0, sizeof(prog_info)); + prog_info.map_ids = (long)map_ids; + prog_info.nr_map_ids = ARRAY_SIZE(map_ids); + prog_fd = bpf_program__fd(skel->progs.one_map_two_jumps); + if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd(one_map_two_jumps)")) + return; + + len = sizeof(prog_info); + ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &len); + if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd(prog_fd)")) + return; + + for (i = 0; i < prog_info.nr_map_ids; i++) { + map_fd = bpf_map_get_fd_by_id(map_ids[i]); + if (!ASSERT_GE(map_fd, 0, "bpf_map_get_fd_by_id")) + return; + + len = sizeof(map_info); + memset(&map_info, 0, len); + ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &len); + if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd(map_fd)")) { + close(map_fd); + return; + } + + if (map_info.type == BPF_MAP_TYPE_INSN_ARRAY) { + if (!ASSERT_EQ(seen, false, "more than one INSN_ARRAY map")) { + close(map_fd); + return; + } + seen = true; + } + close(map_fd); + } + + ASSERT_EQ(seen, true, "no INSN_ARRAY map"); +} + +static void check_one_switch(struct bpf_gotox *skel) +{ + __u64 in[] = {0, 1, 2, 3, 4, 5, 77}; + __u64 out[] = {2, 3, 4, 5, 7, 19, 19}; + int i; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.one_switch, in[i], out[i]); +} + +static void check_one_switch_non_zero_sec_off(struct bpf_gotox *skel) +{ + __u64 in[] = {0, 1, 2, 3, 4, 5, 77}; + __u64 out[] = {2, 3, 4, 5, 7, 19, 19}; + int i; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.one_switch_non_zero_sec_off, in[i], out[i]); +} + +static void check_two_switches(struct bpf_gotox *skel) +{ + __u64 in[] = {0, 1, 2, 3, 4, 5, 77}; + __u64 out[] = {103, 104, 107, 205, 115, 1019, 1019}; + int i; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.two_switches, in[i], out[i]); +} + +static void check_big_jump_table(struct bpf_gotox *skel) +{ + __u64 in[] = {0, 11, 27, 31, 22, 45, 99}; + __u64 out[] = {2, 3, 4, 5, 19, 19, 19}; + int i; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.big_jump_table, in[i], out[i]); +} + +static void check_one_jump_two_maps(struct bpf_gotox *skel) +{ + __u64 in[] = {0, 1, 2, 3, 4, 5, 77}; + __u64 out[] = {12, 15, 7 , 15, 12, 15, 15}; + int i; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.one_jump_two_maps, in[i], out[i]); +} + +static void check_static_global(struct bpf_gotox *skel) +{ + __u64 in[] = {0, 1, 2, 3, 4, 5, 77}; + __u64 out[] = {2, 3, 4, 5, 7, 19, 19}; + int i; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.use_static_global1, in[i], out[i]); + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.use_static_global2, in[i], out[i]); +} + +static void check_nonstatic_global(struct bpf_gotox *skel) +{ + __u64 in[] = {0, 1, 2, 3, 4, 5, 77}; + __u64 out[] = {2, 3, 4, 5, 7, 19, 19}; + int i; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.use_nonstatic_global1, in[i], out[i]); + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple(skel, skel->progs.use_nonstatic_global2, in[i], out[i]); +} + +static void check_other_sec(struct bpf_gotox *skel) +{ + struct bpf_link *link; + __u64 in[] = {0, 1, 2, 3, 4, 5, 77}; + __u64 out[] = {2, 3, 4, 5, 7, 19, 19}; + int i; + + link = bpf_program__attach(skel->progs.simple_test_other_sec); + if (!ASSERT_OK_PTR(link, "link")) + return; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple_fentry(skel, skel->progs.simple_test_other_sec, in[i], out[i]); + + bpf_link__destroy(link); +} + +static void check_static_global_other_sec(struct bpf_gotox *skel) +{ + struct bpf_link *link; + __u64 in[] = {0, 1, 2, 3, 4, 5, 77}; + __u64 out[] = {2, 3, 4, 5, 7, 19, 19}; + int i; + + link = bpf_program__attach(skel->progs.use_static_global_other_sec); + if (!ASSERT_OK_PTR(link, "link")) + return; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple_fentry(skel, skel->progs.use_static_global_other_sec, in[i], out[i]); + + bpf_link__destroy(link); +} + +static void check_nonstatic_global_other_sec(struct bpf_gotox *skel) +{ + struct bpf_link *link; + __u64 in[] = {0, 1, 2, 3, 4, 5, 77}; + __u64 out[] = {2, 3, 4, 5, 7, 19, 19}; + int i; + + link = bpf_program__attach(skel->progs.use_nonstatic_global_other_sec); + if (!ASSERT_OK_PTR(link, "link")) + return; + + for (i = 0; i < ARRAY_SIZE(in); i++) + check_simple_fentry(skel, skel->progs.use_nonstatic_global_other_sec, in[i], out[i]); + + bpf_link__destroy(link); +} + +void test_bpf_gotox(void) +{ + struct bpf_gotox *skel; + int ret; + + skel = bpf_gotox__open(); + if (!ASSERT_NEQ(skel, NULL, "bpf_gotox__open")) + return; + + ret = bpf_gotox__load(skel); + if (!ASSERT_OK(ret, "bpf_gotox__load")) + return; + + skel->bss->pid = getpid(); + + if (test__start_subtest("one-switch")) + __subtest(skel, check_one_switch); + + if (test__start_subtest("one-switch-non-zero-sec-offset")) + __subtest(skel, check_one_switch_non_zero_sec_off); + + if (test__start_subtest("two-switches")) + __subtest(skel, check_two_switches); + + if (test__start_subtest("big-jump-table")) + __subtest(skel, check_big_jump_table); + + if (test__start_subtest("static-global")) + __subtest(skel, check_static_global); + + if (test__start_subtest("nonstatic-global")) + __subtest(skel, check_nonstatic_global); + + if (test__start_subtest("other-sec")) + __subtest(skel, check_other_sec); + + if (test__start_subtest("static-global-other-sec")) + __subtest(skel, check_static_global_other_sec); + + if (test__start_subtest("nonstatic-global-other-sec")) + __subtest(skel, check_nonstatic_global_other_sec); + + if (test__start_subtest("one-jump-two-maps")) + __subtest(skel, check_one_jump_two_maps); + + if (test__start_subtest("one-map-two-jumps")) + __subtest(skel, check_one_map_two_jumps); + + bpf_gotox__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c b/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c new file mode 100644 index 000000000000..269870bec941 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c @@ -0,0 +1,504 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <bpf/bpf.h> +#include <test_progs.h> + +#ifdef __x86_64__ +static int map_create(__u32 map_type, __u32 max_entries) +{ + const char *map_name = "insn_array"; + __u32 key_size = 4; + __u32 value_size = sizeof(struct bpf_insn_array_value); + + return bpf_map_create(map_type, map_name, key_size, value_size, max_entries, NULL); +} + +static int prog_load(struct bpf_insn *insns, __u32 insn_cnt, int *fd_array, __u32 fd_array_cnt) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts); + + opts.fd_array = fd_array; + opts.fd_array_cnt = fd_array_cnt; + + return bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, &opts); +} + +static void __check_success(struct bpf_insn *insns, __u32 insn_cnt, __u32 *map_in, __u32 *map_out) +{ + struct bpf_insn_array_value val = {}; + int prog_fd = -1, map_fd, i; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, insn_cnt); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + for (i = 0; i < insn_cnt; i++) { + val.orig_off = map_in[i]; + if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem")) + goto cleanup; + } + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + prog_fd = prog_load(insns, insn_cnt, &map_fd, 1); + if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)")) + goto cleanup; + + for (i = 0; i < insn_cnt; i++) { + char buf[64]; + + if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem")) + goto cleanup; + + snprintf(buf, sizeof(buf), "val.xlated_off should be equal map_out[%d]", i); + ASSERT_EQ(val.xlated_off, map_out[i], buf); + } + +cleanup: + close(prog_fd); + close(map_fd); +} + +/* + * Load a program, which will not be anyhow mangled by the verifier. Add an + * insn_array map pointing to every instruction. Check that it hasn't changed + * after the program load. + */ +static void check_one_to_one_mapping(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 4), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + __u32 map_in[] = {0, 1, 2, 3, 4, 5}; + __u32 map_out[] = {0, 1, 2, 3, 4, 5}; + + __check_success(insns, ARRAY_SIZE(insns), map_in, map_out); +} + +/* + * Load a program with two patches (get jiffies, for simplicity). Add an + * insn_array map pointing to every instruction. Check how it was changed + * after the program load. + */ +static void check_simple(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + __u32 map_in[] = {0, 1, 2, 3, 4, 5}; + __u32 map_out[] = {0, 1, 4, 5, 8, 9}; + + __check_success(insns, ARRAY_SIZE(insns), map_in, map_out); +} + +/* + * Verifier can delete code in two cases: nops & dead code. From insn + * array's point of view, the two cases are the same, so test using + * the simplest method: by loading some nops + */ +static void check_deletions(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + __u32 map_in[] = {0, 1, 2, 3, 4, 5}; + __u32 map_out[] = {0, -1, 1, -1, 2, 3}; + + __check_success(insns, ARRAY_SIZE(insns), map_in, map_out); +} + +/* + * Same test as check_deletions, but also add code which adds instructions + */ +static void check_deletions_with_functions(void) +{ + struct bpf_insn insns[] = { + BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */ + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }; + __u32 map_in[] = { 0, 1, 2, 3, 4, 5, /* func */ 6, 7, 8, 9, 10}; + __u32 map_out[] = {-1, 0, -1, 3, 4, 5, /* func */ -1, 6, -1, 9, 10}; + + __check_success(insns, ARRAY_SIZE(insns), map_in, map_out); +} + +/* + * Try to load a program with a map which points to outside of the program + */ +static void check_out_of_bounds_index(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 4), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd, map_fd; + struct bpf_insn_array_value val = {}; + int key; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + key = 0; + val.orig_off = ARRAY_SIZE(insns); /* too big */ + if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &key, &val, 0), 0, "bpf_map_update_elem")) + goto cleanup; + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) { + close(prog_fd); + goto cleanup; + } + +cleanup: + close(map_fd); +} + +/* + * Try to load a program with a map which points to the middle of 16-bit insn + */ +static void check_mid_insn_index(void) +{ + struct bpf_insn insns[] = { + BPF_LD_IMM64(BPF_REG_0, 0), /* 2 x 8 */ + BPF_EXIT_INSN(), + }; + int prog_fd, map_fd; + struct bpf_insn_array_value val = {}; + int key; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + key = 0; + val.orig_off = 1; /* middle of 16-byte instruction */ + if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &key, &val, 0), 0, "bpf_map_update_elem")) + goto cleanup; + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) { + close(prog_fd); + goto cleanup; + } + +cleanup: + close(map_fd); +} + +static void check_incorrect_index(void) +{ + check_out_of_bounds_index(); + check_mid_insn_index(); +} + +static int set_bpf_jit_harden(char *level) +{ + char old_level; + int err = -1; + int fd = -1; + + fd = open("/proc/sys/net/core/bpf_jit_harden", O_RDWR | O_NONBLOCK); + if (fd < 0) { + ASSERT_FAIL("open .../bpf_jit_harden returned %d (errno=%d)", fd, errno); + return -1; + } + + err = read(fd, &old_level, 1); + if (err != 1) { + ASSERT_FAIL("read from .../bpf_jit_harden returned %d (errno=%d)", err, errno); + err = -1; + goto end; + } + + lseek(fd, 0, SEEK_SET); + + err = write(fd, level, 1); + if (err != 1) { + ASSERT_FAIL("write to .../bpf_jit_harden returned %d (errno=%d)", err, errno); + err = -1; + goto end; + } + + err = 0; + *level = old_level; +end: + if (fd >= 0) + close(fd); + return err; +} + +static void check_blindness(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 4), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }; + int prog_fd = -1, map_fd; + struct bpf_insn_array_value val = {}; + char bpf_jit_harden = '@'; /* non-exizsting value */ + int i; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns)); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + for (i = 0; i < ARRAY_SIZE(insns); i++) { + val.orig_off = i; + if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem")) + goto cleanup; + } + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + bpf_jit_harden = '2'; + if (set_bpf_jit_harden(&bpf_jit_harden)) { + bpf_jit_harden = '@'; /* open, read or write failed => no write was done */ + goto cleanup; + } + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(insns); i++) { + char fmt[32]; + + if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem")) + goto cleanup; + + snprintf(fmt, sizeof(fmt), "val should be equal 3*%d", i); + ASSERT_EQ(val.xlated_off, i * 3, fmt); + } + +cleanup: + /* restore the old one */ + if (bpf_jit_harden != '@') + set_bpf_jit_harden(&bpf_jit_harden); + + close(prog_fd); + close(map_fd); +} + +/* Once map was initialized, it should be frozen */ +static void check_load_unfrozen_map(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd = -1, map_fd; + struct bpf_insn_array_value val = {}; + int i; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns)); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + for (i = 0; i < ARRAY_SIZE(insns); i++) { + val.orig_off = i; + if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem")) + goto cleanup; + } + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) + goto cleanup; + + /* correctness: now freeze the map, the program should load fine */ + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(insns); i++) { + if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem")) + goto cleanup; + + ASSERT_EQ(val.xlated_off, i, "val should be equal i"); + } + +cleanup: + close(prog_fd); + close(map_fd); +} + +/* Map can be used only by one BPF program */ +static void check_no_map_reuse(void) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd = -1, map_fd, extra_fd = -1; + struct bpf_insn_array_value val = {}; + int i; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns)); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + for (i = 0; i < ARRAY_SIZE(insns); i++) { + val.orig_off = i; + if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem")) + goto cleanup; + } + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(insns); i++) { + if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem")) + goto cleanup; + + ASSERT_EQ(val.xlated_off, i, "val should be equal i"); + } + + extra_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1); + if (!ASSERT_EQ(extra_fd, -EBUSY, "program should have been rejected (extra_fd != -EBUSY)")) + goto cleanup; + + /* correctness: check that prog is still loadable without fd_array */ + extra_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0); + if (!ASSERT_GE(extra_fd, 0, "bpf(BPF_PROG_LOAD): expected no error")) + goto cleanup; + +cleanup: + close(extra_fd); + close(prog_fd); + close(map_fd); +} + +static void check_bpf_no_lookup(void) +{ + struct bpf_insn insns[] = { + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }; + int prog_fd = -1, map_fd; + + map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1); + if (!ASSERT_GE(map_fd, 0, "map_create")) + return; + + insns[0].imm = map_fd; + + if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze")) + goto cleanup; + + prog_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0); + if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) + goto cleanup; + + /* correctness: check that prog is still loadable with normal map */ + close(map_fd); + map_fd = map_create(BPF_MAP_TYPE_ARRAY, 1); + insns[0].imm = map_fd; + prog_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0); + if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)")) + goto cleanup; + +cleanup: + close(prog_fd); + close(map_fd); +} + +static void check_bpf_side(void) +{ + check_bpf_no_lookup(); +} + +static void __test_bpf_insn_array(void) +{ + /* Test if offsets are adjusted properly */ + + if (test__start_subtest("one2one")) + check_one_to_one_mapping(); + + if (test__start_subtest("simple")) + check_simple(); + + if (test__start_subtest("deletions")) + check_deletions(); + + if (test__start_subtest("deletions-with-functions")) + check_deletions_with_functions(); + + if (test__start_subtest("blindness")) + check_blindness(); + + /* Check all kinds of operations and related restrictions */ + + if (test__start_subtest("incorrect-index")) + check_incorrect_index(); + + if (test__start_subtest("load-unfrozen-map")) + check_load_unfrozen_map(); + + if (test__start_subtest("no-map-reuse")) + check_no_map_reuse(); + + if (test__start_subtest("bpf-side-ops")) + check_bpf_side(); +} +#else +static void __test_bpf_insn_array(void) +{ + test__skip(); +} +#endif + +void test_bpf_insn_array(void) +{ + __test_bpf_insn_array(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 8a9ba4292109..054ecb6b1e9f 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -7496,6 +7496,71 @@ static struct btf_dedup_test dedup_tests[] = { }, }, { + .descr = "dedup: recursive typedef", + /* + * This test simulates a recursive typedef, which in GO is defined as such: + * + * type Foo func() Foo + * + * In BTF terms, this is represented as a TYPEDEF referencing + * a FUNC_PROTO that returns the same TYPEDEF. + */ + .input = { + .raw_types = { + /* + * [1] typedef Foo -> func() Foo + * [2] func_proto() -> Foo + * [3] typedef Foo -> func() Foo + * [4] func_proto() -> Foo + */ + BTF_TYPEDEF_ENC(NAME_NTH(1), 2), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 0), /* [2] */ + BTF_TYPEDEF_ENC(NAME_NTH(1), 4), /* [3] */ + BTF_FUNC_PROTO_ENC(3, 0), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0Foo"), + }, + .expect = { + .raw_types = { + BTF_TYPEDEF_ENC(NAME_NTH(1), 2), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 0), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0Foo"), + }, +}, +{ + .descr = "dedup: typedef", + /* + * // CU 1: + * typedef int foo; + * + * // CU 2: + * typedef int foo; + */ + .input = { + .raw_types = { + /* CU 1 */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */ + /* CU 2 */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [3] */ + BTF_TYPEDEF_ENC(NAME_NTH(1), 3), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0foo"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0foo"), + }, +}, +{ .descr = "dedup: typedef tags", .input = { .raw_types = { diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c index 3696fb9a05ed..2d47cad50a51 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_split.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c @@ -12,11 +12,45 @@ static void btf_dump_printf(void *ctx, const char *fmt, va_list args) vfprintf(ctx, fmt, args); } +/* Write raw BTF to file, return number of bytes written or negative errno */ +static ssize_t btf_raw_write(struct btf *btf, char *file) +{ + ssize_t written = 0; + const void *data; + __u32 size = 0; + int fd, ret; + + fd = mkstemp(file); + if (!ASSERT_GE(fd, 0, "create_file")) + return -errno; + + data = btf__raw_data(btf, &size); + if (!ASSERT_OK_PTR(data, "btf__raw_data")) { + close(fd); + return -EINVAL; + } + while (written < size) { + ret = write(fd, data + written, size - written); + if (!ASSERT_GE(ret, 0, "write succeeded")) { + close(fd); + return -errno; + } + written += ret; + } + close(fd); + return written; +} + static void __test_btf_split(bool multi) { + char multisplit_btf_file[] = "/tmp/test_btf_multisplit.XXXXXX"; + char split_btf_file[] = "/tmp/test_btf_split.XXXXXX"; + char base_btf_file[] = "/tmp/test_btf_base.XXXXXX"; + ssize_t multisplit_btf_sz = 0, split_btf_sz = 0, base_btf_sz = 0; struct btf_dump *d = NULL; - const struct btf_type *t; - struct btf *btf1, *btf2, *btf3 = NULL; + const struct btf_type *t, *ot; + struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL; + struct btf *btf4 = NULL, *btf5 = NULL, *btf6 = NULL; int str_off, i, err; btf1 = btf__new_empty(); @@ -123,6 +157,45 @@ static void __test_btf_split(bool multi) " int uf2;\n" "};\n\n", "c_dump"); + /* write base, split BTFs to files and ensure parsing succeeds */ + base_btf_sz = btf_raw_write(btf1, base_btf_file); + if (base_btf_sz < 0) + goto cleanup; + split_btf_sz = btf_raw_write(btf2, split_btf_file); + if (split_btf_sz < 0) + goto cleanup; + btf4 = btf__parse(base_btf_file, NULL); + if (!ASSERT_OK_PTR(btf4, "parse_base")) + goto cleanup; + btf5 = btf__parse_split(split_btf_file, btf4); + if (!ASSERT_OK_PTR(btf5, "parse_split")) + goto cleanup; + if (multi) { + multisplit_btf_sz = btf_raw_write(btf3, multisplit_btf_file); + if (multisplit_btf_sz < 0) + goto cleanup; + btf6 = btf__parse_split(multisplit_btf_file, btf5); + if (!ASSERT_OK_PTR(btf6, "parse_multisplit")) + goto cleanup; + } else { + btf6 = btf5; + } + + if (!ASSERT_EQ(btf__type_cnt(btf3), btf__type_cnt(btf6), "cmp_type_cnt")) + goto cleanup; + + /* compare parsed to original BTF */ + for (i = 1; i < btf__type_cnt(btf6); i++) { + t = btf__type_by_id(btf6, i); + if (!ASSERT_OK_PTR(t, "type_in_parsed_btf")) + goto cleanup; + ot = btf__type_by_id(btf3, i); + if (!ASSERT_OK_PTR(ot, "type_in_orig_btf")) + goto cleanup; + if (!ASSERT_EQ(memcmp(t, ot, sizeof(*ot)), 0, "cmp_parsed_orig_btf")) + goto cleanup; + } + cleanup: if (dump_buf_file) fclose(dump_buf_file); @@ -132,6 +205,16 @@ cleanup: btf__free(btf2); if (btf2 != btf3) btf__free(btf3); + btf__free(btf4); + btf__free(btf5); + if (btf5 != btf6) + btf__free(btf6); + if (base_btf_sz > 0) + unlink(base_btf_file); + if (split_btf_sz > 0) + unlink(split_btf_file); + if (multisplit_btf_sz > 0) + unlink(multisplit_btf_file); } void test_btf_split(void) diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c index 2a9a30650350..65b4512967e7 100644 --- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c +++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c @@ -153,6 +153,26 @@ static void test_check_mtu_run_tc(struct test_check_mtu *skel, ASSERT_EQ(mtu_result, mtu_expect, "MTU-compare-user"); } +static void test_chk_segs_flag(struct test_check_mtu *skel, __u32 mtu) +{ + int err, prog_fd = bpf_program__fd(skel->progs.tc_chk_segs_flag); + struct __sk_buff skb = { + .gso_size = 10, + }; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .ctx_in = &skb, + .ctx_size_in = sizeof(skb), + ); + + /* Lower the mtu to test the BPF_MTU_CHK_SEGS */ + SYS_NOFAIL("ip link set dev lo mtu 10"); + err = bpf_prog_test_run_opts(prog_fd, &topts); + SYS_NOFAIL("ip link set dev lo mtu %u", mtu); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, BPF_OK, "retval"); +} static void test_check_mtu_tc(__u32 mtu, __u32 ifindex) { @@ -177,11 +197,12 @@ static void test_check_mtu_tc(__u32 mtu, __u32 ifindex) test_check_mtu_run_tc(skel, skel->progs.tc_minus_delta, mtu); test_check_mtu_run_tc(skel, skel->progs.tc_input_len, mtu); test_check_mtu_run_tc(skel, skel->progs.tc_input_len_exceed, mtu); + test_chk_segs_flag(skel, mtu); cleanup: test_check_mtu__destroy(skel); } -void serial_test_check_mtu(void) +void test_ns_check_mtu(void) { int mtu_lo; diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c index 34b59f6baca1..7488a7606e6a 100644 --- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c @@ -22,79 +22,37 @@ static int duration = 0; -struct addr_port { - in_port_t port; - union { - struct in_addr in_addr; - struct in6_addr in6_addr; - }; -}; - -struct tuple { - int family; - struct addr_port src; - struct addr_port dst; -}; - -static bool fill_addr_port(const struct sockaddr *sa, struct addr_port *ap) -{ - const struct sockaddr_in6 *in6; - const struct sockaddr_in *in; - - switch (sa->sa_family) { - case AF_INET: - in = (const struct sockaddr_in *)sa; - ap->in_addr = in->sin_addr; - ap->port = in->sin_port; - return true; - - case AF_INET6: - in6 = (const struct sockaddr_in6 *)sa; - ap->in6_addr = in6->sin6_addr; - ap->port = in6->sin6_port; - return true; - - default: - return false; - } -} -static bool set_up_conn(const struct sockaddr *addr, socklen_t len, int type, - int *server, int *conn, struct tuple *tuple) +static bool set_up_conn(const struct sockaddr_storage *addr, socklen_t len, int type, + int *server, int *conn, + struct sockaddr_storage *src, + struct sockaddr_storage *dst) { struct sockaddr_storage ss; socklen_t slen = sizeof(ss); - struct sockaddr *sa = (struct sockaddr *)&ss; - *server = start_server_addr(type, (struct sockaddr_storage *)addr, len, NULL); + *server = start_server_addr(type, addr, len, NULL); if (*server < 0) return false; - if (CHECK_FAIL(getsockname(*server, sa, &slen))) + if (CHECK_FAIL(getsockname(*server, (struct sockaddr *)&ss, &slen))) goto close_server; - *conn = connect_to_addr(type, (struct sockaddr_storage *)sa, slen, NULL); + *conn = connect_to_addr(type, &ss, slen, NULL); if (*conn < 0) goto close_server; /* We want to simulate packets arriving at conn, so we have to * swap src and dst. */ - slen = sizeof(ss); - if (CHECK_FAIL(getsockname(*conn, sa, &slen))) - goto close_conn; - - if (CHECK_FAIL(!fill_addr_port(sa, &tuple->dst))) + slen = sizeof(*dst); + if (CHECK_FAIL(getsockname(*conn, (struct sockaddr *)dst, &slen))) goto close_conn; - slen = sizeof(ss); - if (CHECK_FAIL(getpeername(*conn, sa, &slen))) + slen = sizeof(*src); + if (CHECK_FAIL(getpeername(*conn, (struct sockaddr *)src, &slen))) goto close_conn; - if (CHECK_FAIL(!fill_addr_port(sa, &tuple->src))) - goto close_conn; - - tuple->family = ss.ss_family; return true; close_conn: @@ -110,17 +68,16 @@ static socklen_t prepare_addr(struct sockaddr_storage *addr, int family) { struct sockaddr_in *addr4; struct sockaddr_in6 *addr6; + memset(addr, 0, sizeof(*addr)); switch (family) { case AF_INET: addr4 = (struct sockaddr_in *)addr; - memset(addr4, 0, sizeof(*addr4)); addr4->sin_family = family; addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); return sizeof(*addr4); case AF_INET6: addr6 = (struct sockaddr_in6 *)addr; - memset(addr6, 0, sizeof(*addr6)); addr6->sin6_family = family; addr6->sin6_addr = in6addr_loopback; return sizeof(*addr6); @@ -242,9 +199,15 @@ static void encap_init(encap_headers_t *encap, uint8_t hop_count, uint8_t proto) } static size_t build_input(const struct test_cfg *test, void *const buf, - const struct tuple *tuple) + const struct sockaddr_storage *src, + const struct sockaddr_storage *dst) { - in_port_t sport = tuple->src.port; + struct sockaddr_in6 *src_in6 = (struct sockaddr_in6 *)src; + struct sockaddr_in6 *dst_in6 = (struct sockaddr_in6 *)dst; + struct sockaddr_in *src_in = (struct sockaddr_in *)src; + struct sockaddr_in *dst_in = (struct sockaddr_in *)dst; + sa_family_t family = src->ss_family; + in_port_t sport, dport; encap_headers_t encap; struct iphdr ip; struct ipv6hdr ipv6; @@ -254,8 +217,11 @@ static size_t build_input(const struct test_cfg *test, void *const buf, uint8_t *p = buf; int proto; + sport = (family == AF_INET) ? src_in->sin_port : src_in6->sin6_port; + dport = (family == AF_INET) ? dst_in->sin_port : dst_in6->sin6_port; + proto = IPPROTO_IPIP; - if (tuple->family == AF_INET6) + if (family == AF_INET6) proto = IPPROTO_IPV6; encap_init(&encap, test->hops == ONE_HOP ? 1 : 0, proto); @@ -270,15 +236,15 @@ static size_t build_input(const struct test_cfg *test, void *const buf, if (test->type == UDP) proto = IPPROTO_UDP; - switch (tuple->family) { + switch (family) { case AF_INET: ip = (struct iphdr){ .ihl = 5, .version = 4, .ttl = IPDEFTTL, .protocol = proto, - .saddr = tuple->src.in_addr.s_addr, - .daddr = tuple->dst.in_addr.s_addr, + .saddr = src_in->sin_addr.s_addr, + .daddr = dst_in->sin_addr.s_addr, }; p = mempcpy(p, &ip, sizeof(ip)); break; @@ -287,8 +253,8 @@ static size_t build_input(const struct test_cfg *test, void *const buf, .version = 6, .hop_limit = IPDEFTTL, .nexthdr = proto, - .saddr = tuple->src.in6_addr, - .daddr = tuple->dst.in6_addr, + .saddr = src_in6->sin6_addr, + .daddr = dst_in6->sin6_addr, }; p = mempcpy(p, &ipv6, sizeof(ipv6)); break; @@ -303,18 +269,16 @@ static size_t build_input(const struct test_cfg *test, void *const buf, case TCP: tcp = (struct tcphdr){ .source = sport, - .dest = tuple->dst.port, + .dest = dport, + .syn = (test->flags == SYN), + .ack = (test->flags == ACK), }; - if (test->flags == SYN) - tcp.syn = true; - if (test->flags == ACK) - tcp.ack = true; p = mempcpy(p, &tcp, sizeof(tcp)); break; case UDP: udp = (struct udphdr){ .source = sport, - .dest = tuple->dst.port, + .dest = dport, }; p = mempcpy(p, &udp, sizeof(udp)); break; @@ -339,27 +303,26 @@ static void test_cls_redirect_common(struct bpf_program *prog) LIBBPF_OPTS(bpf_test_run_opts, tattr); int families[] = { AF_INET, AF_INET6 }; struct sockaddr_storage ss; - struct sockaddr *addr; socklen_t slen; int i, j, err, prog_fd; int servers[__NR_KIND][ARRAY_SIZE(families)] = {}; int conns[__NR_KIND][ARRAY_SIZE(families)] = {}; - struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)]; + struct sockaddr_storage srcs[__NR_KIND][ARRAY_SIZE(families)]; + struct sockaddr_storage dsts[__NR_KIND][ARRAY_SIZE(families)]; - addr = (struct sockaddr *)&ss; for (i = 0; i < ARRAY_SIZE(families); i++) { slen = prepare_addr(&ss, families[i]); if (CHECK_FAIL(!slen)) goto cleanup; - if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_DGRAM, + if (CHECK_FAIL(!set_up_conn(&ss, slen, SOCK_DGRAM, &servers[UDP][i], &conns[UDP][i], - &tuples[UDP][i]))) + &srcs[UDP][i], &dsts[UDP][i]))) goto cleanup; - if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_STREAM, + if (CHECK_FAIL(!set_up_conn(&ss, slen, SOCK_STREAM, &servers[TCP][i], &conns[TCP][i], - &tuples[TCP][i]))) + &srcs[TCP][i], &dsts[TCP][i]))) goto cleanup; } @@ -368,11 +331,12 @@ static void test_cls_redirect_common(struct bpf_program *prog) struct test_cfg *test = &tests[i]; for (j = 0; j < ARRAY_SIZE(families); j++) { - struct tuple *tuple = &tuples[test->type][j]; + struct sockaddr_storage *src = &srcs[test->type][j]; + struct sockaddr_storage *dst = &dsts[test->type][j]; char input[256]; char tmp[256]; - test_str(tmp, sizeof(tmp), test, tuple->family); + test_str(tmp, sizeof(tmp), test, families[j]); if (!test__start_subtest(tmp)) continue; @@ -380,7 +344,7 @@ static void test_cls_redirect_common(struct bpf_program *prog) tattr.data_size_out = sizeof(tmp); tattr.data_in = input; - tattr.data_size_in = build_input(test, input, tuple); + tattr.data_size_in = build_input(test, input, src, dst); if (CHECK_FAIL(!tattr.data_size_in)) continue; diff --git a/tools/testing/selftests/bpf/prog_tests/file_reader.c b/tools/testing/selftests/bpf/prog_tests/file_reader.c new file mode 100644 index 000000000000..5cde32b35da4 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/file_reader.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <test_progs.h> +#include <network_helpers.h> +#include "file_reader.skel.h" +#include "file_reader_fail.skel.h" +#include <dlfcn.h> +#include <sys/mman.h> + +const char *user_ptr = "hello world"; +char file_contents[256000]; + +void *get_executable_base_addr(void) +{ + Dl_info info; + + if (!dladdr((void *)&get_executable_base_addr, &info)) { + fprintf(stderr, "dladdr failed\n"); + return NULL; + } + + return info.dli_fbase; +} + +static int initialize_file_contents(void) +{ + int fd, page_sz = sysconf(_SC_PAGESIZE); + ssize_t n = 0, cur, off; + void *addr; + + fd = open("/proc/self/exe", O_RDONLY); + if (!ASSERT_OK_FD(fd, "Open /proc/self/exe\n")) + return 1; + + do { + cur = read(fd, file_contents + n, sizeof(file_contents) - n); + if (!ASSERT_GT(cur, 0, "read success")) + break; + n += cur; + } while (n < sizeof(file_contents)); + + close(fd); + + if (!ASSERT_EQ(n, sizeof(file_contents), "Read /proc/self/exe\n")) + return 1; + + addr = get_executable_base_addr(); + if (!ASSERT_NEQ(addr, NULL, "get executable address")) + return 1; + + /* page-align base file address */ + addr = (void *)((unsigned long)addr & ~(page_sz - 1)); + + /* + * Page out range 0..512K, use 0..256K for positive tests and + * 256K..512K for negative tests expecting page faults + */ + for (off = 0; off < sizeof(file_contents) * 2; off += page_sz) { + if (!ASSERT_OK(madvise(addr + off, page_sz, MADV_PAGEOUT), + "madvise pageout")) + return errno; + } + + return 0; +} + +static void run_test(const char *prog_name) +{ + struct file_reader *skel; + struct bpf_program *prog; + int err, fd; + + err = initialize_file_contents(); + if (!ASSERT_OK(err, "initialize file contents")) + return; + + skel = file_reader__open(); + if (!ASSERT_OK_PTR(skel, "file_reader__open")) + return; + + bpf_object__for_each_program(prog, skel->obj) { + bpf_program__set_autoload(prog, strcmp(bpf_program__name(prog), prog_name) == 0); + } + + memcpy(skel->bss->user_buf, file_contents, sizeof(file_contents)); + skel->bss->pid = getpid(); + + err = file_reader__load(skel); + if (!ASSERT_OK(err, "file_reader__load")) + goto cleanup; + + err = file_reader__attach(skel); + if (!ASSERT_OK(err, "file_reader__attach")) + goto cleanup; + + fd = open("/proc/self/exe", O_RDONLY); + if (fd >= 0) + close(fd); + + ASSERT_EQ(skel->bss->err, 0, "err"); + ASSERT_EQ(skel->bss->run_success, 1, "run_success"); +cleanup: + file_reader__destroy(skel); +} + +void test_file_reader(void) +{ + if (test__start_subtest("on_open_expect_fault")) + run_test("on_open_expect_fault"); + + if (test__start_subtest("on_open_validate_file_read")) + run_test("on_open_validate_file_read"); + + if (test__start_subtest("negative")) + RUN_TESTS(file_reader_fail); +} diff --git a/tools/testing/selftests/bpf/prog_tests/htab_update.c b/tools/testing/selftests/bpf/prog_tests/htab_update.c index 2bc85f4814f4..d0b405eb2966 100644 --- a/tools/testing/selftests/bpf/prog_tests/htab_update.c +++ b/tools/testing/selftests/bpf/prog_tests/htab_update.c @@ -15,17 +15,17 @@ struct htab_update_ctx { static void test_reenter_update(void) { struct htab_update *skel; - unsigned int key, value; + void *value = NULL; + unsigned int key, value_size; int err; skel = htab_update__open(); if (!ASSERT_OK_PTR(skel, "htab_update__open")) return; - /* lookup_elem_raw() may be inlined and find_kernel_btf_id() will return -ESRCH */ - bpf_program__set_autoload(skel->progs.lookup_elem_raw, true); + bpf_program__set_autoload(skel->progs.bpf_obj_free_fields, true); err = htab_update__load(skel); - if (!ASSERT_TRUE(!err || err == -ESRCH, "htab_update__load") || err) + if (!ASSERT_TRUE(!err, "htab_update__load") || err) goto out; skel->bss->pid = getpid(); @@ -33,14 +33,33 @@ static void test_reenter_update(void) if (!ASSERT_OK(err, "htab_update__attach")) goto out; - /* Will trigger the reentrancy of bpf_map_update_elem() */ + value_size = bpf_map__value_size(skel->maps.htab); + + value = calloc(1, value_size); + if (!ASSERT_OK_PTR(value, "calloc value")) + goto out; + /* + * First update: plain insert. This should NOT trigger the re-entrancy + * path, because there is no old element to free yet. + */ key = 0; - value = 0; - err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, &value, 0); - if (!ASSERT_OK(err, "add element")) + err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, value, BPF_ANY); + if (!ASSERT_OK(err, "first update (insert)")) + goto out; + + /* + * Second update: replace existing element with same key and trigger + * the reentrancy of bpf_map_update_elem(). + * check_and_free_fields() calls bpf_obj_free_fields() on the old + * value, which is where fentry program runs and performs a nested + * bpf_map_update_elem(), triggering -EDEADLK. + */ + memset(value, 0, value_size); + err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, value, BPF_ANY); + if (!ASSERT_OK(err, "second update (replace)")) goto out; - ASSERT_EQ(skel->bss->update_err, -EBUSY, "no reentrancy"); + ASSERT_EQ(skel->bss->update_err, -EDEADLK, "no reentrancy"); out: htab_update__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c index 1de14b111931..6e35e13c2022 100644 --- a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c @@ -57,7 +57,8 @@ static void subtest_kmem_cache_iter_check_slabinfo(struct kmem_cache_iter *skel) if (!ASSERT_OK(ret, "kmem_cache_lookup")) break; - ASSERT_STREQ(r.name, name, "kmem_cache_name"); + ASSERT_STRNEQ(r.name, name, sizeof(r.name) - 1, + "kmem_cache_name"); ASSERT_EQ(r.obj_size, objsize, "kmem_cache_objsize"); seen++; diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c index bc24f83339d6..0a7ef770c487 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c @@ -15,6 +15,10 @@ static void check_good_sample(struct test_perf_branches *skel) int pbe_size = sizeof(struct perf_branch_entry); int duration = 0; + if (CHECK(!skel->bss->run_cnt, "invalid run_cnt", + "checked sample validity before prog run")) + return; + if (CHECK(!skel->bss->valid, "output not valid", "no valid sample from prog")) return; @@ -45,6 +49,10 @@ static void check_bad_sample(struct test_perf_branches *skel) int written_stack = skel->bss->written_stack_out; int duration = 0; + if (CHECK(!skel->bss->run_cnt, "invalid run_cnt", + "checked sample validity before prog run")) + return; + if (CHECK(!skel->bss->valid, "output not valid", "no valid sample from prog")) return; @@ -83,8 +91,12 @@ static void test_perf_branches_common(int perf_fd, err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set); if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err)) goto out_destroy; - /* spin the loop for a while (random high number) */ - for (i = 0; i < 1000000; ++i) + + /* Spin the loop for a while by using a high iteration count, and by + * checking whether the specific run count marker has been explicitly + * incremented at least once by the backing perf_event BPF program. + */ + for (i = 0; i < 100000000 && !*(volatile int *)&skel->bss->run_cnt; ++i) ++j; test_perf_branches__detach(skel); @@ -116,11 +128,11 @@ static void test_perf_branches_hw(void) pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); /* - * Some setups don't support branch records (virtual machines, !x86), - * so skip test in this case. + * Some setups don't support LBR (virtual machines, !x86, AMD Milan Zen + * 3 which only supports BRS), so skip test in this case. */ if (pfd < 0) { - if (errno == ENOENT || errno == EOPNOTSUPP) { + if (errno == ENOENT || errno == EOPNOTSUPP || errno == EINVAL) { printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n", __func__); test__skip(); diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c index c9f855e5da24..246eb259c08a 100644 --- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c @@ -28,6 +28,7 @@ static void test_success(void) bpf_program__set_autoload(skel->progs.two_regions, true); bpf_program__set_autoload(skel->progs.non_sleepable_1, true); bpf_program__set_autoload(skel->progs.non_sleepable_2, true); + bpf_program__set_autoload(skel->progs.nested_rcu_region, true); bpf_program__set_autoload(skel->progs.task_trusted_non_rcuptr, true); bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog, true); bpf_program__set_autoload(skel->progs.rcu_read_lock_global_subprog, true); @@ -78,7 +79,8 @@ static const char * const inproper_region_tests[] = { "non_sleepable_rcu_mismatch", "inproper_sleepable_helper", "inproper_sleepable_kfunc", - "nested_rcu_region", + "nested_rcu_region_unbalanced_1", + "nested_rcu_region_unbalanced_2", "rcu_read_lock_global_subprog_lock", "rcu_read_lock_global_subprog_unlock", "rcu_read_lock_sleepable_helper_global_subprog", diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c index d6bd5e16e637..d2c0542716a8 100644 --- a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c +++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c @@ -44,3 +44,59 @@ void test_refcounted_kptr_wrong_owner(void) ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval"); refcounted_kptr__destroy(skel); } + +void test_percpu_hash_refcounted_kptr_refcount_leak(void) +{ + struct refcounted_kptr *skel; + int cpu_nr, fd, err, key = 0; + struct bpf_map *map; + size_t values_sz; + u64 *values; + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + + cpu_nr = libbpf_num_possible_cpus(); + if (!ASSERT_GT(cpu_nr, 0, "libbpf_num_possible_cpus")) + return; + + values = calloc(cpu_nr, sizeof(u64)); + if (!ASSERT_OK_PTR(values, "calloc values")) + return; + + skel = refcounted_kptr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load")) { + free(values); + return; + } + + values_sz = cpu_nr * sizeof(u64); + memset(values, 0, values_sz); + + map = skel->maps.percpu_hash; + err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0); + if (!ASSERT_OK(err, "bpf_map__update_elem")) + goto out; + + fd = bpf_program__fd(skel->progs.percpu_hash_refcount_leak); + err = bpf_prog_test_run_opts(fd, &opts); + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) + goto out; + if (!ASSERT_EQ(opts.retval, 2, "opts.retval")) + goto out; + + err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0); + if (!ASSERT_OK(err, "bpf_map__update_elem")) + goto out; + + fd = bpf_program__fd(skel->progs.check_percpu_hash_refcount); + err = bpf_prog_test_run_opts(fd, &opts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + ASSERT_EQ(opts.retval, 1, "opts.retval"); + +out: + refcounted_kptr__destroy(skel); + free(values); +} diff --git a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c index 8c6c2043a432..f0a8c828f8f1 100644 --- a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c @@ -110,8 +110,8 @@ void serial_test_res_spin_lock_stress(void) ASSERT_OK(load_module("bpf_test_rqspinlock.ko", false), "load module AA"); sleep(5); unload_module("bpf_test_rqspinlock", false); - - ASSERT_OK(load_module_params("bpf_test_rqspinlock.ko", "test_ab=1", false), "load module ABBA"); - sleep(5); - unload_module("bpf_test_rqspinlock", false); + /* + * Insert bpf_test_rqspinlock.ko manually with test_mode=[1|2] to test + * other cases (ABBA, ABBCCA). + */ } diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index d1e4cb28a72c..64520684d2cb 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -17,6 +17,7 @@ #include "test_ringbuf_n.lskel.h" #include "test_ringbuf_map_key.lskel.h" #include "test_ringbuf_write.lskel.h" +#include "test_ringbuf_overwrite.lskel.h" #define EDONE 7777 @@ -497,6 +498,68 @@ cleanup: test_ringbuf_map_key_lskel__destroy(skel_map_key); } +static void ringbuf_overwrite_mode_subtest(void) +{ + unsigned long size, len1, len2, len3, len4, len5; + unsigned long expect_avail_data, expect_prod_pos, expect_over_pos; + struct test_ringbuf_overwrite_lskel *skel; + int page_size = getpagesize(); + int err; + + skel = test_ringbuf_overwrite_lskel__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + size = page_size; + len1 = page_size / 2; + len2 = page_size / 4; + len3 = size - len1 - len2 - BPF_RINGBUF_HDR_SZ * 3; + len4 = len3 - 8; + len5 = len3; /* retry with len3 */ + + skel->maps.ringbuf.max_entries = size; + skel->rodata->LEN1 = len1; + skel->rodata->LEN2 = len2; + skel->rodata->LEN3 = len3; + skel->rodata->LEN4 = len4; + skel->rodata->LEN5 = len5; + + skel->bss->pid = getpid(); + + err = test_ringbuf_overwrite_lskel__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + err = test_ringbuf_overwrite_lskel__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup; + + syscall(__NR_getpgid); + + ASSERT_EQ(skel->bss->reserve1_fail, 0, "reserve 1"); + ASSERT_EQ(skel->bss->reserve2_fail, 0, "reserve 2"); + ASSERT_EQ(skel->bss->reserve3_fail, 1, "reserve 3"); + ASSERT_EQ(skel->bss->reserve4_fail, 0, "reserve 4"); + ASSERT_EQ(skel->bss->reserve5_fail, 0, "reserve 5"); + + ASSERT_EQ(skel->bss->ring_size, size, "check_ring_size"); + + expect_avail_data = len2 + len4 + len5 + 3 * BPF_RINGBUF_HDR_SZ; + ASSERT_EQ(skel->bss->avail_data, expect_avail_data, "check_avail_size"); + + ASSERT_EQ(skel->bss->cons_pos, 0, "check_cons_pos"); + + expect_prod_pos = len1 + len2 + len4 + len5 + 4 * BPF_RINGBUF_HDR_SZ; + ASSERT_EQ(skel->bss->prod_pos, expect_prod_pos, "check_prod_pos"); + + expect_over_pos = len1 + BPF_RINGBUF_HDR_SZ; + ASSERT_EQ(skel->bss->over_pos, expect_over_pos, "check_over_pos"); + + test_ringbuf_overwrite_lskel__detach(skel); +cleanup: + test_ringbuf_overwrite_lskel__destroy(skel); +} + void test_ringbuf(void) { if (test__start_subtest("ringbuf")) @@ -507,4 +570,6 @@ void test_ringbuf(void) ringbuf_map_key_subtest(); if (test__start_subtest("ringbuf_write")) ringbuf_write_subtest(); + if (test__start_subtest("ringbuf_overwrite_mode")) + ringbuf_overwrite_mode_subtest(); } diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index 036d4760d2c1..3dbcc091f16c 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -41,11 +41,7 @@ static struct bpf_object *obj; static __u32 index_zero; static int epfd; -static union sa46 { - struct sockaddr_in6 v6; - struct sockaddr_in v4; - sa_family_t family; -} srv_sa; +static struct sockaddr_storage srv_sa; #define RET_IF(condition, tag, format...) ({ \ if (CHECK_FAIL(condition)) { \ @@ -135,24 +131,24 @@ static int prepare_bpf_obj(void) return 0; } -static void sa46_init_loopback(union sa46 *sa, sa_family_t family) +static void ss_init_loopback(struct sockaddr_storage *sa, sa_family_t family) { memset(sa, 0, sizeof(*sa)); - sa->family = family; - if (sa->family == AF_INET6) - sa->v6.sin6_addr = in6addr_loopback; + sa->ss_family = family; + if (sa->ss_family == AF_INET6) + ((struct sockaddr_in6 *)sa)->sin6_addr = in6addr_loopback; else - sa->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + ((struct sockaddr_in *)sa)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); } -static void sa46_init_inany(union sa46 *sa, sa_family_t family) +static void ss_init_inany(struct sockaddr_storage *sa, sa_family_t family) { memset(sa, 0, sizeof(*sa)); - sa->family = family; - if (sa->family == AF_INET6) - sa->v6.sin6_addr = in6addr_any; + sa->ss_family = family; + if (sa->ss_family == AF_INET6) + ((struct sockaddr_in6 *)sa)->sin6_addr = in6addr_any; else - sa->v4.sin_addr.s_addr = INADDR_ANY; + ((struct sockaddr_in *)sa)->sin_addr.s_addr = INADDR_ANY; } static int read_int_sysctl(const char *sysctl) @@ -228,7 +224,7 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd, int cli_fd) { struct data_check expected = {}, result; - union sa46 cli_sa; + struct sockaddr_storage cli_sa; socklen_t addrlen; int err; @@ -251,26 +247,32 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd, } if (family == AF_INET6) { + struct sockaddr_in6 *srv_v6 = (struct sockaddr_in6 *)&srv_sa; + struct sockaddr_in6 *cli_v6 = (struct sockaddr_in6 *)&cli_sa; + expected.eth_protocol = htons(ETH_P_IPV6); - expected.bind_inany = !srv_sa.v6.sin6_addr.s6_addr32[3] && - !srv_sa.v6.sin6_addr.s6_addr32[2] && - !srv_sa.v6.sin6_addr.s6_addr32[1] && - !srv_sa.v6.sin6_addr.s6_addr32[0]; + expected.bind_inany = !srv_v6->sin6_addr.s6_addr32[3] && + !srv_v6->sin6_addr.s6_addr32[2] && + !srv_v6->sin6_addr.s6_addr32[1] && + !srv_v6->sin6_addr.s6_addr32[0]; - memcpy(&expected.skb_addrs[0], cli_sa.v6.sin6_addr.s6_addr32, - sizeof(cli_sa.v6.sin6_addr)); + memcpy(&expected.skb_addrs[0], cli_v6->sin6_addr.s6_addr32, + sizeof(cli_v6->sin6_addr)); memcpy(&expected.skb_addrs[4], &in6addr_loopback, sizeof(in6addr_loopback)); - expected.skb_ports[0] = cli_sa.v6.sin6_port; - expected.skb_ports[1] = srv_sa.v6.sin6_port; + expected.skb_ports[0] = cli_v6->sin6_port; + expected.skb_ports[1] = srv_v6->sin6_port; } else { + struct sockaddr_in *srv_v4 = (struct sockaddr_in *)&srv_sa; + struct sockaddr_in *cli_v4 = (struct sockaddr_in *)&cli_sa; + expected.eth_protocol = htons(ETH_P_IP); - expected.bind_inany = !srv_sa.v4.sin_addr.s_addr; + expected.bind_inany = !srv_v4->sin_addr.s_addr; - expected.skb_addrs[0] = cli_sa.v4.sin_addr.s_addr; + expected.skb_addrs[0] = cli_v4->sin_addr.s_addr; expected.skb_addrs[1] = htonl(INADDR_LOOPBACK); - expected.skb_ports[0] = cli_sa.v4.sin_port; - expected.skb_ports[1] = srv_sa.v4.sin_port; + expected.skb_ports[0] = cli_v4->sin_port; + expected.skb_ports[1] = srv_v4->sin_port; } if (memcmp(&result, &expected, offsetof(struct data_check, @@ -364,16 +366,15 @@ static void check_results(void) static int send_data(int type, sa_family_t family, void *data, size_t len, enum result expected) { - union sa46 cli_sa; + struct sockaddr_storage cli_sa; int fd, err; fd = socket(family, type, 0); RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno); - sa46_init_loopback(&cli_sa, family); + ss_init_loopback(&cli_sa, family); err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa)); RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno); - err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa, sizeof(srv_sa)); RET_ERR(err != len && expected >= PASS, @@ -589,9 +590,9 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany) socklen_t addrlen; if (inany) - sa46_init_inany(&srv_sa, family); + ss_init_inany(&srv_sa, family); else - sa46_init_loopback(&srv_sa, family); + ss_init_loopback(&srv_sa, family); addrlen = sizeof(srv_sa); /* diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 1702aa592c2c..7ac4d5a488aa 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -206,6 +206,11 @@ destroy_skel: skel_open_load_failure: close(pipe_c2p[0]); close(pipe_p2c[1]); + /* + * Child is either about to exit cleanly or stuck in case of errors. + * Nudge it to exit. + */ + kill(pid, SIGKILL); wait(NULL); } diff --git a/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c new file mode 100644 index 000000000000..e4940583924b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2025 Google LLC */ + +#include <test_progs.h> +#include "sk_bypass_prot_mem.skel.h" +#include "network_helpers.h" + +#define NR_PAGES 32 +#define NR_SOCKETS 2 +#define BUF_TOTAL (NR_PAGES * 4096 / NR_SOCKETS) +#define BUF_SINGLE 1024 +#define NR_SEND (BUF_TOTAL / BUF_SINGLE) + +struct test_case { + char name[8]; + int family; + int type; + int (*create_sockets)(struct test_case *test_case, int sk[], int len); + long (*get_memory_allocated)(struct test_case *test_case, struct sk_bypass_prot_mem *skel); +}; + +static int tcp_create_sockets(struct test_case *test_case, int sk[], int len) +{ + int server, i, err = 0; + + server = start_server(test_case->family, test_case->type, NULL, 0, 0); + if (!ASSERT_GE(server, 0, "start_server_str")) + return server; + + /* Keep for-loop so we can change NR_SOCKETS easily. */ + for (i = 0; i < len; i += 2) { + sk[i] = connect_to_fd(server, 0); + if (sk[i] < 0) { + ASSERT_GE(sk[i], 0, "connect_to_fd"); + err = sk[i]; + break; + } + + sk[i + 1] = accept(server, NULL, NULL); + if (sk[i + 1] < 0) { + ASSERT_GE(sk[i + 1], 0, "accept"); + err = sk[i + 1]; + break; + } + } + + close(server); + + return err; +} + +static int udp_create_sockets(struct test_case *test_case, int sk[], int len) +{ + int i, j, err, rcvbuf = BUF_TOTAL; + + /* Keep for-loop so we can change NR_SOCKETS easily. */ + for (i = 0; i < len; i += 2) { + sk[i] = start_server(test_case->family, test_case->type, NULL, 0, 0); + if (sk[i] < 0) { + ASSERT_GE(sk[i], 0, "start_server"); + return sk[i]; + } + + sk[i + 1] = connect_to_fd(sk[i], 0); + if (sk[i + 1] < 0) { + ASSERT_GE(sk[i + 1], 0, "connect_to_fd"); + return sk[i + 1]; + } + + err = connect_fd_to_fd(sk[i], sk[i + 1], 0); + if (err) { + ASSERT_EQ(err, 0, "connect_fd_to_fd"); + return err; + } + + for (j = 0; j < 2; j++) { + err = setsockopt(sk[i + j], SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(int)); + if (err) { + ASSERT_EQ(err, 0, "setsockopt(SO_RCVBUF)"); + return err; + } + } + } + + return 0; +} + +static long get_memory_allocated(struct test_case *test_case, + bool *activated, long *memory_allocated) +{ + int sk; + + *activated = true; + + /* AF_INET and AF_INET6 share the same memory_allocated. + * tcp_init_sock() is called by AF_INET and AF_INET6, + * but udp_lib_init_sock() is inline. + */ + sk = socket(AF_INET, test_case->type, 0); + if (!ASSERT_GE(sk, 0, "get_memory_allocated")) + return -1; + + close(sk); + + return *memory_allocated; +} + +static long tcp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel) +{ + return get_memory_allocated(test_case, + &skel->bss->tcp_activated, + &skel->bss->tcp_memory_allocated); +} + +static long udp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel) +{ + return get_memory_allocated(test_case, + &skel->bss->udp_activated, + &skel->bss->udp_memory_allocated); +} + +static int check_bypass(struct test_case *test_case, + struct sk_bypass_prot_mem *skel, bool bypass) +{ + char buf[BUF_SINGLE] = {}; + long memory_allocated[2]; + int sk[NR_SOCKETS]; + int err, i, j; + + for (i = 0; i < ARRAY_SIZE(sk); i++) + sk[i] = -1; + + err = test_case->create_sockets(test_case, sk, ARRAY_SIZE(sk)); + if (err) + goto close; + + memory_allocated[0] = test_case->get_memory_allocated(test_case, skel); + + /* allocate pages >= NR_PAGES */ + for (i = 0; i < ARRAY_SIZE(sk); i++) { + for (j = 0; j < NR_SEND; j++) { + int bytes = send(sk[i], buf, sizeof(buf), 0); + + /* Avoid too noisy logs when something failed. */ + if (bytes != sizeof(buf)) { + ASSERT_EQ(bytes, sizeof(buf), "send"); + if (bytes < 0) { + err = bytes; + goto drain; + } + } + } + } + + memory_allocated[1] = test_case->get_memory_allocated(test_case, skel); + + if (bypass) + ASSERT_LE(memory_allocated[1], memory_allocated[0] + 10, "bypass"); + else + ASSERT_GT(memory_allocated[1], memory_allocated[0] + NR_PAGES, "no bypass"); + +drain: + if (test_case->type == SOCK_DGRAM) { + /* UDP starts purging sk->sk_receive_queue after one RCU + * grace period, then udp_memory_allocated goes down, + * so drain the queue before close(). + */ + for (i = 0; i < ARRAY_SIZE(sk); i++) { + for (j = 0; j < NR_SEND; j++) { + int bytes = recv(sk[i], buf, 1, MSG_DONTWAIT | MSG_TRUNC); + + if (bytes == sizeof(buf)) + continue; + if (bytes != -1 || errno != EAGAIN) + PRINT_FAIL("bytes: %d, errno: %s\n", bytes, strerror(errno)); + break; + } + } + } + +close: + for (i = 0; i < ARRAY_SIZE(sk); i++) { + if (sk[i] < 0) + break; + + close(sk[i]); + } + + return err; +} + +static void run_test(struct test_case *test_case) +{ + struct sk_bypass_prot_mem *skel; + struct nstoken *nstoken; + int cgroup, err; + + skel = sk_bypass_prot_mem__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + skel->bss->nr_cpus = libbpf_num_possible_cpus(); + + err = sk_bypass_prot_mem__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto destroy_skel; + + cgroup = test__join_cgroup("/sk_bypass_prot_mem"); + if (!ASSERT_GE(cgroup, 0, "join_cgroup")) + goto destroy_skel; + + err = make_netns("sk_bypass_prot_mem"); + if (!ASSERT_EQ(err, 0, "make_netns")) + goto close_cgroup; + + nstoken = open_netns("sk_bypass_prot_mem"); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto remove_netns; + + err = check_bypass(test_case, skel, false); + if (!ASSERT_EQ(err, 0, "test_bypass(false)")) + goto close_netns; + + err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "1"); + if (!ASSERT_EQ(err, 0, "write_sysctl(1)")) + goto close_netns; + + err = check_bypass(test_case, skel, true); + if (!ASSERT_EQ(err, 0, "test_bypass(true by sysctl)")) + goto close_netns; + + err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "0"); + if (!ASSERT_EQ(err, 0, "write_sysctl(0)")) + goto close_netns; + + skel->links.sock_create = bpf_program__attach_cgroup(skel->progs.sock_create, cgroup); + if (!ASSERT_OK_PTR(skel->links.sock_create, "attach_cgroup(sock_create)")) + goto close_netns; + + err = check_bypass(test_case, skel, true); + ASSERT_EQ(err, 0, "test_bypass(true by bpf)"); + +close_netns: + close_netns(nstoken); +remove_netns: + remove_netns("sk_bypass_prot_mem"); +close_cgroup: + close(cgroup); +destroy_skel: + sk_bypass_prot_mem__destroy(skel); +} + +static struct test_case test_cases[] = { + { + .name = "TCP ", + .family = AF_INET, + .type = SOCK_STREAM, + .create_sockets = tcp_create_sockets, + .get_memory_allocated = tcp_get_memory_allocated, + }, + { + .name = "UDP ", + .family = AF_INET, + .type = SOCK_DGRAM, + .create_sockets = udp_create_sockets, + .get_memory_allocated = udp_get_memory_allocated, + }, + { + .name = "TCPv6", + .family = AF_INET6, + .type = SOCK_STREAM, + .create_sockets = tcp_create_sockets, + .get_memory_allocated = tcp_get_memory_allocated, + }, + { + .name = "UDPv6", + .family = AF_INET6, + .type = SOCK_DGRAM, + .create_sockets = udp_create_sockets, + .get_memory_allocated = udp_get_memory_allocated, + }, +}; + +void serial_test_sk_bypass_prot_mem(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + if (test__start_subtest(test_cases[i].name)) + run_test(&test_cases[i]); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c index 4d66fad3c8bd..0f3bf594e7a5 100644 --- a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c +++ b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c @@ -20,7 +20,9 @@ static const char * const test_cases[] = { "strcspn_str", "strcspn_reject", "strstr", + "strcasestr", "strnstr", + "strncasestr", }; void run_too_long_tests(void) diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c new file mode 100644 index 000000000000..de22734abc4d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c @@ -0,0 +1,390 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <linux/genetlink.h> +#include "network_helpers.h" +#include "bpf_smc.skel.h" + +#ifndef IPPROTO_SMC +#define IPPROTO_SMC 256 +#endif + +#define CLIENT_IP "127.0.0.1" +#define SERVER_IP "127.0.1.0" +#define SERVER_IP_VIA_RISK_PATH "127.0.2.0" + +#define SERVICE_1 80 +#define SERVICE_2 443 +#define SERVICE_3 8443 + +#define TEST_NS "bpf_smc_netns" + +static struct netns_obj *test_netns; + +struct smc_policy_ip_key { + __u32 sip; + __u32 dip; +}; + +struct smc_policy_ip_value { + __u8 mode; +}; + +#if defined(__s390x__) +/* s390x has default seid */ +static bool setup_ueid(void) { return true; } +static void cleanup_ueid(void) {} +#else +enum { + SMC_NETLINK_ADD_UEID = 10, + SMC_NETLINK_REMOVE_UEID +}; + +enum { + SMC_NLA_EID_TABLE_UNSPEC, + SMC_NLA_EID_TABLE_ENTRY, /* string */ +}; + +struct msgtemplate { + struct nlmsghdr n; + struct genlmsghdr g; + char buf[1024]; +}; + +#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN)) +#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN) +#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN)) +#define NLA_PAYLOAD(len) ((len) - NLA_HDRLEN) + +#define SMC_GENL_FAMILY_NAME "SMC_GEN_NETLINK" +#define SMC_BPFTEST_UEID "SMC-BPFTEST-UEID" + +static uint16_t smc_nl_family_id = -1; + +static int send_cmd(int fd, __u16 nlmsg_type, __u32 nlmsg_pid, + __u16 nlmsg_flags, __u8 genl_cmd, __u16 nla_type, + void *nla_data, int nla_len) +{ + struct nlattr *na; + struct sockaddr_nl nladdr; + int r, buflen; + char *buf; + + struct msgtemplate msg = {0}; + + msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); + msg.n.nlmsg_type = nlmsg_type; + msg.n.nlmsg_flags = nlmsg_flags; + msg.n.nlmsg_seq = 0; + msg.n.nlmsg_pid = nlmsg_pid; + msg.g.cmd = genl_cmd; + msg.g.version = 1; + na = (struct nlattr *)GENLMSG_DATA(&msg); + na->nla_type = nla_type; + na->nla_len = nla_len + 1 + NLA_HDRLEN; + memcpy(NLA_DATA(na), nla_data, nla_len); + msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); + + buf = (char *)&msg; + buflen = msg.n.nlmsg_len; + memset(&nladdr, 0, sizeof(nladdr)); + nladdr.nl_family = AF_NETLINK; + + while ((r = sendto(fd, buf, buflen, 0, (struct sockaddr *)&nladdr, + sizeof(nladdr))) < buflen) { + if (r > 0) { + buf += r; + buflen -= r; + } else if (errno != EAGAIN) { + return -1; + } + } + return 0; +} + +static bool get_smc_nl_family_id(void) +{ + struct sockaddr_nl nl_src; + struct msgtemplate msg; + struct nlattr *nl; + int fd, ret; + pid_t pid; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (!ASSERT_OK_FD(fd, "nl_family socket")) + return false; + + pid = getpid(); + + memset(&nl_src, 0, sizeof(nl_src)); + nl_src.nl_family = AF_NETLINK; + nl_src.nl_pid = pid; + + ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src)); + if (!ASSERT_OK(ret, "nl_family bind")) + goto fail; + + ret = send_cmd(fd, GENL_ID_CTRL, pid, + NLM_F_REQUEST, CTRL_CMD_GETFAMILY, + CTRL_ATTR_FAMILY_NAME, (void *)SMC_GENL_FAMILY_NAME, + strlen(SMC_GENL_FAMILY_NAME)); + if (!ASSERT_OK(ret, "nl_family query")) + goto fail; + + ret = recv(fd, &msg, sizeof(msg), 0); + if (!ASSERT_FALSE(msg.n.nlmsg_type == NLMSG_ERROR || ret < 0 || + !NLMSG_OK(&msg.n, ret), "nl_family response")) + goto fail; + + nl = (struct nlattr *)GENLMSG_DATA(&msg); + nl = (struct nlattr *)((char *)nl + NLA_ALIGN(nl->nla_len)); + if (!ASSERT_EQ(nl->nla_type, CTRL_ATTR_FAMILY_ID, "nl_family nla type")) + goto fail; + + smc_nl_family_id = *(uint16_t *)NLA_DATA(nl); + close(fd); + return true; +fail: + close(fd); + return false; +} + +static bool smc_ueid(int op) +{ + struct sockaddr_nl nl_src; + struct msgtemplate msg; + struct nlmsgerr *err; + char test_ueid[32]; + int fd, ret; + pid_t pid; + + /* UEID required */ + memset(test_ueid, '\x20', sizeof(test_ueid)); + memcpy(test_ueid, SMC_BPFTEST_UEID, strlen(SMC_BPFTEST_UEID)); + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (!ASSERT_OK_FD(fd, "ueid socket")) + return false; + + pid = getpid(); + memset(&nl_src, 0, sizeof(nl_src)); + nl_src.nl_family = AF_NETLINK; + nl_src.nl_pid = pid; + + ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src)); + if (!ASSERT_OK(ret, "ueid bind")) + goto fail; + + ret = send_cmd(fd, smc_nl_family_id, pid, + NLM_F_REQUEST | NLM_F_ACK, op, SMC_NLA_EID_TABLE_ENTRY, + (void *)test_ueid, sizeof(test_ueid)); + if (!ASSERT_OK(ret, "ueid cmd")) + goto fail; + + ret = recv(fd, &msg, sizeof(msg), 0); + if (!ASSERT_FALSE(ret < 0 || + !NLMSG_OK(&msg.n, ret), "ueid response")) + goto fail; + + if (msg.n.nlmsg_type == NLMSG_ERROR) { + err = NLMSG_DATA(&msg); + switch (op) { + case SMC_NETLINK_REMOVE_UEID: + if (!ASSERT_FALSE((err->error && err->error != -ENOENT), + "ueid remove")) + goto fail; + break; + case SMC_NETLINK_ADD_UEID: + if (!ASSERT_OK(err->error, "ueid add")) + goto fail; + break; + default: + break; + } + } + close(fd); + return true; +fail: + close(fd); + return false; +} + +static bool setup_ueid(void) +{ + /* get smc nl id */ + if (!get_smc_nl_family_id()) + return false; + /* clear old ueid for bpftest */ + smc_ueid(SMC_NETLINK_REMOVE_UEID); + /* smc-loopback required ueid */ + return smc_ueid(SMC_NETLINK_ADD_UEID); +} + +static void cleanup_ueid(void) +{ + smc_ueid(SMC_NETLINK_REMOVE_UEID); +} +#endif /* __s390x__ */ + +static bool setup_netns(void) +{ + test_netns = netns_new(TEST_NS, true); + if (!ASSERT_OK_PTR(test_netns, "open net namespace")) + goto fail_netns; + + SYS(fail_ip, "ip addr add 127.0.1.0/8 dev lo"); + SYS(fail_ip, "ip addr add 127.0.2.0/8 dev lo"); + + return true; +fail_ip: + netns_free(test_netns); +fail_netns: + return false; +} + +static void cleanup_netns(void) +{ + netns_free(test_netns); +} + +static bool setup_smc(void) +{ + if (!setup_ueid()) + return false; + + if (!setup_netns()) + goto fail_netns; + + return true; +fail_netns: + cleanup_ueid(); + return false; +} + +static int set_client_addr_cb(int fd, void *opts) +{ + const char *src = (const char *)opts; + struct sockaddr_in localaddr; + + localaddr.sin_family = AF_INET; + localaddr.sin_port = htons(0); + localaddr.sin_addr.s_addr = inet_addr(src); + return !ASSERT_OK(bind(fd, &localaddr, sizeof(localaddr)), "client bind"); +} + +static void run_link(const char *src, const char *dst, int port) +{ + struct network_helper_opts opts = {0}; + int server, client; + + server = start_server_str(AF_INET, SOCK_STREAM, dst, port, NULL); + if (!ASSERT_OK_FD(server, "start service_1")) + return; + + opts.proto = IPPROTO_TCP; + opts.post_socket_cb = set_client_addr_cb; + opts.cb_opts = (void *)src; + + client = connect_to_fd_opts(server, &opts); + if (!ASSERT_OK_FD(client, "start connect")) + goto fail_client; + + close(client); +fail_client: + close(server); +} + +static void block_link(int map_fd, const char *src, const char *dst) +{ + struct smc_policy_ip_value val = { .mode = /* block */ 0 }; + struct smc_policy_ip_key key = { + .sip = inet_addr(src), + .dip = inet_addr(dst), + }; + + bpf_map_update_elem(map_fd, &key, &val, BPF_ANY); +} + +/* + * This test describes a real-life service topology as follows: + * + * +-------------> service_1 + * link 1 | | + * +--------------------> server | link 2 + * | | V + * | +-------------> service_2 + * | link 3 + * client -------------------> server_via_unsafe_path -> service_3 + * + * Among them, + * 1. link-1 is very suitable for using SMC. + * 2. link-2 is not suitable for using SMC, because the mode of this link is + * kind of short-link services. + * 3. link-3 is also not suitable for using SMC, because the RDMA link is + * unavailable and needs to go through a long timeout before it can fallback + * to TCP. + * To achieve this goal, we use a customized SMC ip strategy via smc_hs_ctrl. + */ +static void test_topo(void) +{ + struct bpf_smc *skel; + int rc, map_fd; + + skel = bpf_smc__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_smc__open_and_load")) + return; + + rc = bpf_smc__attach(skel); + if (!ASSERT_OK(rc, "bpf_smc__attach")) + goto fail; + + map_fd = bpf_map__fd(skel->maps.smc_policy_ip); + if (!ASSERT_OK_FD(map_fd, "bpf_map__fd")) + goto fail; + + /* Mock the process of transparent replacement, since we will modify + * protocol to ipproto_smc accropding to it via + * fmod_ret/update_socket_protocol. + */ + write_sysctl("/proc/sys/net/smc/hs_ctrl", "linkcheck"); + + /* Configure ip strat */ + block_link(map_fd, CLIENT_IP, SERVER_IP_VIA_RISK_PATH); + block_link(map_fd, SERVER_IP, SERVER_IP); + + /* should go with smc */ + run_link(CLIENT_IP, SERVER_IP, SERVICE_1); + /* should go with smc fallback */ + run_link(SERVER_IP, SERVER_IP, SERVICE_2); + + ASSERT_EQ(skel->bss->smc_cnt, 2, "smc count"); + ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count"); + + /* should go with smc */ + run_link(CLIENT_IP, SERVER_IP, SERVICE_2); + + ASSERT_EQ(skel->bss->smc_cnt, 3, "smc count"); + ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count"); + + /* should go with smc fallback */ + run_link(CLIENT_IP, SERVER_IP_VIA_RISK_PATH, SERVICE_3); + + ASSERT_EQ(skel->bss->smc_cnt, 4, "smc count"); + ASSERT_EQ(skel->bss->fallback_cnt, 2, "fallback count"); + +fail: + bpf_smc__destroy(skel); +} + +void test_bpf_smc(void) +{ + if (!setup_smc()) { + printf("setup for smc test failed, test SKIP:\n"); + test__skip(); + return; + } + + if (test__start_subtest("topo")) + test_topo(); + + cleanup_ueid(); + cleanup_netns(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c index 2a27f3714f5c..bdc4fc06bc5a 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c +++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c @@ -139,7 +139,7 @@ static void test_lsm_tailcall(void) if (CHECK_FAIL(!err)) goto close_prog; - prog_fd = bpf_program__fd(skel->progs.lsm_file_alloc_security_prog); + prog_fd = bpf_program__fd(skel->progs.lsm_kernfs_init_security_prog); if (CHECK_FAIL(prog_fd < 0)) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/test_tc_edt.c b/tools/testing/selftests/bpf/prog_tests/test_tc_edt.c new file mode 100644 index 000000000000..462512fb191f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_tc_edt.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* + * BPF-based flow shaping + * + * The test brings up two veth in two isolated namespaces, attach some flow + * shaping program onto it, and ensures that a manual speedtest maximum + * value matches the rate set in the BPF shapers. + */ + +#include <asm-generic/socket.h> +#include <stdio.h> +#include <unistd.h> +#include <fcntl.h> +#include <math.h> +#include <sys/time.h> +#include <sys/socket.h> +#include <bpf/libbpf.h> +#include <pthread.h> +#include "test_progs.h" +#include "network_helpers.h" +#include "test_tc_edt.skel.h" + +#define SERVER_NS "tc-edt-server-ns" +#define CLIENT_NS "tc-edt-client-ns" +#define IP4_ADDR_VETH1 "192.168.1.1" +#define IP4_ADDR_VETH2 "192.168.1.2" +#define IP4_ADDR_VETH2_HEX 0xC0A80102 + +#define TIMEOUT_MS 2000 +#define TEST_PORT 9000 +#define TARGET_RATE_MBPS 5.0 +#define TX_BYTES_COUNT (1 * 1000 * 1000) +#define RATE_ERROR_PERCENT 2.0 + +struct connection { + int server_listen_fd; + int server_conn_fd; + int client_conn_fd; +}; + +static int setup(struct test_tc_edt *skel) +{ + struct nstoken *nstoken_client, *nstoken_server; + int ret; + + if (!ASSERT_OK(make_netns(CLIENT_NS), "create client ns")) + goto fail; + if (!ASSERT_OK(make_netns(SERVER_NS), "create server ns")) + goto fail_delete_client_ns; + + nstoken_client = open_netns(CLIENT_NS); + if (!ASSERT_OK_PTR(nstoken_client, "open client ns")) + goto fail_delete_server_ns; + SYS(fail_close_client_ns, "ip link add veth1 type veth peer name %s", + "veth2 netns " SERVER_NS); + SYS(fail_close_client_ns, "ip -4 addr add " IP4_ADDR_VETH1 "/24 dev veth1"); + SYS(fail_close_client_ns, "ip link set veth1 up"); + + nstoken_server = open_netns(SERVER_NS); + if (!ASSERT_OK_PTR(nstoken_server, "enter server ns")) + goto fail_close_client_ns; + SYS(fail_close_server_ns, "ip -4 addr add " IP4_ADDR_VETH2 "/24 dev veth2"); + SYS(fail_close_server_ns, "ip link set veth2 up"); + SYS(fail_close_server_ns, "tc qdisc add dev veth2 root fq"); + ret = tc_prog_attach("veth2", -1, bpf_program__fd(skel->progs.tc_prog)); + if (!ASSERT_OK(ret, "attach bpf prog")) + goto fail_close_server_ns; + skel->bss->target_rate = TARGET_RATE_MBPS * 1000 * 1000; + close_netns(nstoken_server); + close_netns(nstoken_client); + + return 0; + +fail_close_server_ns: + close_netns(nstoken_server); +fail_close_client_ns: + close_netns(nstoken_client); +fail_delete_server_ns: + remove_netns(SERVER_NS); +fail_delete_client_ns: + remove_netns(CLIENT_NS); +fail: + return -1; +} + +static void cleanup(void) +{ + remove_netns(CLIENT_NS); + remove_netns(SERVER_NS); +} + +static void run_test(void) +{ + int server_fd, client_fd, err; + double rate_mbps, rate_error; + struct nstoken *nstoken; + __u64 ts_start, ts_end; + + nstoken = open_netns(SERVER_NS); + if (!ASSERT_OK_PTR(nstoken, "open server ns")) + return; + server_fd = start_server(AF_INET, SOCK_STREAM, IP4_ADDR_VETH2, + TEST_PORT, TIMEOUT_MS); + if (!ASSERT_OK_FD(server_fd, "start server")) + return; + + close_netns(nstoken); + nstoken = open_netns(CLIENT_NS); + if (!ASSERT_OK_PTR(nstoken, "open client ns")) + return; + client_fd = connect_to_fd(server_fd, 0); + if (!ASSERT_OK_FD(client_fd, "connect client")) + return; + + ts_start = get_time_ns(); + err = send_recv_data(server_fd, client_fd, TX_BYTES_COUNT); + ts_end = get_time_ns(); + close_netns(nstoken); + ASSERT_OK(err, "send_recv_data"); + + rate_mbps = TX_BYTES_COUNT / ((ts_end - ts_start) / 1000.0); + rate_error = + fabs((rate_mbps - TARGET_RATE_MBPS) * 100.0 / TARGET_RATE_MBPS); + + ASSERT_LE(rate_error, RATE_ERROR_PERCENT, + "rate error is lower than threshold"); +} + +void test_tc_edt(void) +{ + struct test_tc_edt *skel; + + skel = test_tc_edt__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel open and load")) + return; + + if (!ASSERT_OK(setup(skel), "global setup")) + return; + + run_test(); + + cleanup(); + test_tc_edt__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c new file mode 100644 index 000000000000..0fe0a8f62486 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c @@ -0,0 +1,714 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* + * End-to-end eBPF tunnel test suite + * The file tests BPF network tunnels implementation. For each tunnel + * type, the test validates that: + * - basic communication can first be established between the two veths + * - when adding a BPF-based encapsulation on client egress, it now fails + * to communicate with the server + * - when adding a kernel-based decapsulation on server ingress, client + * can now connect + * - when replacing the kernel-based decapsulation with a BPF-based one, + * the client can still connect + */ + +#include <stdio.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/socket.h> +#include <bpf/libbpf.h> + +#include "test_progs.h" +#include "network_helpers.h" +#include "test_tc_tunnel.skel.h" + +#define SERVER_NS "tc-tunnel-server-ns" +#define CLIENT_NS "tc-tunnel-client-ns" +#define MAC_ADDR_VETH1 "00:11:22:33:44:55" +#define IP4_ADDR_VETH1 "192.168.1.1" +#define IP6_ADDR_VETH1 "fd::1" +#define MAC_ADDR_VETH2 "66:77:88:99:AA:BB" +#define IP4_ADDR_VETH2 "192.168.1.2" +#define IP6_ADDR_VETH2 "fd::2" + +#define TEST_NAME_MAX_LEN 64 +#define PROG_NAME_MAX_LEN 64 +#define TUNNEL_ARGS_MAX_LEN 128 +#define BUFFER_LEN 2000 +#define DEFAULT_TEST_DATA_SIZE 100 +#define GSO_TEST_DATA_SIZE BUFFER_LEN + +#define TIMEOUT_MS 1000 +#define TEST_PORT 8000 +#define UDP_PORT 5555 +#define MPLS_UDP_PORT 6635 +#define FOU_MPLS_PROTO 137 +#define VXLAN_ID 1 +#define VXLAN_PORT 8472 +#define MPLS_TABLE_ENTRIES_COUNT 65536 + +static char tx_buffer[BUFFER_LEN], rx_buffer[BUFFER_LEN]; + +struct subtest_cfg { + char *ebpf_tun_type; + char *iproute_tun_type; + char *mac_tun_type; + int ipproto; + void (*extra_decap_mod_args_cb)(struct subtest_cfg *cfg, char *dst); + bool tunnel_need_veth_mac; + bool configure_fou_rx_port; + char *tmode; + bool expect_kern_decap_failure; + bool configure_mpls; + bool test_gso; + char *tunnel_client_addr; + char *tunnel_server_addr; + char name[TEST_NAME_MAX_LEN]; + char *server_addr; + int client_egress_prog_fd; + int server_ingress_prog_fd; + char extra_decap_mod_args[TUNNEL_ARGS_MAX_LEN]; + int server_fd; +}; + +struct connection { + int client_fd; + int server_fd; +}; + +static int build_subtest_name(struct subtest_cfg *cfg, char *dst, size_t size) +{ + int ret; + + ret = snprintf(dst, size, "%s_%s", cfg->ebpf_tun_type, + cfg->mac_tun_type); + + return ret < 0 ? ret : 0; +} + +static int set_subtest_progs(struct subtest_cfg *cfg, struct test_tc_tunnel *skel) +{ + char prog_name[PROG_NAME_MAX_LEN]; + struct bpf_program *prog; + int ret; + + ret = snprintf(prog_name, PROG_NAME_MAX_LEN, "__encap_"); + if (ret < 0) + return ret; + ret = build_subtest_name(cfg, prog_name + ret, PROG_NAME_MAX_LEN - ret); + if (ret < 0) + return ret; + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!prog) + return -1; + + cfg->client_egress_prog_fd = bpf_program__fd(prog); + cfg->server_ingress_prog_fd = bpf_program__fd(skel->progs.decap_f); + return 0; +} + +static void set_subtest_addresses(struct subtest_cfg *cfg) +{ + if (cfg->ipproto == 6) + cfg->server_addr = IP6_ADDR_VETH2; + else + cfg->server_addr = IP4_ADDR_VETH2; + + /* Some specific tunnel types need specific addressing, it then + * has been already set in the configuration table. Otherwise, + * deduce the relevant addressing from the ipproto + */ + if (cfg->tunnel_client_addr && cfg->tunnel_server_addr) + return; + + if (cfg->ipproto == 6) { + cfg->tunnel_client_addr = IP6_ADDR_VETH1; + cfg->tunnel_server_addr = IP6_ADDR_VETH2; + } else { + cfg->tunnel_client_addr = IP4_ADDR_VETH1; + cfg->tunnel_server_addr = IP4_ADDR_VETH2; + } +} + +static int run_server(struct subtest_cfg *cfg) +{ + int family = cfg->ipproto == 6 ? AF_INET6 : AF_INET; + struct nstoken *nstoken; + struct network_helper_opts opts = { + .timeout_ms = TIMEOUT_MS + }; + + nstoken = open_netns(SERVER_NS); + if (!ASSERT_OK_PTR(nstoken, "open server ns")) + return -1; + + cfg->server_fd = start_server_str(family, SOCK_STREAM, cfg->server_addr, + TEST_PORT, &opts); + close_netns(nstoken); + if (!ASSERT_OK_FD(cfg->server_fd, "start server")) + return -1; + + return 0; +} + +static int check_server_rx_data(struct subtest_cfg *cfg, + struct connection *conn, int len) +{ + int err; + + memset(rx_buffer, 0, BUFFER_LEN); + err = recv(conn->server_fd, rx_buffer, len, 0); + if (!ASSERT_EQ(err, len, "check rx data len")) + return 1; + if (!ASSERT_MEMEQ(tx_buffer, rx_buffer, len, "check received data")) + return 1; + return 0; +} + +static struct connection *connect_client_to_server(struct subtest_cfg *cfg) +{ + struct network_helper_opts opts = {.timeout_ms = 500}; + int family = cfg->ipproto == 6 ? AF_INET6 : AF_INET; + struct connection *conn = NULL; + int client_fd, server_fd; + + conn = malloc(sizeof(struct connection)); + if (!conn) + return conn; + + client_fd = connect_to_addr_str(family, SOCK_STREAM, cfg->server_addr, + TEST_PORT, &opts); + + if (client_fd < 0) { + free(conn); + return NULL; + } + + server_fd = accept(cfg->server_fd, NULL, NULL); + if (server_fd < 0) { + close(client_fd); + free(conn); + return NULL; + } + + conn->server_fd = server_fd; + conn->client_fd = client_fd; + + return conn; +} + +static void disconnect_client_from_server(struct subtest_cfg *cfg, + struct connection *conn) +{ + close(conn->server_fd); + close(conn->client_fd); + free(conn); +} + +static int send_and_test_data(struct subtest_cfg *cfg, bool must_succeed) +{ + struct connection *conn; + int err, res = -1; + + conn = connect_client_to_server(cfg); + if (!must_succeed && !ASSERT_ERR_PTR(conn, "connection that must fail")) + goto end; + else if (!must_succeed) + return 0; + + if (!ASSERT_OK_PTR(conn, "connection that must succeed")) + return -1; + + err = send(conn->client_fd, tx_buffer, DEFAULT_TEST_DATA_SIZE, 0); + if (!ASSERT_EQ(err, DEFAULT_TEST_DATA_SIZE, "send data from client")) + goto end; + if (check_server_rx_data(cfg, conn, DEFAULT_TEST_DATA_SIZE)) + goto end; + + if (!cfg->test_gso) { + res = 0; + goto end; + } + + err = send(conn->client_fd, tx_buffer, GSO_TEST_DATA_SIZE, 0); + if (!ASSERT_EQ(err, GSO_TEST_DATA_SIZE, "send (large) data from client")) + goto end; + if (check_server_rx_data(cfg, conn, DEFAULT_TEST_DATA_SIZE)) + goto end; + + res = 0; +end: + disconnect_client_from_server(cfg, conn); + return res; +} + +static void vxlan_decap_mod_args_cb(struct subtest_cfg *cfg, char *dst) +{ + snprintf(dst, TUNNEL_ARGS_MAX_LEN, "id %d dstport %d udp6zerocsumrx", + VXLAN_ID, VXLAN_PORT); +} + +static void udp_decap_mod_args_cb(struct subtest_cfg *cfg, char *dst) +{ + bool is_mpls = !strcmp(cfg->mac_tun_type, "mpls"); + + snprintf(dst, TUNNEL_ARGS_MAX_LEN, + "encap fou encap-sport auto encap-dport %d", + is_mpls ? MPLS_UDP_PORT : UDP_PORT); +} + +static int configure_fou_rx_port(struct subtest_cfg *cfg, bool add) +{ + bool is_mpls = strcmp(cfg->mac_tun_type, "mpls") == 0; + int fou_proto; + + if (is_mpls) + fou_proto = FOU_MPLS_PROTO; + else + fou_proto = cfg->ipproto == 6 ? 41 : 4; + + SYS(fail, "ip fou %s port %d ipproto %d%s", add ? "add" : "del", + is_mpls ? MPLS_UDP_PORT : UDP_PORT, fou_proto, + cfg->ipproto == 6 ? " -6" : ""); + + return 0; +fail: + return 1; +} + +static int add_fou_rx_port(struct subtest_cfg *cfg) +{ + return configure_fou_rx_port(cfg, true); +} + +static int del_fou_rx_port(struct subtest_cfg *cfg) +{ + return configure_fou_rx_port(cfg, false); +} + +static int update_tunnel_intf_addr(struct subtest_cfg *cfg) +{ + SYS(fail, "ip link set dev testtun0 address " MAC_ADDR_VETH2); + return 0; +fail: + return -1; +} + +static int configure_kernel_for_mpls(struct subtest_cfg *cfg) +{ + SYS(fail, "sysctl -qw net.mpls.platform_labels=%d", + MPLS_TABLE_ENTRIES_COUNT); + SYS(fail, "ip -f mpls route add 1000 dev lo"); + SYS(fail, "ip link set lo up"); + SYS(fail, "sysctl -qw net.mpls.conf.testtun0.input=1"); + SYS(fail, "sysctl -qw net.ipv4.conf.lo.rp_filter=0"); + return 0; +fail: + return -1; +} + +static int configure_encapsulation(struct subtest_cfg *cfg) +{ + int ret; + + ret = tc_prog_attach("veth1", -1, cfg->client_egress_prog_fd); + + return ret; +} + +static int configure_kernel_decapsulation(struct subtest_cfg *cfg) +{ + struct nstoken *nstoken = open_netns(SERVER_NS); + int ret = -1; + + if (!ASSERT_OK_PTR(nstoken, "open server ns")) + return ret; + + if (cfg->configure_fou_rx_port && + !ASSERT_OK(add_fou_rx_port(cfg), "configure FOU RX port")) + goto fail; + SYS(fail, "ip link add name testtun0 type %s %s remote %s local %s %s", + cfg->iproute_tun_type, cfg->tmode ? cfg->tmode : "", + cfg->tunnel_client_addr, cfg->tunnel_server_addr, + cfg->extra_decap_mod_args); + if (cfg->tunnel_need_veth_mac && + !ASSERT_OK(update_tunnel_intf_addr(cfg), "update testtun0 mac")) + goto fail; + if (cfg->configure_mpls && + (!ASSERT_OK(configure_kernel_for_mpls(cfg), + "configure MPLS decap"))) + goto fail; + SYS(fail, "sysctl -qw net.ipv4.conf.all.rp_filter=0"); + SYS(fail, "sysctl -qw net.ipv4.conf.testtun0.rp_filter=0"); + SYS(fail, "ip link set dev testtun0 up"); + + ret = 0; +fail: + close_netns(nstoken); + return ret; +} + +static void remove_kernel_decapsulation(struct subtest_cfg *cfg) +{ + SYS_NOFAIL("ip link del testtun0"); + if (cfg->configure_mpls) + SYS_NOFAIL("ip -f mpls route del 1000 dev lo"); + if (cfg->configure_fou_rx_port) + del_fou_rx_port(cfg); +} + +static int configure_ebpf_decapsulation(struct subtest_cfg *cfg) +{ + struct nstoken *nstoken = open_netns(SERVER_NS); + int ret = -1; + + if (!ASSERT_OK_PTR(nstoken, "open server ns")) + return ret; + + if (!cfg->expect_kern_decap_failure) + SYS(fail, "ip link del testtun0"); + + if (!ASSERT_OK(tc_prog_attach("veth2", cfg->server_ingress_prog_fd, -1), + "attach_program")) + goto fail; + + ret = 0; +fail: + close_netns(nstoken); + return ret; +} + +static void run_test(struct subtest_cfg *cfg) +{ + struct nstoken *nstoken; + + if (!ASSERT_OK(run_server(cfg), "run server")) + return; + + nstoken = open_netns(CLIENT_NS); + if (!ASSERT_OK_PTR(nstoken, "open client ns")) + goto fail; + + /* Basic communication must work */ + if (!ASSERT_OK(send_and_test_data(cfg, true), "connect without any encap")) + goto fail; + + /* Attach encapsulation program to client */ + if (!ASSERT_OK(configure_encapsulation(cfg), "configure encapsulation")) + goto fail; + + /* If supported, insert kernel decap module, connection must succeed */ + if (!cfg->expect_kern_decap_failure) { + if (!ASSERT_OK(configure_kernel_decapsulation(cfg), + "configure kernel decapsulation")) + goto fail; + if (!ASSERT_OK(send_and_test_data(cfg, true), + "connect with encap prog and kern decap")) + goto fail; + } + + /* Replace kernel decapsulation with BPF decapsulation, test must pass */ + if (!ASSERT_OK(configure_ebpf_decapsulation(cfg), "configure ebpf decapsulation")) + goto fail; + ASSERT_OK(send_and_test_data(cfg, true), "connect with encap and decap progs"); + +fail: + close_netns(nstoken); + close(cfg->server_fd); +} + +static int setup(void) +{ + struct nstoken *nstoken_client, *nstoken_server; + int fd, err; + + fd = open("/dev/urandom", O_RDONLY); + if (!ASSERT_OK_FD(fd, "open urandom")) + goto fail; + err = read(fd, tx_buffer, BUFFER_LEN); + close(fd); + + if (!ASSERT_EQ(err, BUFFER_LEN, "read random bytes")) + goto fail; + + /* Configure the testing network */ + if (!ASSERT_OK(make_netns(CLIENT_NS), "create client ns") || + !ASSERT_OK(make_netns(SERVER_NS), "create server ns")) + goto fail; + + nstoken_client = open_netns(CLIENT_NS); + if (!ASSERT_OK_PTR(nstoken_client, "open client ns")) + goto fail_delete_ns; + SYS(fail_close_ns_client, "ip link add %s type veth peer name %s", + "veth1 mtu 1500 netns " CLIENT_NS " address " MAC_ADDR_VETH1, + "veth2 mtu 1500 netns " SERVER_NS " address " MAC_ADDR_VETH2); + SYS(fail_close_ns_client, "ethtool -K veth1 tso off"); + SYS(fail_close_ns_client, "ip link set veth1 up"); + nstoken_server = open_netns(SERVER_NS); + if (!ASSERT_OK_PTR(nstoken_server, "open server ns")) + goto fail_close_ns_client; + SYS(fail_close_ns_server, "ip link set veth2 up"); + + close_netns(nstoken_server); + close_netns(nstoken_client); + return 0; + +fail_close_ns_server: + close_netns(nstoken_server); +fail_close_ns_client: + close_netns(nstoken_client); +fail_delete_ns: + SYS_NOFAIL("ip netns del " CLIENT_NS); + SYS_NOFAIL("ip netns del " SERVER_NS); +fail: + return -1; +} + +static int subtest_setup(struct test_tc_tunnel *skel, struct subtest_cfg *cfg) +{ + struct nstoken *nstoken_client, *nstoken_server; + int ret = -1; + + set_subtest_addresses(cfg); + if (!ASSERT_OK(set_subtest_progs(cfg, skel), + "find subtest progs")) + goto fail; + if (cfg->extra_decap_mod_args_cb) + cfg->extra_decap_mod_args_cb(cfg, cfg->extra_decap_mod_args); + + nstoken_client = open_netns(CLIENT_NS); + if (!ASSERT_OK_PTR(nstoken_client, "open client ns")) + goto fail; + SYS(fail_close_client_ns, + "ip -4 addr add " IP4_ADDR_VETH1 "/24 dev veth1"); + SYS(fail_close_client_ns, "ip -4 route flush table main"); + SYS(fail_close_client_ns, + "ip -4 route add " IP4_ADDR_VETH2 " mtu 1450 dev veth1"); + SYS(fail_close_client_ns, + "ip -6 addr add " IP6_ADDR_VETH1 "/64 dev veth1 nodad"); + SYS(fail_close_client_ns, "ip -6 route flush table main"); + SYS(fail_close_client_ns, + "ip -6 route add " IP6_ADDR_VETH2 " mtu 1430 dev veth1"); + nstoken_server = open_netns(SERVER_NS); + if (!ASSERT_OK_PTR(nstoken_server, "open server ns")) + goto fail_close_client_ns; + SYS(fail_close_server_ns, + "ip -4 addr add " IP4_ADDR_VETH2 "/24 dev veth2"); + SYS(fail_close_server_ns, + "ip -6 addr add " IP6_ADDR_VETH2 "/64 dev veth2 nodad"); + + ret = 0; + +fail_close_server_ns: + close_netns(nstoken_server); +fail_close_client_ns: + close_netns(nstoken_client); +fail: + return ret; +} + + +static void subtest_cleanup(struct subtest_cfg *cfg) +{ + struct nstoken *nstoken; + + nstoken = open_netns(CLIENT_NS); + if (ASSERT_OK_PTR(nstoken, "open clien ns")) { + SYS_NOFAIL("tc qdisc delete dev veth1 parent ffff:fff1"); + SYS_NOFAIL("ip a flush veth1"); + close_netns(nstoken); + } + nstoken = open_netns(SERVER_NS); + if (ASSERT_OK_PTR(nstoken, "open clien ns")) { + SYS_NOFAIL("tc qdisc delete dev veth2 parent ffff:fff1"); + SYS_NOFAIL("ip a flush veth2"); + if (!cfg->expect_kern_decap_failure) + remove_kernel_decapsulation(cfg); + close_netns(nstoken); + } +} + +static void cleanup(void) +{ + remove_netns(CLIENT_NS); + remove_netns(SERVER_NS); +} + +static struct subtest_cfg subtests_cfg[] = { + { + .ebpf_tun_type = "ipip", + .mac_tun_type = "none", + .iproute_tun_type = "ipip", + .ipproto = 4, + }, + { + .ebpf_tun_type = "ipip6", + .mac_tun_type = "none", + .iproute_tun_type = "ip6tnl", + .ipproto = 4, + .tunnel_client_addr = IP6_ADDR_VETH1, + .tunnel_server_addr = IP6_ADDR_VETH2, + }, + { + .ebpf_tun_type = "ip6tnl", + .iproute_tun_type = "ip6tnl", + .mac_tun_type = "none", + .ipproto = 6, + }, + { + .mac_tun_type = "none", + .ebpf_tun_type = "sit", + .iproute_tun_type = "sit", + .ipproto = 6, + .tunnel_client_addr = IP4_ADDR_VETH1, + .tunnel_server_addr = IP4_ADDR_VETH2, + }, + { + .ebpf_tun_type = "vxlan", + .mac_tun_type = "eth", + .iproute_tun_type = "vxlan", + .ipproto = 4, + .extra_decap_mod_args_cb = vxlan_decap_mod_args_cb, + .tunnel_need_veth_mac = true + }, + { + .ebpf_tun_type = "ip6vxlan", + .mac_tun_type = "eth", + .iproute_tun_type = "vxlan", + .ipproto = 6, + .extra_decap_mod_args_cb = vxlan_decap_mod_args_cb, + .tunnel_need_veth_mac = true + }, + { + .ebpf_tun_type = "gre", + .mac_tun_type = "none", + .iproute_tun_type = "gre", + .ipproto = 4, + .test_gso = true + }, + { + .ebpf_tun_type = "gre", + .mac_tun_type = "eth", + .iproute_tun_type = "gretap", + .ipproto = 4, + .tunnel_need_veth_mac = true, + .test_gso = true + }, + { + .ebpf_tun_type = "gre", + .mac_tun_type = "mpls", + .iproute_tun_type = "gre", + .ipproto = 4, + .configure_mpls = true, + .test_gso = true + }, + { + .ebpf_tun_type = "ip6gre", + .mac_tun_type = "none", + .iproute_tun_type = "ip6gre", + .ipproto = 6, + .test_gso = true, + }, + { + .ebpf_tun_type = "ip6gre", + .mac_tun_type = "eth", + .iproute_tun_type = "ip6gretap", + .ipproto = 6, + .tunnel_need_veth_mac = true, + .test_gso = true + }, + { + .ebpf_tun_type = "ip6gre", + .mac_tun_type = "mpls", + .iproute_tun_type = "ip6gre", + .ipproto = 6, + .configure_mpls = true, + .test_gso = true + }, + { + .ebpf_tun_type = "udp", + .mac_tun_type = "none", + .iproute_tun_type = "ipip", + .ipproto = 4, + .extra_decap_mod_args_cb = udp_decap_mod_args_cb, + .configure_fou_rx_port = true, + .test_gso = true + }, + { + .ebpf_tun_type = "udp", + .mac_tun_type = "eth", + .iproute_tun_type = "ipip", + .ipproto = 4, + .extra_decap_mod_args_cb = udp_decap_mod_args_cb, + .configure_fou_rx_port = true, + .expect_kern_decap_failure = true, + .test_gso = true + }, + { + .ebpf_tun_type = "udp", + .mac_tun_type = "mpls", + .iproute_tun_type = "ipip", + .ipproto = 4, + .extra_decap_mod_args_cb = udp_decap_mod_args_cb, + .configure_fou_rx_port = true, + .tmode = "mode any ttl 255", + .configure_mpls = true, + .test_gso = true + }, + { + .ebpf_tun_type = "ip6udp", + .mac_tun_type = "none", + .iproute_tun_type = "ip6tnl", + .ipproto = 6, + .extra_decap_mod_args_cb = udp_decap_mod_args_cb, + .configure_fou_rx_port = true, + .test_gso = true + }, + { + .ebpf_tun_type = "ip6udp", + .mac_tun_type = "eth", + .iproute_tun_type = "ip6tnl", + .ipproto = 6, + .extra_decap_mod_args_cb = udp_decap_mod_args_cb, + .configure_fou_rx_port = true, + .expect_kern_decap_failure = true, + .test_gso = true + }, + { + .ebpf_tun_type = "ip6udp", + .mac_tun_type = "mpls", + .iproute_tun_type = "ip6tnl", + .ipproto = 6, + .extra_decap_mod_args_cb = udp_decap_mod_args_cb, + .configure_fou_rx_port = true, + .tmode = "mode any ttl 255", + .expect_kern_decap_failure = true, + .test_gso = true + }, +}; + +void test_tc_tunnel(void) +{ + struct test_tc_tunnel *skel; + struct subtest_cfg *cfg; + int i, ret; + + skel = test_tc_tunnel__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel open and load")) + return; + + if (!ASSERT_OK(setup(), "global setup")) + return; + + for (i = 0; i < ARRAY_SIZE(subtests_cfg); i++) { + cfg = &subtests_cfg[i]; + ret = build_subtest_name(cfg, cfg->name, TEST_NAME_MAX_LEN); + if (ret < 0 || !test__start_subtest(cfg->name)) + continue; + if (subtest_setup(skel, cfg) == 0) + run_test(cfg); + subtest_cleanup(cfg); + } + cleanup(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index bae0e9de277d..eb9309931272 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -534,85 +534,6 @@ static void ping6_dev1(void) close_netns(nstoken); } -static int attach_tc_prog(int ifindex, int igr_fd, int egr_fd) -{ - DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex, - .attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS); - DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, - .priority = 1, .prog_fd = igr_fd); - DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1, - .priority = 1, .prog_fd = egr_fd); - int ret; - - ret = bpf_tc_hook_create(&hook); - if (!ASSERT_OK(ret, "create tc hook")) - return ret; - - if (igr_fd >= 0) { - hook.attach_point = BPF_TC_INGRESS; - ret = bpf_tc_attach(&hook, &opts1); - if (!ASSERT_OK(ret, "bpf_tc_attach")) { - bpf_tc_hook_destroy(&hook); - return ret; - } - } - - if (egr_fd >= 0) { - hook.attach_point = BPF_TC_EGRESS; - ret = bpf_tc_attach(&hook, &opts2); - if (!ASSERT_OK(ret, "bpf_tc_attach")) { - bpf_tc_hook_destroy(&hook); - return ret; - } - } - - return 0; -} - -static int generic_attach(const char *dev, int igr_fd, int egr_fd) -{ - int ifindex; - - if (!ASSERT_OK_FD(igr_fd, "check ingress fd")) - return -1; - if (!ASSERT_OK_FD(egr_fd, "check egress fd")) - return -1; - - ifindex = if_nametoindex(dev); - if (!ASSERT_NEQ(ifindex, 0, "get ifindex")) - return -1; - - return attach_tc_prog(ifindex, igr_fd, egr_fd); -} - -static int generic_attach_igr(const char *dev, int igr_fd) -{ - int ifindex; - - if (!ASSERT_OK_FD(igr_fd, "check ingress fd")) - return -1; - - ifindex = if_nametoindex(dev); - if (!ASSERT_NEQ(ifindex, 0, "get ifindex")) - return -1; - - return attach_tc_prog(ifindex, igr_fd, -1); -} - -static int generic_attach_egr(const char *dev, int egr_fd) -{ - int ifindex; - - if (!ASSERT_OK_FD(egr_fd, "check egress fd")) - return -1; - - ifindex = if_nametoindex(dev); - if (!ASSERT_NEQ(ifindex, 0, "get ifindex")) - return -1; - - return attach_tc_prog(ifindex, -1, egr_fd); -} - static void test_vxlan_tunnel(void) { struct test_tunnel_kern *skel = NULL; @@ -635,12 +556,12 @@ static void test_vxlan_tunnel(void) goto done; get_src_prog_fd = bpf_program__fd(skel->progs.vxlan_get_tunnel_src); set_src_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_src); - if (generic_attach(VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd)) + if (tc_prog_attach(VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd)) goto done; /* load and attach bpf prog to veth dev tc hook point */ set_dst_prog_fd = bpf_program__fd(skel->progs.veth_set_outer_dst); - if (generic_attach_igr("veth1", set_dst_prog_fd)) + if (tc_prog_attach("veth1", set_dst_prog_fd, -1)) goto done; /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */ @@ -648,7 +569,7 @@ static void test_vxlan_tunnel(void) if (!ASSERT_OK_PTR(nstoken, "setns src")) goto done; set_dst_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_dst); - if (generic_attach_egr(VXLAN_TUNL_DEV0, set_dst_prog_fd)) + if (tc_prog_attach(VXLAN_TUNL_DEV0, -1, set_dst_prog_fd)) goto done; close_netns(nstoken); @@ -695,7 +616,7 @@ static void test_ip6vxlan_tunnel(void) goto done; get_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_get_tunnel_src); set_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_src); - if (generic_attach(IP6VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd)) + if (tc_prog_attach(IP6VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd)) goto done; /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */ @@ -703,7 +624,7 @@ static void test_ip6vxlan_tunnel(void) if (!ASSERT_OK_PTR(nstoken, "setns src")) goto done; set_dst_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_dst); - if (generic_attach_egr(IP6VXLAN_TUNL_DEV0, set_dst_prog_fd)) + if (tc_prog_attach(IP6VXLAN_TUNL_DEV0, -1, set_dst_prog_fd)) goto done; close_netns(nstoken); @@ -764,7 +685,7 @@ static void test_ipip_tunnel(enum ipip_encap encap) skel->progs.ipip_set_tunnel); } - if (generic_attach(IPIP_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd)) + if (tc_prog_attach(IPIP_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd)) goto done; ping_dev0(); @@ -797,7 +718,7 @@ static void test_xfrm_tunnel(void) /* attach tc prog to tunnel dev */ tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state); - if (generic_attach_igr("veth1", tc_prog_fd)) + if (tc_prog_attach("veth1", tc_prog_fd, -1)) goto done; /* attach xdp prog to tunnel dev */ @@ -870,7 +791,7 @@ static void test_gre_tunnel(enum gre_test test) if (!ASSERT_OK(err, "add tunnel")) goto done; - if (generic_attach(GRE_TUNL_DEV1, get_fd, set_fd)) + if (tc_prog_attach(GRE_TUNL_DEV1, get_fd, set_fd)) goto done; ping_dev0(); @@ -911,7 +832,7 @@ static void test_ip6gre_tunnel(enum ip6gre_test test) set_fd = bpf_program__fd(skel->progs.ip6gretap_set_tunnel); get_fd = bpf_program__fd(skel->progs.ip6gretap_get_tunnel); - if (generic_attach(IP6GRE_TUNL_DEV1, get_fd, set_fd)) + if (tc_prog_attach(IP6GRE_TUNL_DEV1, get_fd, set_fd)) goto done; ping6_veth0(); @@ -954,7 +875,7 @@ static void test_erspan_tunnel(enum erspan_test test) set_fd = bpf_program__fd(skel->progs.erspan_set_tunnel); get_fd = bpf_program__fd(skel->progs.erspan_get_tunnel); - if (generic_attach(ERSPAN_TUNL_DEV1, get_fd, set_fd)) + if (tc_prog_attach(ERSPAN_TUNL_DEV1, get_fd, set_fd)) goto done; ping_dev0(); @@ -990,7 +911,7 @@ static void test_ip6erspan_tunnel(enum erspan_test test) set_fd = bpf_program__fd(skel->progs.ip4ip6erspan_set_tunnel); get_fd = bpf_program__fd(skel->progs.ip4ip6erspan_get_tunnel); - if (generic_attach(IP6ERSPAN_TUNL_DEV1, get_fd, set_fd)) + if (tc_prog_attach(IP6ERSPAN_TUNL_DEV1, get_fd, set_fd)) goto done; ping6_veth0(); @@ -1017,7 +938,7 @@ static void test_geneve_tunnel(void) set_fd = bpf_program__fd(skel->progs.geneve_set_tunnel); get_fd = bpf_program__fd(skel->progs.geneve_get_tunnel); - if (generic_attach(GENEVE_TUNL_DEV1, get_fd, set_fd)) + if (tc_prog_attach(GENEVE_TUNL_DEV1, get_fd, set_fd)) goto done; ping_dev0(); @@ -1044,7 +965,7 @@ static void test_ip6geneve_tunnel(void) set_fd = bpf_program__fd(skel->progs.ip6geneve_set_tunnel); get_fd = bpf_program__fd(skel->progs.ip6geneve_get_tunnel); - if (generic_attach(IP6GENEVE_TUNL_DEV1, get_fd, set_fd)) + if (tc_prog_attach(IP6GENEVE_TUNL_DEV1, get_fd, set_fd)) goto done; ping_dev0(); @@ -1083,7 +1004,7 @@ static void test_ip6tnl_tunnel(enum ip6tnl_test test) get_fd = bpf_program__fd(skel->progs.ip6ip6_get_tunnel); break; } - if (generic_attach(IP6TNL_TUNL_DEV1, get_fd, set_fd)) + if (tc_prog_attach(IP6TNL_TUNL_DEV1, get_fd, set_fd)) goto done; ping6_veth0(); diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.c b/tools/testing/selftests/bpf/prog_tests/test_xsk.c new file mode 100644 index 000000000000..5af28f359cfd --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.c @@ -0,0 +1,2596 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <bpf/bpf.h> +#include <errno.h> +#include <linux/bitmap.h> +#include <linux/if_link.h> +#include <linux/mman.h> +#include <linux/netdev.h> +#include <poll.h> +#include <pthread.h> +#include <signal.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <unistd.h> + +#include "network_helpers.h" +#include "test_xsk.h" +#include "xsk_xdp_common.h" +#include "xsk_xdp_progs.skel.h" + +#define DEFAULT_BATCH_SIZE 64 +#define MIN_PKT_SIZE 64 +#define MAX_ETH_JUMBO_SIZE 9000 +#define MAX_INTERFACES 2 +#define MAX_TEARDOWN_ITER 10 +#define MAX_TX_BUDGET_DEFAULT 32 +#define PKT_DUMP_NB_TO_PRINT 16 +/* Just to align the data in the packet */ +#define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) +#define POLL_TMOUT 1000 +#define THREAD_TMOUT 3 +#define UMEM_HEADROOM_TEST_SIZE 128 +#define XSK_DESC__INVALID_OPTION (0xffff) +#define XSK_UMEM__INVALID_FRAME_SIZE (MAX_ETH_JUMBO_SIZE + 1) +#define XSK_UMEM__LARGE_FRAME_SIZE (3 * 1024) +#define XSK_UMEM__MAX_FRAME_SIZE (4 * 1024) + +static const u8 g_mac[ETH_ALEN] = {0x55, 0x44, 0x33, 0x22, 0x11, 0x00}; + +bool opt_verbose; +pthread_barrier_t barr; +pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER; + +int pkts_in_flight; + +/* The payload is a word consisting of a packet sequence number in the upper + * 16-bits and a intra packet data sequence number in the lower 16 bits. So the 3rd packet's + * 5th word of data will contain the number (2<<16) | 4 as they are numbered from 0. + */ +static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size) +{ + u32 *ptr = (u32 *)dest, i; + + start /= sizeof(*ptr); + size /= sizeof(*ptr); + for (i = 0; i < size; i++) + ptr[i] = htonl(pkt_nb << 16 | (i + start)); +} + +static void gen_eth_hdr(struct xsk_socket_info *xsk, struct ethhdr *eth_hdr) +{ + memcpy(eth_hdr->h_dest, xsk->dst_mac, ETH_ALEN); + memcpy(eth_hdr->h_source, xsk->src_mac, ETH_ALEN); + eth_hdr->h_proto = htons(ETH_P_LOOPBACK); +} + +static bool is_umem_valid(struct ifobject *ifobj) +{ + return !!ifobj->umem->umem; +} + +static u32 mode_to_xdp_flags(enum test_mode mode) +{ + return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE; +} + +static u64 umem_size(struct xsk_umem_info *umem) +{ + return umem->num_frames * umem->frame_size; +} + +int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer, + u64 size) +{ + struct xsk_umem_config cfg = { + .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, + .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, + .frame_size = umem->frame_size, + .frame_headroom = umem->frame_headroom, + .flags = XSK_UMEM__DEFAULT_FLAGS + }; + int ret; + + if (umem->fill_size) + cfg.fill_size = umem->fill_size; + + if (umem->comp_size) + cfg.comp_size = umem->comp_size; + + if (umem->unaligned_mode) + cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG; + + ret = xsk_umem__create(&umem->umem, buffer, size, + &umem->fq, &umem->cq, &cfg); + if (ret) + return ret; + + umem->buffer = buffer; + if (ifobj->shared_umem && ifobj->rx_on) { + umem->base_addr = umem_size(umem); + umem->next_buffer = umem_size(umem); + } + + return 0; +} + +static u64 umem_alloc_buffer(struct xsk_umem_info *umem) +{ + u64 addr; + + addr = umem->next_buffer; + umem->next_buffer += umem->frame_size; + if (umem->next_buffer >= umem->base_addr + umem_size(umem)) + umem->next_buffer = umem->base_addr; + + return addr; +} + +static void umem_reset_alloc(struct xsk_umem_info *umem) +{ + umem->next_buffer = 0; +} + +static int enable_busy_poll(struct xsk_socket_info *xsk) +{ + int sock_opt; + + sock_opt = 1; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + return -errno; + + sock_opt = 20; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + return -errno; + + sock_opt = xsk->batch_size; + if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET, + (void *)&sock_opt, sizeof(sock_opt)) < 0) + return -errno; + + return 0; +} + +int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, + struct ifobject *ifobject, bool shared) +{ + struct xsk_socket_config cfg = {}; + struct xsk_ring_cons *rxr; + struct xsk_ring_prod *txr; + + xsk->umem = umem; + cfg.rx_size = xsk->rxqsize; + cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + cfg.bind_flags = ifobject->bind_flags; + if (shared) + cfg.bind_flags |= XDP_SHARED_UMEM; + if (ifobject->mtu > MAX_ETH_PKT_SIZE) + cfg.bind_flags |= XDP_USE_SG; + if (umem->comp_size) + cfg.tx_size = umem->comp_size; + if (umem->fill_size) + cfg.rx_size = umem->fill_size; + + txr = ifobject->tx_on ? &xsk->tx : NULL; + rxr = ifobject->rx_on ? &xsk->rx : NULL; + return xsk_socket__create(&xsk->xsk, ifobject->ifindex, 0, umem->umem, rxr, txr, &cfg); +} + +#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags" +static unsigned int get_max_skb_frags(void) +{ + unsigned int max_skb_frags = 0; + FILE *file; + + file = fopen(MAX_SKB_FRAGS_PATH, "r"); + if (!file) { + ksft_print_msg("Error opening %s\n", MAX_SKB_FRAGS_PATH); + return 0; + } + + if (fscanf(file, "%u", &max_skb_frags) != 1) + ksft_print_msg("Error reading %s\n", MAX_SKB_FRAGS_PATH); + + fclose(file); + return max_skb_frags; +} + +static int set_ring_size(struct ifobject *ifobj) +{ + int ret; + u32 ctr = 0; + + while (ctr++ < SOCK_RECONF_CTR) { + ret = set_hw_ring_size(ifobj->ifname, &ifobj->ring); + if (!ret) + break; + + /* Retry if it fails */ + if (ctr >= SOCK_RECONF_CTR || errno != EBUSY) + return -errno; + + usleep(USLEEP_MAX); + } + + return ret; +} + +int hw_ring_size_reset(struct ifobject *ifobj) +{ + ifobj->ring.tx_pending = ifobj->set_ring.default_tx; + ifobj->ring.rx_pending = ifobj->set_ring.default_rx; + return set_ring_size(ifobj); +} + +static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, + struct ifobject *ifobj_rx) +{ + u32 i, j; + + for (i = 0; i < MAX_INTERFACES; i++) { + struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx; + + ifobj->xsk = &ifobj->xsk_arr[0]; + ifobj->use_poll = false; + ifobj->use_fill_ring = true; + ifobj->release_rx = true; + ifobj->validation_func = NULL; + ifobj->use_metadata = false; + + if (i == 0) { + ifobj->rx_on = false; + ifobj->tx_on = true; + } else { + ifobj->rx_on = true; + ifobj->tx_on = false; + } + + memset(ifobj->umem, 0, sizeof(*ifobj->umem)); + ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS; + ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; + + for (j = 0; j < MAX_SOCKETS; j++) { + memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j])); + ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; + ifobj->xsk_arr[j].batch_size = DEFAULT_BATCH_SIZE; + if (i == 0) + ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default; + else + ifobj->xsk_arr[j].pkt_stream = test->rx_pkt_stream_default; + + memcpy(ifobj->xsk_arr[j].src_mac, g_mac, ETH_ALEN); + memcpy(ifobj->xsk_arr[j].dst_mac, g_mac, ETH_ALEN); + ifobj->xsk_arr[j].src_mac[5] += ((j * 2) + 0); + ifobj->xsk_arr[j].dst_mac[5] += ((j * 2) + 1); + } + } + + if (ifobj_tx->hw_ring_size_supp) + hw_ring_size_reset(ifobj_tx); + + test->ifobj_tx = ifobj_tx; + test->ifobj_rx = ifobj_rx; + test->current_step = 0; + test->total_steps = 1; + test->nb_sockets = 1; + test->fail = false; + test->set_ring = false; + test->adjust_tail = false; + test->adjust_tail_support = false; + test->mtu = MAX_ETH_PKT_SIZE; + test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog; + test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk; + test->xdp_prog_tx = ifobj_tx->xdp_progs->progs.xsk_def_prog; + test->xskmap_tx = ifobj_tx->xdp_progs->maps.xsk; +} + +void test_init(struct test_spec *test, struct ifobject *ifobj_tx, + struct ifobject *ifobj_rx, enum test_mode mode, + const struct test_spec *test_to_run) +{ + struct pkt_stream *tx_pkt_stream; + struct pkt_stream *rx_pkt_stream; + u32 i; + + tx_pkt_stream = test->tx_pkt_stream_default; + rx_pkt_stream = test->rx_pkt_stream_default; + memset(test, 0, sizeof(*test)); + test->tx_pkt_stream_default = tx_pkt_stream; + test->rx_pkt_stream_default = rx_pkt_stream; + + for (i = 0; i < MAX_INTERFACES; i++) { + struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx; + + ifobj->bind_flags = XDP_USE_NEED_WAKEUP; + if (mode == TEST_MODE_ZC) + ifobj->bind_flags |= XDP_ZEROCOPY; + else + ifobj->bind_flags |= XDP_COPY; + } + + memcpy(test->name, test_to_run->name, MAX_TEST_NAME_SIZE); + test->test_func = test_to_run->test_func; + test->mode = mode; + __test_spec_init(test, ifobj_tx, ifobj_rx); +} + +static void test_spec_reset(struct test_spec *test) +{ + __test_spec_init(test, test->ifobj_tx, test->ifobj_rx); +} + +static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *xdp_prog_rx, + struct bpf_program *xdp_prog_tx, struct bpf_map *xskmap_rx, + struct bpf_map *xskmap_tx) +{ + test->xdp_prog_rx = xdp_prog_rx; + test->xdp_prog_tx = xdp_prog_tx; + test->xskmap_rx = xskmap_rx; + test->xskmap_tx = xskmap_tx; +} + +static int test_spec_set_mtu(struct test_spec *test, int mtu) +{ + int err; + + if (test->ifobj_rx->mtu != mtu) { + err = xsk_set_mtu(test->ifobj_rx->ifindex, mtu); + if (err) + return err; + test->ifobj_rx->mtu = mtu; + } + if (test->ifobj_tx->mtu != mtu) { + err = xsk_set_mtu(test->ifobj_tx->ifindex, mtu); + if (err) + return err; + test->ifobj_tx->mtu = mtu; + } + + return 0; +} + +void pkt_stream_reset(struct pkt_stream *pkt_stream) +{ + if (pkt_stream) { + pkt_stream->current_pkt_nb = 0; + pkt_stream->nb_rx_pkts = 0; + } +} + +static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream) +{ + if (pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) + return NULL; + + return &pkt_stream->pkts[pkt_stream->current_pkt_nb++]; +} + +static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent) +{ + while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) { + (*pkts_sent)++; + if (pkt_stream->pkts[pkt_stream->current_pkt_nb].valid) + return &pkt_stream->pkts[pkt_stream->current_pkt_nb++]; + pkt_stream->current_pkt_nb++; + } + return NULL; +} + +void pkt_stream_delete(struct pkt_stream *pkt_stream) +{ + free(pkt_stream->pkts); + free(pkt_stream); +} + +void pkt_stream_restore_default(struct test_spec *test) +{ + struct pkt_stream *tx_pkt_stream = test->ifobj_tx->xsk->pkt_stream; + struct pkt_stream *rx_pkt_stream = test->ifobj_rx->xsk->pkt_stream; + + if (tx_pkt_stream != test->tx_pkt_stream_default) { + pkt_stream_delete(test->ifobj_tx->xsk->pkt_stream); + test->ifobj_tx->xsk->pkt_stream = test->tx_pkt_stream_default; + } + + if (rx_pkt_stream != test->rx_pkt_stream_default) { + pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream); + test->ifobj_rx->xsk->pkt_stream = test->rx_pkt_stream_default; + } +} + +static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts) +{ + struct pkt_stream *pkt_stream; + + pkt_stream = calloc(1, sizeof(*pkt_stream)); + if (!pkt_stream) + return NULL; + + pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts)); + if (!pkt_stream->pkts) { + free(pkt_stream); + return NULL; + } + + pkt_stream->nb_pkts = nb_pkts; + return pkt_stream; +} + +static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pkt *pkt) +{ + u32 nb_frags = 1, next_frag; + + if (!pkt) + return 1; + + if (!pkt_stream->verbatim) { + if (!pkt->valid || !pkt->len) + return 1; + return ceil_u32(pkt->len, frame_size); + } + + /* Search for the end of the packet in verbatim mode */ + if (!pkt_continues(pkt->options)) + return nb_frags; + + next_frag = pkt_stream->current_pkt_nb; + pkt++; + while (next_frag++ < pkt_stream->nb_pkts) { + nb_frags++; + if (!pkt_continues(pkt->options) || !pkt->valid) + break; + pkt++; + } + return nb_frags; +} + +static bool set_pkt_valid(int offset, u32 len) +{ + return len <= MAX_ETH_JUMBO_SIZE; +} + +static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len) +{ + pkt->offset = offset; + pkt->len = len; + pkt->valid = set_pkt_valid(offset, len); +} + +static void pkt_stream_pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len) +{ + bool prev_pkt_valid = pkt->valid; + + pkt_set(pkt_stream, pkt, offset, len); + pkt_stream->nb_valid_entries += pkt->valid - prev_pkt_valid; +} + +static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len) +{ + return ceil_u32(len, umem->frame_size) * umem->frame_size; +} + +static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb_start, u32 nb_off) +{ + struct pkt_stream *pkt_stream; + u32 i; + + pkt_stream = __pkt_stream_alloc(nb_pkts); + if (!pkt_stream) + return NULL; + + pkt_stream->nb_pkts = nb_pkts; + pkt_stream->max_pkt_len = pkt_len; + for (i = 0; i < nb_pkts; i++) { + struct pkt *pkt = &pkt_stream->pkts[i]; + + pkt_stream_pkt_set(pkt_stream, pkt, 0, pkt_len); + pkt->pkt_nb = nb_start + i * nb_off; + } + + return pkt_stream; +} + +struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len) +{ + return __pkt_stream_generate(nb_pkts, pkt_len, 0, 1); +} + +static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream) +{ + return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len); +} + +static int pkt_stream_replace_ifobject(struct ifobject *ifobj, u32 nb_pkts, u32 pkt_len) +{ + ifobj->xsk->pkt_stream = pkt_stream_generate(nb_pkts, pkt_len); + + if (!ifobj->xsk->pkt_stream) + return -ENOMEM; + + return 0; +} + +static int pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len) +{ + int ret; + + ret = pkt_stream_replace_ifobject(test->ifobj_tx, nb_pkts, pkt_len); + if (ret) + return ret; + + return pkt_stream_replace_ifobject(test->ifobj_rx, nb_pkts, pkt_len); +} + +static int __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len, + int offset) +{ + struct pkt_stream *pkt_stream; + u32 i; + + pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream); + if (!pkt_stream) + return -ENOMEM; + + for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2) + pkt_stream_pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len); + + ifobj->xsk->pkt_stream = pkt_stream; + + return 0; +} + +static int pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset) +{ + int ret = __pkt_stream_replace_half(test->ifobj_tx, pkt_len, offset); + + if (ret) + return ret; + + return __pkt_stream_replace_half(test->ifobj_rx, pkt_len, offset); +} + +static int pkt_stream_receive_half(struct test_spec *test) +{ + struct pkt_stream *pkt_stream = test->ifobj_tx->xsk->pkt_stream; + u32 i; + + if (test->ifobj_rx->xsk->pkt_stream != test->rx_pkt_stream_default) + /* Packet stream has already been replaced so we have to release this one. + * The newly created one will be freed by the restore_default() at the + * end of the test + */ + pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream); + + test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(pkt_stream->nb_pkts, + pkt_stream->pkts[0].len); + if (!test->ifobj_rx->xsk->pkt_stream) + return -ENOMEM; + + pkt_stream = test->ifobj_rx->xsk->pkt_stream; + for (i = 1; i < pkt_stream->nb_pkts; i += 2) + pkt_stream->pkts[i].valid = false; + + pkt_stream->nb_valid_entries /= 2; + + return 0; +} + +static int pkt_stream_even_odd_sequence(struct test_spec *test) +{ + struct pkt_stream *pkt_stream; + u32 i; + + for (i = 0; i < test->nb_sockets; i++) { + pkt_stream = test->ifobj_tx->xsk_arr[i].pkt_stream; + pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2, + pkt_stream->pkts[0].len, i, 2); + if (!pkt_stream) + return -ENOMEM; + test->ifobj_tx->xsk_arr[i].pkt_stream = pkt_stream; + + pkt_stream = test->ifobj_rx->xsk_arr[i].pkt_stream; + pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2, + pkt_stream->pkts[0].len, i, 2); + if (!pkt_stream) + return -ENOMEM; + test->ifobj_rx->xsk_arr[i].pkt_stream = pkt_stream; + } + + return 0; +} + +static void release_even_odd_sequence(struct test_spec *test) +{ + struct pkt_stream *later_free_tx = test->ifobj_tx->xsk->pkt_stream; + struct pkt_stream *later_free_rx = test->ifobj_rx->xsk->pkt_stream; + int i; + + for (i = 0; i < test->nb_sockets; i++) { + /* later_free_{rx/tx} will be freed by restore_default() */ + if (test->ifobj_tx->xsk_arr[i].pkt_stream != later_free_tx) + pkt_stream_delete(test->ifobj_tx->xsk_arr[i].pkt_stream); + if (test->ifobj_rx->xsk_arr[i].pkt_stream != later_free_rx) + pkt_stream_delete(test->ifobj_rx->xsk_arr[i].pkt_stream); + } + +} + +static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem) +{ + if (!pkt->valid) + return pkt->offset; + return pkt->offset + umem_alloc_buffer(umem); +} + +static void pkt_stream_cancel(struct pkt_stream *pkt_stream) +{ + pkt_stream->current_pkt_nb--; +} + +static void pkt_generate(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, u64 addr, u32 len, + u32 pkt_nb, u32 bytes_written) +{ + void *data = xsk_umem__get_data(umem->buffer, addr); + + if (len < MIN_PKT_SIZE) + return; + + if (!bytes_written) { + gen_eth_hdr(xsk, data); + + len -= PKT_HDR_SIZE; + data += PKT_HDR_SIZE; + } else { + bytes_written -= PKT_HDR_SIZE; + } + + write_payload(data, pkt_nb, bytes_written, len); +} + +static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, struct pkt *frames, + u32 nb_frames, bool verbatim) +{ + u32 i, len = 0, pkt_nb = 0, payload = 0; + struct pkt_stream *pkt_stream; + + pkt_stream = __pkt_stream_alloc(nb_frames); + if (!pkt_stream) + return NULL; + + for (i = 0; i < nb_frames; i++) { + struct pkt *pkt = &pkt_stream->pkts[pkt_nb]; + struct pkt *frame = &frames[i]; + + pkt->offset = frame->offset; + if (verbatim) { + *pkt = *frame; + pkt->pkt_nb = payload; + if (!frame->valid || !pkt_continues(frame->options)) + payload++; + } else { + if (frame->valid) + len += frame->len; + if (frame->valid && pkt_continues(frame->options)) + continue; + + pkt->pkt_nb = pkt_nb; + pkt->len = len; + pkt->valid = frame->valid; + pkt->options = 0; + + len = 0; + } + + print_verbose("offset: %d len: %u valid: %u options: %u pkt_nb: %u\n", + pkt->offset, pkt->len, pkt->valid, pkt->options, pkt->pkt_nb); + + if (pkt->valid && pkt->len > pkt_stream->max_pkt_len) + pkt_stream->max_pkt_len = pkt->len; + + if (pkt->valid) + pkt_stream->nb_valid_entries++; + + pkt_nb++; + } + + pkt_stream->nb_pkts = pkt_nb; + pkt_stream->verbatim = verbatim; + return pkt_stream; +} + +static int pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts) +{ + struct pkt_stream *pkt_stream; + + pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true); + if (!pkt_stream) + return -ENOMEM; + test->ifobj_tx->xsk->pkt_stream = pkt_stream; + + pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false); + if (!pkt_stream) + return -ENOMEM; + test->ifobj_rx->xsk->pkt_stream = pkt_stream; + + return 0; +} + +static void pkt_print_data(u32 *data, u32 cnt) +{ + u32 i; + + for (i = 0; i < cnt; i++) { + u32 seqnum, pkt_nb; + + seqnum = ntohl(*data) & 0xffff; + pkt_nb = ntohl(*data) >> 16; + ksft_print_msg("%u:%u ", pkt_nb, seqnum); + data++; + } +} + +static void pkt_dump(void *pkt, u32 len, bool eth_header) +{ + struct ethhdr *ethhdr = pkt; + u32 i, *data; + + if (eth_header) { + /*extract L2 frame */ + ksft_print_msg("DEBUG>> L2: dst mac: "); + for (i = 0; i < ETH_ALEN; i++) + ksft_print_msg("%02X", ethhdr->h_dest[i]); + + ksft_print_msg("\nDEBUG>> L2: src mac: "); + for (i = 0; i < ETH_ALEN; i++) + ksft_print_msg("%02X", ethhdr->h_source[i]); + + data = pkt + PKT_HDR_SIZE; + } else { + data = pkt; + } + + /*extract L5 frame */ + ksft_print_msg("\nDEBUG>> L5: seqnum: "); + pkt_print_data(data, PKT_DUMP_NB_TO_PRINT); + ksft_print_msg("...."); + if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) { + ksft_print_msg("\n.... "); + pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT, + PKT_DUMP_NB_TO_PRINT); + } + ksft_print_msg("\n---------------------------------------\n"); +} + +static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr) +{ + u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom; + u32 offset = addr % umem->frame_size, expected_offset; + int pkt_offset = pkt->valid ? pkt->offset : 0; + + if (!umem->unaligned_mode) + pkt_offset = 0; + + expected_offset = (pkt_offset + headroom + XDP_PACKET_HEADROOM) % umem->frame_size; + + if (offset == expected_offset) + return true; + + ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset); + return false; +} + +static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr) +{ + void *data = xsk_umem__get_data(buffer, addr); + struct xdp_info *meta = data - sizeof(struct xdp_info); + + if (meta->count != pkt->pkt_nb) { + ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n", + __func__, pkt->pkt_nb, + (unsigned long long)meta->count); + return false; + } + + return true; +} + +static int is_adjust_tail_supported(struct xsk_xdp_progs *skel_rx, bool *supported) +{ + struct bpf_map *data_map; + int adjust_value = 0; + int key = 0; + int ret; + + data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss"); + if (!data_map || !bpf_map__is_internal(data_map)) { + ksft_print_msg("Error: could not find bss section of XDP program\n"); + return -EINVAL; + } + + ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &key, &adjust_value); + if (ret) { + ksft_print_msg("Error: bpf_map_lookup_elem failed with error %d\n", ret); + return ret; + } + + /* Set the 'adjust_value' variable to -EOPNOTSUPP in the XDP program if the adjust_tail + * helper is not supported. Skip the adjust_tail test case in this scenario. + */ + *supported = adjust_value != -EOPNOTSUPP; + + return 0; +} + +static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb, + u32 bytes_processed) +{ + u32 seqnum, pkt_nb, *pkt_data, words_to_end, expected_seqnum; + void *data = xsk_umem__get_data(umem->buffer, addr); + + addr -= umem->base_addr; + + if (addr >= umem->num_frames * umem->frame_size || + addr + len > umem->num_frames * umem->frame_size) { + ksft_print_msg("Frag invalid addr: %llx len: %u\n", + (unsigned long long)addr, len); + return false; + } + if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) { + ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", + (unsigned long long)addr, len); + return false; + } + + pkt_data = data; + if (!bytes_processed) { + pkt_data += PKT_HDR_SIZE / sizeof(*pkt_data); + len -= PKT_HDR_SIZE; + } else { + bytes_processed -= PKT_HDR_SIZE; + } + + expected_seqnum = bytes_processed / sizeof(*pkt_data); + seqnum = ntohl(*pkt_data) & 0xffff; + pkt_nb = ntohl(*pkt_data) >> 16; + + if (expected_pkt_nb != pkt_nb) { + ksft_print_msg("[%s] expected pkt_nb [%u], got pkt_nb [%u]\n", + __func__, expected_pkt_nb, pkt_nb); + goto error; + } + if (expected_seqnum != seqnum) { + ksft_print_msg("[%s] expected seqnum at start [%u], got seqnum [%u]\n", + __func__, expected_seqnum, seqnum); + goto error; + } + + words_to_end = len / sizeof(*pkt_data) - 1; + pkt_data += words_to_end; + seqnum = ntohl(*pkt_data) & 0xffff; + expected_seqnum += words_to_end; + if (expected_seqnum != seqnum) { + ksft_print_msg("[%s] expected seqnum at end [%u], got seqnum [%u]\n", + __func__, expected_seqnum, seqnum); + goto error; + } + + return true; + +error: + pkt_dump(data, len, !bytes_processed); + return false; +} + +static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) +{ + if (pkt->len != len) { + ksft_print_msg("[%s] expected packet length [%d], got length [%d]\n", + __func__, pkt->len, len); + pkt_dump(xsk_umem__get_data(buffer, addr), len, true); + return false; + } + + return true; +} + +static u32 load_value(u32 *counter) +{ + return __atomic_load_n(counter, __ATOMIC_ACQUIRE); +} + +static bool kick_tx_with_check(struct xsk_socket_info *xsk, int *ret) +{ + u32 max_budget = MAX_TX_BUDGET_DEFAULT; + u32 cons, ready_to_send; + int delta; + + cons = load_value(xsk->tx.consumer); + ready_to_send = load_value(xsk->tx.producer) - cons; + *ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + + delta = load_value(xsk->tx.consumer) - cons; + /* By default, xsk should consume exact @max_budget descs at one + * send in this case where hitting the max budget limit in while + * loop is triggered in __xsk_generic_xmit(). Please make sure that + * the number of descs to be sent is larger than @max_budget, or + * else the tx.consumer will be updated in xskq_cons_peek_desc() + * in time which hides the issue we try to verify. + */ + if (ready_to_send > max_budget && delta != max_budget) + return false; + + return true; +} + +int kick_tx(struct xsk_socket_info *xsk) +{ + int ret; + + if (xsk->check_consumer) { + if (!kick_tx_with_check(xsk, &ret)) + return TEST_FAILURE; + } else { + ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + } + if (ret >= 0) + return TEST_PASS; + if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) { + usleep(100); + return TEST_PASS; + } + return TEST_FAILURE; +} + +int kick_rx(struct xsk_socket_info *xsk) +{ + int ret; + + ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); + if (ret < 0) + return TEST_FAILURE; + + return TEST_PASS; +} + +static int complete_pkts(struct xsk_socket_info *xsk, int batch_size) +{ + unsigned int rcvd; + u32 idx; + int ret; + + if (xsk_ring_prod__needs_wakeup(&xsk->tx)) { + ret = kick_tx(xsk); + if (ret) + return TEST_FAILURE; + } + + rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); + if (rcvd) { + if (rcvd > xsk->outstanding_tx) { + u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1); + + ksft_print_msg("[%s] Too many packets completed\n", __func__); + ksft_print_msg("Last completion address: %llx\n", + (unsigned long long)addr); + return TEST_FAILURE; + } + + xsk_ring_cons__release(&xsk->umem->cq, rcvd); + xsk->outstanding_tx -= rcvd; + } + + return TEST_PASS; +} + +static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk) +{ + u32 frags_processed = 0, nb_frags = 0, pkt_len = 0; + u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0; + struct pkt_stream *pkt_stream = xsk->pkt_stream; + struct ifobject *ifobj = test->ifobj_rx; + struct xsk_umem_info *umem = xsk->umem; + struct pollfd fds = { }; + struct pkt *pkt; + u64 first_addr = 0; + int ret; + + fds.fd = xsk_socket__fd(xsk->xsk); + fds.events = POLLIN; + + ret = kick_rx(xsk); + if (ret) + return TEST_FAILURE; + + if (ifobj->use_poll) { + ret = poll(&fds, 1, POLL_TMOUT); + if (ret < 0) + return TEST_FAILURE; + + if (!ret) { + if (!is_umem_valid(test->ifobj_tx)) + return TEST_PASS; + + ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__); + return TEST_CONTINUE; + } + + if (!(fds.revents & POLLIN)) + return TEST_CONTINUE; + } + + rcvd = xsk_ring_cons__peek(&xsk->rx, xsk->batch_size, &idx_rx); + if (!rcvd) + return TEST_CONTINUE; + + if (ifobj->use_fill_ring) { + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); + while (ret != rcvd) { + if (xsk_ring_prod__needs_wakeup(&umem->fq)) { + ret = poll(&fds, 1, POLL_TMOUT); + if (ret < 0) + return TEST_FAILURE; + } + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); + } + } + + while (frags_processed < rcvd) { + const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++); + u64 addr = desc->addr, orig; + + orig = xsk_umem__extract_addr(addr); + addr = xsk_umem__add_offset_to_addr(addr); + + if (!nb_frags) { + pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); + if (!pkt) { + ksft_print_msg("[%s] received too many packets addr: %lx len %u\n", + __func__, addr, desc->len); + return TEST_FAILURE; + } + } + + print_verbose("Rx: addr: %lx len: %u options: %u pkt_nb: %u valid: %u\n", + addr, desc->len, desc->options, pkt->pkt_nb, pkt->valid); + + if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) || + !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata && + !is_metadata_correct(pkt, umem->buffer, addr))) + return TEST_FAILURE; + + if (!nb_frags++) + first_addr = addr; + frags_processed++; + pkt_len += desc->len; + if (ifobj->use_fill_ring) + *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig; + + if (pkt_continues(desc->options)) + continue; + + /* The complete packet has been received */ + if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) || + !is_offset_correct(umem, pkt, addr)) + return TEST_FAILURE; + + pkt_stream->nb_rx_pkts++; + nb_frags = 0; + pkt_len = 0; + } + + if (nb_frags) { + /* In the middle of a packet. Start over from beginning of packet. */ + idx_rx -= nb_frags; + xsk_ring_cons__cancel(&xsk->rx, nb_frags); + if (ifobj->use_fill_ring) { + idx_fq -= nb_frags; + xsk_ring_prod__cancel(&umem->fq, nb_frags); + } + frags_processed -= nb_frags; + } + + if (ifobj->use_fill_ring) + xsk_ring_prod__submit(&umem->fq, frags_processed); + if (ifobj->release_rx) + xsk_ring_cons__release(&xsk->rx, frags_processed); + + pthread_mutex_lock(&pacing_mutex); + pkts_in_flight -= pkts_sent; + pthread_mutex_unlock(&pacing_mutex); + pkts_sent = 0; + + return TEST_CONTINUE; +} + +bool all_packets_received(struct test_spec *test, struct xsk_socket_info *xsk, u32 sock_num, + unsigned long *bitmap) +{ + struct pkt_stream *pkt_stream = xsk->pkt_stream; + + if (!pkt_stream) { + __set_bit(sock_num, bitmap); + return false; + } + + if (pkt_stream->nb_rx_pkts == pkt_stream->nb_valid_entries) { + __set_bit(sock_num, bitmap); + if (bitmap_full(bitmap, test->nb_sockets)) + return true; + } + + return false; +} + +static int receive_pkts(struct test_spec *test) +{ + struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0}; + DECLARE_BITMAP(bitmap, test->nb_sockets); + struct xsk_socket_info *xsk; + u32 sock_num = 0; + int res, ret; + + bitmap_zero(bitmap, test->nb_sockets); + + ret = gettimeofday(&tv_now, NULL); + if (ret) + return TEST_FAILURE; + + timeradd(&tv_now, &tv_timeout, &tv_end); + + while (1) { + xsk = &test->ifobj_rx->xsk_arr[sock_num]; + + if ((all_packets_received(test, xsk, sock_num, bitmap))) + break; + + res = __receive_pkts(test, xsk); + if (!(res == TEST_PASS || res == TEST_CONTINUE)) + return res; + + ret = gettimeofday(&tv_now, NULL); + if (ret) + return TEST_FAILURE; + + if (timercmp(&tv_now, &tv_end, >)) { + ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__); + return TEST_FAILURE; + } + sock_num = (sock_num + 1) % test->nb_sockets; + } + + return TEST_PASS; +} + +static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, bool timeout) +{ + u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len; + struct pkt_stream *pkt_stream = xsk->pkt_stream; + struct xsk_umem_info *umem = ifobject->umem; + bool use_poll = ifobject->use_poll; + struct pollfd fds = { }; + int ret; + + buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len); + /* pkts_in_flight might be negative if many invalid packets are sent */ + if (pkts_in_flight >= (int)((umem_size(umem) - xsk->batch_size * buffer_len) / + buffer_len)) { + ret = kick_tx(xsk); + if (ret) + return TEST_FAILURE; + return TEST_CONTINUE; + } + + fds.fd = xsk_socket__fd(xsk->xsk); + fds.events = POLLOUT; + + while (xsk_ring_prod__reserve(&xsk->tx, xsk->batch_size, &idx) < xsk->batch_size) { + if (use_poll) { + ret = poll(&fds, 1, POLL_TMOUT); + if (timeout) { + if (ret < 0) { + ksft_print_msg("ERROR: [%s] Poll error %d\n", + __func__, errno); + return TEST_FAILURE; + } + if (ret == 0) + return TEST_PASS; + break; + } + if (ret <= 0) { + ksft_print_msg("ERROR: [%s] Poll error %d\n", + __func__, errno); + return TEST_FAILURE; + } + } + + complete_pkts(xsk, xsk->batch_size); + } + + for (i = 0; i < xsk->batch_size; i++) { + struct pkt *pkt = pkt_stream_get_next_tx_pkt(pkt_stream); + u32 nb_frags_left, nb_frags, bytes_written = 0; + + if (!pkt) + break; + + nb_frags = pkt_nb_frags(umem->frame_size, pkt_stream, pkt); + if (nb_frags > xsk->batch_size - i) { + pkt_stream_cancel(pkt_stream); + xsk_ring_prod__cancel(&xsk->tx, xsk->batch_size - i); + break; + } + nb_frags_left = nb_frags; + + while (nb_frags_left--) { + struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); + + tx_desc->addr = pkt_get_addr(pkt, ifobject->umem); + if (pkt_stream->verbatim) { + tx_desc->len = pkt->len; + tx_desc->options = pkt->options; + } else if (nb_frags_left) { + tx_desc->len = umem->frame_size; + tx_desc->options = XDP_PKT_CONTD; + } else { + tx_desc->len = pkt->len - bytes_written; + tx_desc->options = 0; + } + if (pkt->valid) + pkt_generate(xsk, umem, tx_desc->addr, tx_desc->len, pkt->pkt_nb, + bytes_written); + bytes_written += tx_desc->len; + + print_verbose("Tx addr: %llx len: %u options: %u pkt_nb: %u\n", + tx_desc->addr, tx_desc->len, tx_desc->options, pkt->pkt_nb); + + if (nb_frags_left) { + i++; + if (pkt_stream->verbatim) + pkt = pkt_stream_get_next_tx_pkt(pkt_stream); + } + } + + if (pkt && pkt->valid) { + valid_pkts++; + valid_frags += nb_frags; + } + } + + pthread_mutex_lock(&pacing_mutex); + pkts_in_flight += valid_pkts; + pthread_mutex_unlock(&pacing_mutex); + + xsk_ring_prod__submit(&xsk->tx, i); + xsk->outstanding_tx += valid_frags; + + if (use_poll) { + ret = poll(&fds, 1, POLL_TMOUT); + if (ret <= 0) { + if (ret == 0 && timeout) + return TEST_PASS; + + ksft_print_msg("ERROR: [%s] Poll error %d\n", __func__, ret); + return TEST_FAILURE; + } + } + + if (!timeout) { + if (complete_pkts(xsk, i)) + return TEST_FAILURE; + + usleep(10); + return TEST_PASS; + } + + return TEST_CONTINUE; +} + +static int wait_for_tx_completion(struct xsk_socket_info *xsk) +{ + struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0}; + int ret; + + ret = gettimeofday(&tv_now, NULL); + if (ret) + return TEST_FAILURE; + timeradd(&tv_now, &tv_timeout, &tv_end); + + while (xsk->outstanding_tx) { + ret = gettimeofday(&tv_now, NULL); + if (ret) + return TEST_FAILURE; + if (timercmp(&tv_now, &tv_end, >)) { + ksft_print_msg("ERROR: [%s] Transmission loop timed out\n", __func__); + return TEST_FAILURE; + } + + complete_pkts(xsk, xsk->batch_size); + } + + return TEST_PASS; +} + +bool all_packets_sent(struct test_spec *test, unsigned long *bitmap) +{ + return bitmap_full(bitmap, test->nb_sockets); +} + +static int send_pkts(struct test_spec *test, struct ifobject *ifobject) +{ + bool timeout = !is_umem_valid(test->ifobj_rx); + DECLARE_BITMAP(bitmap, test->nb_sockets); + u32 i, ret; + + bitmap_zero(bitmap, test->nb_sockets); + + while (!(all_packets_sent(test, bitmap))) { + for (i = 0; i < test->nb_sockets; i++) { + struct pkt_stream *pkt_stream; + + pkt_stream = ifobject->xsk_arr[i].pkt_stream; + if (!pkt_stream || pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) { + __set_bit(i, bitmap); + continue; + } + ret = __send_pkts(ifobject, &ifobject->xsk_arr[i], timeout); + if (ret == TEST_CONTINUE && !test->fail) + continue; + + if ((ret || test->fail) && !timeout) + return TEST_FAILURE; + + if (ret == TEST_PASS && timeout) + return ret; + + ret = wait_for_tx_completion(&ifobject->xsk_arr[i]); + if (ret) + return TEST_FAILURE; + } + } + + return TEST_PASS; +} + +static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats) +{ + int fd = xsk_socket__fd(xsk), err; + socklen_t optlen, expected_len; + + optlen = sizeof(*stats); + err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen); + if (err) { + ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n", + __func__, -err, strerror(-err)); + return TEST_FAILURE; + } + + expected_len = sizeof(struct xdp_statistics); + if (optlen != expected_len) { + ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n", + __func__, expected_len, optlen); + return TEST_FAILURE; + } + + return TEST_PASS; +} + +static int validate_rx_dropped(struct ifobject *ifobject) +{ + struct xsk_socket *xsk = ifobject->xsk->xsk; + struct xdp_statistics stats; + int err; + + err = kick_rx(ifobject->xsk); + if (err) + return TEST_FAILURE; + + err = get_xsk_stats(xsk, &stats); + if (err) + return TEST_FAILURE; + + /* The receiver calls getsockopt after receiving the last (valid) + * packet which is not the final packet sent in this test (valid and + * invalid packets are sent in alternating fashion with the final + * packet being invalid). Since the last packet may or may not have + * been dropped already, both outcomes must be allowed. + */ + if (stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 || + stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 - 1) + return TEST_PASS; + + return TEST_FAILURE; +} + +static int validate_rx_full(struct ifobject *ifobject) +{ + struct xsk_socket *xsk = ifobject->xsk->xsk; + struct xdp_statistics stats; + int err; + + usleep(1000); + err = kick_rx(ifobject->xsk); + if (err) + return TEST_FAILURE; + + err = get_xsk_stats(xsk, &stats); + if (err) + return TEST_FAILURE; + + if (stats.rx_ring_full) + return TEST_PASS; + + return TEST_FAILURE; +} + +static int validate_fill_empty(struct ifobject *ifobject) +{ + struct xsk_socket *xsk = ifobject->xsk->xsk; + struct xdp_statistics stats; + int err; + + usleep(1000); + err = kick_rx(ifobject->xsk); + if (err) + return TEST_FAILURE; + + err = get_xsk_stats(xsk, &stats); + if (err) + return TEST_FAILURE; + + if (stats.rx_fill_ring_empty_descs) + return TEST_PASS; + + return TEST_FAILURE; +} + +static int validate_tx_invalid_descs(struct ifobject *ifobject) +{ + struct xsk_socket *xsk = ifobject->xsk->xsk; + int fd = xsk_socket__fd(xsk); + struct xdp_statistics stats; + socklen_t optlen; + int err; + + optlen = sizeof(stats); + err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); + if (err) { + ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n", + __func__, -err, strerror(-err)); + return TEST_FAILURE; + } + + if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) { + ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n", + __func__, + (unsigned long long)stats.tx_invalid_descs, + ifobject->xsk->pkt_stream->nb_pkts); + return TEST_FAILURE; + } + + return TEST_PASS; +} + +static int xsk_configure(struct test_spec *test, struct ifobject *ifobject, + struct xsk_umem_info *umem, bool tx) +{ + int i, ret; + + for (i = 0; i < test->nb_sockets; i++) { + bool shared = (ifobject->shared_umem && tx) ? true : !!i; + u32 ctr = 0; + + while (ctr++ < SOCK_RECONF_CTR) { + ret = xsk_configure_socket(&ifobject->xsk_arr[i], umem, + ifobject, shared); + if (!ret) + break; + + /* Retry if it fails as xsk_socket__create() is asynchronous */ + if (ctr >= SOCK_RECONF_CTR) + return ret; + usleep(USLEEP_MAX); + } + if (ifobject->busy_poll) { + ret = enable_busy_poll(&ifobject->xsk_arr[i]); + if (ret) + return ret; + } + } + + return 0; +} + +static int thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobject) +{ + int ret = xsk_configure(test, ifobject, test->ifobj_rx->umem, true); + + if (ret) + return ret; + ifobject->xsk = &ifobject->xsk_arr[0]; + ifobject->xskmap = test->ifobj_rx->xskmap; + memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info)); + ifobject->umem->base_addr = 0; + + return 0; +} + +static int xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, + bool fill_up) +{ + u32 rx_frame_size = umem->frame_size - XDP_PACKET_HEADROOM; + u32 idx = 0, filled = 0, buffers_to_fill, nb_pkts; + int ret; + + if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) + buffers_to_fill = umem->num_frames; + else + buffers_to_fill = umem->fill_size; + + ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx); + if (ret != buffers_to_fill) + return -ENOSPC; + + while (filled < buffers_to_fill) { + struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &nb_pkts); + u64 addr; + u32 i; + + for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt_stream, pkt); i++) { + if (!pkt) { + if (!fill_up) + break; + addr = filled * umem->frame_size + umem->base_addr; + } else if (pkt->offset >= 0) { + addr = pkt->offset % umem->frame_size + umem_alloc_buffer(umem); + } else { + addr = pkt->offset + umem_alloc_buffer(umem); + } + + *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr; + if (++filled >= buffers_to_fill) + break; + } + } + xsk_ring_prod__submit(&umem->fq, filled); + xsk_ring_prod__cancel(&umem->fq, buffers_to_fill - filled); + + pkt_stream_reset(pkt_stream); + umem_reset_alloc(umem); + + return 0; +} + +static int thread_common_ops(struct test_spec *test, struct ifobject *ifobject) +{ + LIBBPF_OPTS(bpf_xdp_query_opts, opts); + int mmap_flags; + u64 umem_sz; + void *bufs; + int ret; + u32 i; + + umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size; + mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; + + if (ifobject->umem->unaligned_mode) + mmap_flags |= MAP_HUGETLB | MAP_HUGE_2MB; + + if (ifobject->shared_umem) + umem_sz *= 2; + + bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); + if (bufs == MAP_FAILED) + return -errno; + + ret = xsk_configure_umem(ifobject, ifobject->umem, bufs, umem_sz); + if (ret) + return ret; + + ret = xsk_configure(test, ifobject, ifobject->umem, false); + if (ret) + return ret; + + ifobject->xsk = &ifobject->xsk_arr[0]; + + if (!ifobject->rx_on) + return 0; + + ret = xsk_populate_fill_ring(ifobject->umem, ifobject->xsk->pkt_stream, + ifobject->use_fill_ring); + if (ret) + return ret; + + for (i = 0; i < test->nb_sockets; i++) { + ifobject->xsk = &ifobject->xsk_arr[i]; + ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, i); + if (ret) + return ret; + } + + return 0; +} + +void *worker_testapp_validate_tx(void *arg) +{ + struct test_spec *test = (struct test_spec *)arg; + struct ifobject *ifobject = test->ifobj_tx; + int err; + + if (test->current_step == 1) { + if (!ifobject->shared_umem) { + if (thread_common_ops(test, ifobject)) { + test->fail = true; + pthread_exit(NULL); + } + } else { + if (thread_common_ops_tx(test, ifobject)) { + test->fail = true; + pthread_exit(NULL); + } + } + } + + err = send_pkts(test, ifobject); + + if (!err && ifobject->validation_func) + err = ifobject->validation_func(ifobject); + if (err) + test->fail = true; + + pthread_exit(NULL); +} + +void *worker_testapp_validate_rx(void *arg) +{ + struct test_spec *test = (struct test_spec *)arg; + struct ifobject *ifobject = test->ifobj_rx; + int err; + + if (test->current_step == 1) { + err = thread_common_ops(test, ifobject); + } else { + xsk_clear_xskmap(ifobject->xskmap); + err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, 0); + if (err) + ksft_print_msg("Error: Failed to update xskmap, error %s\n", + strerror(-err)); + } + + pthread_barrier_wait(&barr); + + /* We leave only now in case of error to avoid getting stuck in the barrier */ + if (err) { + test->fail = true; + pthread_exit(NULL); + } + + err = receive_pkts(test); + + if (!err && ifobject->validation_func) + err = ifobject->validation_func(ifobject); + + if (err) { + if (!test->adjust_tail) { + test->fail = true; + } else { + bool supported; + + if (is_adjust_tail_supported(ifobject->xdp_progs, &supported)) + test->fail = true; + else if (!supported) + test->adjust_tail_support = false; + else + test->fail = true; + } + } + + pthread_exit(NULL); +} + +static void testapp_clean_xsk_umem(struct ifobject *ifobj) +{ + u64 umem_sz = ifobj->umem->num_frames * ifobj->umem->frame_size; + + if (ifobj->shared_umem) + umem_sz *= 2; + + umem_sz = ceil_u64(umem_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE; + xsk_umem__delete(ifobj->umem->umem); + munmap(ifobj->umem->buffer, umem_sz); +} + +static void handler(int signum) +{ + pthread_exit(NULL); +} + +static bool xdp_prog_changed_rx(struct test_spec *test) +{ + struct ifobject *ifobj = test->ifobj_rx; + + return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode; +} + +static bool xdp_prog_changed_tx(struct test_spec *test) +{ + struct ifobject *ifobj = test->ifobj_tx; + + return ifobj->xdp_prog != test->xdp_prog_tx || ifobj->mode != test->mode; +} + +static int xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog, + struct bpf_map *xskmap, enum test_mode mode) +{ + int err; + + xsk_detach_xdp_program(ifobj->ifindex, mode_to_xdp_flags(ifobj->mode)); + err = xsk_attach_xdp_program(xdp_prog, ifobj->ifindex, mode_to_xdp_flags(mode)); + if (err) { + ksft_print_msg("Error attaching XDP program\n"); + return err; + } + + if (ifobj->mode != mode && (mode == TEST_MODE_DRV || mode == TEST_MODE_ZC)) + if (!xsk_is_in_mode(ifobj->ifindex, XDP_FLAGS_DRV_MODE)) { + ksft_print_msg("ERROR: XDP prog not in DRV mode\n"); + return -EINVAL; + } + + ifobj->xdp_prog = xdp_prog; + ifobj->xskmap = xskmap; + ifobj->mode = mode; + + return 0; +} + +static int xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx, + struct ifobject *ifobj_tx) +{ + int err = 0; + + if (xdp_prog_changed_rx(test)) { + err = xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode); + if (err) + return err; + } + + if (!ifobj_tx || ifobj_tx->shared_umem) + return 0; + + if (xdp_prog_changed_tx(test)) + err = xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode); + + return err; +} + +static void clean_sockets(struct test_spec *test, struct ifobject *ifobj) +{ + u32 i; + + if (!ifobj || !test) + return; + + for (i = 0; i < test->nb_sockets; i++) + xsk_socket__delete(ifobj->xsk_arr[i].xsk); +} + +static void clean_umem(struct test_spec *test, struct ifobject *ifobj1, struct ifobject *ifobj2) +{ + if (!ifobj1) + return; + + testapp_clean_xsk_umem(ifobj1); + if (ifobj2 && !ifobj2->shared_umem) + testapp_clean_xsk_umem(ifobj2); +} + +static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *ifobj1, + struct ifobject *ifobj2) +{ + pthread_t t0, t1; + int err; + + if (test->mtu > MAX_ETH_PKT_SIZE) { + if (test->mode == TEST_MODE_ZC && (!ifobj1->multi_buff_zc_supp || + (ifobj2 && !ifobj2->multi_buff_zc_supp))) { + ksft_print_msg("Multi buffer for zero-copy not supported.\n"); + return TEST_SKIP; + } + if (test->mode != TEST_MODE_ZC && (!ifobj1->multi_buff_supp || + (ifobj2 && !ifobj2->multi_buff_supp))) { + ksft_print_msg("Multi buffer not supported.\n"); + return TEST_SKIP; + } + } + err = test_spec_set_mtu(test, test->mtu); + if (err) { + ksft_print_msg("Error, could not set mtu.\n"); + return TEST_FAILURE; + } + + if (ifobj2) { + if (pthread_barrier_init(&barr, NULL, 2)) + return TEST_FAILURE; + pkt_stream_reset(ifobj2->xsk->pkt_stream); + } + + test->current_step++; + pkt_stream_reset(ifobj1->xsk->pkt_stream); + pkts_in_flight = 0; + + signal(SIGUSR1, handler); + /*Spawn RX thread */ + pthread_create(&t0, NULL, ifobj1->func_ptr, test); + + if (ifobj2) { + pthread_barrier_wait(&barr); + if (pthread_barrier_destroy(&barr)) { + pthread_kill(t0, SIGUSR1); + clean_sockets(test, ifobj1); + clean_umem(test, ifobj1, NULL); + return TEST_FAILURE; + } + + /*Spawn TX thread */ + pthread_create(&t1, NULL, ifobj2->func_ptr, test); + + pthread_join(t1, NULL); + } + + if (!ifobj2) + pthread_kill(t0, SIGUSR1); + else + pthread_join(t0, NULL); + + if (test->total_steps == test->current_step || test->fail) { + clean_sockets(test, ifobj1); + clean_sockets(test, ifobj2); + clean_umem(test, ifobj1, ifobj2); + } + + if (test->fail) + return TEST_FAILURE; + + return TEST_PASS; +} + +static int testapp_validate_traffic(struct test_spec *test) +{ + struct ifobject *ifobj_rx = test->ifobj_rx; + struct ifobject *ifobj_tx = test->ifobj_tx; + + if ((ifobj_rx->umem->unaligned_mode && !ifobj_rx->unaligned_supp) || + (ifobj_tx->umem->unaligned_mode && !ifobj_tx->unaligned_supp)) { + ksft_print_msg("No huge pages present.\n"); + return TEST_SKIP; + } + + if (test->set_ring) { + if (ifobj_tx->hw_ring_size_supp) { + if (set_ring_size(ifobj_tx)) { + ksft_print_msg("Failed to change HW ring size.\n"); + return TEST_FAILURE; + } + } else { + ksft_print_msg("Changing HW ring size not supported.\n"); + return TEST_SKIP; + } + } + + if (xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx)) + return TEST_FAILURE; + return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx); +} + +static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj) +{ + return __testapp_validate_traffic(test, ifobj, NULL); +} + +int testapp_teardown(struct test_spec *test) +{ + int i; + + for (i = 0; i < MAX_TEARDOWN_ITER; i++) { + if (testapp_validate_traffic(test)) + return TEST_FAILURE; + test_spec_reset(test); + } + + return TEST_PASS; +} + +static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2) +{ + thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr; + struct ifobject *tmp_ifobj = (*ifobj1); + + (*ifobj1)->func_ptr = (*ifobj2)->func_ptr; + (*ifobj2)->func_ptr = tmp_func_ptr; + + *ifobj1 = *ifobj2; + *ifobj2 = tmp_ifobj; +} + +int testapp_bidirectional(struct test_spec *test) +{ + int res; + + test->ifobj_tx->rx_on = true; + test->ifobj_rx->tx_on = true; + test->total_steps = 2; + if (testapp_validate_traffic(test)) + return TEST_FAILURE; + + print_verbose("Switching Tx/Rx direction\n"); + swap_directions(&test->ifobj_rx, &test->ifobj_tx); + res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx); + + swap_directions(&test->ifobj_rx, &test->ifobj_tx); + return res; +} + +static int swap_xsk_resources(struct test_spec *test) +{ + int ret; + + test->ifobj_tx->xsk_arr[0].pkt_stream = NULL; + test->ifobj_rx->xsk_arr[0].pkt_stream = NULL; + test->ifobj_tx->xsk_arr[1].pkt_stream = test->tx_pkt_stream_default; + test->ifobj_rx->xsk_arr[1].pkt_stream = test->rx_pkt_stream_default; + test->ifobj_tx->xsk = &test->ifobj_tx->xsk_arr[1]; + test->ifobj_rx->xsk = &test->ifobj_rx->xsk_arr[1]; + + ret = xsk_update_xskmap(test->ifobj_rx->xskmap, test->ifobj_rx->xsk->xsk, 0); + if (ret) + return TEST_FAILURE; + + return TEST_PASS; +} + +int testapp_xdp_prog_cleanup(struct test_spec *test) +{ + test->total_steps = 2; + test->nb_sockets = 2; + if (testapp_validate_traffic(test)) + return TEST_FAILURE; + + if (swap_xsk_resources(test)) { + clean_sockets(test, test->ifobj_rx); + clean_sockets(test, test->ifobj_tx); + clean_umem(test, test->ifobj_rx, test->ifobj_tx); + return TEST_FAILURE; + } + + return testapp_validate_traffic(test); +} + +int testapp_headroom(struct test_spec *test) +{ + test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE; + return testapp_validate_traffic(test); +} + +int testapp_stats_rx_dropped(struct test_spec *test) +{ + if (test->mode == TEST_MODE_ZC) { + ksft_print_msg("Can not run RX_DROPPED test for ZC mode\n"); + return TEST_SKIP; + } + + if (pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0)) + return TEST_FAILURE; + test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size - + XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3; + if (pkt_stream_receive_half(test)) + return TEST_FAILURE; + test->ifobj_rx->validation_func = validate_rx_dropped; + return testapp_validate_traffic(test); +} + +int testapp_stats_tx_invalid_descs(struct test_spec *test) +{ + if (pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0)) + return TEST_FAILURE; + test->ifobj_tx->validation_func = validate_tx_invalid_descs; + return testapp_validate_traffic(test); +} + +int testapp_stats_rx_full(struct test_spec *test) +{ + if (pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE)) + return TEST_FAILURE; + test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); + + test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS; + test->ifobj_rx->release_rx = false; + test->ifobj_rx->validation_func = validate_rx_full; + return testapp_validate_traffic(test); +} + +int testapp_stats_fill_empty(struct test_spec *test) +{ + if (pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE)) + return TEST_FAILURE; + test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); + + test->ifobj_rx->use_fill_ring = false; + test->ifobj_rx->validation_func = validate_fill_empty; + return testapp_validate_traffic(test); +} + +int testapp_send_receive_unaligned(struct test_spec *test) +{ + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; + /* Let half of the packets straddle a 4K buffer boundary */ + if (pkt_stream_replace_half(test, MIN_PKT_SIZE, -MIN_PKT_SIZE / 2)) + return TEST_FAILURE; + + return testapp_validate_traffic(test); +} + +int testapp_send_receive_unaligned_mb(struct test_spec *test) +{ + test->mtu = MAX_ETH_JUMBO_SIZE; + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; + if (pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE)) + return TEST_FAILURE; + return testapp_validate_traffic(test); +} + +int testapp_single_pkt(struct test_spec *test) +{ + struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}}; + + if (pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts))) + return TEST_FAILURE; + return testapp_validate_traffic(test); +} + +int testapp_send_receive_mb(struct test_spec *test) +{ + test->mtu = MAX_ETH_JUMBO_SIZE; + if (pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE)) + return TEST_FAILURE; + + return testapp_validate_traffic(test); +} + +int testapp_invalid_desc_mb(struct test_spec *test) +{ + struct xsk_umem_info *umem = test->ifobj_tx->umem; + u64 umem_size = umem->num_frames * umem->frame_size; + struct pkt pkts[] = { + /* Valid packet for synch to start with */ + {0, MIN_PKT_SIZE, 0, true, 0}, + /* Zero frame len is not legal */ + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + {0, 0, 0, false, 0}, + /* Invalid address in the second frame */ + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + {umem_size, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + /* Invalid len in the middle */ + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + /* Invalid options in the middle */ + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XSK_DESC__INVALID_OPTION}, + /* Transmit 2 frags, receive 3 */ + {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, XDP_PKT_CONTD}, + {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, 0}, + /* Middle frame crosses chunk boundary with small length */ + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + {-MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false, 0}, + /* Valid packet for synch so that something is received */ + {0, MIN_PKT_SIZE, 0, true, 0}}; + + if (umem->unaligned_mode) { + /* Crossing a chunk boundary allowed */ + pkts[12].valid = true; + pkts[13].valid = true; + } + + test->mtu = MAX_ETH_JUMBO_SIZE; + if (pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts))) + return TEST_FAILURE; + return testapp_validate_traffic(test); +} + +int testapp_invalid_desc(struct test_spec *test) +{ + struct xsk_umem_info *umem = test->ifobj_tx->umem; + u64 umem_size = umem->num_frames * umem->frame_size; + struct pkt pkts[] = { + /* Zero packet address allowed */ + {0, MIN_PKT_SIZE, 0, true}, + /* Allowed packet */ + {0, MIN_PKT_SIZE, 0, true}, + /* Straddling the start of umem */ + {-2, MIN_PKT_SIZE, 0, false}, + /* Packet too large */ + {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false}, + /* Up to end of umem allowed */ + {umem_size - MIN_PKT_SIZE - 2 * umem->frame_size, MIN_PKT_SIZE, 0, true}, + /* After umem ends */ + {umem_size, MIN_PKT_SIZE, 0, false}, + /* Straddle the end of umem */ + {umem_size - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false}, + /* Straddle a 4K boundary */ + {0x1000 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false}, + /* Straddle a 2K boundary */ + {0x800 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, true}, + /* Valid packet for synch so that something is received */ + {0, MIN_PKT_SIZE, 0, true}}; + + if (umem->unaligned_mode) { + /* Crossing a page boundary allowed */ + pkts[7].valid = true; + } + if (umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) { + /* Crossing a 2K frame size boundary not allowed */ + pkts[8].valid = false; + } + + if (test->ifobj_tx->shared_umem) { + pkts[4].offset += umem_size; + pkts[5].offset += umem_size; + pkts[6].offset += umem_size; + } + + if (pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts))) + return TEST_FAILURE; + return testapp_validate_traffic(test); +} + +int testapp_xdp_drop(struct test_spec *test) +{ + struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; + struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; + + test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_drop, skel_tx->progs.xsk_xdp_drop, + skel_rx->maps.xsk, skel_tx->maps.xsk); + + if (pkt_stream_receive_half(test)) + return TEST_FAILURE; + return testapp_validate_traffic(test); +} + +int testapp_xdp_metadata_copy(struct test_spec *test) +{ + struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; + struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; + + test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata, + skel_tx->progs.xsk_xdp_populate_metadata, + skel_rx->maps.xsk, skel_tx->maps.xsk); + test->ifobj_rx->use_metadata = true; + + skel_rx->bss->count = 0; + + return testapp_validate_traffic(test); +} + +int testapp_xdp_shared_umem(struct test_spec *test) +{ + struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; + struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; + int ret; + + test->total_steps = 1; + test->nb_sockets = 2; + + test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_shared_umem, + skel_tx->progs.xsk_xdp_shared_umem, + skel_rx->maps.xsk, skel_tx->maps.xsk); + + if (pkt_stream_even_odd_sequence(test)) + return TEST_FAILURE; + + ret = testapp_validate_traffic(test); + + release_even_odd_sequence(test); + + return ret; +} + +int testapp_poll_txq_tmout(struct test_spec *test) +{ + test->ifobj_tx->use_poll = true; + /* create invalid frame by set umem frame_size and pkt length equal to 2048 */ + test->ifobj_tx->umem->frame_size = 2048; + if (pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048)) + return TEST_FAILURE; + return testapp_validate_traffic_single_thread(test, test->ifobj_tx); +} + +int testapp_poll_rxq_tmout(struct test_spec *test) +{ + test->ifobj_rx->use_poll = true; + return testapp_validate_traffic_single_thread(test, test->ifobj_rx); +} + +int testapp_too_many_frags(struct test_spec *test) +{ + struct pkt *pkts; + u32 max_frags, i; + int ret = TEST_FAILURE; + + if (test->mode == TEST_MODE_ZC) { + max_frags = test->ifobj_tx->xdp_zc_max_segs; + } else { + max_frags = get_max_skb_frags(); + if (!max_frags) { + ksft_print_msg("Can't get MAX_SKB_FRAGS from system, using default (17)\n"); + max_frags = 17; + } + max_frags += 1; + } + + pkts = calloc(2 * max_frags + 2, sizeof(struct pkt)); + if (!pkts) + return TEST_FAILURE; + + test->mtu = MAX_ETH_JUMBO_SIZE; + + /* Valid packet for synch */ + pkts[0].len = MIN_PKT_SIZE; + pkts[0].valid = true; + + /* One valid packet with the max amount of frags */ + for (i = 1; i < max_frags + 1; i++) { + pkts[i].len = MIN_PKT_SIZE; + pkts[i].options = XDP_PKT_CONTD; + pkts[i].valid = true; + } + pkts[max_frags].options = 0; + + /* An invalid packet with the max amount of frags but signals packet + * continues on the last frag + */ + for (i = max_frags + 1; i < 2 * max_frags + 1; i++) { + pkts[i].len = MIN_PKT_SIZE; + pkts[i].options = XDP_PKT_CONTD; + pkts[i].valid = false; + } + + /* Valid packet for synch */ + pkts[2 * max_frags + 1].len = MIN_PKT_SIZE; + pkts[2 * max_frags + 1].valid = true; + + if (pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2)) { + free(pkts); + return TEST_FAILURE; + } + + ret = testapp_validate_traffic(test); + free(pkts); + return ret; +} + +static int xsk_load_xdp_programs(struct ifobject *ifobj) +{ + ifobj->xdp_progs = xsk_xdp_progs__open_and_load(); + if (libbpf_get_error(ifobj->xdp_progs)) + return libbpf_get_error(ifobj->xdp_progs); + + return 0; +} + +/* Simple test */ +static bool hugepages_present(void) +{ + size_t mmap_sz = 2 * DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE; + void *bufs; + + bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, MAP_HUGE_2MB); + if (bufs == MAP_FAILED) + return false; + + mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE; + munmap(bufs, mmap_sz); + return true; +} + +int init_iface(struct ifobject *ifobj, thread_func_t func_ptr) +{ + LIBBPF_OPTS(bpf_xdp_query_opts, query_opts); + int err; + + ifobj->func_ptr = func_ptr; + + err = xsk_load_xdp_programs(ifobj); + if (err) { + ksft_print_msg("Error loading XDP program\n"); + return err; + } + + if (hugepages_present()) + ifobj->unaligned_supp = true; + + err = bpf_xdp_query(ifobj->ifindex, XDP_FLAGS_DRV_MODE, &query_opts); + if (err) { + ksft_print_msg("Error querying XDP capabilities\n"); + return err; + } + if (query_opts.feature_flags & NETDEV_XDP_ACT_RX_SG) + ifobj->multi_buff_supp = true; + if (query_opts.feature_flags & NETDEV_XDP_ACT_XSK_ZEROCOPY) { + if (query_opts.xdp_zc_max_segs > 1) { + ifobj->multi_buff_zc_supp = true; + ifobj->xdp_zc_max_segs = query_opts.xdp_zc_max_segs; + } else { + ifobj->xdp_zc_max_segs = 0; + } + } + + return 0; +} + +int testapp_send_receive(struct test_spec *test) +{ + return testapp_validate_traffic(test); +} + +int testapp_send_receive_2k_frame(struct test_spec *test) +{ + test->ifobj_tx->umem->frame_size = 2048; + test->ifobj_rx->umem->frame_size = 2048; + if (pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE)) + return TEST_FAILURE; + return testapp_validate_traffic(test); +} + +int testapp_poll_rx(struct test_spec *test) +{ + test->ifobj_rx->use_poll = true; + return testapp_validate_traffic(test); +} + +int testapp_poll_tx(struct test_spec *test) +{ + test->ifobj_tx->use_poll = true; + return testapp_validate_traffic(test); +} + +int testapp_aligned_inv_desc(struct test_spec *test) +{ + return testapp_invalid_desc(test); +} + +int testapp_aligned_inv_desc_2k_frame(struct test_spec *test) +{ + test->ifobj_tx->umem->frame_size = 2048; + test->ifobj_rx->umem->frame_size = 2048; + return testapp_invalid_desc(test); +} + +int testapp_unaligned_inv_desc(struct test_spec *test) +{ + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; + return testapp_invalid_desc(test); +} + +int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test) +{ + u64 page_size, umem_size; + + /* Odd frame size so the UMEM doesn't end near a page boundary. */ + test->ifobj_tx->umem->frame_size = 4001; + test->ifobj_rx->umem->frame_size = 4001; + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; + /* This test exists to test descriptors that staddle the end of + * the UMEM but not a page. + */ + page_size = sysconf(_SC_PAGESIZE); + umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size; + assert(umem_size % page_size > MIN_PKT_SIZE); + assert(umem_size % page_size < page_size - MIN_PKT_SIZE); + + return testapp_invalid_desc(test); +} + +int testapp_aligned_inv_desc_mb(struct test_spec *test) +{ + return testapp_invalid_desc_mb(test); +} + +int testapp_unaligned_inv_desc_mb(struct test_spec *test) +{ + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; + return testapp_invalid_desc_mb(test); +} + +int testapp_xdp_metadata(struct test_spec *test) +{ + return testapp_xdp_metadata_copy(test); +} + +int testapp_xdp_metadata_mb(struct test_spec *test) +{ + test->mtu = MAX_ETH_JUMBO_SIZE; + return testapp_xdp_metadata_copy(test); +} + +int testapp_hw_sw_min_ring_size(struct test_spec *test) +{ + int ret; + + test->set_ring = true; + test->total_steps = 2; + test->ifobj_tx->ring.tx_pending = DEFAULT_BATCH_SIZE; + test->ifobj_tx->ring.rx_pending = DEFAULT_BATCH_SIZE * 2; + test->ifobj_tx->xsk->batch_size = 1; + test->ifobj_rx->xsk->batch_size = 1; + ret = testapp_validate_traffic(test); + if (ret) + return ret; + + /* Set batch size to hw_ring_size - 1 */ + test->ifobj_tx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1; + test->ifobj_rx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1; + return testapp_validate_traffic(test); +} + +int testapp_hw_sw_max_ring_size(struct test_spec *test) +{ + u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 4; + int ret; + + test->set_ring = true; + test->total_steps = 2; + test->ifobj_tx->ring.tx_pending = test->ifobj_tx->ring.tx_max_pending; + test->ifobj_tx->ring.rx_pending = test->ifobj_tx->ring.rx_max_pending; + test->ifobj_rx->umem->num_frames = max_descs; + test->ifobj_rx->umem->fill_size = max_descs; + test->ifobj_rx->umem->comp_size = max_descs; + test->ifobj_tx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + test->ifobj_rx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + + ret = testapp_validate_traffic(test); + if (ret) + return ret; + + /* Set batch_size to 8152 for testing, as the ice HW ignores the 3 lowest bits when + * updating the Rx HW tail register. + */ + test->ifobj_tx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8; + test->ifobj_rx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8; + if (pkt_stream_replace(test, max_descs, MIN_PKT_SIZE)) { + clean_sockets(test, test->ifobj_tx); + clean_sockets(test, test->ifobj_rx); + clean_umem(test, test->ifobj_rx, test->ifobj_tx); + return TEST_FAILURE; + } + + return testapp_validate_traffic(test); +} + +static int testapp_xdp_adjust_tail(struct test_spec *test, int adjust_value) +{ + struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; + struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; + + test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_adjust_tail, + skel_tx->progs.xsk_xdp_adjust_tail, + skel_rx->maps.xsk, skel_tx->maps.xsk); + + skel_rx->bss->adjust_value = adjust_value; + + return testapp_validate_traffic(test); +} + +static int testapp_adjust_tail(struct test_spec *test, u32 value, u32 pkt_len) +{ + int ret; + + test->adjust_tail_support = true; + test->adjust_tail = true; + test->total_steps = 1; + + ret = pkt_stream_replace_ifobject(test->ifobj_tx, DEFAULT_BATCH_SIZE, pkt_len); + if (ret) + return TEST_FAILURE; + + ret = pkt_stream_replace_ifobject(test->ifobj_rx, DEFAULT_BATCH_SIZE, pkt_len + value); + if (ret) + return TEST_FAILURE; + + ret = testapp_xdp_adjust_tail(test, value); + if (ret) + return ret; + + if (!test->adjust_tail_support) { + ksft_print_msg("%s %sResize pkt with bpf_xdp_adjust_tail() not supported\n", + mode_string(test), busy_poll_string(test)); + return TEST_SKIP; + } + + return 0; +} + +int testapp_adjust_tail_shrink(struct test_spec *test) +{ + /* Shrink by 4 bytes for testing purpose */ + return testapp_adjust_tail(test, -4, MIN_PKT_SIZE * 2); +} + +int testapp_adjust_tail_shrink_mb(struct test_spec *test) +{ + test->mtu = MAX_ETH_JUMBO_SIZE; + /* Shrink by the frag size */ + return testapp_adjust_tail(test, -XSK_UMEM__MAX_FRAME_SIZE, XSK_UMEM__LARGE_FRAME_SIZE * 2); +} + +int testapp_adjust_tail_grow(struct test_spec *test) +{ + /* Grow by 4 bytes for testing purpose */ + return testapp_adjust_tail(test, 4, MIN_PKT_SIZE * 2); +} + +int testapp_adjust_tail_grow_mb(struct test_spec *test) +{ + test->mtu = MAX_ETH_JUMBO_SIZE; + /* Grow by (frag_size - last_frag_Size) - 1 to stay inside the last fragment */ + return testapp_adjust_tail(test, (XSK_UMEM__MAX_FRAME_SIZE / 2) - 1, + XSK_UMEM__LARGE_FRAME_SIZE * 2); +} + +int testapp_tx_queue_consumer(struct test_spec *test) +{ + int nr_packets; + + if (test->mode == TEST_MODE_ZC) { + ksft_print_msg("Can not run TX_QUEUE_CONSUMER test for ZC mode\n"); + return TEST_SKIP; + } + + nr_packets = MAX_TX_BUDGET_DEFAULT + 1; + if (pkt_stream_replace(test, nr_packets, MIN_PKT_SIZE)) + return TEST_FAILURE; + test->ifobj_tx->xsk->batch_size = nr_packets; + test->ifobj_tx->xsk->check_consumer = true; + + return testapp_validate_traffic(test); +} + +struct ifobject *ifobject_create(void) +{ + struct ifobject *ifobj; + + ifobj = calloc(1, sizeof(struct ifobject)); + if (!ifobj) + return NULL; + + ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr)); + if (!ifobj->xsk_arr) + goto out_xsk_arr; + + ifobj->umem = calloc(1, sizeof(*ifobj->umem)); + if (!ifobj->umem) + goto out_umem; + + return ifobj; + +out_umem: + free(ifobj->xsk_arr); +out_xsk_arr: + free(ifobj); + return NULL; +} + +void ifobject_delete(struct ifobject *ifobj) +{ + free(ifobj->umem); + free(ifobj->xsk_arr); + free(ifobj); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.h b/tools/testing/selftests/bpf/prog_tests/test_xsk.h new file mode 100644 index 000000000000..8fc78a057de0 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.h @@ -0,0 +1,298 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef TEST_XSK_H_ +#define TEST_XSK_H_ + +#include <linux/ethtool.h> +#include <linux/if_xdp.h> + +#include "../kselftest.h" +#include "xsk.h" + +#ifndef SO_PREFER_BUSY_POLL +#define SO_PREFER_BUSY_POLL 69 +#endif + +#ifndef SO_BUSY_POLL_BUDGET +#define SO_BUSY_POLL_BUDGET 70 +#endif + +#define TEST_PASS 0 +#define TEST_FAILURE -1 +#define TEST_CONTINUE 1 +#define TEST_SKIP 2 + +#define DEFAULT_PKT_CNT (4 * 1024) +#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4) +#define HUGEPAGE_SIZE (2 * 1024 * 1024) +#define MIN_PKT_SIZE 64 +#define MAX_ETH_PKT_SIZE 1518 +#define MAX_INTERFACE_NAME_CHARS 16 +#define MAX_TEST_NAME_SIZE 48 +#define SOCK_RECONF_CTR 10 +#define USLEEP_MAX 10000 + +extern bool opt_verbose; +#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0) + + +static inline u32 ceil_u32(u32 a, u32 b) +{ + return (a + b - 1) / b; +} + +static inline u64 ceil_u64(u64 a, u64 b) +{ + return (a + b - 1) / b; +} + +/* Simple test */ +enum test_mode { + TEST_MODE_SKB, + TEST_MODE_DRV, + TEST_MODE_ZC, + TEST_MODE_ALL +}; + +struct ifobject; +struct test_spec; +typedef int (*validation_func_t)(struct ifobject *ifobj); +typedef void *(*thread_func_t)(void *arg); +typedef int (*test_func_t)(struct test_spec *test); + +struct xsk_socket_info { + struct xsk_ring_cons rx; + struct xsk_ring_prod tx; + struct xsk_umem_info *umem; + struct xsk_socket *xsk; + struct pkt_stream *pkt_stream; + u32 outstanding_tx; + u32 rxqsize; + u32 batch_size; + u8 dst_mac[ETH_ALEN]; + u8 src_mac[ETH_ALEN]; + bool check_consumer; +}; + +int kick_rx(struct xsk_socket_info *xsk); +int kick_tx(struct xsk_socket_info *xsk); + +struct xsk_umem_info { + struct xsk_ring_prod fq; + struct xsk_ring_cons cq; + struct xsk_umem *umem; + u64 next_buffer; + u32 num_frames; + u32 frame_headroom; + void *buffer; + u32 frame_size; + u32 base_addr; + u32 fill_size; + u32 comp_size; + bool unaligned_mode; +}; + +struct set_hw_ring { + u32 default_tx; + u32 default_rx; +}; + +int hw_ring_size_reset(struct ifobject *ifobj); + +struct ifobject { + char ifname[MAX_INTERFACE_NAME_CHARS]; + struct xsk_socket_info *xsk; + struct xsk_socket_info *xsk_arr; + struct xsk_umem_info *umem; + thread_func_t func_ptr; + validation_func_t validation_func; + struct xsk_xdp_progs *xdp_progs; + struct bpf_map *xskmap; + struct bpf_program *xdp_prog; + struct ethtool_ringparam ring; + struct set_hw_ring set_ring; + enum test_mode mode; + int ifindex; + int mtu; + u32 bind_flags; + u32 xdp_zc_max_segs; + bool tx_on; + bool rx_on; + bool use_poll; + bool busy_poll; + bool use_fill_ring; + bool release_rx; + bool shared_umem; + bool use_metadata; + bool unaligned_supp; + bool multi_buff_supp; + bool multi_buff_zc_supp; + bool hw_ring_size_supp; +}; +struct ifobject *ifobject_create(void); +void ifobject_delete(struct ifobject *ifobj); +int init_iface(struct ifobject *ifobj, thread_func_t func_ptr); + +int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer, u64 size); +int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, + struct ifobject *ifobject, bool shared); + + +struct pkt { + int offset; + u32 len; + u32 pkt_nb; + bool valid; + u16 options; +}; + +struct pkt_stream { + u32 nb_pkts; + u32 current_pkt_nb; + struct pkt *pkts; + u32 max_pkt_len; + u32 nb_rx_pkts; + u32 nb_valid_entries; + bool verbatim; +}; + +static inline bool pkt_continues(u32 options) +{ + return options & XDP_PKT_CONTD; +} + +struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len); +void pkt_stream_delete(struct pkt_stream *pkt_stream); +void pkt_stream_reset(struct pkt_stream *pkt_stream); +void pkt_stream_restore_default(struct test_spec *test); + +struct test_spec { + struct ifobject *ifobj_tx; + struct ifobject *ifobj_rx; + struct pkt_stream *tx_pkt_stream_default; + struct pkt_stream *rx_pkt_stream_default; + struct bpf_program *xdp_prog_rx; + struct bpf_program *xdp_prog_tx; + struct bpf_map *xskmap_rx; + struct bpf_map *xskmap_tx; + test_func_t test_func; + int mtu; + u16 total_steps; + u16 current_step; + u16 nb_sockets; + bool fail; + bool set_ring; + bool adjust_tail; + bool adjust_tail_support; + enum test_mode mode; + char name[MAX_TEST_NAME_SIZE]; +}; + +#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : "" +static inline char *mode_string(struct test_spec *test) +{ + switch (test->mode) { + case TEST_MODE_SKB: + return "SKB"; + case TEST_MODE_DRV: + return "DRV"; + case TEST_MODE_ZC: + return "ZC"; + default: + return "BOGUS"; + } +} + +void test_init(struct test_spec *test, struct ifobject *ifobj_tx, + struct ifobject *ifobj_rx, enum test_mode mode, + const struct test_spec *test_to_run); + +int testapp_adjust_tail_grow(struct test_spec *test); +int testapp_adjust_tail_grow_mb(struct test_spec *test); +int testapp_adjust_tail_shrink(struct test_spec *test); +int testapp_adjust_tail_shrink_mb(struct test_spec *test); +int testapp_aligned_inv_desc(struct test_spec *test); +int testapp_aligned_inv_desc_2k_frame(struct test_spec *test); +int testapp_aligned_inv_desc_mb(struct test_spec *test); +int testapp_bidirectional(struct test_spec *test); +int testapp_headroom(struct test_spec *test); +int testapp_hw_sw_max_ring_size(struct test_spec *test); +int testapp_hw_sw_min_ring_size(struct test_spec *test); +int testapp_poll_rx(struct test_spec *test); +int testapp_poll_rxq_tmout(struct test_spec *test); +int testapp_poll_tx(struct test_spec *test); +int testapp_poll_txq_tmout(struct test_spec *test); +int testapp_send_receive(struct test_spec *test); +int testapp_send_receive_2k_frame(struct test_spec *test); +int testapp_send_receive_mb(struct test_spec *test); +int testapp_send_receive_unaligned(struct test_spec *test); +int testapp_send_receive_unaligned_mb(struct test_spec *test); +int testapp_single_pkt(struct test_spec *test); +int testapp_stats_fill_empty(struct test_spec *test); +int testapp_stats_rx_dropped(struct test_spec *test); +int testapp_stats_tx_invalid_descs(struct test_spec *test); +int testapp_stats_rx_full(struct test_spec *test); +int testapp_teardown(struct test_spec *test); +int testapp_too_many_frags(struct test_spec *test); +int testapp_tx_queue_consumer(struct test_spec *test); +int testapp_unaligned_inv_desc(struct test_spec *test); +int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test); +int testapp_unaligned_inv_desc_mb(struct test_spec *test); +int testapp_xdp_drop(struct test_spec *test); +int testapp_xdp_metadata(struct test_spec *test); +int testapp_xdp_metadata_mb(struct test_spec *test); +int testapp_xdp_prog_cleanup(struct test_spec *test); +int testapp_xdp_shared_umem(struct test_spec *test); + +void *worker_testapp_validate_rx(void *arg); +void *worker_testapp_validate_tx(void *arg); + +static const struct test_spec tests[] = { + {.name = "SEND_RECEIVE", .test_func = testapp_send_receive}, + {.name = "SEND_RECEIVE_2K_FRAME", .test_func = testapp_send_receive_2k_frame}, + {.name = "SEND_RECEIVE_SINGLE_PKT", .test_func = testapp_single_pkt}, + {.name = "POLL_RX", .test_func = testapp_poll_rx}, + {.name = "POLL_TX", .test_func = testapp_poll_tx}, + {.name = "POLL_RXQ_FULL", .test_func = testapp_poll_rxq_tmout}, + {.name = "POLL_TXQ_FULL", .test_func = testapp_poll_txq_tmout}, + {.name = "ALIGNED_INV_DESC", .test_func = testapp_aligned_inv_desc}, + {.name = "ALIGNED_INV_DESC_2K_FRAME_SIZE", .test_func = testapp_aligned_inv_desc_2k_frame}, + {.name = "UMEM_HEADROOM", .test_func = testapp_headroom}, + {.name = "BIDIRECTIONAL", .test_func = testapp_bidirectional}, + {.name = "STAT_RX_DROPPED", .test_func = testapp_stats_rx_dropped}, + {.name = "STAT_TX_INVALID", .test_func = testapp_stats_tx_invalid_descs}, + {.name = "STAT_RX_FULL", .test_func = testapp_stats_rx_full}, + {.name = "STAT_FILL_EMPTY", .test_func = testapp_stats_fill_empty}, + {.name = "XDP_PROG_CLEANUP", .test_func = testapp_xdp_prog_cleanup}, + {.name = "XDP_DROP_HALF", .test_func = testapp_xdp_drop}, + {.name = "XDP_SHARED_UMEM", .test_func = testapp_xdp_shared_umem}, + {.name = "XDP_METADATA_COPY", .test_func = testapp_xdp_metadata}, + {.name = "XDP_METADATA_COPY_MULTI_BUFF", .test_func = testapp_xdp_metadata_mb}, + {.name = "ALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_aligned_inv_desc_mb}, + {.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags}, + {.name = "XDP_ADJUST_TAIL_SHRINK", .test_func = testapp_adjust_tail_shrink}, + {.name = "TX_QUEUE_CONSUMER", .test_func = testapp_tx_queue_consumer}, + }; + +static const struct test_spec ci_skip_tests[] = { + /* Flaky tests */ + {.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb}, + {.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow}, + {.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb}, + {.name = "SEND_RECEIVE_9K_PACKETS", .test_func = testapp_send_receive_mb}, + /* Tests with huge page dependency */ + {.name = "SEND_RECEIVE_UNALIGNED", .test_func = testapp_send_receive_unaligned}, + {.name = "UNALIGNED_INV_DESC", .test_func = testapp_unaligned_inv_desc}, + {.name = "UNALIGNED_INV_DESC_4001_FRAME_SIZE", + .test_func = testapp_unaligned_inv_desc_4001_frame}, + {.name = "SEND_RECEIVE_UNALIGNED_9K_PACKETS", + .test_func = testapp_send_receive_unaligned_mb}, + {.name = "UNALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_unaligned_inv_desc_mb}, + /* Test with HW ring size dependency */ + {.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size}, + {.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size}, + /* Too long test */ + {.name = "TEARDOWN", .test_func = testapp_teardown}, +}; + + +#endif /* TEST_XSK_H_ */ diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 28e81161e6fc..4b4b081b46cc 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -7,6 +7,7 @@ #include "verifier_arena.skel.h" #include "verifier_arena_large.skel.h" #include "verifier_array_access.skel.h" +#include "verifier_async_cb_context.skel.h" #include "verifier_basic_stack.skel.h" #include "verifier_bitfield_write.skel.h" #include "verifier_bounds.skel.h" @@ -34,6 +35,7 @@ #include "verifier_global_subprogs.skel.h" #include "verifier_global_ptr_args.skel.h" #include "verifier_gotol.skel.h" +#include "verifier_gotox.skel.h" #include "verifier_helper_access_var_len.skel.h" #include "verifier_helper_packet_access.skel.h" #include "verifier_helper_restricted.skel.h" @@ -172,6 +174,7 @@ void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); } void test_verifier_global_subprogs(void) { RUN(verifier_global_subprogs); } void test_verifier_global_ptr_args(void) { RUN(verifier_global_ptr_args); } void test_verifier_gotol(void) { RUN(verifier_gotol); } +void test_verifier_gotox(void) { RUN(verifier_gotox); } void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); } void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); } void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); } @@ -280,6 +283,7 @@ void test_verifier_array_access(void) verifier_array_access__elf_bytes, init_array_access_maps); } +void test_verifier_async_cb_context(void) { RUN(verifier_async_cb_context); } static int init_value_ptr_arith_maps(struct bpf_object *obj) { diff --git a/tools/testing/selftests/bpf/prog_tests/wq.c b/tools/testing/selftests/bpf/prog_tests/wq.c index 99e438fe12ac..15c67d23128b 100644 --- a/tools/testing/selftests/bpf/prog_tests/wq.c +++ b/tools/testing/selftests/bpf/prog_tests/wq.c @@ -38,3 +38,59 @@ void serial_test_failures_wq(void) { RUN_TESTS(wq_failures); } + +static void test_failure_map_no_btf(void) +{ + struct wq *skel = NULL; + char log[8192]; + const struct bpf_insn *insns; + size_t insn_cnt; + int ret, err, map_fd; + LIBBPF_OPTS(bpf_prog_load_opts, opts, .log_size = sizeof(log), .log_buf = log, + .log_level = 2); + + skel = wq__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + err = bpf_object__prepare(skel->obj); + if (!ASSERT_OK(err, "skel__prepare")) + goto out; + + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "map_no_btf", sizeof(__u32), sizeof(__u64), 100, + NULL); + if (!ASSERT_GT(map_fd, -1, "map create")) + goto out; + + err = bpf_map__reuse_fd(skel->maps.array, map_fd); + if (!ASSERT_OK(err, "map reuse fd")) { + close(map_fd); + goto out; + } + + insns = bpf_program__insns(skel->progs.test_map_no_btf); + if (!ASSERT_OK_PTR(insns, "insns ptr")) + goto out; + + insn_cnt = bpf_program__insn_cnt(skel->progs.test_map_no_btf); + if (!ASSERT_GT(insn_cnt, 0u, "insn cnt")) + goto out; + + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); + if (!ASSERT_LT(ret, 0, "prog load failed")) { + if (ret > 0) + close(ret); + goto out; + } + + ASSERT_HAS_SUBSTR(log, "map 'map_no_btf' has to have BTF in order to use bpf_wq", + "log complains no map BTF"); +out: + wq__destroy(skel); +} + +void test_wq_custom(void) +{ + if (test__start_subtest("test_failure_map_no_btf")) + test_failure_map_no_btf(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 178292d1251a..ee94c281888a 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -124,10 +124,10 @@ static int send_test_packet(int ifindex) int n, sock = -1; __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; - /* The ethernet header is not relevant for this test and doesn't need to - * be meaningful. - */ - struct ethhdr eth = { 0 }; + /* We use the Ethernet header only to identify the test packet */ + struct ethhdr eth = { + .h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF }, + }; memcpy(packet, ð, sizeof(eth)); memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN); @@ -160,8 +160,16 @@ static int write_test_packet(int tap_fd) __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; int n; - /* The ethernet header doesn't need to be valid for this test */ - memset(packet, 0, sizeof(struct ethhdr)); + /* The Ethernet header is mostly not relevant. We use it to identify the + * test packet and some BPF helpers we exercise expect to operate on + * Ethernet frames carrying IP packets. Pretend that's the case. + */ + struct ethhdr eth = { + .h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF }, + .h_proto = htons(ETH_P_IP), + }; + + memcpy(packet, ð, sizeof(eth)); memcpy(packet + sizeof(struct ethhdr), test_payload, TEST_PAYLOAD_LEN); n = write(tap_fd, packet, sizeof(packet)); @@ -171,31 +179,19 @@ static int write_test_packet(int tap_fd) return 0; } -static void assert_test_result(const struct bpf_map *result_map) -{ - int err; - __u32 map_key = 0; - __u8 map_value[TEST_PAYLOAD_LEN]; - - err = bpf_map__lookup_elem(result_map, &map_key, sizeof(map_key), - &map_value, TEST_PAYLOAD_LEN, BPF_ANY); - if (!ASSERT_OK(err, "lookup test_result")) - return; - - ASSERT_MEMEQ(&map_value, &test_payload, TEST_PAYLOAD_LEN, - "test_result map contains test payload"); -} - -static bool clear_test_result(struct bpf_map *result_map) +static void dump_err_stream(const struct bpf_program *prog) { - const __u8 v[sizeof(test_payload)] = {}; - const __u32 k = 0; - int err; - - err = bpf_map__update_elem(result_map, &k, sizeof(k), v, sizeof(v), BPF_ANY); - ASSERT_OK(err, "update test_result"); + char buf[512]; + int ret; - return err == 0; + ret = 0; + do { + ret = bpf_prog_stream_read(bpf_program__fd(prog), + BPF_STREAM_STDERR, buf, sizeof(buf), + NULL); + if (ret > 0) + fwrite(buf, sizeof(buf[0]), ret, stderr); + } while (ret > 0); } void test_xdp_context_veth(void) @@ -270,11 +266,14 @@ void test_xdp_context_veth(void) if (!ASSERT_GE(tx_ifindex, 0, "if_nametoindex tx")) goto close; + skel->bss->test_pass = false; + ret = send_test_packet(tx_ifindex); if (!ASSERT_OK(ret, "send_test_packet")) goto close; - assert_test_result(skel->maps.test_result); + if (!ASSERT_TRUE(skel->bss->test_pass, "test_pass")) + dump_err_stream(tc_prog); close: close_netns(nstoken); @@ -286,7 +285,7 @@ close: static void test_tuntap(struct bpf_program *xdp_prog, struct bpf_program *tc_prio_1_prog, struct bpf_program *tc_prio_2_prog, - struct bpf_map *result_map) + bool *test_pass) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); @@ -295,8 +294,7 @@ static void test_tuntap(struct bpf_program *xdp_prog, int tap_ifindex; int ret; - if (!clear_test_result(result_map)) - return; + *test_pass = false; ns = netns_new(TAP_NETNS, true); if (!ASSERT_OK_PTR(ns, "create and open ns")) @@ -340,7 +338,8 @@ static void test_tuntap(struct bpf_program *xdp_prog, if (!ASSERT_OK(ret, "write_test_packet")) goto close; - assert_test_result(result_map); + if (!ASSERT_TRUE(*test_pass, "test_pass")) + dump_err_stream(tc_prio_2_prog ? : tc_prio_1_prog); close: if (tap_fd >= 0) @@ -411,7 +410,8 @@ static void test_tuntap_mirred(struct bpf_program *xdp_prog, if (!ASSERT_OK(ret, "write_test_packet")) goto close; - ASSERT_TRUE(*test_pass, "test_pass"); + if (!ASSERT_TRUE(*test_pass, "test_pass")) + dump_err_stream(tc_prog); close: if (tap_fd >= 0) @@ -431,61 +431,82 @@ void test_xdp_context_tuntap(void) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls, NULL, /* tc prio 2 */ - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_read")) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_read, NULL, /* tc prio 2 */ - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_slice")) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_slice, NULL, /* tc prio 2 */ - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_write")) test_tuntap(skel->progs.ing_xdp_zalloc_meta, skel->progs.ing_cls_dynptr_write, skel->progs.ing_cls_dynptr_read, - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_slice_rdwr")) test_tuntap(skel->progs.ing_xdp_zalloc_meta, skel->progs.ing_cls_dynptr_slice_rdwr, skel->progs.ing_cls_dynptr_slice, - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_offset")) test_tuntap(skel->progs.ing_xdp_zalloc_meta, skel->progs.ing_cls_dynptr_offset_wr, skel->progs.ing_cls_dynptr_offset_rd, - skel->maps.test_result); + &skel->bss->test_pass); if (test__start_subtest("dynptr_offset_oob")) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_offset_oob, skel->progs.ing_cls, - skel->maps.test_result); - if (test__start_subtest("clone_data_meta_empty_on_data_write")) + &skel->bss->test_pass); + if (test__start_subtest("clone_data_meta_survives_data_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_data_meta_empty_on_data_write, + skel->progs.clone_data_meta_survives_data_write, &skel->bss->test_pass); - if (test__start_subtest("clone_data_meta_empty_on_meta_write")) + if (test__start_subtest("clone_data_meta_survives_meta_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_data_meta_empty_on_meta_write, + skel->progs.clone_data_meta_survives_meta_write, &skel->bss->test_pass); - if (test__start_subtest("clone_dynptr_empty_on_data_slice_write")) + if (test__start_subtest("clone_meta_dynptr_survives_data_slice_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_dynptr_empty_on_data_slice_write, + skel->progs.clone_meta_dynptr_survives_data_slice_write, &skel->bss->test_pass); - if (test__start_subtest("clone_dynptr_empty_on_meta_slice_write")) + if (test__start_subtest("clone_meta_dynptr_survives_meta_slice_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_dynptr_empty_on_meta_slice_write, + skel->progs.clone_meta_dynptr_survives_meta_slice_write, &skel->bss->test_pass); - if (test__start_subtest("clone_dynptr_rdonly_before_data_dynptr_write")) + if (test__start_subtest("clone_meta_dynptr_rw_before_data_dynptr_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_dynptr_rdonly_before_data_dynptr_write, + skel->progs.clone_meta_dynptr_rw_before_data_dynptr_write, &skel->bss->test_pass); - if (test__start_subtest("clone_dynptr_rdonly_before_meta_dynptr_write")) + if (test__start_subtest("clone_meta_dynptr_rw_before_meta_dynptr_write")) test_tuntap_mirred(skel->progs.ing_xdp, - skel->progs.clone_dynptr_rdonly_before_meta_dynptr_write, + skel->progs.clone_meta_dynptr_rw_before_meta_dynptr_write, &skel->bss->test_pass); + /* Tests for BPF helpers which touch headroom */ + if (test__start_subtest("helper_skb_vlan_push_pop")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.helper_skb_vlan_push_pop, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); + if (test__start_subtest("helper_skb_adjust_room")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.helper_skb_adjust_room, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); + if (test__start_subtest("helper_skb_change_head_tail")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.helper_skb_change_head_tail, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); + if (test__start_subtest("helper_skb_change_proto")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.helper_skb_change_proto, + NULL, /* tc prio 2 */ + &skel->bss->test_pass); test_xdp_meta__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/xsk.c b/tools/testing/selftests/bpf/prog_tests/xsk.c new file mode 100644 index 000000000000..dd4c35c0e428 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xsk.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <net/if.h> +#include <stdarg.h> + +#include "network_helpers.h" +#include "test_progs.h" +#include "test_xsk.h" +#include "xsk_xdp_progs.skel.h" + +#define VETH_RX "veth0" +#define VETH_TX "veth1" +#define MTU 1500 + +int setup_veth(bool busy_poll) +{ + SYS(fail, + "ip link add %s numtxqueues 4 numrxqueues 4 type veth peer name %s numtxqueues 4 numrxqueues 4", + VETH_RX, VETH_TX); + SYS(fail, "sysctl -wq net.ipv6.conf.%s.disable_ipv6=1", VETH_RX); + SYS(fail, "sysctl -wq net.ipv6.conf.%s.disable_ipv6=1", VETH_TX); + + if (busy_poll) { + SYS(fail, "echo 2 > /sys/class/net/%s/napi_defer_hard_irqs", VETH_RX); + SYS(fail, "echo 200000 > /sys/class/net/%s/gro_flush_timeout", VETH_RX); + SYS(fail, "echo 2 > /sys/class/net/%s/napi_defer_hard_irqs", VETH_TX); + SYS(fail, "echo 200000 > /sys/class/net/%s/gro_flush_timeout", VETH_TX); + } + + SYS(fail, "ip link set %s mtu %d", VETH_RX, MTU); + SYS(fail, "ip link set %s mtu %d", VETH_TX, MTU); + SYS(fail, "ip link set %s up", VETH_RX); + SYS(fail, "ip link set %s up", VETH_TX); + + return 0; + +fail: + return -1; +} + +void delete_veth(void) +{ + SYS_NOFAIL("ip link del %s", VETH_RX); + SYS_NOFAIL("ip link del %s", VETH_TX); +} + +int configure_ifobj(struct ifobject *tx, struct ifobject *rx) +{ + rx->ifindex = if_nametoindex(VETH_RX); + if (!ASSERT_OK_FD(rx->ifindex, "get RX ifindex")) + return -1; + + tx->ifindex = if_nametoindex(VETH_TX); + if (!ASSERT_OK_FD(tx->ifindex, "get TX ifindex")) + return -1; + + tx->shared_umem = false; + rx->shared_umem = false; + + + return 0; +} + +static void test_xsk(const struct test_spec *test_to_run, enum test_mode mode) +{ + struct ifobject *ifobj_tx, *ifobj_rx; + struct test_spec test; + int ret; + + ifobj_tx = ifobject_create(); + if (!ASSERT_OK_PTR(ifobj_tx, "create ifobj_tx")) + return; + + ifobj_rx = ifobject_create(); + if (!ASSERT_OK_PTR(ifobj_rx, "create ifobj_rx")) + goto delete_tx; + + if (!ASSERT_OK(configure_ifobj(ifobj_tx, ifobj_rx), "conigure ifobj")) + goto delete_rx; + + ret = get_hw_ring_size(ifobj_tx->ifname, &ifobj_tx->ring); + if (!ret) { + ifobj_tx->hw_ring_size_supp = true; + ifobj_tx->set_ring.default_tx = ifobj_tx->ring.tx_pending; + ifobj_tx->set_ring.default_rx = ifobj_tx->ring.rx_pending; + } + + if (!ASSERT_OK(init_iface(ifobj_rx, worker_testapp_validate_rx), "init RX")) + goto delete_rx; + if (!ASSERT_OK(init_iface(ifobj_tx, worker_testapp_validate_tx), "init TX")) + goto delete_rx; + + test_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]); + + test.tx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE); + if (!ASSERT_OK_PTR(test.tx_pkt_stream_default, "TX pkt generation")) + goto delete_rx; + test.rx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE); + if (!ASSERT_OK_PTR(test.rx_pkt_stream_default, "RX pkt generation")) + goto delete_rx; + + + test_init(&test, ifobj_tx, ifobj_rx, mode, test_to_run); + ret = test.test_func(&test); + if (ret != TEST_SKIP) + ASSERT_OK(ret, "Run test"); + pkt_stream_restore_default(&test); + + if (ifobj_tx->hw_ring_size_supp) + hw_ring_size_reset(ifobj_tx); + + pkt_stream_delete(test.tx_pkt_stream_default); + pkt_stream_delete(test.rx_pkt_stream_default); + xsk_xdp_progs__destroy(ifobj_tx->xdp_progs); + xsk_xdp_progs__destroy(ifobj_rx->xdp_progs); + +delete_rx: + ifobject_delete(ifobj_rx); +delete_tx: + ifobject_delete(ifobj_tx); +} + +void test_ns_xsk_skb(void) +{ + int i; + + if (!ASSERT_OK(setup_veth(false), "setup veth")) + return; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (test__start_subtest(tests[i].name)) + test_xsk(&tests[i], TEST_MODE_SKB); + } + + delete_veth(); +} + +void test_ns_xsk_drv(void) +{ + int i; + + if (!ASSERT_OK(setup_veth(false), "setup veth")) + return; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (test__start_subtest(tests[i].name)) + test_xsk(&tests[i], TEST_MODE_DRV); + } + + delete_veth(); +} + diff --git a/tools/testing/selftests/bpf/progs/arena_strsearch.c b/tools/testing/selftests/bpf/progs/arena_strsearch.c new file mode 100644 index 000000000000..ef6b76658f7f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/arena_strsearch.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include "bpf_experimental.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARENA); + __uint(map_flags, BPF_F_MMAPABLE); + __uint(max_entries, 100); /* number of pages */ +} arena SEC(".maps"); + +#include "bpf_arena_strsearch.h" + +struct glob_test { + char const __arena *pat, *str; + bool expected; +}; + +static bool test(char const __arena *pat, char const __arena *str, bool expected) +{ + bool match = glob_match(pat, str); + bool success = match == expected; + + /* bpf_printk("glob_match %s %s res %d ok %d", pat, str, match, success); */ + return success; +} + +/* + * The tests are all jammed together in one array to make it simpler + * to place that array in the .init.rodata section. The obvious + * "array of structures containing char *" has no way to force the + * pointed-to strings to be in a particular section. + * + * Anyway, a test consists of: + * 1. Expected glob_match result: '1' or '0'. + * 2. Pattern to match: null-terminated string + * 3. String to match against: null-terminated string + * + * The list of tests is terminated with a final '\0' instead of + * a glob_match result character. + */ +static const char __arena glob_tests[] = + /* Some basic tests */ + "1" "a\0" "a\0" + "0" "a\0" "b\0" + "0" "a\0" "aa\0" + "0" "a\0" "\0" + "1" "\0" "\0" + "0" "\0" "a\0" + /* Simple character class tests */ + "1" "[a]\0" "a\0" + "0" "[a]\0" "b\0" + "0" "[!a]\0" "a\0" + "1" "[!a]\0" "b\0" + "1" "[ab]\0" "a\0" + "1" "[ab]\0" "b\0" + "0" "[ab]\0" "c\0" + "1" "[!ab]\0" "c\0" + "1" "[a-c]\0" "b\0" + "0" "[a-c]\0" "d\0" + /* Corner cases in character class parsing */ + "1" "[a-c-e-g]\0" "-\0" + "0" "[a-c-e-g]\0" "d\0" + "1" "[a-c-e-g]\0" "f\0" + "1" "[]a-ceg-ik[]\0" "a\0" + "1" "[]a-ceg-ik[]\0" "]\0" + "1" "[]a-ceg-ik[]\0" "[\0" + "1" "[]a-ceg-ik[]\0" "h\0" + "0" "[]a-ceg-ik[]\0" "f\0" + "0" "[!]a-ceg-ik[]\0" "h\0" + "0" "[!]a-ceg-ik[]\0" "]\0" + "1" "[!]a-ceg-ik[]\0" "f\0" + /* Simple wild cards */ + "1" "?\0" "a\0" + "0" "?\0" "aa\0" + "0" "??\0" "a\0" + "1" "?x?\0" "axb\0" + "0" "?x?\0" "abx\0" + "0" "?x?\0" "xab\0" + /* Asterisk wild cards (backtracking) */ + "0" "*??\0" "a\0" + "1" "*??\0" "ab\0" + "1" "*??\0" "abc\0" + "1" "*??\0" "abcd\0" + "0" "??*\0" "a\0" + "1" "??*\0" "ab\0" + "1" "??*\0" "abc\0" + "1" "??*\0" "abcd\0" + "0" "?*?\0" "a\0" + "1" "?*?\0" "ab\0" + "1" "?*?\0" "abc\0" + "1" "?*?\0" "abcd\0" + "1" "*b\0" "b\0" + "1" "*b\0" "ab\0" + "0" "*b\0" "ba\0" + "1" "*b\0" "bb\0" + "1" "*b\0" "abb\0" + "1" "*b\0" "bab\0" + "1" "*bc\0" "abbc\0" + "1" "*bc\0" "bc\0" + "1" "*bc\0" "bbc\0" + "1" "*bc\0" "bcbc\0" + /* Multiple asterisks (complex backtracking) */ + "1" "*ac*\0" "abacadaeafag\0" + "1" "*ac*ae*ag*\0" "abacadaeafag\0" + "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0" + "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0" + "1" "*abcd*\0" "abcabcabcabcdefg\0" + "1" "*ab*cd*\0" "abcabcabcabcdefg\0" + "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0" + "0" "*abcd*\0" "abcabcabcabcefg\0" + "0" "*ab*cd*\0" "abcabcabcabcefg\0"; + +bool skip = false; + +SEC("syscall") +int arena_strsearch(void *ctx) +{ + unsigned successes = 0; + unsigned n = 0; + char const __arena *p = glob_tests; + + /* + * Tests are jammed together in a string. The first byte is '1' + * or '0' to indicate the expected outcome, or '\0' to indicate the + * end of the tests. Then come two null-terminated strings: the + * pattern and the string to match it against. + */ + while (*p) { + bool expected = *p++ & 1; + char const __arena *pat = p; + + cond_break; + p += bpf_arena_strlen(p) + 1; + successes += test(pat, p, expected); + p += bpf_arena_strlen(p) + 1; + n++; + } + + n -= successes; + /* bpf_printk("glob: %u self-tests passed, %u failed\n", successes, n); */ + + return n ? -1 : 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c index 4e51785e7606..9af19dfe4e80 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c @@ -22,10 +22,6 @@ #define TCP_PACING_CA_RATIO (120) #define TCP_REORDERING (12) -#define min(a, b) ((a) < (b) ? (a) : (b)) -#define max(a, b) ((a) > (b) ? (a) : (b)) -#define after(seq2, seq1) before(seq1, seq2) - extern void cubictcp_init(struct sock *sk) __ksym; extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym; extern __u32 cubictcp_recalc_ssthresh(struct sock *sk) __ksym; @@ -34,11 +30,6 @@ extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym; extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym; extern void cubictcp_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym; -static bool before(__u32 seq1, __u32 seq2) -{ - return (__s32)(seq1-seq2) < 0; -} - static __u64 div64_u64(__u64 dividend, __u64 divisor) { return dividend / divisor; diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c index f089faa97ae6..46fb2b37d3a7 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c @@ -20,13 +20,6 @@ char _license[] SEC("license") = "GPL"; #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) -#define min(a, b) ((a) < (b) ? (a) : (b)) -#define max(a, b) ((a) > (b) ? (a) : (b)) -static bool before(__u32 seq1, __u32 seq2) -{ - return (__s32)(seq1-seq2) < 0; -} -#define after(seq2, seq1) before(seq1, seq2) extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym; extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym; diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c index 32c511bcd60b..1cc83140849f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c +++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c @@ -13,16 +13,10 @@ #ifndef EBUSY #define EBUSY 16 #endif -#define min(a, b) ((a) < (b) ? (a) : (b)) -#define max(a, b) ((a) > (b) ? (a) : (b)) #define min_not_zero(x, y) ({ \ typeof(x) __x = (x); \ typeof(y) __y = (y); \ __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) -static bool before(__u32 seq1, __u32 seq2) -{ - return (__s32)(seq1-seq2) < 0; -} char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_gotox.c b/tools/testing/selftests/bpf/progs/bpf_gotox.c new file mode 100644 index 000000000000..216c71b94c64 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_gotox.c @@ -0,0 +1,448 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> +#include "bpf_misc.h" + +__u64 in_user; +__u64 ret_user; + +int pid; + +/* + * Skip all the tests if compiler doesn't support indirect jumps. + * + * If tests are skipped, then all functions below are compiled as + * dummy, such that the skeleton looks the same, and the userspace + * program can avoid any checks rather than if data->skip is set. + */ +#ifdef __BPF_FEATURE_GOTOX +__u64 skip SEC(".data") = 0; +#else +__u64 skip = 1; +#endif + +struct simple_ctx { + __u64 x; +}; + +#ifdef __BPF_FEATURE_GOTOX +__u64 some_var; + +/* + * This function adds code which will be replaced by a different + * number of instructions by the verifier. This adds additional + * stress on testing the insn_array maps corresponding to indirect jumps. + */ +static __always_inline void adjust_insns(__u64 x) +{ + some_var ^= x + bpf_jiffies64(); +} + +SEC("syscall") +int one_switch(struct simple_ctx *ctx) +{ + switch (ctx->x) { + case 0: + adjust_insns(ctx->x + 1); + ret_user = 2; + break; + case 1: + adjust_insns(ctx->x + 7); + ret_user = 3; + break; + case 2: + adjust_insns(ctx->x + 9); + ret_user = 4; + break; + case 3: + adjust_insns(ctx->x + 11); + ret_user = 5; + break; + case 4: + adjust_insns(ctx->x + 17); + ret_user = 7; + break; + default: + adjust_insns(ctx->x + 177); + ret_user = 19; + break; + } + + return 0; +} + +SEC("syscall") +int one_switch_non_zero_sec_off(struct simple_ctx *ctx) +{ + switch (ctx->x) { + case 0: + adjust_insns(ctx->x + 1); + ret_user = 2; + break; + case 1: + adjust_insns(ctx->x + 7); + ret_user = 3; + break; + case 2: + adjust_insns(ctx->x + 9); + ret_user = 4; + break; + case 3: + adjust_insns(ctx->x + 11); + ret_user = 5; + break; + case 4: + adjust_insns(ctx->x + 17); + ret_user = 7; + break; + default: + adjust_insns(ctx->x + 177); + ret_user = 19; + break; + } + + return 0; +} + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int simple_test_other_sec(struct pt_regs *ctx) +{ + __u64 x = in_user; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + switch (x) { + case 0: + adjust_insns(x + 1); + ret_user = 2; + break; + case 1: + adjust_insns(x + 7); + ret_user = 3; + break; + case 2: + adjust_insns(x + 9); + ret_user = 4; + break; + case 3: + adjust_insns(x + 11); + ret_user = 5; + break; + case 4: + adjust_insns(x + 17); + ret_user = 7; + break; + default: + adjust_insns(x + 177); + ret_user = 19; + break; + } + + return 0; +} + +SEC("syscall") +int two_switches(struct simple_ctx *ctx) +{ + switch (ctx->x) { + case 0: + adjust_insns(ctx->x + 1); + ret_user = 2; + break; + case 1: + adjust_insns(ctx->x + 7); + ret_user = 3; + break; + case 2: + adjust_insns(ctx->x + 9); + ret_user = 4; + break; + case 3: + adjust_insns(ctx->x + 11); + ret_user = 5; + break; + case 4: + adjust_insns(ctx->x + 17); + ret_user = 7; + break; + default: + adjust_insns(ctx->x + 177); + ret_user = 19; + break; + } + + switch (ctx->x + !!ret_user) { + case 1: + adjust_insns(ctx->x + 7); + ret_user = 103; + break; + case 2: + adjust_insns(ctx->x + 9); + ret_user = 104; + break; + case 3: + adjust_insns(ctx->x + 11); + ret_user = 107; + break; + case 4: + adjust_insns(ctx->x + 11); + ret_user = 205; + break; + case 5: + adjust_insns(ctx->x + 11); + ret_user = 115; + break; + default: + adjust_insns(ctx->x + 177); + ret_user = 1019; + break; + } + + return 0; +} + +SEC("syscall") +int big_jump_table(struct simple_ctx *ctx __attribute__((unused))) +{ + const void *const jt[256] = { + [0 ... 255] = &&default_label, + [0] = &&l0, + [11] = &&l11, + [27] = &&l27, + [31] = &&l31, + }; + + goto *jt[ctx->x & 0xff]; + +l0: + adjust_insns(ctx->x + 1); + ret_user = 2; + return 0; + +l11: + adjust_insns(ctx->x + 7); + ret_user = 3; + return 0; + +l27: + adjust_insns(ctx->x + 9); + ret_user = 4; + return 0; + +l31: + adjust_insns(ctx->x + 11); + ret_user = 5; + return 0; + +default_label: + adjust_insns(ctx->x + 177); + ret_user = 19; + return 0; +} + +SEC("syscall") +int one_jump_two_maps(struct simple_ctx *ctx __attribute__((unused))) +{ + __label__ l1, l2, l3, l4; + void *jt1[2] = { &&l1, &&l2 }; + void *jt2[2] = { &&l3, &&l4 }; + unsigned int a = ctx->x % 2; + unsigned int b = (ctx->x / 2) % 2; + volatile int ret = 0; + + if (!(a < 2 && b < 2)) + return 19; + + if (ctx->x % 2) + goto *jt1[a]; + else + goto *jt2[b]; + + l1: ret += 1; + l2: ret += 3; + l3: ret += 5; + l4: ret += 7; + + ret_user = ret; + return ret; +} + +SEC("syscall") +int one_map_two_jumps(struct simple_ctx *ctx __attribute__((unused))) +{ + __label__ l1, l2, l3; + void *jt[3] = { &&l1, &&l2, &&l3 }; + unsigned int a = (ctx->x >> 2) & 1; + unsigned int b = (ctx->x >> 3) & 1; + volatile int ret = 0; + + if (ctx->x % 2) + goto *jt[a]; + + if (ctx->x % 3) + goto *jt[a + b]; + + l1: ret += 3; + l2: ret += 5; + l3: ret += 7; + + ret_user = ret; + return ret; +} + +/* Just to introduce some non-zero offsets in .text */ +static __noinline int f0(volatile struct simple_ctx *ctx __arg_ctx) +{ + if (ctx) + return 1; + else + return 13; +} + +SEC("syscall") int f1(struct simple_ctx *ctx) +{ + ret_user = 0; + return f0(ctx); +} + +static __noinline int __static_global(__u64 x) +{ + switch (x) { + case 0: + adjust_insns(x + 1); + ret_user = 2; + break; + case 1: + adjust_insns(x + 7); + ret_user = 3; + break; + case 2: + adjust_insns(x + 9); + ret_user = 4; + break; + case 3: + adjust_insns(x + 11); + ret_user = 5; + break; + case 4: + adjust_insns(x + 17); + ret_user = 7; + break; + default: + adjust_insns(x + 177); + ret_user = 19; + break; + } + + return 0; +} + +SEC("syscall") +int use_static_global1(struct simple_ctx *ctx) +{ + ret_user = 0; + return __static_global(ctx->x); +} + +SEC("syscall") +int use_static_global2(struct simple_ctx *ctx) +{ + ret_user = 0; + adjust_insns(ctx->x + 1); + return __static_global(ctx->x); +} + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int use_static_global_other_sec(void *ctx) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + return __static_global(in_user); +} + +__noinline int __nonstatic_global(__u64 x) +{ + switch (x) { + case 0: + adjust_insns(x + 1); + ret_user = 2; + break; + case 1: + adjust_insns(x + 7); + ret_user = 3; + break; + case 2: + adjust_insns(x + 9); + ret_user = 4; + break; + case 3: + adjust_insns(x + 11); + ret_user = 5; + break; + case 4: + adjust_insns(x + 17); + ret_user = 7; + break; + default: + adjust_insns(x + 177); + ret_user = 19; + break; + } + + return 0; +} + +SEC("syscall") +int use_nonstatic_global1(struct simple_ctx *ctx) +{ + ret_user = 0; + return __nonstatic_global(ctx->x); +} + +SEC("syscall") +int use_nonstatic_global2(struct simple_ctx *ctx) +{ + ret_user = 0; + adjust_insns(ctx->x + 1); + return __nonstatic_global(ctx->x); +} + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int use_nonstatic_global_other_sec(void *ctx) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + return __nonstatic_global(in_user); +} + +#else /* __BPF_FEATURE_GOTOX */ + +#define SKIP_TEST(TEST_NAME) \ + SEC("syscall") int TEST_NAME(void *ctx) \ + { \ + return 0; \ + } + +SKIP_TEST(one_switch); +SKIP_TEST(one_switch_non_zero_sec_off); +SKIP_TEST(simple_test_other_sec); +SKIP_TEST(two_switches); +SKIP_TEST(big_jump_table); +SKIP_TEST(one_jump_two_maps); +SKIP_TEST(one_map_two_jumps); +SKIP_TEST(use_static_global1); +SKIP_TEST(use_static_global2); +SKIP_TEST(use_static_global_other_sec); +SKIP_TEST(use_nonstatic_global1); +SKIP_TEST(use_nonstatic_global2); +SKIP_TEST(use_nonstatic_global_other_sec); + +#endif /* __BPF_FEATURE_GOTOX */ + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c index 774d4dbe8189..a8aa5a71d846 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c @@ -18,23 +18,10 @@ unsigned short reuse_listen_hport = 0; unsigned short listen_hport = 0; -char cubic_cc[TCP_CA_NAME_MAX] = "bpf_cubic"; +const char cubic_cc[] = "bpf_cubic"; char dctcp_cc[TCP_CA_NAME_MAX] = "bpf_dctcp"; bool random_retry = false; -static bool tcp_cc_eq(const char *a, const char *b) -{ - int i; - - for (i = 0; i < TCP_CA_NAME_MAX; i++) { - if (a[i] != b[i]) - return false; - if (!a[i]) - break; - } - - return true; -} SEC("iter/tcp") int change_tcp_cc(struct bpf_iter__tcp *ctx) @@ -58,7 +45,7 @@ int change_tcp_cc(struct bpf_iter__tcp *ctx) cur_cc, sizeof(cur_cc))) return 0; - if (!tcp_cc_eq(cur_cc, cubic_cc)) + if (bpf_strncmp(cur_cc, TCP_CA_NAME_MAX, cubic_cc)) return 0; if (random_retry && bpf_get_prandom_u32() % 4 == 1) diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c index 164640db3a29..b1e509b231cd 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c @@ -99,13 +99,13 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp, icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; - timer_expires = icsk->icsk_retransmit_timer.expires; + timer_expires = sp->tcp_retransmit_timer.expires; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; - timer_expires = icsk->icsk_retransmit_timer.expires; - } else if (timer_pending(&sp->sk_timer)) { + timer_expires = sp->tcp_retransmit_timer.expires; + } else if (timer_pending(&icsk->icsk_keepalive_timer)) { timer_active = 2; - timer_expires = sp->sk_timer.expires; + timer_expires = icsk->icsk_keepalive_timer.expires; } else { timer_active = 0; timer_expires = bpf_jiffies64(); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c index 591c703f5032..dbc7166aee91 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c @@ -99,13 +99,13 @@ static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp, icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; - timer_expires = icsk->icsk_retransmit_timer.expires; + timer_expires = sp->tcp_retransmit_timer.expires; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; - timer_expires = icsk->icsk_retransmit_timer.expires; - } else if (timer_pending(&sp->sk_timer)) { + timer_expires = sp->tcp_retransmit_timer.expires; + } else if (timer_pending(&icsk->icsk_keepalive_timer)) { timer_active = 2; - timer_expires = sp->sk_timer.expires; + timer_expires = icsk->icsk_keepalive_timer.expires; } else { timer_active = 0; timer_expires = bpf_jiffies64(); diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index a7a1a684eed1..c9bfbe1bafc1 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -126,6 +126,9 @@ * Several __arch_* annotations could be specified at once. * When test case is not run on current arch it is marked as skipped. * __caps_unpriv Specify the capabilities that should be set when running the test. + * + * __linear_size Specify the size of the linear area of non-linear skbs, or + * 0 for linear skbs. */ #define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" XSTR(__COUNTER__) "=" msg))) #define __not_msg(msg) __attribute__((btf_decl_tag("comment:test_expect_not_msg=" XSTR(__COUNTER__) "=" msg))) @@ -159,6 +162,7 @@ #define __stderr_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr_unpriv=" XSTR(__COUNTER__) "=" msg))) #define __stdout(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout=" XSTR(__COUNTER__) "=" msg))) #define __stdout_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout_unpriv=" XSTR(__COUNTER__) "=" msg))) +#define __linear_size(sz) __attribute__((btf_decl_tag("comment:test_linear_size=" XSTR(sz)))) /* Define common capabilities tested using __caps_unpriv */ #define CAP_NET_ADMIN 12 diff --git a/tools/testing/selftests/bpf/progs/bpf_smc.c b/tools/testing/selftests/bpf/progs/bpf_smc.c new file mode 100644 index 000000000000..70d8b08f5914 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_smc.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_tracing_net.h" + +char _license[] SEC("license") = "GPL"; + +enum { + BPF_SMC_LISTEN = 10, +}; + +struct smc_sock___local { + struct sock sk; + struct smc_sock *listen_smc; + bool use_fallback; +} __attribute__((preserve_access_index)); + +int smc_cnt = 0; +int fallback_cnt = 0; + +SEC("fentry/smc_release") +int BPF_PROG(bpf_smc_release, struct socket *sock) +{ + /* only count from one side (client) */ + if (sock->sk->__sk_common.skc_state == BPF_SMC_LISTEN) + return 0; + smc_cnt++; + return 0; +} + +SEC("fentry/smc_switch_to_fallback") +int BPF_PROG(bpf_smc_switch_to_fallback, struct smc_sock___local *smc) +{ + /* only count from one side (client) */ + if (smc && !smc->listen_smc) + fallback_cnt++; + return 0; +} + +/* go with default value if no strat was found */ +bool default_ip_strat_value = true; + +struct smc_policy_ip_key { + __u32 sip; + __u32 dip; +}; + +struct smc_policy_ip_value { + __u8 mode; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(struct smc_policy_ip_key)); + __uint(value_size, sizeof(struct smc_policy_ip_value)); + __uint(max_entries, 128); + __uint(map_flags, BPF_F_NO_PREALLOC); +} smc_policy_ip SEC(".maps"); + +static bool smc_check(__u32 src, __u32 dst) +{ + struct smc_policy_ip_value *value; + struct smc_policy_ip_key key = { + .sip = src, + .dip = dst, + }; + + value = bpf_map_lookup_elem(&smc_policy_ip, &key); + return value ? value->mode : default_ip_strat_value; +} + +SEC("fmod_ret/update_socket_protocol") +int BPF_PROG(smc_run, int family, int type, int protocol) +{ + struct task_struct *task; + + if (family != AF_INET && family != AF_INET6) + return protocol; + + if ((type & 0xf) != SOCK_STREAM) + return protocol; + + if (protocol != 0 && protocol != IPPROTO_TCP) + return protocol; + + task = bpf_get_current_task_btf(); + /* Prevent from affecting other tests */ + if (!task || !task->nsproxy->net_ns->smc.hs_ctrl) + return protocol; + + return IPPROTO_SMC; +} + +SEC("struct_ops") +int BPF_PROG(bpf_smc_set_tcp_option_cond, const struct tcp_sock *tp, + struct inet_request_sock *ireq) +{ + return smc_check(ireq->req.__req_common.skc_daddr, + ireq->req.__req_common.skc_rcv_saddr); +} + +SEC("struct_ops") +int BPF_PROG(bpf_smc_set_tcp_option, struct tcp_sock *tp) +{ + return smc_check(tp->inet_conn.icsk_inet.sk.__sk_common.skc_rcv_saddr, + tp->inet_conn.icsk_inet.sk.__sk_common.skc_daddr); +} + +SEC(".struct_ops") +struct smc_hs_ctrl linkcheck = { + .name = "linkcheck", + .syn_option = (void *)bpf_smc_set_tcp_option, + .synack_option = (void *)bpf_smc_set_tcp_option_cond, +}; diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index 17db400f0e0d..d8dacef37c16 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -146,6 +146,20 @@ #define tcp_jiffies32 ((__u32)bpf_jiffies64()) +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif +#ifndef max +#define max(a, b) ((a) > (b) ? (a) : (b)) +#endif + +static inline bool before(__u32 seq1, __u32 seq2) +{ + return (__s32)(seq1 - seq2) < 0; +} + +#define after(seq2, seq1) before(seq1, seq2) + static inline struct inet_connection_sock *inet_csk(const struct sock *sk) { return (struct inet_connection_sock *)sk; diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index 9e9ebf27b878..9d158cfad981 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -34,6 +34,9 @@ #define SOL_TCP 6 #endif +const char reno[] = "reno"; +const char cubic[] = "cubic"; + __attribute__ ((noinline)) __weak int do_bind(struct bpf_sock_addr *ctx) { @@ -50,35 +53,27 @@ int do_bind(struct bpf_sock_addr *ctx) } static __inline int verify_cc(struct bpf_sock_addr *ctx, - char expected[TCP_CA_NAME_MAX]) + const char expected[]) { char buf[TCP_CA_NAME_MAX]; - int i; if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf))) return 1; - for (i = 0; i < TCP_CA_NAME_MAX; i++) { - if (buf[i] != expected[i]) - return 1; - if (buf[i] == 0) - break; - } + if (bpf_strncmp(buf, TCP_CA_NAME_MAX, expected)) + return 1; return 0; } static __inline int set_cc(struct bpf_sock_addr *ctx) { - char reno[TCP_CA_NAME_MAX] = "reno"; - char cubic[TCP_CA_NAME_MAX] = "cubic"; - - if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &reno, sizeof(reno))) + if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, (void *)reno, sizeof(reno))) return 1; if (verify_cc(ctx, reno)) return 1; - if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic))) + if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, (void *)cubic, sizeof(cubic))) return 1; if (verify_cc(ctx, cubic)) return 1; diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index 127dea342e5a..e0d672d93adf 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -914,8 +914,8 @@ void *user_ptr; char expected_str[384]; __u32 test_len[7] = {0/* placeholder */, 0, 1, 2, 255, 256, 257}; -typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u32 off, - u32 size, const void *unsafe_ptr); +typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u64 off, + u64 size, const void *unsafe_ptr); /* Returns the offset just before the end of the maximum sized xdp fragment. * Any write larger than 32 bytes will be split between 2 fragments. @@ -1106,16 +1106,16 @@ int test_copy_from_user_str_dynptr(void *ctx) return 0; } -static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u32 off, - u32 size, const void *unsafe_ptr) +static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u64 off, + u64 size, const void *unsafe_ptr) { struct task_struct *task = bpf_get_current_task_btf(); return bpf_copy_from_user_task_dynptr(dptr, off, size, unsafe_ptr, task); } -static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u32 off, - u32 size, const void *unsafe_ptr) +static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u64 off, + u64 size, const void *unsafe_ptr) { struct task_struct *task = bpf_get_current_task_btf(); diff --git a/tools/testing/selftests/bpf/progs/file_reader.c b/tools/testing/selftests/bpf/progs/file_reader.c new file mode 100644 index 000000000000..4d756b623557 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/file_reader.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <string.h> +#include <stdbool.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "errno.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} arrmap SEC(".maps"); + +struct elem { + struct file *file; + struct bpf_task_work tw; +}; + +char user_buf[256000]; +char tmp_buf[256000]; + +int pid = 0; +int err, run_success = 0; + +static int validate_file_read(struct file *file); +static int task_work_callback(struct bpf_map *map, void *key, void *value); + +SEC("lsm/file_open") +int on_open_expect_fault(void *c) +{ + struct bpf_dynptr dynptr; + struct file *file; + int local_err = 1; + __u32 user_buf_sz = sizeof(user_buf); + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + file = bpf_get_task_exe_file(bpf_get_current_task_btf()); + if (!file) + return 0; + + if (bpf_dynptr_from_file(file, 0, &dynptr)) + goto out; + + local_err = bpf_dynptr_read(tmp_buf, user_buf_sz, &dynptr, user_buf_sz, 0); + if (local_err == -EFAULT) { /* Expect page fault */ + local_err = 0; + run_success = 1; + } +out: + bpf_dynptr_file_discard(&dynptr); + if (local_err) + err = local_err; + bpf_put_file(file); + return 0; +} + +SEC("lsm/file_open") +int on_open_validate_file_read(void *c) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct elem *work; + int key = 0; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + work = bpf_map_lookup_elem(&arrmap, &key); + if (!work) { + err = 1; + return 0; + } + bpf_task_work_schedule_signal_impl(task, &work->tw, &arrmap, task_work_callback, NULL); + return 0; +} + +/* Called in a sleepable context, read 256K bytes, cross check with user space read data */ +static int task_work_callback(struct bpf_map *map, void *key, void *value) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct file *file = bpf_get_task_exe_file(task); + + if (!file) + return 0; + + err = validate_file_read(file); + if (!err) + run_success = 1; + bpf_put_file(file); + return 0; +} + +static int verify_dynptr_read(struct bpf_dynptr *ptr, u32 off, char *user_buf, u32 len) +{ + int i; + + if (bpf_dynptr_read(tmp_buf, len, ptr, off, 0)) + return 1; + + /* Verify file contents read from BPF is the same as the one read from userspace */ + bpf_for(i, 0, len) + { + if (tmp_buf[i] != user_buf[i]) + return 1; + } + return 0; +} + +static int validate_file_read(struct file *file) +{ + struct bpf_dynptr dynptr; + int loc_err = 1, off; + __u32 user_buf_sz = sizeof(user_buf); + + if (bpf_dynptr_from_file(file, 0, &dynptr)) + goto cleanup; + + loc_err = verify_dynptr_read(&dynptr, 0, user_buf, user_buf_sz); + off = 1; + loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, user_buf_sz - off); + off = user_buf_sz - 1; + loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, user_buf_sz - off); + /* Read file with random offset and length */ + off = 4097; + loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, 100); + + /* Adjust dynptr, verify read */ + loc_err = loc_err ?: bpf_dynptr_adjust(&dynptr, off, off + 1); + loc_err = loc_err ?: verify_dynptr_read(&dynptr, 0, user_buf + off, 1); + /* Can't read more than 1 byte */ + loc_err = loc_err ?: verify_dynptr_read(&dynptr, 0, user_buf + off, 2) == 0; + /* Can't read with far offset */ + loc_err = loc_err ?: verify_dynptr_read(&dynptr, 1, user_buf + off, 1) == 0; + +cleanup: + bpf_dynptr_file_discard(&dynptr); + return loc_err; +} diff --git a/tools/testing/selftests/bpf/progs/file_reader_fail.c b/tools/testing/selftests/bpf/progs/file_reader_fail.c new file mode 100644 index 000000000000..32fe28ed2439 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/file_reader_fail.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <string.h> +#include <stdbool.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +int err; +void *user_ptr; + +SEC("lsm/file_open") +__failure +__msg("Unreleased reference id=") +int on_nanosleep_unreleased_ref(void *ctx) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct file *file = bpf_get_task_exe_file(task); + struct bpf_dynptr dynptr; + + if (!file) + return 0; + + err = bpf_dynptr_from_file(file, 0, &dynptr); + return err ? 1 : 0; +} + +SEC("xdp") +__failure +__msg("Expected a dynptr of type file as arg #0") +int xdp_wrong_dynptr_type(struct xdp_md *xdp) +{ + struct bpf_dynptr dynptr; + + bpf_dynptr_from_xdp(xdp, 0, &dynptr); + bpf_dynptr_file_discard(&dynptr); + return 0; +} + +SEC("xdp") +__failure +__msg("Expected an initialized dynptr as arg #0") +int xdp_no_dynptr_type(struct xdp_md *xdp) +{ + struct bpf_dynptr dynptr; + + bpf_dynptr_file_discard(&dynptr); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/htab_update.c b/tools/testing/selftests/bpf/progs/htab_update.c index 7481bb30b29b..195d3b2fba00 100644 --- a/tools/testing/selftests/bpf/progs/htab_update.c +++ b/tools/testing/selftests/bpf/progs/htab_update.c @@ -6,24 +6,31 @@ char _license[] SEC("license") = "GPL"; +/* Map value type: has BTF-managed field (bpf_timer) */ +struct val { + struct bpf_timer t; + __u64 payload; +}; + struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 1); - __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(__u32)); + __type(key, __u32); + __type(value, struct val); } htab SEC(".maps"); int pid = 0; int update_err = 0; -SEC("?fentry/lookup_elem_raw") -int lookup_elem_raw(void *ctx) +SEC("?fentry/bpf_obj_free_fields") +int bpf_obj_free_fields(void *ctx) { - __u32 key = 0, value = 1; + __u32 key = 0; + struct val value = { .payload = 1 }; if ((bpf_get_current_pid_tgid() >> 32) != pid) return 0; - update_err = bpf_map_update_elem(&htab, &key, &value, 0); + update_err = bpf_map_update_elem(&htab, &key, &value, BPF_ANY); return 0; } diff --git a/tools/testing/selftests/bpf/progs/ip_check_defrag.c b/tools/testing/selftests/bpf/progs/ip_check_defrag.c index 645b2c9f7867..0e87ad1ebcfa 100644 --- a/tools/testing/selftests/bpf/progs/ip_check_defrag.c +++ b/tools/testing/selftests/bpf/progs/ip_check_defrag.c @@ -12,11 +12,6 @@ #define IP_OFFSET 0x1FFF #define NEXTHDR_FRAGMENT 44 -extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, - struct bpf_dynptr *ptr__uninit) __ksym; -extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, - void *buffer, uint32_t buffer__sz) __ksym; - volatile int shootdowns = 0; static bool is_frag_v4(struct iphdr *iph) diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c index 0c13b7409947..7de173daf27b 100644 --- a/tools/testing/selftests/bpf/progs/lsm.c +++ b/tools/testing/selftests/bpf/progs/lsm.c @@ -89,14 +89,16 @@ SEC("lsm/file_mprotect") int BPF_PROG(test_int_hook, struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot, int ret) { - if (ret != 0) + struct mm_struct *mm = vma->vm_mm; + + if (ret != 0 || !mm) return ret; __s32 pid = bpf_get_current_pid_tgid() >> 32; int is_stack = 0; - is_stack = (vma->vm_start <= vma->vm_mm->start_stack && - vma->vm_end >= vma->vm_mm->start_stack); + is_stack = (vma->vm_start <= mm->start_stack && + vma->vm_end >= mm->start_stack); if (is_stack && monitored_pid == pid) { mprotect_count++; diff --git a/tools/testing/selftests/bpf/progs/lsm_tailcall.c b/tools/testing/selftests/bpf/progs/lsm_tailcall.c index 49c075ce2d4c..6e7e58051e64 100644 --- a/tools/testing/selftests/bpf/progs/lsm_tailcall.c +++ b/tools/testing/selftests/bpf/progs/lsm_tailcall.c @@ -20,14 +20,14 @@ int lsm_file_permission_prog(void *ctx) return 0; } -SEC("lsm/file_alloc_security") -int lsm_file_alloc_security_prog(void *ctx) +SEC("lsm/kernfs_init_security") +int lsm_kernfs_init_security_prog(void *ctx) { return 0; } -SEC("lsm/file_alloc_security") -int lsm_file_alloc_security_entry(void *ctx) +SEC("lsm/kernfs_init_security") +int lsm_kernfs_init_security_entry(void *ctx) { bpf_tail_call_static(ctx, &jmp_table, 0); return 0; diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c index 3a868a199349..d70c28824bbe 100644 --- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -278,6 +278,46 @@ out: return 0; } +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int nested_rcu_region_unbalanced_1(void *ctx) +{ + struct task_struct *task, *real_parent; + + /* nested rcu read lock regions */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + if (!real_parent) + goto out; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); +out: + bpf_rcu_read_unlock(); + bpf_rcu_read_unlock(); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep") +int nested_rcu_region_unbalanced_2(void *ctx) +{ + struct task_struct *task, *real_parent; + + /* nested rcu read lock regions */ + task = bpf_get_current_task_btf(); + bpf_rcu_read_lock(); + bpf_rcu_read_lock(); + bpf_rcu_read_lock(); + real_parent = task->real_parent; + if (!real_parent) + goto out; + (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); +out: + bpf_rcu_read_unlock(); + bpf_rcu_read_unlock(); + return 0; +} + SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") int task_trusted_non_rcuptr(void *ctx) { diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c index 893a4fdb4b6e..1aca85d86aeb 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c @@ -568,4 +568,64 @@ err_out: return 0; } +private(kptr_ref) u64 ref; + +static int probe_read_refcount(void) +{ + u32 refcount; + + bpf_probe_read_kernel(&refcount, sizeof(refcount), (void *) ref); + return refcount; +} + +static int __insert_in_list(struct bpf_list_head *head, struct bpf_spin_lock *lock, + struct node_data __kptr **node) +{ + struct node_data *node_new, *node_ref, *node_old; + + node_new = bpf_obj_new(typeof(*node_new)); + if (!node_new) + return -1; + + node_ref = bpf_refcount_acquire(node_new); + node_old = bpf_kptr_xchg(node, node_new); + if (node_old) { + bpf_obj_drop(node_old); + bpf_obj_drop(node_ref); + return -2; + } + + bpf_spin_lock(lock); + bpf_list_push_front(head, &node_ref->l); + ref = (u64)(void *) &node_ref->ref; + bpf_spin_unlock(lock); + return probe_read_refcount(); +} + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} percpu_hash SEC(".maps"); + +SEC("tc") +int percpu_hash_refcount_leak(void *ctx) +{ + struct map_value *v; + int key = 0; + + v = bpf_map_lookup_elem(&percpu_hash, &key); + if (!v) + return 0; + + return __insert_in_list(&head, &lock, &v->node); +} + +SEC("tc") +int check_percpu_hash_refcount(void *ctx) +{ + return probe_read_refcount(); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/ringbuf_bench.c b/tools/testing/selftests/bpf/progs/ringbuf_bench.c index 6a468496f539..d96c7d1e8fc2 100644 --- a/tools/testing/selftests/bpf/progs/ringbuf_bench.c +++ b/tools/testing/selftests/bpf/progs/ringbuf_bench.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2020 Facebook +#include <stdbool.h> #include <linux/bpf.h> #include <stdint.h> #include <bpf/bpf_helpers.h> @@ -14,9 +15,11 @@ struct { const volatile int batch_cnt = 0; const volatile long use_output = 0; +const volatile bool bench_producer = false; long sample_val = 42; long dropped __attribute__((aligned(128))) = 0; +long hits __attribute__((aligned(128))) = 0; const volatile long wakeup_data_size = 0; @@ -24,6 +27,9 @@ static __always_inline long get_flags() { long sz; + if (bench_producer) + return BPF_RB_NO_WAKEUP; + if (!wakeup_data_size) return 0; @@ -47,6 +53,8 @@ int bench_ringbuf(void *ctx) *sample = sample_val; flags = get_flags(); bpf_ringbuf_submit(sample, flags); + if (bench_producer) + __sync_add_and_fetch(&hits, 1); } } } else { @@ -55,6 +63,9 @@ int bench_ringbuf(void *ctx) if (bpf_ringbuf_output(&ringbuf, &sample_val, sizeof(sample_val), flags)) __sync_add_and_fetch(&dropped, 1); + else if (bench_producer) + __sync_add_and_fetch(&hits, 1); + } } return 0; diff --git a/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c new file mode 100644 index 000000000000..09a00d11ffcc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2025 Google LLC */ + +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <errno.h> + +extern int tcp_memory_per_cpu_fw_alloc __ksym; +extern int udp_memory_per_cpu_fw_alloc __ksym; + +int nr_cpus; +bool tcp_activated, udp_activated; +long tcp_memory_allocated, udp_memory_allocated; + +struct sk_prot { + long *memory_allocated; + int *memory_per_cpu_fw_alloc; +}; + +static int drain_memory_per_cpu_fw_alloc(__u32 i, struct sk_prot *sk_prot_ctx) +{ + int *memory_per_cpu_fw_alloc; + + memory_per_cpu_fw_alloc = bpf_per_cpu_ptr(sk_prot_ctx->memory_per_cpu_fw_alloc, i); + if (memory_per_cpu_fw_alloc) + *sk_prot_ctx->memory_allocated += *memory_per_cpu_fw_alloc; + + return 0; +} + +static long get_memory_allocated(struct sock *_sk, int *memory_per_cpu_fw_alloc) +{ + struct sock *sk = bpf_core_cast(_sk, struct sock); + struct sk_prot sk_prot_ctx; + long memory_allocated; + + /* net_aligned_data.{tcp,udp}_memory_allocated was not available. */ + memory_allocated = sk->__sk_common.skc_prot->memory_allocated->counter; + + sk_prot_ctx.memory_allocated = &memory_allocated; + sk_prot_ctx.memory_per_cpu_fw_alloc = memory_per_cpu_fw_alloc; + + bpf_loop(nr_cpus, drain_memory_per_cpu_fw_alloc, &sk_prot_ctx, 0); + + return memory_allocated; +} + +static void fentry_init_sock(struct sock *sk, bool *activated, + long *memory_allocated, int *memory_per_cpu_fw_alloc) +{ + if (!*activated) + return; + + *memory_allocated = get_memory_allocated(sk, memory_per_cpu_fw_alloc); + *activated = false; +} + +SEC("fentry/tcp_init_sock") +int BPF_PROG(fentry_tcp_init_sock, struct sock *sk) +{ + fentry_init_sock(sk, &tcp_activated, + &tcp_memory_allocated, &tcp_memory_per_cpu_fw_alloc); + return 0; +} + +SEC("fentry/udp_init_sock") +int BPF_PROG(fentry_udp_init_sock, struct sock *sk) +{ + fentry_init_sock(sk, &udp_activated, + &udp_memory_allocated, &udp_memory_per_cpu_fw_alloc); + return 0; +} + +SEC("cgroup/sock_create") +int sock_create(struct bpf_sock *ctx) +{ + int err, val = 1; + + err = bpf_setsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM, + &val, sizeof(val)); + if (err) + goto err; + + val = 0; + + err = bpf_getsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM, + &val, sizeof(val)); + if (err) + goto err; + + if (val != 1) { + err = -EINVAL; + goto err; + } + + return 1; + +err: + bpf_set_retval(err); + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c index 99d72c68f76a..826e6b6aff7e 100644 --- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c @@ -45,8 +45,12 @@ SEC("syscall") __retval(USER_PTR_ERR)int test_strcspn_null1(void *ctx) { return SEC("syscall") __retval(USER_PTR_ERR)int test_strcspn_null2(void *ctx) { return bpf_strcspn("hello", NULL); } SEC("syscall") __retval(USER_PTR_ERR)int test_strstr_null1(void *ctx) { return bpf_strstr(NULL, "hello"); } SEC("syscall") __retval(USER_PTR_ERR)int test_strstr_null2(void *ctx) { return bpf_strstr("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strcasestr_null1(void *ctx) { return bpf_strcasestr(NULL, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strcasestr_null2(void *ctx) { return bpf_strcasestr("hello", NULL); } SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null1(void *ctx) { return bpf_strnstr(NULL, "hello", 1); } SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null2(void *ctx) { return bpf_strnstr("hello", NULL, 1); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strncasestr_null1(void *ctx) { return bpf_strncasestr(NULL, "hello", 1); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strncasestr_null2(void *ctx) { return bpf_strncasestr("hello", NULL, 1); } /* Passing userspace ptr to string kfuncs */ SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr1(void *ctx) { return bpf_strcmp(user_ptr, "hello"); } @@ -65,8 +69,12 @@ SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr1(void *ctx) { re SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr2(void *ctx) { return bpf_strcspn("hello", user_ptr); } SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr1(void *ctx) { return bpf_strstr(user_ptr, "hello"); } SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr2(void *ctx) { return bpf_strstr("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcasestr_user_ptr1(void *ctx) { return bpf_strcasestr(user_ptr, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcasestr_user_ptr2(void *ctx) { return bpf_strcasestr("hello", user_ptr); } SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr1(void *ctx) { return bpf_strnstr(user_ptr, "hello", 1); } SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr2(void *ctx) { return bpf_strnstr("hello", user_ptr, 1); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strncasestr_user_ptr1(void *ctx) { return bpf_strncasestr(user_ptr, "hello", 1); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strncasestr_user_ptr2(void *ctx) { return bpf_strncasestr("hello", user_ptr, 1); } #endif /* __TARGET_ARCH_s390 */ @@ -87,7 +95,11 @@ SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault1(void *ctx) { return SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault2(void *ctx) { return bpf_strcspn("hello", invalid_kern_ptr); } SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault1(void *ctx) { return bpf_strstr(invalid_kern_ptr, "hello"); } SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault2(void *ctx) { return bpf_strstr("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strcasestr_pagefault1(void *ctx) { return bpf_strcasestr(invalid_kern_ptr, "hello"); } +SEC("syscall") __retval(-EFAULT) int test_strcasestr_pagefault2(void *ctx) { return bpf_strcasestr("hello", invalid_kern_ptr); } SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault1(void *ctx) { return bpf_strnstr(invalid_kern_ptr, "hello", 1); } SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault2(void *ctx) { return bpf_strnstr("hello", invalid_kern_ptr, 1); } +SEC("syscall") __retval(-EFAULT) int test_strncasestr_pagefault1(void *ctx) { return bpf_strncasestr(invalid_kern_ptr, "hello", 1); } +SEC("syscall") __retval(-EFAULT) int test_strncasestr_pagefault2(void *ctx) { return bpf_strncasestr("hello", invalid_kern_ptr, 1); } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c index e41cc5601994..05e1da1f250f 100644 --- a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c @@ -19,6 +19,8 @@ SEC("syscall") int test_strspn_accept_too_long(void *ctx) { return bpf_strspn("b SEC("syscall") int test_strcspn_str_too_long(void *ctx) { return bpf_strcspn(long_str, "b"); } SEC("syscall") int test_strcspn_reject_too_long(void *ctx) { return bpf_strcspn("b", long_str); } SEC("syscall") int test_strstr_too_long(void *ctx) { return bpf_strstr(long_str, "hello"); } +SEC("syscall") int test_strcasestr_too_long(void *ctx) { return bpf_strcasestr(long_str, "hello"); } SEC("syscall") int test_strnstr_too_long(void *ctx) { return bpf_strnstr(long_str, "hello", sizeof(long_str)); } +SEC("syscall") int test_strncasestr_too_long(void *ctx) { return bpf_strncasestr(long_str, "hello", sizeof(long_str)); } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c index 2e3498e37b9c..a8513964516b 100644 --- a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c @@ -33,8 +33,11 @@ __test(11) int test_strnlen(void *ctx) { return bpf_strnlen(str, 12); } __test(5) int test_strspn(void *ctx) { return bpf_strspn(str, "ehlo"); } __test(2) int test_strcspn(void *ctx) { return bpf_strcspn(str, "lo"); } __test(6) int test_strstr_found(void *ctx) { return bpf_strstr(str, "world"); } +__test(6) int test_strcasestr_found(void *ctx) { return bpf_strcasestr(str, "woRLD"); } __test(-ENOENT) int test_strstr_notfound(void *ctx) { return bpf_strstr(str, "hi"); } +__test(-ENOENT) int test_strcasestr_notfound(void *ctx) { return bpf_strcasestr(str, "hi"); } __test(0) int test_strstr_empty(void *ctx) { return bpf_strstr(str, ""); } +__test(0) int test_strcasestr_empty(void *ctx) { return bpf_strcasestr(str, ""); } __test(0) int test_strnstr_found1(void *ctx) { return bpf_strnstr("", "", 0); } __test(0) int test_strnstr_found2(void *ctx) { return bpf_strnstr(str, "hello", 5); } __test(0) int test_strnstr_found3(void *ctx) { return bpf_strnstr(str, "hello", 6); } @@ -42,5 +45,12 @@ __test(-ENOENT) int test_strnstr_notfound1(void *ctx) { return bpf_strnstr(str, __test(-ENOENT) int test_strnstr_notfound2(void *ctx) { return bpf_strnstr(str, "hello", 4); } __test(-ENOENT) int test_strnstr_notfound3(void *ctx) { return bpf_strnstr("", "a", 0); } __test(0) int test_strnstr_empty(void *ctx) { return bpf_strnstr(str, "", 1); } +__test(0) int test_strncasestr_found1(void *ctx) { return bpf_strncasestr("", "", 0); } +__test(0) int test_strncasestr_found2(void *ctx) { return bpf_strncasestr(str, "heLLO", 5); } +__test(0) int test_strncasestr_found3(void *ctx) { return bpf_strncasestr(str, "heLLO", 6); } +__test(-ENOENT) int test_strncasestr_notfound1(void *ctx) { return bpf_strncasestr(str, "hi", 10); } +__test(-ENOENT) int test_strncasestr_notfound2(void *ctx) { return bpf_strncasestr(str, "hello", 4); } +__test(-ENOENT) int test_strncasestr_notfound3(void *ctx) { return bpf_strncasestr("", "a", 0); } +__test(0) int test_strncasestr_empty(void *ctx) { return bpf_strncasestr(str, "", 1); } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index a5c74d31a244..6e1918deaf26 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -330,9 +330,9 @@ static void *calc_location(struct strobe_value_loc *loc, void *tls_base) } bpf_probe_read_user(&tls_ptr, sizeof(void *), dtv); /* if pointer has (void *)-1 value, then TLS wasn't initialized yet */ - return tls_ptr && tls_ptr != (void *)-1 - ? tls_ptr + tls_index.offset - : NULL; + if (!tls_ptr || tls_ptr == (void *)-1) + return NULL; + return tls_ptr + tls_index.offset; } #ifdef SUBPROGS diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c index a58b5194fc89..022291f21dfb 100644 --- a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c +++ b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c @@ -8,8 +8,6 @@ char _license[] SEC("license") = "GPL"; #define USEC_PER_SEC 1000000UL -#define min(a, b) ((a) < (b) ? (a) : (b)) - static unsigned int tcp_left_out(const struct tcp_sock *tp) { return tp->sacked_out + tp->lost_out; diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c index 2ec1de11a3ae..7b6b2b342c1d 100644 --- a/tools/testing/selftests/bpf/progs/test_check_mtu.c +++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c @@ -7,6 +7,7 @@ #include <stddef.h> #include <stdint.h> +#include <errno.h> char _license[] SEC("license") = "GPL"; @@ -288,3 +289,14 @@ int tc_input_len_exceed(struct __sk_buff *ctx) global_bpf_mtu_xdp = mtu_len; return retval; } + +SEC("tc") +int tc_chk_segs_flag(struct __sk_buff *ctx) +{ + __u32 mtu_len = 0; + int err; + + err = bpf_check_mtu(ctx, GLOBAL_USER_IFINDEX, &mtu_len, 0, BPF_MTU_CHK_SEGS); + + return err == -EINVAL ? BPF_OK : BPF_DROP; +} diff --git a/tools/testing/selftests/bpf/progs/test_perf_branches.c b/tools/testing/selftests/bpf/progs/test_perf_branches.c index a1ccc831c882..05ac9410cd68 100644 --- a/tools/testing/selftests/bpf/progs/test_perf_branches.c +++ b/tools/testing/selftests/bpf/progs/test_perf_branches.c @@ -8,6 +8,7 @@ #include <bpf/bpf_tracing.h> int valid = 0; +int run_cnt = 0; int required_size_out = 0; int written_stack_out = 0; int written_global_out = 0; @@ -24,6 +25,8 @@ int perf_branches(void *ctx) __u64 entries[4 * 3] = {0}; int required_size, written_stack, written_global; + ++run_cnt; + /* write to stack */ written_stack = bpf_read_branch_records(ctx, entries, sizeof(entries), 0); /* ignore spurious events */ diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c b/tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c new file mode 100644 index 000000000000..ff4aa67ddacc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025. Huawei Technologies Co., Ltd */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(map_flags, BPF_F_RB_OVERWRITE); +} ringbuf SEC(".maps"); + +int pid; + +const volatile unsigned long LEN1; +const volatile unsigned long LEN2; +const volatile unsigned long LEN3; +const volatile unsigned long LEN4; +const volatile unsigned long LEN5; + +long reserve1_fail = 0; +long reserve2_fail = 0; +long reserve3_fail = 0; +long reserve4_fail = 0; +long reserve5_fail = 0; + +unsigned long avail_data = 0; +unsigned long ring_size = 0; +unsigned long cons_pos = 0; +unsigned long prod_pos = 0; +unsigned long over_pos = 0; + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int test_overwrite_ringbuf(void *ctx) +{ + char *rec1, *rec2, *rec3, *rec4, *rec5; + int cur_pid = bpf_get_current_pid_tgid() >> 32; + + if (cur_pid != pid) + return 0; + + rec1 = bpf_ringbuf_reserve(&ringbuf, LEN1, 0); + if (!rec1) { + reserve1_fail = 1; + return 0; + } + + rec2 = bpf_ringbuf_reserve(&ringbuf, LEN2, 0); + if (!rec2) { + bpf_ringbuf_discard(rec1, 0); + reserve2_fail = 1; + return 0; + } + + rec3 = bpf_ringbuf_reserve(&ringbuf, LEN3, 0); + /* expect failure */ + if (!rec3) { + reserve3_fail = 1; + } else { + bpf_ringbuf_discard(rec1, 0); + bpf_ringbuf_discard(rec2, 0); + bpf_ringbuf_discard(rec3, 0); + return 0; + } + + rec4 = bpf_ringbuf_reserve(&ringbuf, LEN4, 0); + if (!rec4) { + reserve4_fail = 1; + bpf_ringbuf_discard(rec1, 0); + bpf_ringbuf_discard(rec2, 0); + return 0; + } + + bpf_ringbuf_submit(rec1, 0); + bpf_ringbuf_submit(rec2, 0); + bpf_ringbuf_submit(rec4, 0); + + rec5 = bpf_ringbuf_reserve(&ringbuf, LEN5, 0); + if (!rec5) { + reserve5_fail = 1; + return 0; + } + + for (int i = 0; i < LEN3; i++) + rec5[i] = 0xdd; + + bpf_ringbuf_submit(rec5, 0); + + ring_size = bpf_ringbuf_query(&ringbuf, BPF_RB_RING_SIZE); + avail_data = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA); + cons_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_CONS_POS); + prod_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_PROD_POS); + over_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_OVERWRITE_POS); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_tc_edt.c b/tools/testing/selftests/bpf/progs/test_tc_edt.c index 950a70b61e74..4f6f03122d61 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_edt.c +++ b/tools/testing/selftests/bpf/progs/test_tc_edt.c @@ -14,7 +14,6 @@ #define TIME_HORIZON_NS (2000 * 1000 * 1000) #define NS_PER_SEC 1000000000 #define ECN_HORIZON_NS 5000000 -#define THROTTLE_RATE_BPS (5 * 1000 * 1000) /* flow_key => last_tstamp timestamp used */ struct { @@ -24,12 +23,13 @@ struct { __uint(max_entries, 1); } flow_map SEC(".maps"); +__uint64_t target_rate; + static inline int throttle_flow(struct __sk_buff *skb) { int key = 0; uint64_t *last_tstamp = bpf_map_lookup_elem(&flow_map, &key); - uint64_t delay_ns = ((uint64_t)skb->len) * NS_PER_SEC / - THROTTLE_RATE_BPS; + uint64_t delay_ns = ((uint64_t)skb->len) * NS_PER_SEC / target_rate; uint64_t now = bpf_ktime_get_ns(); uint64_t tstamp, next_tstamp = 0; @@ -70,7 +70,7 @@ static inline int handle_tcp(struct __sk_buff *skb, struct tcphdr *tcp) if ((void *)(tcp + 1) > data_end) return TC_ACT_SHOT; - if (tcp->dest == bpf_htons(9000)) + if (tcp->source == bpf_htons(9000)) return throttle_flow(skb); return TC_ACT_OK; @@ -99,7 +99,8 @@ static inline int handle_ipv4(struct __sk_buff *skb) return TC_ACT_OK; } -SEC("cls_test") int tc_prog(struct __sk_buff *skb) +SEC("tc") +int tc_prog(struct __sk_buff *skb) { if (skb->protocol == bpf_htons(ETH_P_IP)) return handle_ipv4(skb); diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c index 404124a93892..7330c61b5730 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c @@ -2,23 +2,11 @@ /* In-place tunneling */ -#include <stdbool.h> -#include <string.h> - -#include <linux/stddef.h> -#include <linux/bpf.h> -#include <linux/if_ether.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/ipv6.h> -#include <linux/mpls.h> -#include <linux/tcp.h> -#include <linux/udp.h> -#include <linux/pkt_cls.h> -#include <linux/types.h> +#include <vmlinux.h> -#include <bpf/bpf_endian.h> #include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include "bpf_tracing_net.h" #include "bpf_compiler.h" #pragma GCC diagnostic ignored "-Waddress-of-packed-member" @@ -27,6 +15,14 @@ static const int cfg_port = 8000; static const int cfg_udp_src = 20000; +#define ETH_P_MPLS_UC 0x8847 +#define ETH_P_TEB 0x6558 + +#define MPLS_LS_S_MASK 0x00000100 +#define BPF_F_ADJ_ROOM_ENCAP_L2(len) \ + (((__u64)len & BPF_ADJ_ROOM_ENCAP_L2_MASK) \ + << BPF_ADJ_ROOM_ENCAP_L2_SHIFT) + #define L2_PAD_SZ (sizeof(struct vxlanhdr) + ETH_HLEN) #define UDP_PORT 5555 @@ -36,10 +32,9 @@ static const int cfg_udp_src = 20000; #define EXTPROTO_VXLAN 0x1 -#define VXLAN_N_VID (1u << 24) -#define VXLAN_VNI_MASK bpf_htonl((VXLAN_N_VID - 1) << 8) -#define VXLAN_FLAGS 0x8 -#define VXLAN_VNI 1 +#define VXLAN_FLAGS bpf_htonl(1<<27) +#define VNI_ID 1 +#define VXLAN_VNI bpf_htonl(VNI_ID << 8) #ifndef NEXTHDR_DEST #define NEXTHDR_DEST 60 @@ -48,12 +43,6 @@ static const int cfg_udp_src = 20000; /* MPLS label 1000 with S bit (last label) set and ttl of 255. */ static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 | MPLS_LS_S_MASK | 0xff); - -struct vxlanhdr { - __be32 vx_flags; - __be32 vx_vni; -} __attribute__((packed)); - struct gre_hdr { __be16 flags; __be16 protocol; @@ -94,8 +83,8 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph) static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, __u16 l2_proto, __u16 ext_proto) { + struct iphdr iph_inner = {0}; __u16 udp_dst = UDP_PORT; - struct iphdr iph_inner; struct v4hdr h_outer; struct tcphdr tcph; int olen, l2_len; @@ -122,7 +111,6 @@ static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, return TC_ACT_OK; /* Derive the IPv4 header fields from the IPv6 header */ - memset(&iph_inner, 0, sizeof(iph_inner)); iph_inner.version = 4; iph_inner.ihl = 5; iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) + @@ -210,7 +198,7 @@ static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto, struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr; vxlan_hdr->vx_flags = VXLAN_FLAGS; - vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8); + vxlan_hdr->vx_vni = VXLAN_VNI; l2_hdr += sizeof(struct vxlanhdr); } @@ -340,7 +328,7 @@ static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr; vxlan_hdr->vx_flags = VXLAN_FLAGS; - vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8); + vxlan_hdr->vx_vni = VXLAN_VNI; l2_hdr += sizeof(struct vxlanhdr); } @@ -372,8 +360,8 @@ static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, static int encap_ipv6_ipip6(struct __sk_buff *skb) { + struct v6hdr h_outer = {0}; struct iphdr iph_inner; - struct v6hdr h_outer; struct tcphdr tcph; struct ethhdr eth; __u64 flags; @@ -400,13 +388,12 @@ static int encap_ipv6_ipip6(struct __sk_buff *skb) return TC_ACT_SHOT; /* prepare new outer network header */ - memset(&h_outer.ip, 0, sizeof(h_outer.ip)); h_outer.ip.version = 6; h_outer.ip.hop_limit = iph_inner.ttl; - h_outer.ip.saddr.s6_addr[1] = 0xfd; - h_outer.ip.saddr.s6_addr[15] = 1; - h_outer.ip.daddr.s6_addr[1] = 0xfd; - h_outer.ip.daddr.s6_addr[15] = 2; + h_outer.ip.saddr.in6_u.u6_addr8[1] = 0xfd; + h_outer.ip.saddr.in6_u.u6_addr8[15] = 1; + h_outer.ip.daddr.in6_u.u6_addr8[1] = 0xfd; + h_outer.ip.daddr.in6_u.u6_addr8[15] = 2; h_outer.ip.payload_len = iph_inner.tot_len; h_outer.ip.nexthdr = IPPROTO_IPIP; @@ -431,7 +418,7 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto, return __encap_ipv6(skb, encap_proto, l2_proto, 0); } -SEC("encap_ipip_none") +SEC("tc") int __encap_ipip_none(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) @@ -440,7 +427,7 @@ int __encap_ipip_none(struct __sk_buff *skb) return TC_ACT_OK; } -SEC("encap_gre_none") +SEC("tc") int __encap_gre_none(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) @@ -449,7 +436,7 @@ int __encap_gre_none(struct __sk_buff *skb) return TC_ACT_OK; } -SEC("encap_gre_mpls") +SEC("tc") int __encap_gre_mpls(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) @@ -458,7 +445,7 @@ int __encap_gre_mpls(struct __sk_buff *skb) return TC_ACT_OK; } -SEC("encap_gre_eth") +SEC("tc") int __encap_gre_eth(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) @@ -467,7 +454,7 @@ int __encap_gre_eth(struct __sk_buff *skb) return TC_ACT_OK; } -SEC("encap_udp_none") +SEC("tc") int __encap_udp_none(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) @@ -476,7 +463,7 @@ int __encap_udp_none(struct __sk_buff *skb) return TC_ACT_OK; } -SEC("encap_udp_mpls") +SEC("tc") int __encap_udp_mpls(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) @@ -485,7 +472,7 @@ int __encap_udp_mpls(struct __sk_buff *skb) return TC_ACT_OK; } -SEC("encap_udp_eth") +SEC("tc") int __encap_udp_eth(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) @@ -494,7 +481,7 @@ int __encap_udp_eth(struct __sk_buff *skb) return TC_ACT_OK; } -SEC("encap_vxlan_eth") +SEC("tc") int __encap_vxlan_eth(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) @@ -505,7 +492,7 @@ int __encap_vxlan_eth(struct __sk_buff *skb) return TC_ACT_OK; } -SEC("encap_sit_none") +SEC("tc") int __encap_sit_none(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) @@ -514,7 +501,7 @@ int __encap_sit_none(struct __sk_buff *skb) return TC_ACT_OK; } -SEC("encap_ip6tnl_none") +SEC("tc") int __encap_ip6tnl_none(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) @@ -523,7 +510,7 @@ int __encap_ip6tnl_none(struct __sk_buff *skb) return TC_ACT_OK; } -SEC("encap_ipip6_none") +SEC("tc") int __encap_ipip6_none(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IP)) @@ -532,7 +519,7 @@ int __encap_ipip6_none(struct __sk_buff *skb) return TC_ACT_OK; } -SEC("encap_ip6gre_none") +SEC("tc") int __encap_ip6gre_none(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) @@ -541,7 +528,7 @@ int __encap_ip6gre_none(struct __sk_buff *skb) return TC_ACT_OK; } -SEC("encap_ip6gre_mpls") +SEC("tc") int __encap_ip6gre_mpls(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) @@ -550,7 +537,7 @@ int __encap_ip6gre_mpls(struct __sk_buff *skb) return TC_ACT_OK; } -SEC("encap_ip6gre_eth") +SEC("tc") int __encap_ip6gre_eth(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) @@ -559,7 +546,7 @@ int __encap_ip6gre_eth(struct __sk_buff *skb) return TC_ACT_OK; } -SEC("encap_ip6udp_none") +SEC("tc") int __encap_ip6udp_none(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) @@ -568,7 +555,7 @@ int __encap_ip6udp_none(struct __sk_buff *skb) return TC_ACT_OK; } -SEC("encap_ip6udp_mpls") +SEC("tc") int __encap_ip6udp_mpls(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) @@ -577,7 +564,7 @@ int __encap_ip6udp_mpls(struct __sk_buff *skb) return TC_ACT_OK; } -SEC("encap_ip6udp_eth") +SEC("tc") int __encap_ip6udp_eth(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) @@ -586,7 +573,7 @@ int __encap_ip6udp_eth(struct __sk_buff *skb) return TC_ACT_OK; } -SEC("encap_ip6vxlan_eth") +SEC("tc") int __encap_ip6vxlan_eth(struct __sk_buff *skb) { if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6)) @@ -693,7 +680,7 @@ static int decap_ipv6(struct __sk_buff *skb) iph_outer.nexthdr); } -SEC("decap") +SEC("tc") int decap_f(struct __sk_buff *skb) { switch (skb->protocol) { diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index d79cb74b571e..0a0f371a2dec 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -4,6 +4,7 @@ #include <linux/if_ether.h> #include <linux/pkt_cls.h> +#include <bpf/bpf_endian.h> #include <bpf/bpf_helpers.h> #include "bpf_kfuncs.h" @@ -11,37 +12,72 @@ #define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem -/* Demonstrates how metadata can be passed from an XDP program to a TC program - * using bpf_xdp_adjust_meta. - * For the sake of testing the metadata support in drivers, the XDP program uses - * a fixed-size payload after the Ethernet header as metadata. The TC program - * copies the metadata it receives into a map so it can be checked from - * userspace. +/* Demonstrate passing metadata from XDP to TC using bpf_xdp_adjust_meta. + * + * The XDP program extracts a fixed-size payload following the Ethernet header + * and stores it as packet metadata to test the driver's metadata support. The + * TC program then verifies if the passed metadata is correct. */ -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 1); - __type(key, __u32); - __uint(value_size, META_SIZE); -} test_result SEC(".maps"); - bool test_pass; +static const __u8 smac_want[ETH_ALEN] = { + 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF, +}; + +static const __u8 meta_want[META_SIZE] = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, + 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, + 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, + 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, +}; + +static bool check_smac(const struct ethhdr *eth) +{ + return !__builtin_memcmp(eth->h_source, smac_want, ETH_ALEN); +} + +static bool check_metadata(const char *file, int line, __u8 *meta_have) +{ + if (!__builtin_memcmp(meta_have, meta_want, META_SIZE)) + return true; + + bpf_stream_printk(BPF_STREAM_STDERR, + "FAIL:%s:%d: metadata mismatch\n" + " have:\n %pI6\n %pI6\n" + " want:\n %pI6\n %pI6\n", + file, line, + &meta_have[0x00], &meta_have[0x10], + &meta_want[0x00], &meta_want[0x10]); + return false; +} + +#define check_metadata(meta_have) check_metadata(__FILE__, __LINE__, meta_have) + +static bool check_skb_metadata(const char *file, int line, struct __sk_buff *skb) +{ + __u8 *data_meta = ctx_ptr(skb, data_meta); + __u8 *data = ctx_ptr(skb, data); + + return data_meta + META_SIZE <= data && (check_metadata)(file, line, data_meta); +} + +#define check_skb_metadata(skb) check_skb_metadata(__FILE__, __LINE__, skb) + SEC("tc") int ing_cls(struct __sk_buff *ctx) { - __u8 *data, *data_meta; - __u32 key = 0; - - data_meta = ctx_ptr(ctx, data_meta); - data = ctx_ptr(ctx, data); + __u8 *meta_have = ctx_ptr(ctx, data_meta); + __u8 *data = ctx_ptr(ctx, data); - if (data_meta + META_SIZE > data) - return TC_ACT_SHOT; + if (meta_have + META_SIZE > data) + goto out; - bpf_map_update_elem(&test_result, &key, data_meta, BPF_ANY); + if (!check_metadata(meta_have)) + goto out; + test_pass = true; +out: return TC_ACT_SHOT; } @@ -49,17 +85,17 @@ int ing_cls(struct __sk_buff *ctx) SEC("tc") int ing_cls_dynptr_read(struct __sk_buff *ctx) { + __u8 meta_have[META_SIZE]; struct bpf_dynptr meta; - const __u32 zero = 0; - __u8 *dst; - - dst = bpf_map_lookup_elem(&test_result, &zero); - if (!dst) - return TC_ACT_SHOT; bpf_dynptr_from_skb_meta(ctx, 0, &meta); - bpf_dynptr_read(dst, META_SIZE, &meta, 0, 0); + bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (!check_metadata(meta_have)) + goto out; + + test_pass = true; +out: return TC_ACT_SHOT; } @@ -86,20 +122,18 @@ SEC("tc") int ing_cls_dynptr_slice(struct __sk_buff *ctx) { struct bpf_dynptr meta; - const __u32 zero = 0; - __u8 *dst, *src; - - dst = bpf_map_lookup_elem(&test_result, &zero); - if (!dst) - return TC_ACT_SHOT; + __u8 *meta_have; bpf_dynptr_from_skb_meta(ctx, 0, &meta); - src = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE); - if (!src) - return TC_ACT_SHOT; + meta_have = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE); + if (!meta_have) + goto out; - __builtin_memcpy(dst, src, META_SIZE); + if (!check_metadata(meta_have)) + goto out; + test_pass = true; +out: return TC_ACT_SHOT; } @@ -129,14 +163,12 @@ int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx) SEC("tc") int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx) { - struct bpf_dynptr meta; const __u32 chunk_len = META_SIZE / 4; - const __u32 zero = 0; + __u8 meta_have[META_SIZE]; + struct bpf_dynptr meta; __u8 *dst, *src; - dst = bpf_map_lookup_elem(&test_result, &zero); - if (!dst) - return TC_ACT_SHOT; + dst = meta_have; /* 1. Regular read */ bpf_dynptr_from_skb_meta(ctx, 0, &meta); @@ -155,9 +187,14 @@ int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx) /* 4. Read from a slice starting at an offset */ src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len); if (!src) - return TC_ACT_SHOT; + goto out; __builtin_memcpy(dst, src, chunk_len); + if (!check_metadata(meta_have)) + goto out; + + test_pass = true; +out: return TC_ACT_SHOT; } @@ -254,7 +291,7 @@ int ing_xdp_zalloc_meta(struct xdp_md *ctx) /* Drop any non-test packets */ if (eth + 1 > ctx_ptr(ctx, data_end)) return XDP_DROP; - if (eth->h_proto != 0) + if (!check_smac(eth)) return XDP_DROP; ret = bpf_xdp_adjust_meta(ctx, -META_SIZE); @@ -294,9 +331,9 @@ int ing_xdp(struct xdp_md *ctx) /* The Linux networking stack may send other packets on the test * interface that interfere with the test. Just drop them. - * The test packets can be recognized by their ethertype of zero. + * The test packets can be recognized by their source MAC address. */ - if (eth->h_proto != 0) + if (!check_smac(eth)) return XDP_DROP; __builtin_memcpy(data_meta, payload, META_SIZE); @@ -304,22 +341,25 @@ int ing_xdp(struct xdp_md *ctx) } /* - * Check that skb->data_meta..skb->data is empty if prog writes to packet - * _payload_ using packet pointers. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, skb->data_meta..skb->data is + * kept intact if prog writes to packet _payload_ using packet pointers. */ SEC("tc") -int clone_data_meta_empty_on_data_write(struct __sk_buff *ctx) +int clone_data_meta_survives_data_write(struct __sk_buff *ctx) { + __u8 *meta_have = ctx_ptr(ctx, data_meta); struct ethhdr *eth = ctx_ptr(ctx, data); if (eth + 1 > ctx_ptr(ctx, data_end)) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) + goto out; + + if (meta_have + META_SIZE > eth) goto out; - /* Expect no metadata */ - if (ctx->data_meta != ctx->data) + if (!check_metadata(meta_have)) goto out; /* Packet write to trigger unclone in prologue */ @@ -331,40 +371,44 @@ out: } /* - * Check that skb->data_meta..skb->data is empty if prog writes to packet - * _metadata_ using packet pointers. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, skb->data_meta..skb->data is + * kept intact if prog writes to packet _metadata_ using packet pointers. */ SEC("tc") -int clone_data_meta_empty_on_meta_write(struct __sk_buff *ctx) +int clone_data_meta_survives_meta_write(struct __sk_buff *ctx) { + __u8 *meta_have = ctx_ptr(ctx, data_meta); struct ethhdr *eth = ctx_ptr(ctx, data); - __u8 *md = ctx_ptr(ctx, data_meta); if (eth + 1 > ctx_ptr(ctx, data_end)) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; - if (md + 1 > ctx_ptr(ctx, data)) { - /* Expect no metadata */ - test_pass = true; - } else { - /* Metadata write to trigger unclone in prologue */ - *md = 42; - } + if (meta_have + META_SIZE > eth) + goto out; + + if (!check_metadata(meta_have)) + goto out; + + /* Metadata write to trigger unclone in prologue */ + *meta_have = 42; + + test_pass = true; out: return TC_ACT_SHOT; } /* - * Check that skb_meta dynptr is writable but empty if prog writes to packet - * _payload_ using a dynptr slice. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, metadata remains intact if + * prog creates a r/w slice to packet _payload_. */ SEC("tc") -int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx) +int clone_meta_dynptr_survives_data_slice_write(struct __sk_buff *ctx) { struct bpf_dynptr data, meta; + __u8 meta_have[META_SIZE]; struct ethhdr *eth; bpf_dynptr_from_skb(ctx, 0, &data); @@ -372,51 +416,45 @@ int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx) if (!eth) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; - /* Expect no metadata */ bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0) + bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (!check_metadata(meta_have)) goto out; - /* Packet write to trigger unclone in prologue */ - eth->h_proto = 42; - test_pass = true; out: return TC_ACT_SHOT; } /* - * Check that skb_meta dynptr is writable but empty if prog writes to packet - * _metadata_ using a dynptr slice. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, metadata remains intact if + * prog creates an r/w slice to packet _metadata_. */ SEC("tc") -int clone_dynptr_empty_on_meta_slice_write(struct __sk_buff *ctx) +int clone_meta_dynptr_survives_meta_slice_write(struct __sk_buff *ctx) { struct bpf_dynptr data, meta; const struct ethhdr *eth; - __u8 *md; + __u8 *meta_have; bpf_dynptr_from_skb(ctx, 0, &data); eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); if (!eth) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; - /* Expect no metadata */ bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0) + meta_have = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE); + if (!meta_have) goto out; - /* Metadata write to trigger unclone in prologue */ - bpf_dynptr_from_skb_meta(ctx, 0, &meta); - md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); - if (md) - *md = 42; + if (!check_metadata(meta_have)) + goto out; test_pass = true; out: @@ -424,34 +462,40 @@ out: } /* - * Check that skb_meta dynptr is read-only before prog writes to packet payload - * using dynptr_write helper. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, skb_meta dynptr is read-write + * before prog writes to packet _payload_ using dynptr_write helper and metadata + * remains intact before and after the write. */ SEC("tc") -int clone_dynptr_rdonly_before_data_dynptr_write(struct __sk_buff *ctx) +int clone_meta_dynptr_rw_before_data_dynptr_write(struct __sk_buff *ctx) { struct bpf_dynptr data, meta; + __u8 meta_have[META_SIZE]; const struct ethhdr *eth; + int err; bpf_dynptr_from_skb(ctx, 0, &data); eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); if (!eth) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; - /* Expect read-only metadata before unclone */ + /* Expect read-write metadata before unclone */ bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE) + if (bpf_dynptr_is_rdonly(&meta)) + goto out; + + err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (err || !check_metadata(meta_have)) goto out; /* Helper write to payload will unclone the packet */ bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0); - /* Expect no metadata after unclone */ - bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != 0) + err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (err || !check_metadata(meta_have)) goto out; test_pass = true; @@ -460,31 +504,165 @@ out: } /* - * Check that skb_meta dynptr is read-only if prog writes to packet - * metadata using dynptr_write helper. Applies only to cloned skbs. + * Check that, when operating on a cloned packet, skb_meta dynptr is read-write + * before prog writes to packet _metadata_ using dynptr_write helper and + * metadata remains intact before and after the write. */ SEC("tc") -int clone_dynptr_rdonly_before_meta_dynptr_write(struct __sk_buff *ctx) +int clone_meta_dynptr_rw_before_meta_dynptr_write(struct __sk_buff *ctx) { struct bpf_dynptr data, meta; + __u8 meta_have[META_SIZE]; const struct ethhdr *eth; + int err; bpf_dynptr_from_skb(ctx, 0, &data); eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); if (!eth) goto out; /* Ignore non-test packets */ - if (eth->h_proto != 0) + if (!check_smac(eth)) goto out; - /* Expect read-only metadata */ + /* Expect read-write metadata before unclone */ bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE) + if (bpf_dynptr_is_rdonly(&meta)) goto out; - /* Metadata write. Expect failure. */ - bpf_dynptr_from_skb_meta(ctx, 0, &meta); - if (bpf_dynptr_write(&meta, 0, "x", 1, 0) != -EINVAL) + err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (err || !check_metadata(meta_have)) + goto out; + + /* Helper write to metadata will unclone the packet */ + bpf_dynptr_write(&meta, 0, &meta_have[0], 1, 0); + + err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); + if (err || !check_metadata(meta_have)) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +SEC("tc") +int helper_skb_vlan_push_pop(struct __sk_buff *ctx) +{ + int err; + + /* bpf_skb_vlan_push assumes HW offload for primary VLAN tag. Only + * secondary tag push triggers an actual MAC header modification. + */ + err = bpf_skb_vlan_push(ctx, 0, 42); + if (err) + goto out; + err = bpf_skb_vlan_push(ctx, 0, 207); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + err = bpf_skb_vlan_pop(ctx); + if (err) + goto out; + err = bpf_skb_vlan_pop(ctx); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +SEC("tc") +int helper_skb_adjust_room(struct __sk_buff *ctx) +{ + int err; + + /* Grow a 1 byte hole after the MAC header */ + err = bpf_skb_adjust_room(ctx, 1, BPF_ADJ_ROOM_MAC, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + /* Shrink a 1 byte hole after the MAC header */ + err = bpf_skb_adjust_room(ctx, -1, BPF_ADJ_ROOM_MAC, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + /* Grow a 256 byte hole to trigger head reallocation */ + err = bpf_skb_adjust_room(ctx, 256, BPF_ADJ_ROOM_MAC, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +SEC("tc") +int helper_skb_change_head_tail(struct __sk_buff *ctx) +{ + int err; + + /* Reserve 1 extra in the front for packet data */ + err = bpf_skb_change_head(ctx, 1, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + /* Reserve 256 extra bytes in the front to trigger head reallocation */ + err = bpf_skb_change_head(ctx, 256, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + /* Reserve 4k extra bytes in the back to trigger head reallocation */ + err = bpf_skb_change_tail(ctx, ctx->len + 4096, 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +SEC("tc") +int helper_skb_change_proto(struct __sk_buff *ctx) +{ + int err; + + err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IPV6), 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) + goto out; + + err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IP), 0); + if (err) + goto out; + + if (!check_skb_metadata(ctx)) goto out; test_pass = true; diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index 3d5f30c29ae3..2898b3749d07 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -42,12 +42,14 @@ int bench_trigger_uprobe_multi(void *ctx) const volatile int batch_iters = 0; SEC("?raw_tp") -int trigger_count(void *ctx) +int trigger_kernel_count(void *ctx) { int i; - for (i = 0; i < batch_iters; i++) + for (i = 0; i < batch_iters; i++) { inc_counter(); + bpf_get_numa_node_id(); + } return 0; } diff --git a/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c b/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c new file mode 100644 index 000000000000..7efa9521105e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +char _license[] SEC("license") = "GPL"; + +/* Timer tests */ + +struct timer_elem { + struct bpf_timer t; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct timer_elem); +} timer_map SEC(".maps"); + +static int timer_cb(void *map, int *key, struct bpf_timer *timer) +{ + u32 data; + /* Timer callbacks are never sleepable, even from non-sleepable programs */ + bpf_copy_from_user(&data, sizeof(data), NULL); + return 0; +} + +SEC("fentry/bpf_fentry_test1") +__failure __msg("helper call might sleep in a non-sleepable prog") +int timer_non_sleepable_prog(void *ctx) +{ + struct timer_elem *val; + int key = 0; + + val = bpf_map_lookup_elem(&timer_map, &key); + if (!val) + return 0; + + bpf_timer_init(&val->t, &timer_map, 0); + bpf_timer_set_callback(&val->t, timer_cb); + return 0; +} + +SEC("lsm.s/file_open") +__failure __msg("helper call might sleep in a non-sleepable prog") +int timer_sleepable_prog(void *ctx) +{ + struct timer_elem *val; + int key = 0; + + val = bpf_map_lookup_elem(&timer_map, &key); + if (!val) + return 0; + + bpf_timer_init(&val->t, &timer_map, 0); + bpf_timer_set_callback(&val->t, timer_cb); + return 0; +} + +/* Workqueue tests */ + +struct wq_elem { + struct bpf_wq w; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct wq_elem); +} wq_map SEC(".maps"); + +static int wq_cb(void *map, int *key, void *value) +{ + u32 data; + /* Workqueue callbacks are always sleepable, even from non-sleepable programs */ + bpf_copy_from_user(&data, sizeof(data), NULL); + return 0; +} + +SEC("fentry/bpf_fentry_test1") +__success +int wq_non_sleepable_prog(void *ctx) +{ + struct wq_elem *val; + int key = 0; + + val = bpf_map_lookup_elem(&wq_map, &key); + if (!val) + return 0; + + if (bpf_wq_init(&val->w, &wq_map, 0) != 0) + return 0; + if (bpf_wq_set_callback_impl(&val->w, wq_cb, 0, NULL) != 0) + return 0; + return 0; +} + +SEC("lsm.s/file_open") +__success +int wq_sleepable_prog(void *ctx) +{ + struct wq_elem *val; + int key = 0; + + val = bpf_map_lookup_elem(&wq_map, &key); + if (!val) + return 0; + + if (bpf_wq_init(&val->w, &wq_map, 0) != 0) + return 0; + if (bpf_wq_set_callback_impl(&val->w, wq_cb, 0, NULL) != 0) + return 0; + return 0; +} + +/* Task work tests */ + +struct task_work_elem { + struct bpf_task_work tw; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct task_work_elem); +} task_work_map SEC(".maps"); + +static int task_work_cb(struct bpf_map *map, void *key, void *value) +{ + u32 data; + /* Task work callbacks are always sleepable, even from non-sleepable programs */ + bpf_copy_from_user(&data, sizeof(data), NULL); + return 0; +} + +SEC("fentry/bpf_fentry_test1") +__success +int task_work_non_sleepable_prog(void *ctx) +{ + struct task_work_elem *val; + struct task_struct *task; + int key = 0; + + val = bpf_map_lookup_elem(&task_work_map, &key); + if (!val) + return 0; + + task = bpf_get_current_task_btf(); + if (!task) + return 0; + + bpf_task_work_schedule_resume_impl(task, &val->tw, &task_work_map, task_work_cb, NULL); + return 0; +} + +SEC("lsm.s/file_open") +__success +int task_work_sleepable_prog(void *ctx) +{ + struct task_work_elem *val; + struct task_struct *task; + int key = 0; + + val = bpf_map_lookup_elem(&task_work_map, &key); + if (!val) + return 0; + + task = bpf_get_current_task_btf(); + if (!task) + return 0; + + bpf_task_work_schedule_resume_impl(task, &val->tw, &task_work_map, task_work_cb, NULL); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index 0a72e0228ea9..411a18437d7e 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -1709,4 +1709,158 @@ __naked void jeq_disagreeing_tnums(void *ctx) : __clobber_all); } +SEC("socket") +__description("conditional jump on same register, branch taken") +__not_msg("20: (b7) r0 = 1 {{.*}} R0=1") +__success __log_level(2) +__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS) +__naked void condition_jump_on_same_register(void *ctx) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + w8 = 0x80000000; \ + r0 &= r8; \ + if r0 == r0 goto +1; \ + goto l1_%=; \ + if r0 >= r0 goto +1; \ + goto l1_%=; \ + if r0 s>= r0 goto +1; \ + goto l1_%=; \ + if r0 <= r0 goto +1; \ + goto l1_%=; \ + if r0 s<= r0 goto +1; \ + goto l1_%=; \ + if r0 != r0 goto l1_%=; \ + if r0 > r0 goto l1_%=; \ + if r0 s> r0 goto l1_%=; \ + if r0 < r0 goto l1_%=; \ + if r0 s< r0 goto l1_%=; \ +l0_%=: r0 = 0; \ + exit; \ +l1_%=: r0 = 1; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("jset on same register, constant value branch taken") +__not_msg("7: (b7) r0 = 1 {{.*}} R0=1") +__success __log_level(2) +__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS) +__naked void jset_on_same_register_1(void *ctx) +{ + asm volatile(" \ + r0 = 0; \ + if r0 & r0 goto l1_%=; \ + r0 = 1; \ + if r0 & r0 goto +1; \ + goto l1_%=; \ +l0_%=: r0 = 0; \ + exit; \ +l1_%=: r0 = 1; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("jset on same register, scalar value branch taken") +__not_msg("12: (b7) r0 = 1 {{.*}} R0=1") +__success __log_level(2) +__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS) +__naked void jset_on_same_register_2(void *ctx) +{ + asm volatile(" \ + /* range [1;2] */ \ + call %[bpf_get_prandom_u32]; \ + r0 &= 0x1; \ + r0 += 1; \ + if r0 & r0 goto +1; \ + goto l1_%=; \ + /* range [-2;-1] */ \ + call %[bpf_get_prandom_u32]; \ + r0 &= 0x1; \ + r0 -= 2; \ + if r0 & r0 goto +1; \ + goto l1_%=; \ +l0_%=: r0 = 0; \ + exit; \ +l1_%=: r0 = 1; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("jset on same register, scalar value unknown branch 1") +__msg("3: (b7) r0 = 0 {{.*}} R0=0") +__msg("5: (b7) r0 = 1 {{.*}} R0=1") +__success __log_level(2) +__flag(BPF_F_TEST_REG_INVARIANTS) +__naked void jset_on_same_register_3(void *ctx) +{ + asm volatile(" \ + /* range [0;1] */ \ + call %[bpf_get_prandom_u32]; \ + r0 &= 0x1; \ + if r0 & r0 goto l1_%=; \ +l0_%=: r0 = 0; \ + exit; \ +l1_%=: r0 = 1; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("jset on same register, scalar value unknown branch 2") +__msg("4: (b7) r0 = 0 {{.*}} R0=0") +__msg("6: (b7) r0 = 1 {{.*}} R0=1") +__success __log_level(2) +__flag(BPF_F_TEST_REG_INVARIANTS) +__naked void jset_on_same_register_4(void *ctx) +{ + asm volatile(" \ + /* range [-1;0] */ \ + call %[bpf_get_prandom_u32]; \ + r0 &= 0x1; \ + r0 -= 1; \ + if r0 & r0 goto l1_%=; \ +l0_%=: r0 = 0; \ + exit; \ +l1_%=: r0 = 1; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("jset on same register, scalar value unknown branch 3") +__msg("4: (b7) r0 = 0 {{.*}} R0=0") +__msg("6: (b7) r0 = 1 {{.*}} R0=1") +__success __log_level(2) +__flag(BPF_F_TEST_REG_INVARIANTS) +__naked void jset_on_same_register_5(void *ctx) +{ + asm volatile(" \ + /* range [-1;1] */ \ + call %[bpf_get_prandom_u32]; \ + r0 &= 0x2; \ + r0 -= 1; \ + if r0 & r0 goto l1_%=; \ +l0_%=: r0 = 0; \ + exit; \ +l1_%=: r0 = 1; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c index 28b602ac9cbe..911caa8fd1b7 100644 --- a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Converted from tools/testing/selftests/bpf/verifier/direct_packet_access.c */ +#include <linux/if_ether.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" @@ -800,4 +801,62 @@ l0_%=: /* exit(0) */ \ : __clobber_all); } +#define access_test_non_linear(name, type, desc, retval, linear_sz, off) \ + SEC(type) \ + __description("direct packet access: " #name " (non-linear, " type ", " desc ")") \ + __success __retval(retval) \ + __linear_size(linear_sz) \ + __naked void access_non_linear_##name(void) \ + { \ + asm volatile (" \ + r2 = *(u32*)(r1 + %[skb_data]); \ + r3 = *(u32*)(r1 + %[skb_data_end]); \ + r0 = r2; \ + r0 += %[offset]; \ + if r0 > r3 goto l0_%=; \ + r0 = *(u8*)(r0 - 1); \ + r0 = 0; \ + exit; \ + l0_%=: r0 = 1; \ + exit; \ + " : \ + : __imm_const(skb_data, offsetof(struct __sk_buff, data)), \ + __imm_const(skb_data_end, offsetof(struct __sk_buff, data_end)), \ + __imm_const(offset, off) \ + : __clobber_all); \ + } + +access_test_non_linear(test31, "tc", "too short eth", 1, ETH_HLEN, 22); +access_test_non_linear(test32, "tc", "too short 1", 1, 1, 22); +access_test_non_linear(test33, "tc", "long enough", 0, 22, 22); +access_test_non_linear(test34, "cgroup_skb/ingress", "too short eth", 1, ETH_HLEN, 8); +access_test_non_linear(test35, "cgroup_skb/ingress", "too short 1", 1, 1, 8); +access_test_non_linear(test36, "cgroup_skb/ingress", "long enough", 0, 22, 8); + +SEC("tc") +__description("direct packet access: test37 (non-linear, linearized)") +__success __retval(0) +__linear_size(ETH_HLEN) +__naked void access_non_linear_linearized(void) +{ + asm volatile (" \ + r6 = r1; \ + r2 = 22; \ + call %[bpf_skb_pull_data]; \ + r2 = *(u32*)(r6 + %[skb_data]); \ + r3 = *(u32*)(r6 + %[skb_data_end]); \ + r0 = r2; \ + r0 += 22; \ + if r0 > r3 goto l0_%=; \ + r0 = *(u8*)(r0 - 1); \ + exit; \ +l0_%=: r0 = 1; \ + exit; \ +" : + : __imm(bpf_skb_pull_data), + __imm_const(skb_data, offsetof(struct __sk_buff, data)), + __imm_const(skb_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_gotox.c b/tools/testing/selftests/bpf/progs/verifier_gotox.c new file mode 100644 index 000000000000..607dad058ca1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_gotox.c @@ -0,0 +1,389 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Isovalent */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "../../../include/linux/filter.h" + +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) + +#define DEFINE_SIMPLE_JUMP_TABLE_PROG(NAME, SRC_REG, OFF, IMM, OUTCOME) \ + \ + SEC("socket") \ + OUTCOME \ + __naked void jump_table_ ## NAME(void) \ + { \ + asm volatile (" \ + .pushsection .jumptables,\"\",@progbits; \ + jt0_%=: \ + .quad ret0_%= - socket; \ + .quad ret1_%= - socket; \ + .size jt0_%=, 16; \ + .global jt0_%=; \ + .popsection; \ + \ + r0 = jt0_%= ll; \ + r0 += 8; \ + r0 = *(u64 *)(r0 + 0); \ + .8byte %[gotox_r0]; \ + ret0_%=: \ + r0 = 0; \ + exit; \ + ret1_%=: \ + r0 = 1; \ + exit; \ + " : \ + : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, (SRC_REG), (OFF) , (IMM))) \ + : __clobber_all); \ + } + +/* + * The first program which doesn't use reserved fields + * loads and works properly. The rest fail to load. + */ +DEFINE_SIMPLE_JUMP_TABLE_PROG(ok, BPF_REG_0, 0, 0, __success __retval(1)) +DEFINE_SIMPLE_JUMP_TABLE_PROG(reserved_field_src_reg, BPF_REG_1, 0, 0, __failure __msg("BPF_JA|BPF_X uses reserved fields")) +DEFINE_SIMPLE_JUMP_TABLE_PROG(reserved_field_non_zero_off, BPF_REG_0, 1, 0, __failure __msg("BPF_JA|BPF_X uses reserved fields")) +DEFINE_SIMPLE_JUMP_TABLE_PROG(reserved_field_non_zero_imm, BPF_REG_0, 0, 1, __failure __msg("BPF_JA|BPF_X uses reserved fields")) + +/* + * Gotox is forbidden when there is no jump table loaded + * which points to the sub-function where the gotox is used + */ +SEC("socket") +__failure __msg("no jump tables found for subprog starting at 0") +__naked void jump_table_no_jump_table(void) +{ + asm volatile (" \ + .8byte %[gotox_r0]; \ + r0 = 1; \ + exit; \ +" : \ + : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0)) + : __clobber_all); +} + +/* + * Incorrect type of the target register, only PTR_TO_INSN allowed + */ +SEC("socket") +__failure __msg("R1 has type scalar, expected PTR_TO_INSN") +__naked void jump_table_incorrect_dst_reg_type(void) +{ + asm volatile (" \ + .pushsection .jumptables,\"\",@progbits; \ +jt0_%=: \ + .quad ret0_%= - socket; \ + .quad ret1_%= - socket; \ + .size jt0_%=, 16; \ + .global jt0_%=; \ + .popsection; \ + \ + r0 = jt0_%= ll; \ + r0 += 8; \ + r0 = *(u64 *)(r0 + 0); \ + r1 = 42; \ + .8byte %[gotox_r1]; \ + ret0_%=: \ + r0 = 0; \ + exit; \ + ret1_%=: \ + r0 = 1; \ + exit; \ +" : \ + : __imm_insn(gotox_r1, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_1, 0, 0 , 0)) + : __clobber_all); +} + +#define DEFINE_INVALID_SIZE_PROG(READ_SIZE, OUTCOME) \ + \ + SEC("socket") \ + OUTCOME \ + __naked void jump_table_invalid_read_size_ ## READ_SIZE(void) \ + { \ + asm volatile (" \ + .pushsection .jumptables,\"\",@progbits; \ + jt0_%=: \ + .quad ret0_%= - socket; \ + .quad ret1_%= - socket; \ + .size jt0_%=, 16; \ + .global jt0_%=; \ + .popsection; \ + \ + r0 = jt0_%= ll; \ + r0 += 8; \ + r0 = *(" #READ_SIZE " *)(r0 + 0); \ + .8byte %[gotox_r0]; \ + ret0_%=: \ + r0 = 0; \ + exit; \ + ret1_%=: \ + r0 = 1; \ + exit; \ + " : \ + : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0)) \ + : __clobber_all); \ + } + +DEFINE_INVALID_SIZE_PROG(u32, __failure __msg("Invalid read of 4 bytes from insn_array")) +DEFINE_INVALID_SIZE_PROG(u16, __failure __msg("Invalid read of 2 bytes from insn_array")) +DEFINE_INVALID_SIZE_PROG(u8, __failure __msg("Invalid read of 1 bytes from insn_array")) + +SEC("socket") +__failure __msg("misaligned value access off 0+1+0 size 8") +__naked void jump_table_misaligned_access(void) +{ + asm volatile (" \ + .pushsection .jumptables,\"\",@progbits; \ +jt0_%=: \ + .quad ret0_%= - socket; \ + .quad ret1_%= - socket; \ + .size jt0_%=, 16; \ + .global jt0_%=; \ + .popsection; \ + \ + r0 = jt0_%= ll; \ + r0 += 1; \ + r0 = *(u64 *)(r0 + 0); \ + .8byte %[gotox_r0]; \ + ret0_%=: \ + r0 = 0; \ + exit; \ + ret1_%=: \ + r0 = 1; \ + exit; \ +" : \ + : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0)) + : __clobber_all); +} + +SEC("socket") +__failure __msg("invalid access to map value, value_size=16 off=24 size=8") +__naked void jump_table_invalid_mem_acceess_pos(void) +{ + asm volatile (" \ + .pushsection .jumptables,\"\",@progbits; \ +jt0_%=: \ + .quad ret0_%= - socket; \ + .quad ret1_%= - socket; \ + .size jt0_%=, 16; \ + .global jt0_%=; \ + .popsection; \ + \ + r0 = jt0_%= ll; \ + r0 += 24; \ + r0 = *(u64 *)(r0 + 0); \ + .8byte %[gotox_r0]; \ + ret0_%=: \ + r0 = 0; \ + exit; \ + ret1_%=: \ + r0 = 1; \ + exit; \ +" : \ + : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0)) + : __clobber_all); +} + +SEC("socket") +__failure __msg("invalid access to map value, value_size=16 off=-24 size=8") +__naked void jump_table_invalid_mem_acceess_neg(void) +{ + asm volatile (" \ + .pushsection .jumptables,\"\",@progbits; \ +jt0_%=: \ + .quad ret0_%= - socket; \ + .quad ret1_%= - socket; \ + .size jt0_%=, 16; \ + .global jt0_%=; \ + .popsection; \ + \ + r0 = jt0_%= ll; \ + r0 -= 24; \ + r0 = *(u64 *)(r0 + 0); \ + .8byte %[gotox_r0]; \ + ret0_%=: \ + r0 = 0; \ + exit; \ + ret1_%=: \ + r0 = 1; \ + exit; \ +" : \ + : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0)) + : __clobber_all); +} + +SEC("socket") +__success __retval(1) +__naked void jump_table_add_sub_ok(void) +{ + asm volatile (" \ + .pushsection .jumptables,\"\",@progbits; \ +jt0_%=: \ + .quad ret0_%= - socket; \ + .quad ret1_%= - socket; \ + .size jt0_%=, 16; \ + .global jt0_%=; \ + .popsection; \ + \ + r0 = jt0_%= ll; \ + r0 -= 24; \ + r0 += 32; \ + r0 = *(u64 *)(r0 + 0); \ + .8byte %[gotox_r0]; \ + ret0_%=: \ + r0 = 0; \ + exit; \ + ret1_%=: \ + r0 = 1; \ + exit; \ +" : \ + : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0)) + : __clobber_all); +} + +SEC("socket") +__failure __msg("write into map forbidden, value_size=16 off=8 size=8") +__naked void jump_table_no_writes(void) +{ + asm volatile (" \ + .pushsection .jumptables,\"\",@progbits; \ +jt0_%=: \ + .quad ret0_%= - socket; \ + .quad ret1_%= - socket; \ + .size jt0_%=, 16; \ + .global jt0_%=; \ + .popsection; \ + \ + r0 = jt0_%= ll; \ + r0 += 8; \ + r1 = 0xbeef; \ + *(u64 *)(r0 + 0) = r1; \ + .8byte %[gotox_r0]; \ + ret0_%=: \ + r0 = 0; \ + exit; \ + ret1_%=: \ + r0 = 1; \ + exit; \ +" : \ + : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0)) + : __clobber_all); +} + +#define DEFINE_JUMP_TABLE_USE_REG(REG) \ + SEC("socket") \ + __success __retval(1) \ + __naked void jump_table_use_reg_r ## REG(void) \ + { \ + asm volatile (" \ + .pushsection .jumptables,\"\",@progbits; \ + jt0_%=: \ + .quad ret0_%= - socket; \ + .quad ret1_%= - socket; \ + .size jt0_%=, 16; \ + .global jt0_%=; \ + .popsection; \ + \ + r0 = jt0_%= ll; \ + r0 += 8; \ + r" #REG " = *(u64 *)(r0 + 0); \ + .8byte %[gotox_rX]; \ + ret0_%=: \ + r0 = 0; \ + exit; \ + ret1_%=: \ + r0 = 1; \ + exit; \ + " : \ + : __imm_insn(gotox_rX, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_ ## REG, 0, 0 , 0)) \ + : __clobber_all); \ + } + +DEFINE_JUMP_TABLE_USE_REG(0) +DEFINE_JUMP_TABLE_USE_REG(1) +DEFINE_JUMP_TABLE_USE_REG(2) +DEFINE_JUMP_TABLE_USE_REG(3) +DEFINE_JUMP_TABLE_USE_REG(4) +DEFINE_JUMP_TABLE_USE_REG(5) +DEFINE_JUMP_TABLE_USE_REG(6) +DEFINE_JUMP_TABLE_USE_REG(7) +DEFINE_JUMP_TABLE_USE_REG(8) +DEFINE_JUMP_TABLE_USE_REG(9) + +__used static int test_subprog(void) +{ + return 0; +} + +SEC("socket") +__failure __msg("jump table for insn 4 points outside of the subprog [0,10]") +__naked void jump_table_outside_subprog(void) +{ + asm volatile (" \ + .pushsection .jumptables,\"\",@progbits; \ +jt0_%=: \ + .quad ret0_%= - socket; \ + .quad ret1_%= - socket; \ + .quad ret_out_%= - socket; \ + .size jt0_%=, 24; \ + .global jt0_%=; \ + .popsection; \ + \ + r0 = jt0_%= ll; \ + r0 += 8; \ + r0 = *(u64 *)(r0 + 0); \ + .8byte %[gotox_r0]; \ + ret0_%=: \ + r0 = 0; \ + exit; \ + ret1_%=: \ + r0 = 1; \ + call test_subprog; \ + exit; \ + ret_out_%=: \ +" : \ + : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0)) + : __clobber_all); +} + +SEC("socket") +__success __retval(1) +__naked void jump_table_contains_non_unique_values(void) +{ + asm volatile (" \ + .pushsection .jumptables,\"\",@progbits; \ +jt0_%=: \ + .quad ret0_%= - socket; \ + .quad ret1_%= - socket; \ + .quad ret0_%= - socket; \ + .quad ret1_%= - socket; \ + .quad ret0_%= - socket; \ + .quad ret1_%= - socket; \ + .quad ret0_%= - socket; \ + .quad ret1_%= - socket; \ + .quad ret0_%= - socket; \ + .quad ret1_%= - socket; \ + .size jt0_%=, 80; \ + .global jt0_%=; \ + .popsection; \ + \ + r0 = jt0_%= ll; \ + r0 += 8; \ + r0 = *(u64 *)(r0 + 0); \ + .8byte %[gotox_r0]; \ + ret0_%=: \ + r0 = 0; \ + exit; \ + ret1_%=: \ + r0 = 1; \ + exit; \ +" : \ + : __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0)) + : __clobber_all); +} + +#endif /* __TARGET_ARCH_x86 || __TARGET_ARCH_arm64 */ + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_live_stack.c b/tools/testing/selftests/bpf/progs/verifier_live_stack.c index c0e808509268..2de105057bbc 100644 --- a/tools/testing/selftests/bpf/progs/verifier_live_stack.c +++ b/tools/testing/selftests/bpf/progs/verifier_live_stack.c @@ -292,3 +292,53 @@ __naked void syzbot_postorder_bug1(void) "exit;" ::: __clobber_all); } + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} map_array SEC(".maps"); + +SEC("socket") +__failure __msg("invalid read from stack R2 off=-1024 size=8") +__flag(BPF_F_TEST_STATE_FREQ) +__naked unsigned long caller_stack_write_tail_call(void) +{ + asm volatile ( + "r6 = r1;" + "*(u64 *)(r10 - 8) = -8;" + "call %[bpf_get_prandom_u32];" + "if r0 != 42 goto 1f;" + "goto 2f;" + "1:" + "*(u64 *)(r10 - 8) = -1024;" + "2:" + "r1 = r6;" + "r2 = r10;" + "r2 += -8;" + "call write_tail_call;" + "r1 = *(u64 *)(r10 - 8);" + "r2 = r10;" + "r2 += r1;" + "r0 = *(u64 *)(r2 + 0);" + "exit;" + :: __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +static __used __naked unsigned long write_tail_call(void) +{ + asm volatile ( + "r6 = r2;" + "r2 = %[map_array] ll;" + "r3 = 0;" + "call %[bpf_tail_call];" + "*(u64 *)(r6 + 0) = -16;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_tail_call), + __imm_addr(map_array) + : __clobber_all); +} diff --git a/tools/testing/selftests/bpf/progs/verifier_lsm.c b/tools/testing/selftests/bpf/progs/verifier_lsm.c index 32e5e779cb96..6af9100a37ff 100644 --- a/tools/testing/selftests/bpf/progs/verifier_lsm.c +++ b/tools/testing/selftests/bpf/progs/verifier_lsm.c @@ -4,7 +4,7 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" -SEC("lsm/file_alloc_security") +SEC("lsm/file_permission") __description("lsm bpf prog with -4095~0 retval. test 1") __success __naked int errno_zero_retval_test1(void *ctx) @@ -15,7 +15,7 @@ __naked int errno_zero_retval_test1(void *ctx) ::: __clobber_all); } -SEC("lsm/file_alloc_security") +SEC("lsm/file_permission") __description("lsm bpf prog with -4095~0 retval. test 2") __success __naked int errno_zero_retval_test2(void *ctx) diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c index ab9f9f2620ed..e2cbc5bda65e 100644 --- a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c +++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c @@ -79,11 +79,6 @@ int with_invalid_ctx_access_test5(struct bpf_nf_ctx *ctx) return NF_ACCEPT; } -extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, - struct bpf_dynptr *ptr__uninit) __ksym; -extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, - void *buffer, uint32_t buffer__sz) __ksym; - SEC("netfilter") __description("netfilter test prog with skb and state read access") __success __failure_unpriv diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c index 2b4610b53382..a2132c72d3b8 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sock.c +++ b/tools/testing/selftests/bpf/progs/verifier_sock.c @@ -1117,10 +1117,17 @@ int tail_call(struct __sk_buff *sk) return 0; } -/* Tail calls invalidate packet pointers. */ +static __noinline +int static_tail_call(struct __sk_buff *sk) +{ + bpf_tail_call_static(sk, &jmp_table, 0); + return 0; +} + +/* Tail calls in sub-programs invalidate packet pointers. */ SEC("tc") __failure __msg("invalid mem access") -int invalidate_pkt_pointers_by_tail_call(struct __sk_buff *sk) +int invalidate_pkt_pointers_by_global_tail_call(struct __sk_buff *sk) { int *p = (void *)(long)sk->data; @@ -1131,4 +1138,32 @@ int invalidate_pkt_pointers_by_tail_call(struct __sk_buff *sk) return TCX_PASS; } +/* Tail calls in static sub-programs invalidate packet pointers. */ +SEC("tc") +__failure __msg("invalid mem access") +int invalidate_pkt_pointers_by_static_tail_call(struct __sk_buff *sk) +{ + int *p = (void *)(long)sk->data; + + if ((void *)(p + 1) > (void *)(long)sk->data_end) + return TCX_DROP; + static_tail_call(sk); + *p = 42; /* this is unsafe */ + return TCX_PASS; +} + +/* Direct tail calls do not invalidate packet pointers. */ +SEC("tc") +__success +int invalidate_pkt_pointers_by_tail_call(struct __sk_buff *sk) +{ + int *p = (void *)(long)sk->data; + + if ((void *)(p + 1) > (void *)(long)sk->data_end) + return TCX_DROP; + bpf_tail_call_static(sk, &jmp_table, 0); + *p = 42; /* this is NOT unsafe: tail calls don't return */ + return TCX_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index ac3e418c2a96..61886ed554de 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -793,4 +793,57 @@ __naked int stack_slot_aliases_precision(void) ); } +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} map_array SEC(".maps"); + +__naked __noinline __used +static unsigned long identity_tail_call(void) +{ + /* the simplest identity function involving a tail call */ + asm volatile ( + "r6 = r2;" + "r2 = %[map_array] ll;" + "r3 = 0;" + "call %[bpf_tail_call];" + "r0 = r6;" + "exit;" + : + : __imm(bpf_tail_call), + __imm_addr(map_array) + : __clobber_all); +} + +SEC("?raw_tp") +__failure __log_level(2) +__msg("13: (85) call bpf_tail_call#12") +__msg("mark_precise: frame1: last_idx 13 first_idx 0 subseq_idx -1 ") +__msg("returning from callee:") +__msg("frame1: R0=scalar() R6=3 R10=fp0") +__msg("to caller at 4:") +__msg("R0=scalar() R6=map_value(map=.data.vals,ks=4,vs=16) R10=fp0") +__msg("6: (0f) r1 += r0") +__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r1 = r6") +__msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4") +__msg("mark_precise: frame0: parent state regs=r0 stack=: R0=Pscalar() R6=map_value(map=.data.vals,ks=4,vs=16) R10=fp0") +__msg("math between map_value pointer and register with unbounded min value is not allowed") +__naked int subprog_result_tail_call(void) +{ + asm volatile ( + "r2 = 3;" + "call identity_tail_call;" + "r0 *= 4;" + "r1 = %[vals];" + "r1 += r0;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals) + : __clobber_common + ); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/wq.c b/tools/testing/selftests/bpf/progs/wq.c index 2f1ba08c293e..25be2cd9d42c 100644 --- a/tools/testing/selftests/bpf/progs/wq.c +++ b/tools/testing/selftests/bpf/progs/wq.c @@ -187,3 +187,20 @@ long test_call_lru_sleepable(void *ctx) return test_elem_callback(&lru, &key, wq_callback); } + +SEC("tc") +long test_map_no_btf(void *ctx) +{ + struct elem *val; + struct bpf_wq *wq; + int key = 42; + + val = bpf_map_lookup_elem(&array, &key); + if (!val) + return -2; + + wq = &val->w; + if (bpf_wq_init(wq, &array, 0) != 0) + return -3; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/wq_failures.c b/tools/testing/selftests/bpf/progs/wq_failures.c index 4240211a1900..d06f6d40594a 100644 --- a/tools/testing/selftests/bpf/progs/wq_failures.c +++ b/tools/testing/selftests/bpf/progs/wq_failures.c @@ -142,3 +142,26 @@ long test_wrong_wq_pointer_offset(void *ctx) return -22; } + +SEC("tc") +__log_level(2) +__failure +__msg(": (85) call bpf_wq_init#") +__msg("R1 doesn't have constant offset. bpf_wq has to be at the constant offset") +long test_bad_wq_off(void *ctx) +{ + struct elem *val; + struct bpf_wq *wq; + int key = 42; + u64 unknown; + + val = bpf_map_lookup_elem(&array, &key); + if (!val) + return -2; + + unknown = bpf_get_prandom_u32(); + wq = &val->w + unknown; + if (bpf_wq_init(wq, &array, 0) != 0) + return -3; + return 0; +} diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh index 1453a53ed547..b03a87571592 100755 --- a/tools/testing/selftests/bpf/test_bpftool_build.sh +++ b/tools/testing/selftests/bpf/test_bpftool_build.sh @@ -90,10 +90,6 @@ echo -e "... through kbuild\n" if [ -f ".config" ] ; then make_and_clean tools/bpf - ## "make tools/bpf" sets $(OUTPUT) to ...tools/bpf/runqslower for - ## runqslower, but the default (used for the "clean" target) is .output. - ## Let's make sure we clean runqslower's directory properly. - make -C tools/bpf/runqslower OUTPUT=${KDIR_ROOT_DIR}/tools/bpf/runqslower/ clean ## $OUTPUT is overwritten in kbuild Makefile, and thus cannot be passed ## down from toplevel Makefile to bpftool's Makefile. diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c index 769206fc70e4..7b4ae5e81d32 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c @@ -5,6 +5,7 @@ #include <linux/delay.h> #include <linux/module.h> #include <linux/prandom.h> +#include <linux/ktime.h> #include <asm/rqspinlock.h> #include <linux/perf_event.h> #include <linux/kthread.h> @@ -22,48 +23,146 @@ static struct perf_event_attr hw_attr = { static rqspinlock_t lock_a; static rqspinlock_t lock_b; +static rqspinlock_t lock_c; + +#define RQSL_SLOW_THRESHOLD_MS 10 +static const unsigned int rqsl_hist_ms[] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 12, 14, 16, 18, 20, 25, 30, 40, 50, 75, + 100, 150, 200, 250, 1000, +}; +#define RQSL_NR_HIST_BUCKETS ARRAY_SIZE(rqsl_hist_ms) + +enum rqsl_context { + RQSL_CTX_NORMAL = 0, + RQSL_CTX_NMI, + RQSL_CTX_MAX, +}; + +struct rqsl_cpu_hist { + atomic64_t hist[RQSL_CTX_MAX][RQSL_NR_HIST_BUCKETS]; + atomic64_t success[RQSL_CTX_MAX]; + atomic64_t failure[RQSL_CTX_MAX]; +}; + +static DEFINE_PER_CPU(struct rqsl_cpu_hist, rqsl_cpu_hists); + +enum rqsl_mode { + RQSL_MODE_AA = 0, + RQSL_MODE_ABBA, + RQSL_MODE_ABBCCA, +}; + +static int test_mode = RQSL_MODE_AA; +module_param(test_mode, int, 0644); +MODULE_PARM_DESC(test_mode, + "rqspinlock test mode: 0 = AA, 1 = ABBA, 2 = ABBCCA"); + +static int normal_delay = 20; +module_param(normal_delay, int, 0644); +MODULE_PARM_DESC(normal_delay, + "rqspinlock critical section length for normal context (20ms default)"); + +static int nmi_delay = 10; +module_param(nmi_delay, int, 0644); +MODULE_PARM_DESC(nmi_delay, + "rqspinlock critical section length for NMI context (10ms default)"); static struct perf_event **rqsl_evts; static int rqsl_nevts; -static bool test_ab = false; -module_param(test_ab, bool, 0644); -MODULE_PARM_DESC(test_ab, "Test ABBA situations instead of AA situations"); - static struct task_struct **rqsl_threads; static int rqsl_nthreads; static atomic_t rqsl_ready_cpus = ATOMIC_INIT(0); static int pause = 0; -static bool nmi_locks_a(int cpu) +static const char *rqsl_mode_names[] = { + [RQSL_MODE_AA] = "AA", + [RQSL_MODE_ABBA] = "ABBA", + [RQSL_MODE_ABBCCA] = "ABBCCA", +}; + +struct rqsl_lock_pair { + rqspinlock_t *worker_lock; + rqspinlock_t *nmi_lock; +}; + +static struct rqsl_lock_pair rqsl_get_lock_pair(int cpu) { - return (cpu & 1) && test_ab; + int mode = READ_ONCE(test_mode); + + switch (mode) { + default: + case RQSL_MODE_AA: + return (struct rqsl_lock_pair){ &lock_a, &lock_a }; + case RQSL_MODE_ABBA: + if (cpu & 1) + return (struct rqsl_lock_pair){ &lock_b, &lock_a }; + return (struct rqsl_lock_pair){ &lock_a, &lock_b }; + case RQSL_MODE_ABBCCA: + switch (cpu % 3) { + case 0: + return (struct rqsl_lock_pair){ &lock_a, &lock_b }; + case 1: + return (struct rqsl_lock_pair){ &lock_b, &lock_c }; + default: + return (struct rqsl_lock_pair){ &lock_c, &lock_a }; + } + } +} + +static u32 rqsl_hist_bucket_idx(u32 delta_ms) +{ + int i; + + for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) { + if (delta_ms <= rqsl_hist_ms[i]) + return i; + } + + return RQSL_NR_HIST_BUCKETS - 1; +} + +static void rqsl_record_lock_result(u64 delta_ns, enum rqsl_context ctx, int ret) +{ + struct rqsl_cpu_hist *hist = this_cpu_ptr(&rqsl_cpu_hists); + u32 delta_ms = DIV_ROUND_UP_ULL(delta_ns, NSEC_PER_MSEC); + u32 bucket = rqsl_hist_bucket_idx(delta_ms); + atomic64_t *buckets = hist->hist[ctx]; + + atomic64_inc(&buckets[bucket]); + if (!ret) + atomic64_inc(&hist->success[ctx]); + else + atomic64_inc(&hist->failure[ctx]); } static int rqspinlock_worker_fn(void *arg) { int cpu = smp_processor_id(); unsigned long flags; + u64 start_ns; int ret; if (cpu) { atomic_inc(&rqsl_ready_cpus); while (!kthread_should_stop()) { + struct rqsl_lock_pair locks = rqsl_get_lock_pair(cpu); + rqspinlock_t *worker_lock = locks.worker_lock; + if (READ_ONCE(pause)) { msleep(1000); continue; } - if (nmi_locks_a(cpu)) - ret = raw_res_spin_lock_irqsave(&lock_b, flags); - else - ret = raw_res_spin_lock_irqsave(&lock_a, flags); - mdelay(20); - if (nmi_locks_a(cpu) && !ret) - raw_res_spin_unlock_irqrestore(&lock_b, flags); - else if (!ret) - raw_res_spin_unlock_irqrestore(&lock_a, flags); + start_ns = ktime_get_mono_fast_ns(); + ret = raw_res_spin_lock_irqsave(worker_lock, flags); + rqsl_record_lock_result(ktime_get_mono_fast_ns() - start_ns, + RQSL_CTX_NORMAL, ret); + mdelay(normal_delay); + if (!ret) + raw_res_spin_unlock_irqrestore(worker_lock, flags); cpu_relax(); } return 0; @@ -91,24 +190,25 @@ static int rqspinlock_worker_fn(void *arg) static void nmi_cb(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs) { + struct rqsl_lock_pair locks; int cpu = smp_processor_id(); unsigned long flags; + u64 start_ns; int ret; if (!cpu || READ_ONCE(pause)) return; - if (nmi_locks_a(cpu)) - ret = raw_res_spin_lock_irqsave(&lock_a, flags); - else - ret = raw_res_spin_lock_irqsave(test_ab ? &lock_b : &lock_a, flags); + locks = rqsl_get_lock_pair(cpu); + start_ns = ktime_get_mono_fast_ns(); + ret = raw_res_spin_lock_irqsave(locks.nmi_lock, flags); + rqsl_record_lock_result(ktime_get_mono_fast_ns() - start_ns, + RQSL_CTX_NMI, ret); - mdelay(10); + mdelay(nmi_delay); - if (nmi_locks_a(cpu) && !ret) - raw_res_spin_unlock_irqrestore(&lock_a, flags); - else if (!ret) - raw_res_spin_unlock_irqrestore(test_ab ? &lock_b : &lock_a, flags); + if (!ret) + raw_res_spin_unlock_irqrestore(locks.nmi_lock, flags); } static void free_rqsl_threads(void) @@ -142,13 +242,19 @@ static int bpf_test_rqspinlock_init(void) int i, ret; int ncpus = num_online_cpus(); - pr_err("Mode = %s\n", test_ab ? "ABBA" : "AA"); + if (test_mode < RQSL_MODE_AA || test_mode > RQSL_MODE_ABBCCA) { + pr_err("Invalid mode %d\n", test_mode); + return -EINVAL; + } + + pr_err("Mode = %s\n", rqsl_mode_names[test_mode]); - if (ncpus < 3) + if (ncpus < test_mode + 2) return -ENOTSUPP; raw_res_spin_lock_init(&lock_a); raw_res_spin_lock_init(&lock_b); + raw_res_spin_lock_init(&lock_c); rqsl_evts = kcalloc(ncpus - 1, sizeof(*rqsl_evts), GFP_KERNEL); if (!rqsl_evts) @@ -196,10 +302,88 @@ err_perf_events: module_init(bpf_test_rqspinlock_init); +static void rqsl_print_histograms(void) +{ + int cpu, i; + + pr_err("rqspinlock acquisition latency histogram (ms):\n"); + + for_each_online_cpu(cpu) { + struct rqsl_cpu_hist *hist = per_cpu_ptr(&rqsl_cpu_hists, cpu); + u64 norm_counts[RQSL_NR_HIST_BUCKETS]; + u64 nmi_counts[RQSL_NR_HIST_BUCKETS]; + u64 total_counts[RQSL_NR_HIST_BUCKETS]; + u64 norm_success, nmi_success, success_total; + u64 norm_failure, nmi_failure, failure_total; + u64 norm_total = 0, nmi_total = 0, total = 0; + bool has_slow = false; + + for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) { + norm_counts[i] = atomic64_read(&hist->hist[RQSL_CTX_NORMAL][i]); + nmi_counts[i] = atomic64_read(&hist->hist[RQSL_CTX_NMI][i]); + total_counts[i] = norm_counts[i] + nmi_counts[i]; + norm_total += norm_counts[i]; + nmi_total += nmi_counts[i]; + total += total_counts[i]; + if (rqsl_hist_ms[i] > RQSL_SLOW_THRESHOLD_MS && + total_counts[i]) + has_slow = true; + } + + norm_success = atomic64_read(&hist->success[RQSL_CTX_NORMAL]); + nmi_success = atomic64_read(&hist->success[RQSL_CTX_NMI]); + norm_failure = atomic64_read(&hist->failure[RQSL_CTX_NORMAL]); + nmi_failure = atomic64_read(&hist->failure[RQSL_CTX_NMI]); + success_total = norm_success + nmi_success; + failure_total = norm_failure + nmi_failure; + + if (!total) + continue; + + if (!has_slow) { + pr_err(" cpu%d: total %llu (normal %llu, nmi %llu) | " + "success %llu (normal %llu, nmi %llu) | " + "failure %llu (normal %llu, nmi %llu), all within 0-%ums\n", + cpu, total, norm_total, nmi_total, + success_total, norm_success, nmi_success, + failure_total, norm_failure, nmi_failure, + RQSL_SLOW_THRESHOLD_MS); + continue; + } + + pr_err(" cpu%d: total %llu (normal %llu, nmi %llu) | " + "success %llu (normal %llu, nmi %llu) | " + "failure %llu (normal %llu, nmi %llu)\n", + cpu, total, norm_total, nmi_total, + success_total, norm_success, nmi_success, + failure_total, norm_failure, nmi_failure); + for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) { + unsigned int start_ms; + + if (!total_counts[i]) + continue; + + start_ms = i == 0 ? 0 : rqsl_hist_ms[i - 1] + 1; + if (i == RQSL_NR_HIST_BUCKETS - 1) { + pr_err(" >= %ums: total %llu (normal %llu, nmi %llu)\n", + start_ms, total_counts[i], + norm_counts[i], nmi_counts[i]); + } else { + pr_err(" %u-%ums: total %llu (normal %llu, nmi %llu)\n", + start_ms, rqsl_hist_ms[i], + total_counts[i], + norm_counts[i], nmi_counts[i]); + } + } + } +} + static void bpf_test_rqspinlock_exit(void) { + WRITE_ONCE(pause, 1); free_rqsl_threads(); free_rqsl_evts(); + rqsl_print_histograms(); } module_exit(bpf_test_rqspinlock_exit); diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c index ed0a4721d8fd..1669a7eeda26 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c @@ -926,7 +926,7 @@ __bpf_kfunc int bpf_kfunc_call_kernel_connect(struct addr_args *args) goto out; } - err = kernel_connect(sock, (struct sockaddr *)&args->addr, + err = kernel_connect(sock, (struct sockaddr_unsized *)&args->addr, args->addrlen, 0); out: mutex_unlock(&sock_lock); @@ -949,7 +949,7 @@ __bpf_kfunc int bpf_kfunc_call_kernel_bind(struct addr_args *args) goto out; } - err = kernel_bind(sock, (struct sockaddr *)&args->addr, args->addrlen); + err = kernel_bind(sock, (struct sockaddr_unsized *)&args->addr, args->addrlen); out: mutex_unlock(&sock_lock); diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 74ecc281bb8c..338c035c3688 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -43,6 +43,7 @@ #define TEST_TAG_EXPECT_STDERR_PFX_UNPRIV "comment:test_expect_stderr_unpriv=" #define TEST_TAG_EXPECT_STDOUT_PFX "comment:test_expect_stdout=" #define TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV "comment:test_expect_stdout_unpriv=" +#define TEST_TAG_LINEAR_SIZE "comment:test_linear_size=" /* Warning: duplicated in bpf_misc.h */ #define POINTER_VALUE 0xbadcafe @@ -89,6 +90,7 @@ struct test_spec { int mode_mask; int arch_mask; int load_mask; + int linear_sz; bool auxiliary; bool valid; }; @@ -633,6 +635,21 @@ static int parse_test_spec(struct test_loader *tester, &spec->unpriv.stdout); if (err) goto cleanup; + } else if (str_has_pfx(s, TEST_TAG_LINEAR_SIZE)) { + switch (bpf_program__type(prog)) { + case BPF_PROG_TYPE_SCHED_ACT: + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_CGROUP_SKB: + val = s + sizeof(TEST_TAG_LINEAR_SIZE) - 1; + err = parse_int(val, &spec->linear_sz, "test linear size"); + if (err) + goto cleanup; + break; + default: + PRINT_FAIL("__linear_size for unsupported program type"); + err = -EINVAL; + goto cleanup; + } } } @@ -1007,10 +1024,11 @@ static bool is_unpriv_capable_map(struct bpf_map *map) } } -static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts) +static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts, int linear_sz) { __u8 tmp_out[TEST_DATA_LEN << 2] = {}; __u8 tmp_in[TEST_DATA_LEN] = {}; + struct __sk_buff ctx = {}; int err, saved_errno; LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = tmp_in, @@ -1020,6 +1038,12 @@ static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts) .repeat = 1, ); + if (linear_sz) { + ctx.data_end = linear_sz; + topts.ctx_in = &ctx; + topts.ctx_size_in = sizeof(ctx); + } + if (empty_opts) { memset(&topts, 0, sizeof(struct bpf_test_run_opts)); topts.sz = sizeof(struct bpf_test_run_opts); @@ -1269,7 +1293,8 @@ void run_subtest(struct test_loader *tester, } err = do_prog_test_run(bpf_program__fd(tprog), &retval, - bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false); + bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false, + spec->linear_sz); if (!err && retval != subspec->retval && subspec->retval != POINTER_VALUE) { PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval); goto tobj_cleanup; diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 3fae9ce46ca9..ccc5acd55ff9 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1399,7 +1399,8 @@ static void test_map_stress(void) static bool can_retry(int err) { return (err == EAGAIN || err == EBUSY || - (err == ENOMEM && map_opts.map_flags == BPF_F_NO_PREALLOC)); + ((err == ENOMEM || err == E2BIG) && + map_opts.map_flags == BPF_F_NO_PREALLOC)); } int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts, diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c index 5546b05a0486..f1300047c1e0 100644 --- a/tools/testing/selftests/bpf/test_tag.c +++ b/tools/testing/selftests/bpf/test_tag.c @@ -116,7 +116,7 @@ static void tag_from_alg(int insns, uint8_t *tag, uint32_t len) static const struct sockaddr_alg alg = { .salg_family = AF_ALG, .salg_type = "hash", - .salg_name = "sha1", + .salg_name = "sha256", }; int fd_base, fd_alg, ret; ssize_t size; diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh deleted file mode 100755 index 76f0bd17061f..000000000000 --- a/tools/testing/selftests/bpf/test_tc_edt.sh +++ /dev/null @@ -1,100 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# This test installs a TC bpf program that throttles a TCP flow -# with dst port = 9000 down to 5MBps. Then it measures actual -# throughput of the flow. - -BPF_FILE="test_tc_edt.bpf.o" -if [[ $EUID -ne 0 ]]; then - echo "This script must be run as root" - echo "FAIL" - exit 1 -fi - -# check that nc, dd, and timeout are present -command -v nc >/dev/null 2>&1 || \ - { echo >&2 "nc is not available"; exit 1; } -command -v dd >/dev/null 2>&1 || \ - { echo >&2 "nc is not available"; exit 1; } -command -v timeout >/dev/null 2>&1 || \ - { echo >&2 "timeout is not available"; exit 1; } - -readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)" -readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)" - -readonly IP_SRC="172.16.1.100" -readonly IP_DST="172.16.2.100" - -cleanup() -{ - ip netns del ${NS_SRC} - ip netns del ${NS_DST} -} - -trap cleanup EXIT - -set -e # exit on error - -ip netns add "${NS_SRC}" -ip netns add "${NS_DST}" -ip link add veth_src type veth peer name veth_dst -ip link set veth_src netns ${NS_SRC} -ip link set veth_dst netns ${NS_DST} - -ip -netns ${NS_SRC} addr add ${IP_SRC}/24 dev veth_src -ip -netns ${NS_DST} addr add ${IP_DST}/24 dev veth_dst - -ip -netns ${NS_SRC} link set dev veth_src up -ip -netns ${NS_DST} link set dev veth_dst up - -ip -netns ${NS_SRC} route add ${IP_DST}/32 dev veth_src -ip -netns ${NS_DST} route add ${IP_SRC}/32 dev veth_dst - -# set up TC on TX -ip netns exec ${NS_SRC} tc qdisc add dev veth_src root fq -ip netns exec ${NS_SRC} tc qdisc add dev veth_src clsact -ip netns exec ${NS_SRC} tc filter add dev veth_src egress \ - bpf da obj ${BPF_FILE} sec cls_test - - -# start the listener -ip netns exec ${NS_DST} bash -c \ - "nc -4 -l -p 9000 >/dev/null &" -declare -i NC_PID=$! -sleep 1 - -declare -ir TIMEOUT=20 -declare -ir EXPECTED_BPS=5000000 - -# run the load, capture RX bytes on DST -declare -ir RX_BYTES_START=$( ip netns exec ${NS_DST} \ - cat /sys/class/net/veth_dst/statistics/rx_bytes ) - -set +e -ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero \ - bs=1000 count=1000000 > /dev/tcp/${IP_DST}/9000 2>/dev/null" -set -e - -declare -ir RX_BYTES_END=$( ip netns exec ${NS_DST} \ - cat /sys/class/net/veth_dst/statistics/rx_bytes ) - -declare -ir ACTUAL_BPS=$(( ($RX_BYTES_END - $RX_BYTES_START) / $TIMEOUT )) - -echo $TIMEOUT $ACTUAL_BPS $EXPECTED_BPS | \ - awk '{printf "elapsed: %d sec; bps difference: %.2f%%\n", - $1, ($2-$3)*100.0/$3}' - -# Pass the test if the actual bps is within 1% of the expected bps. -# The difference is usually about 0.1% on a 20-sec test, and ==> zero -# the longer the test runs. -declare -ir RES=$( echo $ACTUAL_BPS $EXPECTED_BPS | \ - awk 'function abs(x){return ((x < 0.0) ? -x : x)} - {if (abs(($1-$2)*100.0/$2) > 1.0) { print "1" } - else { print "0"} }' ) -if [ "${RES}" == "0" ] ; then - echo "PASS" -else - echo "FAIL" - exit 1 -fi diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh deleted file mode 100755 index cb55a908bb0d..000000000000 --- a/tools/testing/selftests/bpf/test_tc_tunnel.sh +++ /dev/null @@ -1,320 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# In-place tunneling - -BPF_FILE="test_tc_tunnel.bpf.o" -# must match the port that the bpf program filters on -readonly port=8000 - -readonly ns_prefix="ns-$$-" -readonly ns1="${ns_prefix}1" -readonly ns2="${ns_prefix}2" - -readonly ns1_v4=192.168.1.1 -readonly ns2_v4=192.168.1.2 -readonly ns1_v6=fd::1 -readonly ns2_v6=fd::2 - -# Must match port used by bpf program -readonly udpport=5555 -# MPLSoverUDP -readonly mplsudpport=6635 -readonly mplsproto=137 - -readonly infile="$(mktemp)" -readonly outfile="$(mktemp)" - -setup() { - ip netns add "${ns1}" - ip netns add "${ns2}" - - ip link add dev veth1 mtu 1500 netns "${ns1}" type veth \ - peer name veth2 mtu 1500 netns "${ns2}" - - ip netns exec "${ns1}" ethtool -K veth1 tso off - - ip -netns "${ns1}" link set veth1 up - ip -netns "${ns2}" link set veth2 up - - ip -netns "${ns1}" -4 addr add "${ns1_v4}/24" dev veth1 - ip -netns "${ns2}" -4 addr add "${ns2_v4}/24" dev veth2 - ip -netns "${ns1}" -6 addr add "${ns1_v6}/64" dev veth1 nodad - ip -netns "${ns2}" -6 addr add "${ns2_v6}/64" dev veth2 nodad - - # clamp route to reserve room for tunnel headers - ip -netns "${ns1}" -4 route flush table main - ip -netns "${ns1}" -6 route flush table main - ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1450 dev veth1 - ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1430 dev veth1 - - sleep 1 - - dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none -} - -cleanup() { - ip netns del "${ns2}" - ip netns del "${ns1}" - - if [[ -f "${outfile}" ]]; then - rm "${outfile}" - fi - if [[ -f "${infile}" ]]; then - rm "${infile}" - fi - - if [[ -n $server_pid ]]; then - kill $server_pid 2> /dev/null - fi -} - -server_listen() { - ip netns exec "${ns2}" nc "${netcat_opt}" -l "${port}" > "${outfile}" & - server_pid=$! -} - -client_connect() { - ip netns exec "${ns1}" timeout 2 nc "${netcat_opt}" -w 1 "${addr2}" "${port}" < "${infile}" - echo $? -} - -verify_data() { - wait "${server_pid}" - server_pid= - # sha1sum returns two fields [sha1] [filepath] - # convert to bash array and access first elem - insum=($(sha1sum ${infile})) - outsum=($(sha1sum ${outfile})) - if [[ "${insum[0]}" != "${outsum[0]}" ]]; then - echo "data mismatch" - exit 1 - fi -} - -wait_for_port() { - for i in $(seq 20); do - if ip netns exec "${ns2}" ss ${2:--4}OHntl | grep -q "$1"; then - return 0 - fi - sleep 0.1 - done - return 1 -} - -set -e - -# no arguments: automated test, run all -if [[ "$#" -eq "0" ]]; then - echo "ipip" - $0 ipv4 ipip none 100 - - echo "ipip6" - $0 ipv4 ipip6 none 100 - - echo "ip6ip6" - $0 ipv6 ip6tnl none 100 - - echo "sit" - $0 ipv6 sit none 100 - - echo "ip4 vxlan" - $0 ipv4 vxlan eth 2000 - - echo "ip6 vxlan" - $0 ipv6 ip6vxlan eth 2000 - - for mac in none mpls eth ; do - echo "ip gre $mac" - $0 ipv4 gre $mac 100 - - echo "ip6 gre $mac" - $0 ipv6 ip6gre $mac 100 - - echo "ip gre $mac gso" - $0 ipv4 gre $mac 2000 - - echo "ip6 gre $mac gso" - $0 ipv6 ip6gre $mac 2000 - - echo "ip udp $mac" - $0 ipv4 udp $mac 100 - - echo "ip6 udp $mac" - $0 ipv6 ip6udp $mac 100 - - echo "ip udp $mac gso" - $0 ipv4 udp $mac 2000 - - echo "ip6 udp $mac gso" - $0 ipv6 ip6udp $mac 2000 - done - - echo "OK. All tests passed" - exit 0 -fi - -if [[ "$#" -ne "4" ]]; then - echo "Usage: $0" - echo " or: $0 <ipv4|ipv6> <tuntype> <none|mpls|eth> <data_len>" - exit 1 -fi - -case "$1" in -"ipv4") - readonly addr1="${ns1_v4}" - readonly addr2="${ns2_v4}" - readonly ipproto=4 - readonly netcat_opt=-${ipproto} - readonly foumod=fou - readonly foutype=ipip - readonly fouproto=4 - readonly fouproto_mpls=${mplsproto} - readonly gretaptype=gretap - ;; -"ipv6") - readonly addr1="${ns1_v6}" - readonly addr2="${ns2_v6}" - readonly ipproto=6 - readonly netcat_opt=-${ipproto} - readonly foumod=fou6 - readonly foutype=ip6tnl - readonly fouproto="41 -6" - readonly fouproto_mpls="${mplsproto} -6" - readonly gretaptype=ip6gretap - ;; -*) - echo "unknown arg: $1" - exit 1 - ;; -esac - -readonly tuntype=$2 -readonly mac=$3 -readonly datalen=$4 - -echo "encap ${addr1} to ${addr2}, type ${tuntype}, mac ${mac} len ${datalen}" - -trap cleanup EXIT - -setup - -# basic communication works -echo "test basic connectivity" -server_listen -wait_for_port ${port} ${netcat_opt} -client_connect -verify_data - -# clientside, insert bpf program to encap all TCP to port ${port} -# client can no longer connect -ip netns exec "${ns1}" tc qdisc add dev veth1 clsact -ip netns exec "${ns1}" tc filter add dev veth1 egress \ - bpf direct-action object-file ${BPF_FILE} \ - section "encap_${tuntype}_${mac}" -echo "test bpf encap without decap (expect failure)" -server_listen -wait_for_port ${port} ${netcat_opt} -! client_connect - -if [[ "$tuntype" =~ "udp" ]]; then - # Set up fou tunnel. - ttype="${foutype}" - targs="encap fou encap-sport auto encap-dport $udpport" - # fou may be a module; allow this to fail. - modprobe "${foumod}" ||true - if [[ "$mac" == "mpls" ]]; then - dport=${mplsudpport} - dproto=${fouproto_mpls} - tmode="mode any ttl 255" - else - dport=${udpport} - dproto=${fouproto} - fi - ip netns exec "${ns2}" ip fou add port $dport ipproto ${dproto} - targs="encap fou encap-sport auto encap-dport $dport" -elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then - ttype=$gretaptype -elif [[ "$tuntype" =~ "vxlan" && "$mac" == "eth" ]]; then - ttype="vxlan" - targs="id 1 dstport 8472 udp6zerocsumrx" -elif [[ "$tuntype" == "ipip6" ]]; then - ttype="ip6tnl" - targs="" -else - ttype=$tuntype - targs="" -fi - -# tunnel address family differs from inner for SIT -if [[ "${tuntype}" == "sit" ]]; then - link_addr1="${ns1_v4}" - link_addr2="${ns2_v4}" -elif [[ "${tuntype}" == "ipip6" ]]; then - link_addr1="${ns1_v6}" - link_addr2="${ns2_v6}" -else - link_addr1="${addr1}" - link_addr2="${addr2}" -fi - -# serverside, insert decap module -# server is still running -# client can connect again -ip netns exec "${ns2}" ip link add name testtun0 type "${ttype}" \ - ${tmode} remote "${link_addr1}" local "${link_addr2}" $targs - -expect_tun_fail=0 - -if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then - # No support for MPLS IPv6 fou tunnel; expect failure. - expect_tun_fail=1 -elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then - # No support for TEB fou tunnel; expect failure. - expect_tun_fail=1 -elif [[ "$tuntype" =~ (gre|vxlan) && "$mac" == "eth" ]]; then - # Share ethernet address between tunnel/veth2 so L2 decap works. - ethaddr=$(ip netns exec "${ns2}" ip link show veth2 | \ - awk '/ether/ { print $2 }') - ip netns exec "${ns2}" ip link set testtun0 address $ethaddr -elif [[ "$mac" == "mpls" ]]; then - modprobe mpls_iptunnel ||true - modprobe mpls_gso ||true - ip netns exec "${ns2}" sysctl -qw net.mpls.platform_labels=65536 - ip netns exec "${ns2}" ip -f mpls route add 1000 dev lo - ip netns exec "${ns2}" ip link set lo up - ip netns exec "${ns2}" sysctl -qw net.mpls.conf.testtun0.input=1 - ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.lo.rp_filter=0 -fi - -# Because packets are decapped by the tunnel they arrive on testtun0 from -# the IP stack perspective. Ensure reverse path filtering is disabled -# otherwise we drop the TCP SYN as arriving on testtun0 instead of the -# expected veth2 (veth2 is where 192.168.1.2 is configured). -ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 -# rp needs to be disabled for both all and testtun0 as the rp value is -# selected as the max of the "all" and device-specific values. -ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.testtun0.rp_filter=0 -ip netns exec "${ns2}" ip link set dev testtun0 up -if [[ "$expect_tun_fail" == 1 ]]; then - # This tunnel mode is not supported, so we expect failure. - echo "test bpf encap with tunnel device decap (expect failure)" - ! client_connect -else - echo "test bpf encap with tunnel device decap" - client_connect - verify_data - server_listen - wait_for_port ${port} ${netcat_opt} -fi - -# serverside, use BPF for decap -ip netns exec "${ns2}" ip link del dev testtun0 -ip netns exec "${ns2}" tc qdisc add dev veth2 clsact -ip netns exec "${ns2}" tc filter add dev veth2 ingress \ - bpf direct-action object-file ${BPF_FILE} section decap -echo "test bpf encap with bpf decap" -client_connect -verify_data - -echo OK diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 352adc8df2d1..9234a58b0a97 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -74,31 +74,23 @@ #define _GNU_SOURCE #include <assert.h> #include <fcntl.h> -#include <errno.h> #include <getopt.h> #include <linux/if_link.h> #include <linux/if_ether.h> #include <linux/mman.h> #include <linux/netdev.h> -#include <linux/bitmap.h> #include <linux/ethtool.h> #include <arpa/inet.h> #include <net/if.h> #include <locale.h> -#include <poll.h> -#include <pthread.h> -#include <signal.h> #include <stdio.h> #include <stdlib.h> #include <libgen.h> -#include <string.h> #include <stddef.h> #include <sys/mman.h> -#include <sys/socket.h> -#include <sys/time.h> #include <sys/types.h> -#include <unistd.h> +#include "prog_tests/test_xsk.h" #include "xsk_xdp_progs.skel.h" #include "xsk.h" #include "xskxceiver.h" @@ -109,9 +101,6 @@ #include <network_helpers.h> -#define MAX_TX_BUDGET_DEFAULT 32 - -static bool opt_verbose; static bool opt_print_tests; static enum test_mode opt_mode = TEST_MODE_ALL; static u32 opt_run_test = RUN_ALL_TESTS; @@ -120,169 +109,12 @@ void test__fail(void) { /* for network_helpers.c */ } static void __exit_with_error(int error, const char *file, const char *func, int line) { - ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, - strerror(error)); + ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, + error, strerror(error)); ksft_exit_xfail(); } #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__) -#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : "" -static char *mode_string(struct test_spec *test) -{ - switch (test->mode) { - case TEST_MODE_SKB: - return "SKB"; - case TEST_MODE_DRV: - return "DRV"; - case TEST_MODE_ZC: - return "ZC"; - default: - return "BOGUS"; - } -} - -static void report_failure(struct test_spec *test) -{ - if (test->fail) - return; - - ksft_test_result_fail("FAIL: %s %s%s\n", mode_string(test), busy_poll_string(test), - test->name); - test->fail = true; -} - -/* The payload is a word consisting of a packet sequence number in the upper - * 16-bits and a intra packet data sequence number in the lower 16 bits. So the 3rd packet's - * 5th word of data will contain the number (2<<16) | 4 as they are numbered from 0. - */ -static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size) -{ - u32 *ptr = (u32 *)dest, i; - - start /= sizeof(*ptr); - size /= sizeof(*ptr); - for (i = 0; i < size; i++) - ptr[i] = htonl(pkt_nb << 16 | (i + start)); -} - -static void gen_eth_hdr(struct xsk_socket_info *xsk, struct ethhdr *eth_hdr) -{ - memcpy(eth_hdr->h_dest, xsk->dst_mac, ETH_ALEN); - memcpy(eth_hdr->h_source, xsk->src_mac, ETH_ALEN); - eth_hdr->h_proto = htons(ETH_P_LOOPBACK); -} - -static bool is_umem_valid(struct ifobject *ifobj) -{ - return !!ifobj->umem->umem; -} - -static u32 mode_to_xdp_flags(enum test_mode mode) -{ - return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE; -} - -static u64 umem_size(struct xsk_umem_info *umem) -{ - return umem->num_frames * umem->frame_size; -} - -static int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer, - u64 size) -{ - struct xsk_umem_config cfg = { - .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, - .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, - .frame_size = umem->frame_size, - .frame_headroom = umem->frame_headroom, - .flags = XSK_UMEM__DEFAULT_FLAGS - }; - int ret; - - if (umem->fill_size) - cfg.fill_size = umem->fill_size; - - if (umem->comp_size) - cfg.comp_size = umem->comp_size; - - if (umem->unaligned_mode) - cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG; - - ret = xsk_umem__create(&umem->umem, buffer, size, - &umem->fq, &umem->cq, &cfg); - if (ret) - return ret; - - umem->buffer = buffer; - if (ifobj->shared_umem && ifobj->rx_on) { - umem->base_addr = umem_size(umem); - umem->next_buffer = umem_size(umem); - } - - return 0; -} - -static u64 umem_alloc_buffer(struct xsk_umem_info *umem) -{ - u64 addr; - - addr = umem->next_buffer; - umem->next_buffer += umem->frame_size; - if (umem->next_buffer >= umem->base_addr + umem_size(umem)) - umem->next_buffer = umem->base_addr; - - return addr; -} - -static void umem_reset_alloc(struct xsk_umem_info *umem) -{ - umem->next_buffer = 0; -} - -static void enable_busy_poll(struct xsk_socket_info *xsk) -{ - int sock_opt; - - sock_opt = 1; - if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL, - (void *)&sock_opt, sizeof(sock_opt)) < 0) - exit_with_error(errno); - - sock_opt = 20; - if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL, - (void *)&sock_opt, sizeof(sock_opt)) < 0) - exit_with_error(errno); - - sock_opt = xsk->batch_size; - if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET, - (void *)&sock_opt, sizeof(sock_opt)) < 0) - exit_with_error(errno); -} - -static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, - struct ifobject *ifobject, bool shared) -{ - struct xsk_socket_config cfg = {}; - struct xsk_ring_cons *rxr; - struct xsk_ring_prod *txr; - - xsk->umem = umem; - cfg.rx_size = xsk->rxqsize; - cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; - cfg.bind_flags = ifobject->bind_flags; - if (shared) - cfg.bind_flags |= XDP_SHARED_UMEM; - if (ifobject->mtu > MAX_ETH_PKT_SIZE) - cfg.bind_flags |= XDP_USE_SG; - if (umem->comp_size) - cfg.tx_size = umem->comp_size; - if (umem->fill_size) - cfg.rx_size = umem->fill_size; - - txr = ifobject->tx_on ? &xsk->tx : NULL; - rxr = ifobject->rx_on ? &xsk->rx : NULL; - return xsk_socket__create(&xsk->xsk, ifobject->ifindex, 0, umem->umem, rxr, txr, &cfg); -} static bool ifobj_zc_avail(struct ifobject *ifobject) { @@ -314,7 +146,7 @@ static bool ifobj_zc_avail(struct ifobject *ifobject) ifobject->bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY; ifobject->rx_on = true; xsk->rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; - ret = __xsk_configure_socket(xsk, umem, ifobject, false); + ret = xsk_configure_socket(xsk, umem, ifobject, false); if (!ret) zc_avail = true; @@ -327,25 +159,6 @@ out: return zc_avail; } -#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags" -static unsigned int get_max_skb_frags(void) -{ - unsigned int max_skb_frags = 0; - FILE *file; - - file = fopen(MAX_SKB_FRAGS_PATH, "r"); - if (!file) { - ksft_print_msg("Error opening %s\n", MAX_SKB_FRAGS_PATH); - return 0; - } - - if (fscanf(file, "%u", &max_skb_frags) != 1) - ksft_print_msg("Error reading %s\n", MAX_SKB_FRAGS_PATH); - - fclose(file); - return max_skb_frags; -} - static struct option long_options[] = { {"interface", required_argument, 0, 'i'}, {"busy-poll", no_argument, 0, 'b'}, @@ -446,2256 +259,36 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj } } -static int set_ring_size(struct ifobject *ifobj) -{ - int ret; - u32 ctr = 0; - - while (ctr++ < SOCK_RECONF_CTR) { - ret = set_hw_ring_size(ifobj->ifname, &ifobj->ring); - if (!ret) - break; - - /* Retry if it fails */ - if (ctr >= SOCK_RECONF_CTR || errno != EBUSY) - return -errno; - - usleep(USLEEP_MAX); - } - - return ret; -} - -static int hw_ring_size_reset(struct ifobject *ifobj) -{ - ifobj->ring.tx_pending = ifobj->set_ring.default_tx; - ifobj->ring.rx_pending = ifobj->set_ring.default_rx; - return set_ring_size(ifobj); -} - -static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, - struct ifobject *ifobj_rx) -{ - u32 i, j; - - for (i = 0; i < MAX_INTERFACES; i++) { - struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx; - - ifobj->xsk = &ifobj->xsk_arr[0]; - ifobj->use_poll = false; - ifobj->use_fill_ring = true; - ifobj->release_rx = true; - ifobj->validation_func = NULL; - ifobj->use_metadata = false; - - if (i == 0) { - ifobj->rx_on = false; - ifobj->tx_on = true; - } else { - ifobj->rx_on = true; - ifobj->tx_on = false; - } - - memset(ifobj->umem, 0, sizeof(*ifobj->umem)); - ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS; - ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; - - for (j = 0; j < MAX_SOCKETS; j++) { - memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j])); - ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; - ifobj->xsk_arr[j].batch_size = DEFAULT_BATCH_SIZE; - if (i == 0) - ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default; - else - ifobj->xsk_arr[j].pkt_stream = test->rx_pkt_stream_default; - - memcpy(ifobj->xsk_arr[j].src_mac, g_mac, ETH_ALEN); - memcpy(ifobj->xsk_arr[j].dst_mac, g_mac, ETH_ALEN); - ifobj->xsk_arr[j].src_mac[5] += ((j * 2) + 0); - ifobj->xsk_arr[j].dst_mac[5] += ((j * 2) + 1); - } - } - - if (ifobj_tx->hw_ring_size_supp) - hw_ring_size_reset(ifobj_tx); - - test->ifobj_tx = ifobj_tx; - test->ifobj_rx = ifobj_rx; - test->current_step = 0; - test->total_steps = 1; - test->nb_sockets = 1; - test->fail = false; - test->set_ring = false; - test->adjust_tail = false; - test->adjust_tail_support = false; - test->mtu = MAX_ETH_PKT_SIZE; - test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog; - test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk; - test->xdp_prog_tx = ifobj_tx->xdp_progs->progs.xsk_def_prog; - test->xskmap_tx = ifobj_tx->xdp_progs->maps.xsk; -} - -static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, - struct ifobject *ifobj_rx, enum test_mode mode, - const struct test_spec *test_to_run) -{ - struct pkt_stream *tx_pkt_stream; - struct pkt_stream *rx_pkt_stream; - u32 i; - - tx_pkt_stream = test->tx_pkt_stream_default; - rx_pkt_stream = test->rx_pkt_stream_default; - memset(test, 0, sizeof(*test)); - test->tx_pkt_stream_default = tx_pkt_stream; - test->rx_pkt_stream_default = rx_pkt_stream; - - for (i = 0; i < MAX_INTERFACES; i++) { - struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx; - - ifobj->bind_flags = XDP_USE_NEED_WAKEUP; - if (mode == TEST_MODE_ZC) - ifobj->bind_flags |= XDP_ZEROCOPY; - else - ifobj->bind_flags |= XDP_COPY; - } - - strncpy(test->name, test_to_run->name, MAX_TEST_NAME_SIZE); - test->test_func = test_to_run->test_func; - test->mode = mode; - __test_spec_init(test, ifobj_tx, ifobj_rx); -} - -static void test_spec_reset(struct test_spec *test) -{ - __test_spec_init(test, test->ifobj_tx, test->ifobj_rx); -} - -static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *xdp_prog_rx, - struct bpf_program *xdp_prog_tx, struct bpf_map *xskmap_rx, - struct bpf_map *xskmap_tx) -{ - test->xdp_prog_rx = xdp_prog_rx; - test->xdp_prog_tx = xdp_prog_tx; - test->xskmap_rx = xskmap_rx; - test->xskmap_tx = xskmap_tx; -} - -static int test_spec_set_mtu(struct test_spec *test, int mtu) -{ - int err; - - if (test->ifobj_rx->mtu != mtu) { - err = xsk_set_mtu(test->ifobj_rx->ifindex, mtu); - if (err) - return err; - test->ifobj_rx->mtu = mtu; - } - if (test->ifobj_tx->mtu != mtu) { - err = xsk_set_mtu(test->ifobj_tx->ifindex, mtu); - if (err) - return err; - test->ifobj_tx->mtu = mtu; - } - - return 0; -} - -static void pkt_stream_reset(struct pkt_stream *pkt_stream) -{ - if (pkt_stream) { - pkt_stream->current_pkt_nb = 0; - pkt_stream->nb_rx_pkts = 0; - } -} - -static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream) -{ - if (pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) - return NULL; - - return &pkt_stream->pkts[pkt_stream->current_pkt_nb++]; -} - -static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent) -{ - while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) { - (*pkts_sent)++; - if (pkt_stream->pkts[pkt_stream->current_pkt_nb].valid) - return &pkt_stream->pkts[pkt_stream->current_pkt_nb++]; - pkt_stream->current_pkt_nb++; - } - return NULL; -} - -static void pkt_stream_delete(struct pkt_stream *pkt_stream) -{ - free(pkt_stream->pkts); - free(pkt_stream); -} - -static void pkt_stream_restore_default(struct test_spec *test) -{ - struct pkt_stream *tx_pkt_stream = test->ifobj_tx->xsk->pkt_stream; - struct pkt_stream *rx_pkt_stream = test->ifobj_rx->xsk->pkt_stream; - - if (tx_pkt_stream != test->tx_pkt_stream_default) { - pkt_stream_delete(test->ifobj_tx->xsk->pkt_stream); - test->ifobj_tx->xsk->pkt_stream = test->tx_pkt_stream_default; - } - - if (rx_pkt_stream != test->rx_pkt_stream_default) { - pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream); - test->ifobj_rx->xsk->pkt_stream = test->rx_pkt_stream_default; - } -} - -static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts) -{ - struct pkt_stream *pkt_stream; - - pkt_stream = calloc(1, sizeof(*pkt_stream)); - if (!pkt_stream) - return NULL; - - pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts)); - if (!pkt_stream->pkts) { - free(pkt_stream); - return NULL; - } - - pkt_stream->nb_pkts = nb_pkts; - return pkt_stream; -} - -static bool pkt_continues(u32 options) -{ - return options & XDP_PKT_CONTD; -} - -static u32 ceil_u32(u32 a, u32 b) -{ - return (a + b - 1) / b; -} - -static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pkt *pkt) -{ - u32 nb_frags = 1, next_frag; - - if (!pkt) - return 1; - - if (!pkt_stream->verbatim) { - if (!pkt->valid || !pkt->len) - return 1; - return ceil_u32(pkt->len, frame_size); - } - - /* Search for the end of the packet in verbatim mode */ - if (!pkt_continues(pkt->options)) - return nb_frags; - - next_frag = pkt_stream->current_pkt_nb; - pkt++; - while (next_frag++ < pkt_stream->nb_pkts) { - nb_frags++; - if (!pkt_continues(pkt->options) || !pkt->valid) - break; - pkt++; - } - return nb_frags; -} - -static bool set_pkt_valid(int offset, u32 len) -{ - return len <= MAX_ETH_JUMBO_SIZE; -} - -static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len) -{ - pkt->offset = offset; - pkt->len = len; - pkt->valid = set_pkt_valid(offset, len); -} - -static void pkt_stream_pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len) -{ - bool prev_pkt_valid = pkt->valid; - - pkt_set(pkt_stream, pkt, offset, len); - pkt_stream->nb_valid_entries += pkt->valid - prev_pkt_valid; -} - -static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len) -{ - return ceil_u32(len, umem->frame_size) * umem->frame_size; -} - -static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb_start, u32 nb_off) -{ - struct pkt_stream *pkt_stream; - u32 i; - - pkt_stream = __pkt_stream_alloc(nb_pkts); - if (!pkt_stream) - exit_with_error(ENOMEM); - - pkt_stream->nb_pkts = nb_pkts; - pkt_stream->max_pkt_len = pkt_len; - for (i = 0; i < nb_pkts; i++) { - struct pkt *pkt = &pkt_stream->pkts[i]; - - pkt_stream_pkt_set(pkt_stream, pkt, 0, pkt_len); - pkt->pkt_nb = nb_start + i * nb_off; - } - - return pkt_stream; -} - -static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len) -{ - return __pkt_stream_generate(nb_pkts, pkt_len, 0, 1); -} - -static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream) -{ - return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len); -} - -static void pkt_stream_replace_ifobject(struct ifobject *ifobj, u32 nb_pkts, u32 pkt_len) -{ - ifobj->xsk->pkt_stream = pkt_stream_generate(nb_pkts, pkt_len); -} - -static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len) -{ - pkt_stream_replace_ifobject(test->ifobj_tx, nb_pkts, pkt_len); - pkt_stream_replace_ifobject(test->ifobj_rx, nb_pkts, pkt_len); -} - -static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len, - int offset) -{ - struct pkt_stream *pkt_stream; - u32 i; - - pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream); - for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2) - pkt_stream_pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len); - - ifobj->xsk->pkt_stream = pkt_stream; -} - -static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset) -{ - __pkt_stream_replace_half(test->ifobj_tx, pkt_len, offset); - __pkt_stream_replace_half(test->ifobj_rx, pkt_len, offset); -} - -static void pkt_stream_receive_half(struct test_spec *test) -{ - struct pkt_stream *pkt_stream = test->ifobj_tx->xsk->pkt_stream; - u32 i; - - test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(pkt_stream->nb_pkts, - pkt_stream->pkts[0].len); - pkt_stream = test->ifobj_rx->xsk->pkt_stream; - for (i = 1; i < pkt_stream->nb_pkts; i += 2) - pkt_stream->pkts[i].valid = false; - - pkt_stream->nb_valid_entries /= 2; -} - -static void pkt_stream_even_odd_sequence(struct test_spec *test) -{ - struct pkt_stream *pkt_stream; - u32 i; - - for (i = 0; i < test->nb_sockets; i++) { - pkt_stream = test->ifobj_tx->xsk_arr[i].pkt_stream; - pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2, - pkt_stream->pkts[0].len, i, 2); - test->ifobj_tx->xsk_arr[i].pkt_stream = pkt_stream; - - pkt_stream = test->ifobj_rx->xsk_arr[i].pkt_stream; - pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2, - pkt_stream->pkts[0].len, i, 2); - test->ifobj_rx->xsk_arr[i].pkt_stream = pkt_stream; - } -} - -static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem) -{ - if (!pkt->valid) - return pkt->offset; - return pkt->offset + umem_alloc_buffer(umem); -} - -static void pkt_stream_cancel(struct pkt_stream *pkt_stream) -{ - pkt_stream->current_pkt_nb--; -} - -static void pkt_generate(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, u64 addr, u32 len, - u32 pkt_nb, u32 bytes_written) -{ - void *data = xsk_umem__get_data(umem->buffer, addr); - - if (len < MIN_PKT_SIZE) - return; - - if (!bytes_written) { - gen_eth_hdr(xsk, data); - - len -= PKT_HDR_SIZE; - data += PKT_HDR_SIZE; - } else { - bytes_written -= PKT_HDR_SIZE; - } - - write_payload(data, pkt_nb, bytes_written, len); -} - -static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, struct pkt *frames, - u32 nb_frames, bool verbatim) -{ - u32 i, len = 0, pkt_nb = 0, payload = 0; - struct pkt_stream *pkt_stream; - - pkt_stream = __pkt_stream_alloc(nb_frames); - if (!pkt_stream) - exit_with_error(ENOMEM); - - for (i = 0; i < nb_frames; i++) { - struct pkt *pkt = &pkt_stream->pkts[pkt_nb]; - struct pkt *frame = &frames[i]; - - pkt->offset = frame->offset; - if (verbatim) { - *pkt = *frame; - pkt->pkt_nb = payload; - if (!frame->valid || !pkt_continues(frame->options)) - payload++; - } else { - if (frame->valid) - len += frame->len; - if (frame->valid && pkt_continues(frame->options)) - continue; - - pkt->pkt_nb = pkt_nb; - pkt->len = len; - pkt->valid = frame->valid; - pkt->options = 0; - - len = 0; - } - - print_verbose("offset: %d len: %u valid: %u options: %u pkt_nb: %u\n", - pkt->offset, pkt->len, pkt->valid, pkt->options, pkt->pkt_nb); - - if (pkt->valid && pkt->len > pkt_stream->max_pkt_len) - pkt_stream->max_pkt_len = pkt->len; - - if (pkt->valid) - pkt_stream->nb_valid_entries++; - - pkt_nb++; - } - - pkt_stream->nb_pkts = pkt_nb; - pkt_stream->verbatim = verbatim; - return pkt_stream; -} - -static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts) -{ - struct pkt_stream *pkt_stream; - - pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true); - test->ifobj_tx->xsk->pkt_stream = pkt_stream; - - pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false); - test->ifobj_rx->xsk->pkt_stream = pkt_stream; -} - -static void pkt_print_data(u32 *data, u32 cnt) -{ - u32 i; - - for (i = 0; i < cnt; i++) { - u32 seqnum, pkt_nb; - - seqnum = ntohl(*data) & 0xffff; - pkt_nb = ntohl(*data) >> 16; - ksft_print_msg("%u:%u ", pkt_nb, seqnum); - data++; - } -} - -static void pkt_dump(void *pkt, u32 len, bool eth_header) -{ - struct ethhdr *ethhdr = pkt; - u32 i, *data; - - if (eth_header) { - /*extract L2 frame */ - ksft_print_msg("DEBUG>> L2: dst mac: "); - for (i = 0; i < ETH_ALEN; i++) - ksft_print_msg("%02X", ethhdr->h_dest[i]); - - ksft_print_msg("\nDEBUG>> L2: src mac: "); - for (i = 0; i < ETH_ALEN; i++) - ksft_print_msg("%02X", ethhdr->h_source[i]); - - data = pkt + PKT_HDR_SIZE; - } else { - data = pkt; - } - - /*extract L5 frame */ - ksft_print_msg("\nDEBUG>> L5: seqnum: "); - pkt_print_data(data, PKT_DUMP_NB_TO_PRINT); - ksft_print_msg("...."); - if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) { - ksft_print_msg("\n.... "); - pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT, - PKT_DUMP_NB_TO_PRINT); - } - ksft_print_msg("\n---------------------------------------\n"); -} - -static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr) -{ - u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom; - u32 offset = addr % umem->frame_size, expected_offset; - int pkt_offset = pkt->valid ? pkt->offset : 0; - - if (!umem->unaligned_mode) - pkt_offset = 0; - - expected_offset = (pkt_offset + headroom + XDP_PACKET_HEADROOM) % umem->frame_size; - - if (offset == expected_offset) - return true; - - ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset); - return false; -} - -static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr) -{ - void *data = xsk_umem__get_data(buffer, addr); - struct xdp_info *meta = data - sizeof(struct xdp_info); - - if (meta->count != pkt->pkt_nb) { - ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n", - __func__, pkt->pkt_nb, - (unsigned long long)meta->count); - return false; - } - - return true; -} - -static bool is_adjust_tail_supported(struct xsk_xdp_progs *skel_rx) -{ - struct bpf_map *data_map; - int adjust_value = 0; - int key = 0; - int ret; - - data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss"); - if (!data_map || !bpf_map__is_internal(data_map)) { - ksft_print_msg("Error: could not find bss section of XDP program\n"); - exit_with_error(errno); - } - - ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &key, &adjust_value); - if (ret) { - ksft_print_msg("Error: bpf_map_lookup_elem failed with error %d\n", ret); - exit_with_error(errno); - } - - /* Set the 'adjust_value' variable to -EOPNOTSUPP in the XDP program if the adjust_tail - * helper is not supported. Skip the adjust_tail test case in this scenario. - */ - return adjust_value != -EOPNOTSUPP; -} - -static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb, - u32 bytes_processed) -{ - u32 seqnum, pkt_nb, *pkt_data, words_to_end, expected_seqnum; - void *data = xsk_umem__get_data(umem->buffer, addr); - - addr -= umem->base_addr; - - if (addr >= umem->num_frames * umem->frame_size || - addr + len > umem->num_frames * umem->frame_size) { - ksft_print_msg("Frag invalid addr: %llx len: %u\n", - (unsigned long long)addr, len); - return false; - } - if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) { - ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", - (unsigned long long)addr, len); - return false; - } - - pkt_data = data; - if (!bytes_processed) { - pkt_data += PKT_HDR_SIZE / sizeof(*pkt_data); - len -= PKT_HDR_SIZE; - } else { - bytes_processed -= PKT_HDR_SIZE; - } - - expected_seqnum = bytes_processed / sizeof(*pkt_data); - seqnum = ntohl(*pkt_data) & 0xffff; - pkt_nb = ntohl(*pkt_data) >> 16; - - if (expected_pkt_nb != pkt_nb) { - ksft_print_msg("[%s] expected pkt_nb [%u], got pkt_nb [%u]\n", - __func__, expected_pkt_nb, pkt_nb); - goto error; - } - if (expected_seqnum != seqnum) { - ksft_print_msg("[%s] expected seqnum at start [%u], got seqnum [%u]\n", - __func__, expected_seqnum, seqnum); - goto error; - } - - words_to_end = len / sizeof(*pkt_data) - 1; - pkt_data += words_to_end; - seqnum = ntohl(*pkt_data) & 0xffff; - expected_seqnum += words_to_end; - if (expected_seqnum != seqnum) { - ksft_print_msg("[%s] expected seqnum at end [%u], got seqnum [%u]\n", - __func__, expected_seqnum, seqnum); - goto error; - } - - return true; - -error: - pkt_dump(data, len, !bytes_processed); - return false; -} - -static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) -{ - if (pkt->len != len) { - ksft_print_msg("[%s] expected packet length [%d], got length [%d]\n", - __func__, pkt->len, len); - pkt_dump(xsk_umem__get_data(buffer, addr), len, true); - return false; - } - - return true; -} - -static u32 load_value(u32 *counter) -{ - return __atomic_load_n(counter, __ATOMIC_ACQUIRE); -} - -static bool kick_tx_with_check(struct xsk_socket_info *xsk, int *ret) -{ - u32 max_budget = MAX_TX_BUDGET_DEFAULT; - u32 cons, ready_to_send; - int delta; - - cons = load_value(xsk->tx.consumer); - ready_to_send = load_value(xsk->tx.producer) - cons; - *ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); - - delta = load_value(xsk->tx.consumer) - cons; - /* By default, xsk should consume exact @max_budget descs at one - * send in this case where hitting the max budget limit in while - * loop is triggered in __xsk_generic_xmit(). Please make sure that - * the number of descs to be sent is larger than @max_budget, or - * else the tx.consumer will be updated in xskq_cons_peek_desc() - * in time which hides the issue we try to verify. - */ - if (ready_to_send > max_budget && delta != max_budget) - return false; - - return true; -} - -static int kick_tx(struct xsk_socket_info *xsk) -{ - int ret; - - if (xsk->check_consumer) { - if (!kick_tx_with_check(xsk, &ret)) - return TEST_FAILURE; - } else { - ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); - } - if (ret >= 0) - return TEST_PASS; - if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) { - usleep(100); - return TEST_PASS; - } - return TEST_FAILURE; -} - -static int kick_rx(struct xsk_socket_info *xsk) -{ - int ret; - - ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); - if (ret < 0) - return TEST_FAILURE; - - return TEST_PASS; -} - -static int complete_pkts(struct xsk_socket_info *xsk, int batch_size) -{ - unsigned int rcvd; - u32 idx; - int ret; - - if (xsk_ring_prod__needs_wakeup(&xsk->tx)) { - ret = kick_tx(xsk); - if (ret) - return TEST_FAILURE; - } - - rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); - if (rcvd) { - if (rcvd > xsk->outstanding_tx) { - u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1); - - ksft_print_msg("[%s] Too many packets completed\n", __func__); - ksft_print_msg("Last completion address: %llx\n", - (unsigned long long)addr); - return TEST_FAILURE; - } - - xsk_ring_cons__release(&xsk->umem->cq, rcvd); - xsk->outstanding_tx -= rcvd; - } - - return TEST_PASS; -} - -static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk) -{ - u32 frags_processed = 0, nb_frags = 0, pkt_len = 0; - u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0; - struct pkt_stream *pkt_stream = xsk->pkt_stream; - struct ifobject *ifobj = test->ifobj_rx; - struct xsk_umem_info *umem = xsk->umem; - struct pollfd fds = { }; - struct pkt *pkt; - u64 first_addr = 0; - int ret; - - fds.fd = xsk_socket__fd(xsk->xsk); - fds.events = POLLIN; - - ret = kick_rx(xsk); - if (ret) - return TEST_FAILURE; - - if (ifobj->use_poll) { - ret = poll(&fds, 1, POLL_TMOUT); - if (ret < 0) - return TEST_FAILURE; - - if (!ret) { - if (!is_umem_valid(test->ifobj_tx)) - return TEST_PASS; - - ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__); - return TEST_CONTINUE; - } - - if (!(fds.revents & POLLIN)) - return TEST_CONTINUE; - } - - rcvd = xsk_ring_cons__peek(&xsk->rx, xsk->batch_size, &idx_rx); - if (!rcvd) - return TEST_CONTINUE; - - if (ifobj->use_fill_ring) { - ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); - while (ret != rcvd) { - if (xsk_ring_prod__needs_wakeup(&umem->fq)) { - ret = poll(&fds, 1, POLL_TMOUT); - if (ret < 0) - return TEST_FAILURE; - } - ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); - } - } - - while (frags_processed < rcvd) { - const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++); - u64 addr = desc->addr, orig; - - orig = xsk_umem__extract_addr(addr); - addr = xsk_umem__add_offset_to_addr(addr); - - if (!nb_frags) { - pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); - if (!pkt) { - ksft_print_msg("[%s] received too many packets addr: %lx len %u\n", - __func__, addr, desc->len); - return TEST_FAILURE; - } - } - - print_verbose("Rx: addr: %lx len: %u options: %u pkt_nb: %u valid: %u\n", - addr, desc->len, desc->options, pkt->pkt_nb, pkt->valid); - - if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) || - !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata && - !is_metadata_correct(pkt, umem->buffer, addr))) - return TEST_FAILURE; - - if (!nb_frags++) - first_addr = addr; - frags_processed++; - pkt_len += desc->len; - if (ifobj->use_fill_ring) - *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig; - - if (pkt_continues(desc->options)) - continue; - - /* The complete packet has been received */ - if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) || - !is_offset_correct(umem, pkt, addr)) - return TEST_FAILURE; - - pkt_stream->nb_rx_pkts++; - nb_frags = 0; - pkt_len = 0; - } - - if (nb_frags) { - /* In the middle of a packet. Start over from beginning of packet. */ - idx_rx -= nb_frags; - xsk_ring_cons__cancel(&xsk->rx, nb_frags); - if (ifobj->use_fill_ring) { - idx_fq -= nb_frags; - xsk_ring_prod__cancel(&umem->fq, nb_frags); - } - frags_processed -= nb_frags; - } - - if (ifobj->use_fill_ring) - xsk_ring_prod__submit(&umem->fq, frags_processed); - if (ifobj->release_rx) - xsk_ring_cons__release(&xsk->rx, frags_processed); - - pthread_mutex_lock(&pacing_mutex); - pkts_in_flight -= pkts_sent; - pthread_mutex_unlock(&pacing_mutex); - pkts_sent = 0; - -return TEST_CONTINUE; -} - -bool all_packets_received(struct test_spec *test, struct xsk_socket_info *xsk, u32 sock_num, - unsigned long *bitmap) -{ - struct pkt_stream *pkt_stream = xsk->pkt_stream; - - if (!pkt_stream) { - __set_bit(sock_num, bitmap); - return false; - } - - if (pkt_stream->nb_rx_pkts == pkt_stream->nb_valid_entries) { - __set_bit(sock_num, bitmap); - if (bitmap_full(bitmap, test->nb_sockets)) - return true; - } - - return false; -} - -static int receive_pkts(struct test_spec *test) -{ - struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0}; - DECLARE_BITMAP(bitmap, test->nb_sockets); - struct xsk_socket_info *xsk; - u32 sock_num = 0; - int res, ret; - - ret = gettimeofday(&tv_now, NULL); - if (ret) - exit_with_error(errno); - - timeradd(&tv_now, &tv_timeout, &tv_end); - - while (1) { - xsk = &test->ifobj_rx->xsk_arr[sock_num]; - - if ((all_packets_received(test, xsk, sock_num, bitmap))) - break; - - res = __receive_pkts(test, xsk); - if (!(res == TEST_PASS || res == TEST_CONTINUE)) - return res; - - ret = gettimeofday(&tv_now, NULL); - if (ret) - exit_with_error(errno); - - if (timercmp(&tv_now, &tv_end, >)) { - ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__); - return TEST_FAILURE; - } - sock_num = (sock_num + 1) % test->nb_sockets; - } - - return TEST_PASS; -} - -static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, bool timeout) -{ - u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len; - struct pkt_stream *pkt_stream = xsk->pkt_stream; - struct xsk_umem_info *umem = ifobject->umem; - bool use_poll = ifobject->use_poll; - struct pollfd fds = { }; - int ret; - - buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len); - /* pkts_in_flight might be negative if many invalid packets are sent */ - if (pkts_in_flight >= (int)((umem_size(umem) - xsk->batch_size * buffer_len) / - buffer_len)) { - ret = kick_tx(xsk); - if (ret) - return TEST_FAILURE; - return TEST_CONTINUE; - } - - fds.fd = xsk_socket__fd(xsk->xsk); - fds.events = POLLOUT; - - while (xsk_ring_prod__reserve(&xsk->tx, xsk->batch_size, &idx) < xsk->batch_size) { - if (use_poll) { - ret = poll(&fds, 1, POLL_TMOUT); - if (timeout) { - if (ret < 0) { - ksft_print_msg("ERROR: [%s] Poll error %d\n", - __func__, errno); - return TEST_FAILURE; - } - if (ret == 0) - return TEST_PASS; - break; - } - if (ret <= 0) { - ksft_print_msg("ERROR: [%s] Poll error %d\n", - __func__, errno); - return TEST_FAILURE; - } - } - - complete_pkts(xsk, xsk->batch_size); - } - - for (i = 0; i < xsk->batch_size; i++) { - struct pkt *pkt = pkt_stream_get_next_tx_pkt(pkt_stream); - u32 nb_frags_left, nb_frags, bytes_written = 0; - - if (!pkt) - break; - - nb_frags = pkt_nb_frags(umem->frame_size, pkt_stream, pkt); - if (nb_frags > xsk->batch_size - i) { - pkt_stream_cancel(pkt_stream); - xsk_ring_prod__cancel(&xsk->tx, xsk->batch_size - i); - break; - } - nb_frags_left = nb_frags; - - while (nb_frags_left--) { - struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); - - tx_desc->addr = pkt_get_addr(pkt, ifobject->umem); - if (pkt_stream->verbatim) { - tx_desc->len = pkt->len; - tx_desc->options = pkt->options; - } else if (nb_frags_left) { - tx_desc->len = umem->frame_size; - tx_desc->options = XDP_PKT_CONTD; - } else { - tx_desc->len = pkt->len - bytes_written; - tx_desc->options = 0; - } - if (pkt->valid) - pkt_generate(xsk, umem, tx_desc->addr, tx_desc->len, pkt->pkt_nb, - bytes_written); - bytes_written += tx_desc->len; - - print_verbose("Tx addr: %llx len: %u options: %u pkt_nb: %u\n", - tx_desc->addr, tx_desc->len, tx_desc->options, pkt->pkt_nb); - - if (nb_frags_left) { - i++; - if (pkt_stream->verbatim) - pkt = pkt_stream_get_next_tx_pkt(pkt_stream); - } - } - - if (pkt && pkt->valid) { - valid_pkts++; - valid_frags += nb_frags; - } - } - - pthread_mutex_lock(&pacing_mutex); - pkts_in_flight += valid_pkts; - pthread_mutex_unlock(&pacing_mutex); - - xsk_ring_prod__submit(&xsk->tx, i); - xsk->outstanding_tx += valid_frags; - - if (use_poll) { - ret = poll(&fds, 1, POLL_TMOUT); - if (ret <= 0) { - if (ret == 0 && timeout) - return TEST_PASS; - - ksft_print_msg("ERROR: [%s] Poll error %d\n", __func__, ret); - return TEST_FAILURE; - } - } - - if (!timeout) { - if (complete_pkts(xsk, i)) - return TEST_FAILURE; - - usleep(10); - return TEST_PASS; - } - - return TEST_CONTINUE; -} - -static int wait_for_tx_completion(struct xsk_socket_info *xsk) -{ - struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0}; - int ret; - - ret = gettimeofday(&tv_now, NULL); - if (ret) - exit_with_error(errno); - timeradd(&tv_now, &tv_timeout, &tv_end); - - while (xsk->outstanding_tx) { - ret = gettimeofday(&tv_now, NULL); - if (ret) - exit_with_error(errno); - if (timercmp(&tv_now, &tv_end, >)) { - ksft_print_msg("ERROR: [%s] Transmission loop timed out\n", __func__); - return TEST_FAILURE; - } - - complete_pkts(xsk, xsk->batch_size); - } - - return TEST_PASS; -} - -bool all_packets_sent(struct test_spec *test, unsigned long *bitmap) -{ - return bitmap_full(bitmap, test->nb_sockets); -} - -static int send_pkts(struct test_spec *test, struct ifobject *ifobject) -{ - bool timeout = !is_umem_valid(test->ifobj_rx); - DECLARE_BITMAP(bitmap, test->nb_sockets); - u32 i, ret; - - while (!(all_packets_sent(test, bitmap))) { - for (i = 0; i < test->nb_sockets; i++) { - struct pkt_stream *pkt_stream; - - pkt_stream = ifobject->xsk_arr[i].pkt_stream; - if (!pkt_stream || pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) { - __set_bit(i, bitmap); - continue; - } - ret = __send_pkts(ifobject, &ifobject->xsk_arr[i], timeout); - if (ret == TEST_CONTINUE && !test->fail) - continue; - - if ((ret || test->fail) && !timeout) - return TEST_FAILURE; - - if (ret == TEST_PASS && timeout) - return ret; - - ret = wait_for_tx_completion(&ifobject->xsk_arr[i]); - if (ret) - return TEST_FAILURE; - } - } - - return TEST_PASS; -} - -static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats) -{ - int fd = xsk_socket__fd(xsk), err; - socklen_t optlen, expected_len; - - optlen = sizeof(*stats); - err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen); - if (err) { - ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n", - __func__, -err, strerror(-err)); - return TEST_FAILURE; - } - - expected_len = sizeof(struct xdp_statistics); - if (optlen != expected_len) { - ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n", - __func__, expected_len, optlen); - return TEST_FAILURE; - } - - return TEST_PASS; -} - -static int validate_rx_dropped(struct ifobject *ifobject) -{ - struct xsk_socket *xsk = ifobject->xsk->xsk; - struct xdp_statistics stats; - int err; - - err = kick_rx(ifobject->xsk); - if (err) - return TEST_FAILURE; - - err = get_xsk_stats(xsk, &stats); - if (err) - return TEST_FAILURE; - - /* The receiver calls getsockopt after receiving the last (valid) - * packet which is not the final packet sent in this test (valid and - * invalid packets are sent in alternating fashion with the final - * packet being invalid). Since the last packet may or may not have - * been dropped already, both outcomes must be allowed. - */ - if (stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 || - stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 - 1) - return TEST_PASS; - - return TEST_FAILURE; -} - -static int validate_rx_full(struct ifobject *ifobject) -{ - struct xsk_socket *xsk = ifobject->xsk->xsk; - struct xdp_statistics stats; - int err; - - usleep(1000); - err = kick_rx(ifobject->xsk); - if (err) - return TEST_FAILURE; - - err = get_xsk_stats(xsk, &stats); - if (err) - return TEST_FAILURE; - - if (stats.rx_ring_full) - return TEST_PASS; - - return TEST_FAILURE; -} - -static int validate_fill_empty(struct ifobject *ifobject) -{ - struct xsk_socket *xsk = ifobject->xsk->xsk; - struct xdp_statistics stats; - int err; - - usleep(1000); - err = kick_rx(ifobject->xsk); - if (err) - return TEST_FAILURE; - - err = get_xsk_stats(xsk, &stats); - if (err) - return TEST_FAILURE; - - if (stats.rx_fill_ring_empty_descs) - return TEST_PASS; - - return TEST_FAILURE; -} - -static int validate_tx_invalid_descs(struct ifobject *ifobject) -{ - struct xsk_socket *xsk = ifobject->xsk->xsk; - int fd = xsk_socket__fd(xsk); - struct xdp_statistics stats; - socklen_t optlen; - int err; - - optlen = sizeof(stats); - err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); - if (err) { - ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n", - __func__, -err, strerror(-err)); - return TEST_FAILURE; - } - - if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) { - ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n", - __func__, - (unsigned long long)stats.tx_invalid_descs, - ifobject->xsk->pkt_stream->nb_pkts); - return TEST_FAILURE; - } - - return TEST_PASS; -} - -static void xsk_configure_socket(struct test_spec *test, struct ifobject *ifobject, - struct xsk_umem_info *umem, bool tx) -{ - int i, ret; - - for (i = 0; i < test->nb_sockets; i++) { - bool shared = (ifobject->shared_umem && tx) ? true : !!i; - u32 ctr = 0; - - while (ctr++ < SOCK_RECONF_CTR) { - ret = __xsk_configure_socket(&ifobject->xsk_arr[i], umem, - ifobject, shared); - if (!ret) - break; - - /* Retry if it fails as xsk_socket__create() is asynchronous */ - if (ctr >= SOCK_RECONF_CTR) - exit_with_error(-ret); - usleep(USLEEP_MAX); - } - if (ifobject->busy_poll) - enable_busy_poll(&ifobject->xsk_arr[i]); - } -} - -static void thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobject) -{ - xsk_configure_socket(test, ifobject, test->ifobj_rx->umem, true); - ifobject->xsk = &ifobject->xsk_arr[0]; - ifobject->xskmap = test->ifobj_rx->xskmap; - memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info)); - ifobject->umem->base_addr = 0; -} - -static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, - bool fill_up) -{ - u32 rx_frame_size = umem->frame_size - XDP_PACKET_HEADROOM; - u32 idx = 0, filled = 0, buffers_to_fill, nb_pkts; - int ret; - - if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) - buffers_to_fill = umem->num_frames; - else - buffers_to_fill = umem->fill_size; - - ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx); - if (ret != buffers_to_fill) - exit_with_error(ENOSPC); - - while (filled < buffers_to_fill) { - struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &nb_pkts); - u64 addr; - u32 i; - - for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt_stream, pkt); i++) { - if (!pkt) { - if (!fill_up) - break; - addr = filled * umem->frame_size + umem->base_addr; - } else if (pkt->offset >= 0) { - addr = pkt->offset % umem->frame_size + umem_alloc_buffer(umem); - } else { - addr = pkt->offset + umem_alloc_buffer(umem); - } - - *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr; - if (++filled >= buffers_to_fill) - break; - } - } - xsk_ring_prod__submit(&umem->fq, filled); - xsk_ring_prod__cancel(&umem->fq, buffers_to_fill - filled); - - pkt_stream_reset(pkt_stream); - umem_reset_alloc(umem); -} - -static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) -{ - u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size; - int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; - LIBBPF_OPTS(bpf_xdp_query_opts, opts); - void *bufs; - int ret; - u32 i; - - if (ifobject->umem->unaligned_mode) - mmap_flags |= MAP_HUGETLB | MAP_HUGE_2MB; - - if (ifobject->shared_umem) - umem_sz *= 2; - - bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); - if (bufs == MAP_FAILED) - exit_with_error(errno); - - ret = xsk_configure_umem(ifobject, ifobject->umem, bufs, umem_sz); - if (ret) - exit_with_error(-ret); - - xsk_configure_socket(test, ifobject, ifobject->umem, false); - - ifobject->xsk = &ifobject->xsk_arr[0]; - - if (!ifobject->rx_on) - return; - - xsk_populate_fill_ring(ifobject->umem, ifobject->xsk->pkt_stream, ifobject->use_fill_ring); - - for (i = 0; i < test->nb_sockets; i++) { - ifobject->xsk = &ifobject->xsk_arr[i]; - ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, i); - if (ret) - exit_with_error(errno); - } -} - -static void *worker_testapp_validate_tx(void *arg) -{ - struct test_spec *test = (struct test_spec *)arg; - struct ifobject *ifobject = test->ifobj_tx; - int err; - - if (test->current_step == 1) { - if (!ifobject->shared_umem) - thread_common_ops(test, ifobject); - else - thread_common_ops_tx(test, ifobject); - } - - err = send_pkts(test, ifobject); - - if (!err && ifobject->validation_func) - err = ifobject->validation_func(ifobject); - if (err) - report_failure(test); - - pthread_exit(NULL); -} - -static void *worker_testapp_validate_rx(void *arg) -{ - struct test_spec *test = (struct test_spec *)arg; - struct ifobject *ifobject = test->ifobj_rx; - int err; - - if (test->current_step == 1) { - thread_common_ops(test, ifobject); - } else { - xsk_clear_xskmap(ifobject->xskmap); - err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, 0); - if (err) { - ksft_print_msg("Error: Failed to update xskmap, error %s\n", - strerror(-err)); - exit_with_error(-err); - } - } - - pthread_barrier_wait(&barr); - - err = receive_pkts(test); - - if (!err && ifobject->validation_func) - err = ifobject->validation_func(ifobject); - - if (err) { - if (test->adjust_tail && !is_adjust_tail_supported(ifobject->xdp_progs)) - test->adjust_tail_support = false; - else - report_failure(test); - } - - pthread_exit(NULL); -} - -static u64 ceil_u64(u64 a, u64 b) -{ - return (a + b - 1) / b; -} - -static void testapp_clean_xsk_umem(struct ifobject *ifobj) -{ - u64 umem_sz = ifobj->umem->num_frames * ifobj->umem->frame_size; - - if (ifobj->shared_umem) - umem_sz *= 2; - - umem_sz = ceil_u64(umem_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE; - xsk_umem__delete(ifobj->umem->umem); - munmap(ifobj->umem->buffer, umem_sz); -} - -static void handler(int signum) -{ - pthread_exit(NULL); -} - -static bool xdp_prog_changed_rx(struct test_spec *test) -{ - struct ifobject *ifobj = test->ifobj_rx; - - return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode; -} - -static bool xdp_prog_changed_tx(struct test_spec *test) -{ - struct ifobject *ifobj = test->ifobj_tx; - - return ifobj->xdp_prog != test->xdp_prog_tx || ifobj->mode != test->mode; -} - -static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog, - struct bpf_map *xskmap, enum test_mode mode) -{ - int err; - - xsk_detach_xdp_program(ifobj->ifindex, mode_to_xdp_flags(ifobj->mode)); - err = xsk_attach_xdp_program(xdp_prog, ifobj->ifindex, mode_to_xdp_flags(mode)); - if (err) { - ksft_print_msg("Error attaching XDP program\n"); - exit_with_error(-err); - } - - if (ifobj->mode != mode && (mode == TEST_MODE_DRV || mode == TEST_MODE_ZC)) - if (!xsk_is_in_mode(ifobj->ifindex, XDP_FLAGS_DRV_MODE)) { - ksft_print_msg("ERROR: XDP prog not in DRV mode\n"); - exit_with_error(EINVAL); - } - - ifobj->xdp_prog = xdp_prog; - ifobj->xskmap = xskmap; - ifobj->mode = mode; -} - -static void xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx, - struct ifobject *ifobj_tx) -{ - if (xdp_prog_changed_rx(test)) - xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode); - - if (!ifobj_tx || ifobj_tx->shared_umem) - return; - - if (xdp_prog_changed_tx(test)) - xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode); -} - -static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *ifobj1, - struct ifobject *ifobj2) -{ - pthread_t t0, t1; - int err; - - if (test->mtu > MAX_ETH_PKT_SIZE) { - if (test->mode == TEST_MODE_ZC && (!ifobj1->multi_buff_zc_supp || - (ifobj2 && !ifobj2->multi_buff_zc_supp))) { - ksft_test_result_skip("Multi buffer for zero-copy not supported.\n"); - return TEST_SKIP; - } - if (test->mode != TEST_MODE_ZC && (!ifobj1->multi_buff_supp || - (ifobj2 && !ifobj2->multi_buff_supp))) { - ksft_test_result_skip("Multi buffer not supported.\n"); - return TEST_SKIP; - } - } - err = test_spec_set_mtu(test, test->mtu); - if (err) { - ksft_print_msg("Error, could not set mtu.\n"); - exit_with_error(err); - } - - if (ifobj2) { - if (pthread_barrier_init(&barr, NULL, 2)) - exit_with_error(errno); - pkt_stream_reset(ifobj2->xsk->pkt_stream); - } - - test->current_step++; - pkt_stream_reset(ifobj1->xsk->pkt_stream); - pkts_in_flight = 0; - - signal(SIGUSR1, handler); - /*Spawn RX thread */ - pthread_create(&t0, NULL, ifobj1->func_ptr, test); - - if (ifobj2) { - pthread_barrier_wait(&barr); - if (pthread_barrier_destroy(&barr)) - exit_with_error(errno); - - /*Spawn TX thread */ - pthread_create(&t1, NULL, ifobj2->func_ptr, test); - - pthread_join(t1, NULL); - } - - if (!ifobj2) - pthread_kill(t0, SIGUSR1); - else - pthread_join(t0, NULL); - - if (test->total_steps == test->current_step || test->fail) { - u32 i; - - if (ifobj2) - for (i = 0; i < test->nb_sockets; i++) - xsk_socket__delete(ifobj2->xsk_arr[i].xsk); - - for (i = 0; i < test->nb_sockets; i++) - xsk_socket__delete(ifobj1->xsk_arr[i].xsk); - - testapp_clean_xsk_umem(ifobj1); - if (ifobj2 && !ifobj2->shared_umem) - testapp_clean_xsk_umem(ifobj2); - } - - return !!test->fail; -} - -static int testapp_validate_traffic(struct test_spec *test) -{ - struct ifobject *ifobj_rx = test->ifobj_rx; - struct ifobject *ifobj_tx = test->ifobj_tx; - - if ((ifobj_rx->umem->unaligned_mode && !ifobj_rx->unaligned_supp) || - (ifobj_tx->umem->unaligned_mode && !ifobj_tx->unaligned_supp)) { - ksft_test_result_skip("No huge pages present.\n"); - return TEST_SKIP; - } - - if (test->set_ring) { - if (ifobj_tx->hw_ring_size_supp) { - if (set_ring_size(ifobj_tx)) { - ksft_test_result_skip("Failed to change HW ring size.\n"); - return TEST_FAILURE; - } - } else { - ksft_test_result_skip("Changing HW ring size not supported.\n"); - return TEST_SKIP; - } - } - - xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx); - return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx); -} - -static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj) -{ - return __testapp_validate_traffic(test, ifobj, NULL); -} - -static int testapp_teardown(struct test_spec *test) -{ - int i; - - for (i = 0; i < MAX_TEARDOWN_ITER; i++) { - if (testapp_validate_traffic(test)) - return TEST_FAILURE; - test_spec_reset(test); - } - - return TEST_PASS; -} - -static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2) -{ - thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr; - struct ifobject *tmp_ifobj = (*ifobj1); - - (*ifobj1)->func_ptr = (*ifobj2)->func_ptr; - (*ifobj2)->func_ptr = tmp_func_ptr; - - *ifobj1 = *ifobj2; - *ifobj2 = tmp_ifobj; -} - -static int testapp_bidirectional(struct test_spec *test) -{ - int res; - - test->ifobj_tx->rx_on = true; - test->ifobj_rx->tx_on = true; - test->total_steps = 2; - if (testapp_validate_traffic(test)) - return TEST_FAILURE; - - print_verbose("Switching Tx/Rx direction\n"); - swap_directions(&test->ifobj_rx, &test->ifobj_tx); - res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx); - - swap_directions(&test->ifobj_rx, &test->ifobj_tx); - return res; -} - -static int swap_xsk_resources(struct test_spec *test) -{ - int ret; - - test->ifobj_tx->xsk_arr[0].pkt_stream = NULL; - test->ifobj_rx->xsk_arr[0].pkt_stream = NULL; - test->ifobj_tx->xsk_arr[1].pkt_stream = test->tx_pkt_stream_default; - test->ifobj_rx->xsk_arr[1].pkt_stream = test->rx_pkt_stream_default; - test->ifobj_tx->xsk = &test->ifobj_tx->xsk_arr[1]; - test->ifobj_rx->xsk = &test->ifobj_rx->xsk_arr[1]; - - ret = xsk_update_xskmap(test->ifobj_rx->xskmap, test->ifobj_rx->xsk->xsk, 0); - if (ret) - return TEST_FAILURE; - - return TEST_PASS; -} - -static int testapp_xdp_prog_cleanup(struct test_spec *test) -{ - test->total_steps = 2; - test->nb_sockets = 2; - if (testapp_validate_traffic(test)) - return TEST_FAILURE; - - if (swap_xsk_resources(test)) - return TEST_FAILURE; - return testapp_validate_traffic(test); -} - -static int testapp_headroom(struct test_spec *test) -{ - test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE; - return testapp_validate_traffic(test); -} - -static int testapp_stats_rx_dropped(struct test_spec *test) -{ - if (test->mode == TEST_MODE_ZC) { - ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n"); - return TEST_SKIP; - } - - pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0); - test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size - - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3; - pkt_stream_receive_half(test); - test->ifobj_rx->validation_func = validate_rx_dropped; - return testapp_validate_traffic(test); -} - -static int testapp_stats_tx_invalid_descs(struct test_spec *test) -{ - pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0); - test->ifobj_tx->validation_func = validate_tx_invalid_descs; - return testapp_validate_traffic(test); -} - -static int testapp_stats_rx_full(struct test_spec *test) -{ - pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); - test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); - - test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS; - test->ifobj_rx->release_rx = false; - test->ifobj_rx->validation_func = validate_rx_full; - return testapp_validate_traffic(test); -} - -static int testapp_stats_fill_empty(struct test_spec *test) -{ - pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); - test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); - - test->ifobj_rx->use_fill_ring = false; - test->ifobj_rx->validation_func = validate_fill_empty; - return testapp_validate_traffic(test); -} - -static int testapp_send_receive_unaligned(struct test_spec *test) -{ - test->ifobj_tx->umem->unaligned_mode = true; - test->ifobj_rx->umem->unaligned_mode = true; - /* Let half of the packets straddle a 4K buffer boundary */ - pkt_stream_replace_half(test, MIN_PKT_SIZE, -MIN_PKT_SIZE / 2); - - return testapp_validate_traffic(test); -} - -static int testapp_send_receive_unaligned_mb(struct test_spec *test) -{ - test->mtu = MAX_ETH_JUMBO_SIZE; - test->ifobj_tx->umem->unaligned_mode = true; - test->ifobj_rx->umem->unaligned_mode = true; - pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE); - return testapp_validate_traffic(test); -} - -static int testapp_single_pkt(struct test_spec *test) -{ - struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}}; - - pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); - return testapp_validate_traffic(test); -} - -static int testapp_send_receive_mb(struct test_spec *test) -{ - test->mtu = MAX_ETH_JUMBO_SIZE; - pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE); - - return testapp_validate_traffic(test); -} - -static int testapp_invalid_desc_mb(struct test_spec *test) -{ - struct xsk_umem_info *umem = test->ifobj_tx->umem; - u64 umem_size = umem->num_frames * umem->frame_size; - struct pkt pkts[] = { - /* Valid packet for synch to start with */ - {0, MIN_PKT_SIZE, 0, true, 0}, - /* Zero frame len is not legal */ - {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, - {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, - {0, 0, 0, false, 0}, - /* Invalid address in the second frame */ - {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, - {umem_size, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, - /* Invalid len in the middle */ - {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, - {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, - /* Invalid options in the middle */ - {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, - {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XSK_DESC__INVALID_OPTION}, - /* Transmit 2 frags, receive 3 */ - {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, XDP_PKT_CONTD}, - {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, 0}, - /* Middle frame crosses chunk boundary with small length */ - {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, - {-MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false, 0}, - /* Valid packet for synch so that something is received */ - {0, MIN_PKT_SIZE, 0, true, 0}}; - - if (umem->unaligned_mode) { - /* Crossing a chunk boundary allowed */ - pkts[12].valid = true; - pkts[13].valid = true; - } - - test->mtu = MAX_ETH_JUMBO_SIZE; - pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); - return testapp_validate_traffic(test); -} - -static int testapp_invalid_desc(struct test_spec *test) -{ - struct xsk_umem_info *umem = test->ifobj_tx->umem; - u64 umem_size = umem->num_frames * umem->frame_size; - struct pkt pkts[] = { - /* Zero packet address allowed */ - {0, MIN_PKT_SIZE, 0, true}, - /* Allowed packet */ - {0, MIN_PKT_SIZE, 0, true}, - /* Straddling the start of umem */ - {-2, MIN_PKT_SIZE, 0, false}, - /* Packet too large */ - {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false}, - /* Up to end of umem allowed */ - {umem_size - MIN_PKT_SIZE - 2 * umem->frame_size, MIN_PKT_SIZE, 0, true}, - /* After umem ends */ - {umem_size, MIN_PKT_SIZE, 0, false}, - /* Straddle the end of umem */ - {umem_size - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false}, - /* Straddle a 4K boundary */ - {0x1000 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false}, - /* Straddle a 2K boundary */ - {0x800 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, true}, - /* Valid packet for synch so that something is received */ - {0, MIN_PKT_SIZE, 0, true}}; - - if (umem->unaligned_mode) { - /* Crossing a page boundary allowed */ - pkts[7].valid = true; - } - if (umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) { - /* Crossing a 2K frame size boundary not allowed */ - pkts[8].valid = false; - } - - if (test->ifobj_tx->shared_umem) { - pkts[4].offset += umem_size; - pkts[5].offset += umem_size; - pkts[6].offset += umem_size; - } - - pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); - return testapp_validate_traffic(test); -} - -static int testapp_xdp_drop(struct test_spec *test) -{ - struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; - struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; - - test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_drop, skel_tx->progs.xsk_xdp_drop, - skel_rx->maps.xsk, skel_tx->maps.xsk); - - pkt_stream_receive_half(test); - return testapp_validate_traffic(test); -} - -static int testapp_xdp_metadata_copy(struct test_spec *test) -{ - struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; - struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; - - test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata, - skel_tx->progs.xsk_xdp_populate_metadata, - skel_rx->maps.xsk, skel_tx->maps.xsk); - test->ifobj_rx->use_metadata = true; - - skel_rx->bss->count = 0; - - return testapp_validate_traffic(test); -} - -static int testapp_xdp_shared_umem(struct test_spec *test) -{ - struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; - struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; - - test->total_steps = 1; - test->nb_sockets = 2; - - test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_shared_umem, - skel_tx->progs.xsk_xdp_shared_umem, - skel_rx->maps.xsk, skel_tx->maps.xsk); - - pkt_stream_even_odd_sequence(test); - - return testapp_validate_traffic(test); -} - -static int testapp_poll_txq_tmout(struct test_spec *test) -{ - test->ifobj_tx->use_poll = true; - /* create invalid frame by set umem frame_size and pkt length equal to 2048 */ - test->ifobj_tx->umem->frame_size = 2048; - pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048); - return testapp_validate_traffic_single_thread(test, test->ifobj_tx); -} - -static int testapp_poll_rxq_tmout(struct test_spec *test) -{ - test->ifobj_rx->use_poll = true; - return testapp_validate_traffic_single_thread(test, test->ifobj_rx); -} - -static int testapp_too_many_frags(struct test_spec *test) -{ - struct pkt *pkts; - u32 max_frags, i; - int ret; - - if (test->mode == TEST_MODE_ZC) { - max_frags = test->ifobj_tx->xdp_zc_max_segs; - } else { - max_frags = get_max_skb_frags(); - if (!max_frags) { - ksft_print_msg("Couldn't retrieve MAX_SKB_FRAGS from system, using default (17) value\n"); - max_frags = 17; - } - max_frags += 1; - } - - pkts = calloc(2 * max_frags + 2, sizeof(struct pkt)); - if (!pkts) - return TEST_FAILURE; - - test->mtu = MAX_ETH_JUMBO_SIZE; - - /* Valid packet for synch */ - pkts[0].len = MIN_PKT_SIZE; - pkts[0].valid = true; - - /* One valid packet with the max amount of frags */ - for (i = 1; i < max_frags + 1; i++) { - pkts[i].len = MIN_PKT_SIZE; - pkts[i].options = XDP_PKT_CONTD; - pkts[i].valid = true; - } - pkts[max_frags].options = 0; - - /* An invalid packet with the max amount of frags but signals packet - * continues on the last frag - */ - for (i = max_frags + 1; i < 2 * max_frags + 1; i++) { - pkts[i].len = MIN_PKT_SIZE; - pkts[i].options = XDP_PKT_CONTD; - pkts[i].valid = false; - } - - /* Valid packet for synch */ - pkts[2 * max_frags + 1].len = MIN_PKT_SIZE; - pkts[2 * max_frags + 1].valid = true; - - pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2); - ret = testapp_validate_traffic(test); - - free(pkts); - return ret; -} - -static int xsk_load_xdp_programs(struct ifobject *ifobj) -{ - ifobj->xdp_progs = xsk_xdp_progs__open_and_load(); - if (libbpf_get_error(ifobj->xdp_progs)) - return libbpf_get_error(ifobj->xdp_progs); - - return 0; -} - static void xsk_unload_xdp_programs(struct ifobject *ifobj) { xsk_xdp_progs__destroy(ifobj->xdp_progs); } -/* Simple test */ -static bool hugepages_present(void) -{ - size_t mmap_sz = 2 * DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE; - void *bufs; - - bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, MAP_HUGE_2MB); - if (bufs == MAP_FAILED) - return false; - - mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE; - munmap(bufs, mmap_sz); - return true; -} - -static void init_iface(struct ifobject *ifobj, thread_func_t func_ptr) -{ - LIBBPF_OPTS(bpf_xdp_query_opts, query_opts); - int err; - - ifobj->func_ptr = func_ptr; - - err = xsk_load_xdp_programs(ifobj); - if (err) { - ksft_print_msg("Error loading XDP program\n"); - exit_with_error(err); - } - - if (hugepages_present()) - ifobj->unaligned_supp = true; - - err = bpf_xdp_query(ifobj->ifindex, XDP_FLAGS_DRV_MODE, &query_opts); - if (err) { - ksft_print_msg("Error querying XDP capabilities\n"); - exit_with_error(-err); - } - if (query_opts.feature_flags & NETDEV_XDP_ACT_RX_SG) - ifobj->multi_buff_supp = true; - if (query_opts.feature_flags & NETDEV_XDP_ACT_XSK_ZEROCOPY) { - if (query_opts.xdp_zc_max_segs > 1) { - ifobj->multi_buff_zc_supp = true; - ifobj->xdp_zc_max_segs = query_opts.xdp_zc_max_segs; - } else { - ifobj->xdp_zc_max_segs = 0; - } - } -} - -static int testapp_send_receive(struct test_spec *test) -{ - return testapp_validate_traffic(test); -} - -static int testapp_send_receive_2k_frame(struct test_spec *test) -{ - test->ifobj_tx->umem->frame_size = 2048; - test->ifobj_rx->umem->frame_size = 2048; - pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE); - return testapp_validate_traffic(test); -} - -static int testapp_poll_rx(struct test_spec *test) -{ - test->ifobj_rx->use_poll = true; - return testapp_validate_traffic(test); -} - -static int testapp_poll_tx(struct test_spec *test) -{ - test->ifobj_tx->use_poll = true; - return testapp_validate_traffic(test); -} - -static int testapp_aligned_inv_desc(struct test_spec *test) -{ - return testapp_invalid_desc(test); -} - -static int testapp_aligned_inv_desc_2k_frame(struct test_spec *test) -{ - test->ifobj_tx->umem->frame_size = 2048; - test->ifobj_rx->umem->frame_size = 2048; - return testapp_invalid_desc(test); -} - -static int testapp_unaligned_inv_desc(struct test_spec *test) -{ - test->ifobj_tx->umem->unaligned_mode = true; - test->ifobj_rx->umem->unaligned_mode = true; - return testapp_invalid_desc(test); -} - -static int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test) -{ - u64 page_size, umem_size; - - /* Odd frame size so the UMEM doesn't end near a page boundary. */ - test->ifobj_tx->umem->frame_size = 4001; - test->ifobj_rx->umem->frame_size = 4001; - test->ifobj_tx->umem->unaligned_mode = true; - test->ifobj_rx->umem->unaligned_mode = true; - /* This test exists to test descriptors that staddle the end of - * the UMEM but not a page. - */ - page_size = sysconf(_SC_PAGESIZE); - umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size; - assert(umem_size % page_size > MIN_PKT_SIZE); - assert(umem_size % page_size < page_size - MIN_PKT_SIZE); - - return testapp_invalid_desc(test); -} - -static int testapp_aligned_inv_desc_mb(struct test_spec *test) -{ - return testapp_invalid_desc_mb(test); -} - -static int testapp_unaligned_inv_desc_mb(struct test_spec *test) -{ - test->ifobj_tx->umem->unaligned_mode = true; - test->ifobj_rx->umem->unaligned_mode = true; - return testapp_invalid_desc_mb(test); -} - -static int testapp_xdp_metadata(struct test_spec *test) -{ - return testapp_xdp_metadata_copy(test); -} - -static int testapp_xdp_metadata_mb(struct test_spec *test) -{ - test->mtu = MAX_ETH_JUMBO_SIZE; - return testapp_xdp_metadata_copy(test); -} - -static int testapp_hw_sw_min_ring_size(struct test_spec *test) -{ - int ret; - - test->set_ring = true; - test->total_steps = 2; - test->ifobj_tx->ring.tx_pending = DEFAULT_BATCH_SIZE; - test->ifobj_tx->ring.rx_pending = DEFAULT_BATCH_SIZE * 2; - test->ifobj_tx->xsk->batch_size = 1; - test->ifobj_rx->xsk->batch_size = 1; - ret = testapp_validate_traffic(test); - if (ret) - return ret; - - /* Set batch size to hw_ring_size - 1 */ - test->ifobj_tx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1; - test->ifobj_rx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1; - return testapp_validate_traffic(test); -} - -static int testapp_hw_sw_max_ring_size(struct test_spec *test) -{ - u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 4; - int ret; - - test->set_ring = true; - test->total_steps = 2; - test->ifobj_tx->ring.tx_pending = test->ifobj_tx->ring.tx_max_pending; - test->ifobj_tx->ring.rx_pending = test->ifobj_tx->ring.rx_max_pending; - test->ifobj_rx->umem->num_frames = max_descs; - test->ifobj_rx->umem->fill_size = max_descs; - test->ifobj_rx->umem->comp_size = max_descs; - test->ifobj_tx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; - test->ifobj_rx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; - - ret = testapp_validate_traffic(test); - if (ret) - return ret; - - /* Set batch_size to 8152 for testing, as the ice HW ignores the 3 lowest bits when - * updating the Rx HW tail register. - */ - test->ifobj_tx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8; - test->ifobj_rx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8; - pkt_stream_replace(test, max_descs, MIN_PKT_SIZE); - return testapp_validate_traffic(test); -} - -static int testapp_xdp_adjust_tail(struct test_spec *test, int adjust_value) -{ - struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; - struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; - - test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_adjust_tail, - skel_tx->progs.xsk_xdp_adjust_tail, - skel_rx->maps.xsk, skel_tx->maps.xsk); - - skel_rx->bss->adjust_value = adjust_value; - - return testapp_validate_traffic(test); -} - -static int testapp_adjust_tail(struct test_spec *test, u32 value, u32 pkt_len) -{ - int ret; - - test->adjust_tail_support = true; - test->adjust_tail = true; - test->total_steps = 1; - - pkt_stream_replace_ifobject(test->ifobj_tx, DEFAULT_BATCH_SIZE, pkt_len); - pkt_stream_replace_ifobject(test->ifobj_rx, DEFAULT_BATCH_SIZE, pkt_len + value); - - ret = testapp_xdp_adjust_tail(test, value); - if (ret) - return ret; - - if (!test->adjust_tail_support) { - ksft_test_result_skip("%s %sResize pkt with bpf_xdp_adjust_tail() not supported\n", - mode_string(test), busy_poll_string(test)); - return TEST_SKIP; - } - - return 0; -} - -static int testapp_adjust_tail_shrink(struct test_spec *test) -{ - /* Shrink by 4 bytes for testing purpose */ - return testapp_adjust_tail(test, -4, MIN_PKT_SIZE * 2); -} - -static int testapp_adjust_tail_shrink_mb(struct test_spec *test) -{ - test->mtu = MAX_ETH_JUMBO_SIZE; - /* Shrink by the frag size */ - return testapp_adjust_tail(test, -XSK_UMEM__MAX_FRAME_SIZE, XSK_UMEM__LARGE_FRAME_SIZE * 2); -} - -static int testapp_adjust_tail_grow(struct test_spec *test) -{ - /* Grow by 4 bytes for testing purpose */ - return testapp_adjust_tail(test, 4, MIN_PKT_SIZE * 2); -} - -static int testapp_adjust_tail_grow_mb(struct test_spec *test) -{ - test->mtu = MAX_ETH_JUMBO_SIZE; - /* Grow by (frag_size - last_frag_Size) - 1 to stay inside the last fragment */ - return testapp_adjust_tail(test, (XSK_UMEM__MAX_FRAME_SIZE / 2) - 1, - XSK_UMEM__LARGE_FRAME_SIZE * 2); -} - -static int testapp_tx_queue_consumer(struct test_spec *test) -{ - int nr_packets; - - if (test->mode == TEST_MODE_ZC) { - ksft_test_result_skip("Can not run TX_QUEUE_CONSUMER test for ZC mode\n"); - return TEST_SKIP; - } - - nr_packets = MAX_TX_BUDGET_DEFAULT + 1; - pkt_stream_replace(test, nr_packets, MIN_PKT_SIZE); - test->ifobj_tx->xsk->batch_size = nr_packets; - test->ifobj_tx->xsk->check_consumer = true; - - return testapp_validate_traffic(test); -} - static void run_pkt_test(struct test_spec *test) { int ret; ret = test->test_func(test); - if (ret == TEST_PASS) + switch (ret) { + case TEST_PASS: ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test), test->name); - pkt_stream_restore_default(test); -} - -static struct ifobject *ifobject_create(void) -{ - struct ifobject *ifobj; - - ifobj = calloc(1, sizeof(struct ifobject)); - if (!ifobj) - return NULL; - - ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr)); - if (!ifobj->xsk_arr) - goto out_xsk_arr; - - ifobj->umem = calloc(1, sizeof(*ifobj->umem)); - if (!ifobj->umem) - goto out_umem; - - return ifobj; - -out_umem: - free(ifobj->xsk_arr); -out_xsk_arr: - free(ifobj); - return NULL; -} + break; + case TEST_SKIP: + ksft_test_result_skip("SKIP: %s %s%s\n", mode_string(test), busy_poll_string(test), + test->name); + break; + case TEST_FAILURE: + ksft_test_result_fail("FAIL: %s %s%s\n", mode_string(test), busy_poll_string(test), + test->name); + break; + default: + ksft_test_result_fail("FAIL: %s %s%s -- Unexpected returned value (%d)\n", + mode_string(test), busy_poll_string(test), test->name, ret); + } -static void ifobject_delete(struct ifobject *ifobj) -{ - free(ifobj->umem); - free(ifobj->xsk_arr); - free(ifobj); + pkt_stream_restore_default(test); } static bool is_xdp_supported(int ifindex) @@ -2726,47 +319,6 @@ static bool is_xdp_supported(int ifindex) return true; } -static const struct test_spec tests[] = { - {.name = "SEND_RECEIVE", .test_func = testapp_send_receive}, - {.name = "SEND_RECEIVE_2K_FRAME", .test_func = testapp_send_receive_2k_frame}, - {.name = "SEND_RECEIVE_SINGLE_PKT", .test_func = testapp_single_pkt}, - {.name = "POLL_RX", .test_func = testapp_poll_rx}, - {.name = "POLL_TX", .test_func = testapp_poll_tx}, - {.name = "POLL_RXQ_FULL", .test_func = testapp_poll_rxq_tmout}, - {.name = "POLL_TXQ_FULL", .test_func = testapp_poll_txq_tmout}, - {.name = "SEND_RECEIVE_UNALIGNED", .test_func = testapp_send_receive_unaligned}, - {.name = "ALIGNED_INV_DESC", .test_func = testapp_aligned_inv_desc}, - {.name = "ALIGNED_INV_DESC_2K_FRAME_SIZE", .test_func = testapp_aligned_inv_desc_2k_frame}, - {.name = "UNALIGNED_INV_DESC", .test_func = testapp_unaligned_inv_desc}, - {.name = "UNALIGNED_INV_DESC_4001_FRAME_SIZE", - .test_func = testapp_unaligned_inv_desc_4001_frame}, - {.name = "UMEM_HEADROOM", .test_func = testapp_headroom}, - {.name = "TEARDOWN", .test_func = testapp_teardown}, - {.name = "BIDIRECTIONAL", .test_func = testapp_bidirectional}, - {.name = "STAT_RX_DROPPED", .test_func = testapp_stats_rx_dropped}, - {.name = "STAT_TX_INVALID", .test_func = testapp_stats_tx_invalid_descs}, - {.name = "STAT_RX_FULL", .test_func = testapp_stats_rx_full}, - {.name = "STAT_FILL_EMPTY", .test_func = testapp_stats_fill_empty}, - {.name = "XDP_PROG_CLEANUP", .test_func = testapp_xdp_prog_cleanup}, - {.name = "XDP_DROP_HALF", .test_func = testapp_xdp_drop}, - {.name = "XDP_SHARED_UMEM", .test_func = testapp_xdp_shared_umem}, - {.name = "XDP_METADATA_COPY", .test_func = testapp_xdp_metadata}, - {.name = "XDP_METADATA_COPY_MULTI_BUFF", .test_func = testapp_xdp_metadata_mb}, - {.name = "SEND_RECEIVE_9K_PACKETS", .test_func = testapp_send_receive_mb}, - {.name = "SEND_RECEIVE_UNALIGNED_9K_PACKETS", - .test_func = testapp_send_receive_unaligned_mb}, - {.name = "ALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_aligned_inv_desc_mb}, - {.name = "UNALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_unaligned_inv_desc_mb}, - {.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags}, - {.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size}, - {.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size}, - {.name = "XDP_ADJUST_TAIL_SHRINK", .test_func = testapp_adjust_tail_shrink}, - {.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb}, - {.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow}, - {.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb}, - {.name = "TX_QUEUE_CONSUMER", .test_func = testapp_tx_queue_consumer}, - }; - static void print_tests(void) { u32 i; @@ -2774,10 +326,13 @@ static void print_tests(void) printf("Tests:\n"); for (i = 0; i < ARRAY_SIZE(tests); i++) printf("%u: %s\n", i, tests[i].name); + for (i = ARRAY_SIZE(tests); i < ARRAY_SIZE(tests) + ARRAY_SIZE(ci_skip_tests); i++) + printf("%u: %s\n", i, ci_skip_tests[i - ARRAY_SIZE(tests)].name); } int main(int argc, char **argv) { + const size_t total_tests = ARRAY_SIZE(tests) + ARRAY_SIZE(ci_skip_tests); struct pkt_stream *rx_pkt_stream_default; struct pkt_stream *tx_pkt_stream_default; struct ifobject *ifobj_tx, *ifobj_rx; @@ -2805,7 +360,7 @@ int main(int argc, char **argv) print_tests(); ksft_exit_xpass(); } - if (opt_run_test != RUN_ALL_TESTS && opt_run_test >= ARRAY_SIZE(tests)) { + if (opt_run_test != RUN_ALL_TESTS && opt_run_test >= total_tests) { ksft_print_msg("Error: test %u does not exist.\n", opt_run_test); ksft_exit_xfail(); } @@ -2830,10 +385,13 @@ int main(int argc, char **argv) ifobj_tx->set_ring.default_rx = ifobj_tx->ring.rx_pending; } - init_iface(ifobj_rx, worker_testapp_validate_rx); - init_iface(ifobj_tx, worker_testapp_validate_tx); + if (init_iface(ifobj_rx, worker_testapp_validate_rx) || + init_iface(ifobj_tx, worker_testapp_validate_tx)) { + ksft_print_msg("Error : can't initialize interfaces\n"); + ksft_exit_xfail(); + } - test_spec_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]); + test_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]); tx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE); rx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE); if (!tx_pkt_stream_default || !rx_pkt_stream_default) @@ -2842,7 +400,7 @@ int main(int argc, char **argv) test.rx_pkt_stream_default = rx_pkt_stream_default; if (opt_run_test == RUN_ALL_TESTS) - nb_tests = ARRAY_SIZE(tests); + nb_tests = total_tests; else nb_tests = 1; if (opt_mode == TEST_MODE_ALL) { @@ -2864,11 +422,15 @@ int main(int argc, char **argv) if (opt_mode != TEST_MODE_ALL && i != opt_mode) continue; - for (j = 0; j < ARRAY_SIZE(tests); j++) { + for (j = 0; j < total_tests; j++) { if (opt_run_test != RUN_ALL_TESTS && j != opt_run_test) continue; - test_spec_init(&test, ifobj_tx, ifobj_rx, i, &tests[j]); + if (j < ARRAY_SIZE(tests)) + test_init(&test, ifobj_tx, ifobj_rx, i, &tests[j]); + else + test_init(&test, ifobj_tx, ifobj_rx, i, + &ci_skip_tests[j - ARRAY_SIZE(tests)]); run_pkt_test(&test); usleep(USLEEP_MAX); diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index 4df3a5d329ac..3ca518df23ad 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -22,169 +22,13 @@ #define PF_XDP AF_XDP #endif -#ifndef SO_BUSY_POLL_BUDGET -#define SO_BUSY_POLL_BUDGET 70 -#endif - -#ifndef SO_PREFER_BUSY_POLL -#define SO_PREFER_BUSY_POLL 69 -#endif - -#define TEST_PASS 0 -#define TEST_FAILURE -1 -#define TEST_CONTINUE 1 -#define TEST_SKIP 2 -#define MAX_INTERFACES 2 -#define MAX_INTERFACE_NAME_CHARS 16 -#define MAX_TEST_NAME_SIZE 48 #define MAX_TEARDOWN_ITER 10 -#define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */ -#define MIN_PKT_SIZE 64 -#define MAX_ETH_PKT_SIZE 1518 #define MAX_ETH_JUMBO_SIZE 9000 -#define USLEEP_MAX 10000 #define SOCK_RECONF_CTR 10 -#define DEFAULT_BATCH_SIZE 64 -#define POLL_TMOUT 1000 -#define THREAD_TMOUT 3 -#define DEFAULT_PKT_CNT (4 * 1024) -#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4) #define RX_FULL_RXQSIZE 32 #define UMEM_HEADROOM_TEST_SIZE 128 #define XSK_UMEM__INVALID_FRAME_SIZE (MAX_ETH_JUMBO_SIZE + 1) -#define XSK_UMEM__LARGE_FRAME_SIZE (3 * 1024) -#define XSK_UMEM__MAX_FRAME_SIZE (4 * 1024) -#define XSK_DESC__INVALID_OPTION (0xffff) -#define HUGEPAGE_SIZE (2 * 1024 * 1024) -#define PKT_DUMP_NB_TO_PRINT 16 #define RUN_ALL_TESTS UINT_MAX #define NUM_MAC_ADDRESSES 4 -#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0) - -enum test_mode { - TEST_MODE_SKB, - TEST_MODE_DRV, - TEST_MODE_ZC, - TEST_MODE_ALL -}; - -struct xsk_umem_info { - struct xsk_ring_prod fq; - struct xsk_ring_cons cq; - struct xsk_umem *umem; - u64 next_buffer; - u32 num_frames; - u32 frame_headroom; - void *buffer; - u32 frame_size; - u32 base_addr; - u32 fill_size; - u32 comp_size; - bool unaligned_mode; -}; - -struct xsk_socket_info { - struct xsk_ring_cons rx; - struct xsk_ring_prod tx; - struct xsk_umem_info *umem; - struct xsk_socket *xsk; - struct pkt_stream *pkt_stream; - u32 outstanding_tx; - u32 rxqsize; - u32 batch_size; - u8 dst_mac[ETH_ALEN]; - u8 src_mac[ETH_ALEN]; - bool check_consumer; -}; - -struct pkt { - int offset; - u32 len; - u32 pkt_nb; - bool valid; - u16 options; -}; - -struct pkt_stream { - u32 nb_pkts; - u32 current_pkt_nb; - struct pkt *pkts; - u32 max_pkt_len; - u32 nb_rx_pkts; - u32 nb_valid_entries; - bool verbatim; -}; - -struct set_hw_ring { - u32 default_tx; - u32 default_rx; -}; - -struct ifobject; -struct test_spec; -typedef int (*validation_func_t)(struct ifobject *ifobj); -typedef void *(*thread_func_t)(void *arg); -typedef int (*test_func_t)(struct test_spec *test); - -struct ifobject { - char ifname[MAX_INTERFACE_NAME_CHARS]; - struct xsk_socket_info *xsk; - struct xsk_socket_info *xsk_arr; - struct xsk_umem_info *umem; - thread_func_t func_ptr; - validation_func_t validation_func; - struct xsk_xdp_progs *xdp_progs; - struct bpf_map *xskmap; - struct bpf_program *xdp_prog; - struct ethtool_ringparam ring; - struct set_hw_ring set_ring; - enum test_mode mode; - int ifindex; - int mtu; - u32 bind_flags; - u32 xdp_zc_max_segs; - bool tx_on; - bool rx_on; - bool use_poll; - bool busy_poll; - bool use_fill_ring; - bool release_rx; - bool shared_umem; - bool use_metadata; - bool unaligned_supp; - bool multi_buff_supp; - bool multi_buff_zc_supp; - bool hw_ring_size_supp; -}; - -struct test_spec { - struct ifobject *ifobj_tx; - struct ifobject *ifobj_rx; - struct pkt_stream *tx_pkt_stream_default; - struct pkt_stream *rx_pkt_stream_default; - struct bpf_program *xdp_prog_rx; - struct bpf_program *xdp_prog_tx; - struct bpf_map *xskmap_rx; - struct bpf_map *xskmap_tx; - test_func_t test_func; - int mtu; - u16 total_steps; - u16 current_step; - u16 nb_sockets; - bool fail; - bool set_ring; - bool adjust_tail; - bool adjust_tail_support; - enum test_mode mode; - char name[MAX_TEST_NAME_SIZE]; -}; - -pthread_barrier_t barr; -pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER; - -int pkts_in_flight; - -static const u8 g_mac[ETH_ALEN] = {0x55, 0x44, 0x33, 0x22, 0x11, 0x00}; - #endif /* XSKXCEIVER_H_ */ |
