diff options
Diffstat (limited to 'tools/testing/selftests')
95 files changed, 8339 insertions, 634 deletions
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index b89eb87034e4..a02a085e7f32 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -4,6 +4,8 @@ bloom_filter_map # failed to find kernel BTF type ID of bpf_cookie # failed to open_and_load program: -524 (trampoline) bpf_loop # attaches to __x64_sys_nanosleep cgrp_local_storage # prog_attach unexpected error: -524 (trampoline) +dynptr/test_dynptr_skb_data +dynptr/test_skb_readonly fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline) get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index b677dcd0b77a..16f404aa1b23 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -338,7 +338,8 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ define get_sys_includes $(shell $(1) $(2) -v -E - </dev/null 2>&1 \ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ -$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') +$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') \ +$(shell $(1) $(2) -dM -E - </dev/null | grep '__loongarch_grlen ' | awk '{printf("-D__BITS_PER_LONG=%d", $$3)}') endef # Determine target endianness. @@ -356,7 +357,7 @@ BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(abspath $(OUTPUT)/../usr/include) CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ - -Wno-compare-distinct-pointer-types + -Wno-compare-distinct-pointer-types -Wuninitialized $(OUTPUT)/test_l4lb_noinline.o: BPF_CFLAGS += -fno-inline $(OUTPUT)/test_xdp_noinline.o: BPF_CFLAGS += -fno-inline @@ -558,7 +559,7 @@ TRUNNER_BPF_PROGS_DIR := progs TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ network_helpers.c testing_helpers.c \ btf_helpers.c flow_dissector_load.h \ - cap_helpers.c test_loader.c xsk.c + cap_helpers.c test_loader.c xsk.c disasm.c TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(OUTPUT)/liburandom_read.so \ $(OUTPUT)/xdp_synproxy \ diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h new file mode 100644 index 000000000000..8c993ec8ceea --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -0,0 +1,38 @@ +#ifndef __BPF_KFUNCS__ +#define __BPF_KFUNCS__ + +/* Description + * Initializes an skb-type dynptr + * Returns + * Error code + */ +extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, + struct bpf_dynptr *ptr__uninit) __ksym; + +/* Description + * Initializes an xdp-type dynptr + * Returns + * Error code + */ +extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags, + struct bpf_dynptr *ptr__uninit) __ksym; + +/* Description + * Obtain a read-only pointer to the dynptr's data + * Returns + * Either a direct pointer to the dynptr data or a pointer to the user-provided + * buffer if unable to obtain a direct pointer + */ +extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset, + void *buffer, __u32 buffer__szk) __ksym; + +/* Description + * Obtain a read-write pointer to the dynptr's data + * Returns + * Either a direct pointer to the dynptr data or a pointer to the user-provided + * buffer if unable to obtain a direct pointer + */ +extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset, + void *buffer, __u32 buffer__szk) __ksym; + +#endif diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64 index 1f0437644186..253821494884 100644 --- a/tools/testing/selftests/bpf/config.aarch64 +++ b/tools/testing/selftests/bpf/config.aarch64 @@ -176,6 +176,8 @@ CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_NET=y CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y CONFIG_VSOCKETS=y +CONFIG_VSOCKETS_LOOPBACK=y CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x index d49f6170e7bd..2ba92167be35 100644 --- a/tools/testing/selftests/bpf/config.s390x +++ b/tools/testing/selftests/bpf/config.s390x @@ -140,5 +140,8 @@ CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_BLK=y CONFIG_VIRTIO_NET=y CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y +CONFIG_VSOCKETS=y +CONFIG_VSOCKETS_LOOPBACK=y CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index dd97d61d325c..b650b2e617b8 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -234,7 +234,10 @@ CONFIG_VIRTIO_BLK=y CONFIG_VIRTIO_CONSOLE=y CONFIG_VIRTIO_NET=y CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y +CONFIG_VSOCKETS=y +CONFIG_VSOCKETS_LOOPBACK=y CONFIG_X86_ACPI_CPUFREQ=y CONFIG_X86_CPUID=y CONFIG_X86_MSR=y diff --git a/tools/testing/selftests/bpf/disasm.c b/tools/testing/selftests/bpf/disasm.c new file mode 120000 index 000000000000..b1571927bd54 --- /dev/null +++ b/tools/testing/selftests/bpf/disasm.c @@ -0,0 +1 @@ +../../../../kernel/bpf/disasm.c
\ No newline at end of file diff --git a/tools/testing/selftests/bpf/disasm.h b/tools/testing/selftests/bpf/disasm.h new file mode 120000 index 000000000000..8054fd497340 --- /dev/null +++ b/tools/testing/selftests/bpf/disasm.h @@ -0,0 +1 @@ +../../../../kernel/bpf/disasm.h
\ No newline at end of file diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index 4666f88f2bb4..c94fa8d6c4f6 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -660,16 +660,22 @@ static int do_test_single(struct bpf_align_test *test) * func#0 @0 * 0: R1=ctx(off=0,imm=0) R10=fp0 * 0: (b7) r3 = 2 ; R3_w=2 + * + * Sometimes it's actually two lines below, e.g. when + * searching for "6: R3_w=scalar(umax=255,var_off=(0x0; 0xff))": + * from 4 to 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 + * 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 + * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(off=0,r=8,imm=0) R3_w=scalar(umax=255,var_off=(0x0; 0xff)) */ - if (!strstr(line_ptr, m.match)) { + while (!strstr(line_ptr, m.match)) { cur_line = -1; line_ptr = strtok(NULL, "\n"); - sscanf(line_ptr, "%u: ", &cur_line); + sscanf(line_ptr ?: "", "%u: ", &cur_line); + if (!line_ptr || cur_line != m.line) + break; } - if (cur_line != m.line || !line_ptr || - !strstr(line_ptr, m.match)) { - printf("Failed to find match %u: %s\n", - m.line, m.match); + if (cur_line != m.line || !line_ptr || !strstr(line_ptr, m.match)) { + printf("Failed to find match %u: %s\n", m.line, m.match); ret = 1; printf("%s", bpf_vlog); break; diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 56374c8b5436..7175af39134f 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include "test_attach_kprobe_sleepable.skel.h" +#include "test_attach_probe_manual.skel.h" #include "test_attach_probe.skel.h" /* this is how USDT semaphore is actually defined, except volatile modifier */ @@ -23,81 +25,54 @@ static noinline void trigger_func3(void) asm volatile (""); } +/* attach point for ref_ctr */ +static noinline void trigger_func4(void) +{ + asm volatile (""); +} + static char test_data[] = "test_data"; -void test_attach_probe(void) +/* manual attach kprobe/kretprobe/uprobe/uretprobe testings */ +static void test_attach_probe_manual(enum probe_attach_mode attach_mode) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); struct bpf_link *kprobe_link, *kretprobe_link; struct bpf_link *uprobe_link, *uretprobe_link; - struct test_attach_probe* skel; - ssize_t uprobe_offset, ref_ctr_offset; - struct bpf_link *uprobe_err_link; - FILE *devnull; - bool legacy; - - /* Check if new-style kprobe/uprobe API is supported. - * Kernels that support new FD-based kprobe and uprobe BPF attachment - * through perf_event_open() syscall expose - * /sys/bus/event_source/devices/kprobe/type and - * /sys/bus/event_source/devices/uprobe/type files, respectively. They - * contain magic numbers that are passed as "type" field of - * perf_event_attr. Lack of such file in the system indicates legacy - * kernel with old-style kprobe/uprobe attach interface through - * creating per-probe event through tracefs. For such cases - * ref_ctr_offset feature is not supported, so we don't test it. - */ - legacy = access("/sys/bus/event_source/devices/kprobe/type", F_OK) != 0; + struct test_attach_probe_manual *skel; + ssize_t uprobe_offset; - uprobe_offset = get_uprobe_offset(&trigger_func); - if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset")) + skel = test_attach_probe_manual__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load")) return; - ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr); - if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset")) - return; - - skel = test_attach_probe__open(); - if (!ASSERT_OK_PTR(skel, "skel_open")) - return; - - /* sleepable kprobe test case needs flags set before loading */ - if (!ASSERT_OK(bpf_program__set_flags(skel->progs.handle_kprobe_sleepable, - BPF_F_SLEEPABLE), "kprobe_sleepable_flags")) - goto cleanup; - - if (!ASSERT_OK(test_attach_probe__load(skel), "skel_load")) - goto cleanup; - if (!ASSERT_OK_PTR(skel->bss, "check_bss")) + uprobe_offset = get_uprobe_offset(&trigger_func); + if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset")) goto cleanup; /* manual-attach kprobe/kretprobe */ - kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe, - false /* retprobe */, - SYS_NANOSLEEP_KPROBE_NAME); + kprobe_opts.attach_mode = attach_mode; + kprobe_opts.retprobe = false; + kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe, + SYS_NANOSLEEP_KPROBE_NAME, + &kprobe_opts); if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe")) goto cleanup; skel->links.handle_kprobe = kprobe_link; - kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe, - true /* retprobe */, - SYS_NANOSLEEP_KPROBE_NAME); + kprobe_opts.retprobe = true; + kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe, + SYS_NANOSLEEP_KPROBE_NAME, + &kprobe_opts); if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe")) goto cleanup; skel->links.handle_kretprobe = kretprobe_link; - /* auto-attachable kprobe and kretprobe */ - skel->links.handle_kprobe_auto = bpf_program__attach(skel->progs.handle_kprobe_auto); - ASSERT_OK_PTR(skel->links.handle_kprobe_auto, "attach_kprobe_auto"); - - skel->links.handle_kretprobe_auto = bpf_program__attach(skel->progs.handle_kretprobe_auto); - ASSERT_OK_PTR(skel->links.handle_kretprobe_auto, "attach_kretprobe_auto"); - - if (!legacy) - ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before"); - + /* manual-attach uprobe/uretprobe */ + uprobe_opts.attach_mode = attach_mode; + uprobe_opts.ref_ctr_offset = 0; uprobe_opts.retprobe = false; - uprobe_opts.ref_ctr_offset = legacy ? 0 : ref_ctr_offset; uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, 0 /* self pid */, "/proc/self/exe", @@ -107,12 +82,7 @@ void test_attach_probe(void) goto cleanup; skel->links.handle_uprobe = uprobe_link; - if (!legacy) - ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after"); - - /* if uprobe uses ref_ctr, uretprobe has to use ref_ctr as well */ uprobe_opts.retprobe = true; - uprobe_opts.ref_ctr_offset = legacy ? 0 : ref_ctr_offset; uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, -1 /* any pid */, "/proc/self/exe", @@ -121,12 +91,7 @@ void test_attach_probe(void) goto cleanup; skel->links.handle_uretprobe = uretprobe_link; - /* verify auto-attach fails for old-style uprobe definition */ - uprobe_err_link = bpf_program__attach(skel->progs.handle_uprobe_byname); - if (!ASSERT_EQ(libbpf_get_error(uprobe_err_link), -EOPNOTSUPP, - "auto-attach should fail for old-style name")) - goto cleanup; - + /* attach uprobe by function name manually */ uprobe_opts.func_name = "trigger_func2"; uprobe_opts.retprobe = false; uprobe_opts.ref_ctr_offset = 0; @@ -138,11 +103,63 @@ void test_attach_probe(void) if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname, "attach_uprobe_byname")) goto cleanup; + /* trigger & validate kprobe && kretprobe */ + usleep(1); + + /* trigger & validate uprobe & uretprobe */ + trigger_func(); + + /* trigger & validate uprobe attached by name */ + trigger_func2(); + + ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res"); + ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res"); + ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res"); + ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res"); + ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res"); + +cleanup: + test_attach_probe_manual__destroy(skel); +} + +static void test_attach_probe_auto(struct test_attach_probe *skel) +{ + struct bpf_link *uprobe_err_link; + + /* auto-attachable kprobe and kretprobe */ + skel->links.handle_kprobe_auto = bpf_program__attach(skel->progs.handle_kprobe_auto); + ASSERT_OK_PTR(skel->links.handle_kprobe_auto, "attach_kprobe_auto"); + + skel->links.handle_kretprobe_auto = bpf_program__attach(skel->progs.handle_kretprobe_auto); + ASSERT_OK_PTR(skel->links.handle_kretprobe_auto, "attach_kretprobe_auto"); + + /* verify auto-attach fails for old-style uprobe definition */ + uprobe_err_link = bpf_program__attach(skel->progs.handle_uprobe_byname); + if (!ASSERT_EQ(libbpf_get_error(uprobe_err_link), -EOPNOTSUPP, + "auto-attach should fail for old-style name")) + return; + /* verify auto-attach works */ skel->links.handle_uretprobe_byname = bpf_program__attach(skel->progs.handle_uretprobe_byname); if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname, "attach_uretprobe_byname")) - goto cleanup; + return; + + /* trigger & validate kprobe && kretprobe */ + usleep(1); + + /* trigger & validate uprobe attached by name */ + trigger_func2(); + + ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res"); + ASSERT_EQ(skel->bss->kretprobe2_res, 22, "check_kretprobe_auto_res"); + ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res"); +} + +static void test_uprobe_lib(struct test_attach_probe *skel) +{ + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + FILE *devnull; /* test attach by name for a library function, using the library * as the binary argument. libc.so.6 will be resolved via dlopen()/dlinfo(). @@ -155,7 +172,7 @@ void test_attach_probe(void) "libc.so.6", 0, &uprobe_opts); if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname2, "attach_uprobe_byname2")) - goto cleanup; + return; uprobe_opts.func_name = "fclose"; uprobe_opts.retprobe = true; @@ -165,62 +182,144 @@ void test_attach_probe(void) "libc.so.6", 0, &uprobe_opts); if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname2, "attach_uretprobe_byname2")) + return; + + /* trigger & validate shared library u[ret]probes attached by name */ + devnull = fopen("/dev/null", "r"); + fclose(devnull); + + ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res"); + ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res"); +} + +static void test_uprobe_ref_ctr(struct test_attach_probe *skel) +{ + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + struct bpf_link *uprobe_link, *uretprobe_link; + ssize_t uprobe_offset, ref_ctr_offset; + + uprobe_offset = get_uprobe_offset(&trigger_func4); + if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset_ref_ctr")) + return; + + ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr); + if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset")) + return; + + ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before"); + + uprobe_opts.retprobe = false; + uprobe_opts.ref_ctr_offset = ref_ctr_offset; + uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_ref_ctr, + 0 /* self pid */, + "/proc/self/exe", + uprobe_offset, + &uprobe_opts); + if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe_ref_ctr")) + return; + skel->links.handle_uprobe_ref_ctr = uprobe_link; + + ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after"); + + /* if uprobe uses ref_ctr, uretprobe has to use ref_ctr as well */ + uprobe_opts.retprobe = true; + uprobe_opts.ref_ctr_offset = ref_ctr_offset; + uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe_ref_ctr, + -1 /* any pid */, + "/proc/self/exe", + uprobe_offset, &uprobe_opts); + if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe_ref_ctr")) + return; + skel->links.handle_uretprobe_ref_ctr = uretprobe_link; +} + +static void test_kprobe_sleepable(void) +{ + struct test_attach_kprobe_sleepable *skel; + + skel = test_attach_kprobe_sleepable__open(); + if (!ASSERT_OK_PTR(skel, "skel_kprobe_sleepable_open")) + return; + + /* sleepable kprobe test case needs flags set before loading */ + if (!ASSERT_OK(bpf_program__set_flags(skel->progs.handle_kprobe_sleepable, + BPF_F_SLEEPABLE), "kprobe_sleepable_flags")) + goto cleanup; + + if (!ASSERT_OK(test_attach_kprobe_sleepable__load(skel), + "skel_kprobe_sleepable_load")) goto cleanup; /* sleepable kprobes should not attach successfully */ skel->links.handle_kprobe_sleepable = bpf_program__attach(skel->progs.handle_kprobe_sleepable); - if (!ASSERT_ERR_PTR(skel->links.handle_kprobe_sleepable, "attach_kprobe_sleepable")) - goto cleanup; + ASSERT_ERR_PTR(skel->links.handle_kprobe_sleepable, "attach_kprobe_sleepable"); +cleanup: + test_attach_kprobe_sleepable__destroy(skel); +} + +static void test_uprobe_sleepable(struct test_attach_probe *skel) +{ /* test sleepable uprobe and uretprobe variants */ skel->links.handle_uprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uprobe_byname3_sleepable); if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3_sleepable, "attach_uprobe_byname3_sleepable")) - goto cleanup; + return; skel->links.handle_uprobe_byname3 = bpf_program__attach(skel->progs.handle_uprobe_byname3); if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3, "attach_uprobe_byname3")) - goto cleanup; + return; skel->links.handle_uretprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uretprobe_byname3_sleepable); if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3_sleepable, "attach_uretprobe_byname3_sleepable")) - goto cleanup; + return; skel->links.handle_uretprobe_byname3 = bpf_program__attach(skel->progs.handle_uretprobe_byname3); if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3, "attach_uretprobe_byname3")) - goto cleanup; + return; skel->bss->user_ptr = test_data; - /* trigger & validate kprobe && kretprobe */ - usleep(1); - - /* trigger & validate shared library u[ret]probes attached by name */ - devnull = fopen("/dev/null", "r"); - fclose(devnull); - - /* trigger & validate uprobe & uretprobe */ - trigger_func(); - - /* trigger & validate uprobe attached by name */ - trigger_func2(); - /* trigger & validate sleepable uprobe attached by name */ trigger_func3(); - ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res"); - ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res"); - ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res"); - ASSERT_EQ(skel->bss->kretprobe2_res, 22, "check_kretprobe_auto_res"); - ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res"); - ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res"); - ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res"); - ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res"); - ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res"); - ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res"); ASSERT_EQ(skel->bss->uprobe_byname3_sleepable_res, 9, "check_uprobe_byname3_sleepable_res"); ASSERT_EQ(skel->bss->uprobe_byname3_res, 10, "check_uprobe_byname3_res"); ASSERT_EQ(skel->bss->uretprobe_byname3_sleepable_res, 11, "check_uretprobe_byname3_sleepable_res"); ASSERT_EQ(skel->bss->uretprobe_byname3_res, 12, "check_uretprobe_byname3_res"); +} + +void test_attach_probe(void) +{ + struct test_attach_probe *skel; + + skel = test_attach_probe__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + if (!ASSERT_OK(test_attach_probe__load(skel), "skel_load")) + goto cleanup; + if (!ASSERT_OK_PTR(skel->bss, "check_bss")) + goto cleanup; + + if (test__start_subtest("manual-default")) + test_attach_probe_manual(PROBE_ATTACH_MODE_DEFAULT); + if (test__start_subtest("manual-legacy")) + test_attach_probe_manual(PROBE_ATTACH_MODE_LEGACY); + if (test__start_subtest("manual-perf")) + test_attach_probe_manual(PROBE_ATTACH_MODE_PERF); + if (test__start_subtest("manual-link")) + test_attach_probe_manual(PROBE_ATTACH_MODE_LINK); + + if (test__start_subtest("auto")) + test_attach_probe_auto(skel); + if (test__start_subtest("kprobe-sleepable")) + test_kprobe_sleepable(); + if (test__start_subtest("uprobe-lib")) + test_uprobe_lib(skel); + if (test__start_subtest("uprobe-sleepable")) + test_uprobe_sleepable(skel); + if (test__start_subtest("uprobe-ref_ctr")) + test_uprobe_ref_ctr(skel); cleanup: test_attach_probe__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c index b3f7985c8504..adda85f97058 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c @@ -84,6 +84,7 @@ static const char * const success_tests[] = { "test_cgrp_xchg_release", "test_cgrp_get_release", "test_cgrp_get_ancestors", + "test_cgrp_from_id", }; void test_cgrp_kfunc(void) diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c index 2cc759956e3b..63e776f4176e 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c @@ -193,7 +193,7 @@ out: cgrp_ls_sleepable__destroy(skel); } -static void test_no_rcu_lock(__u64 cgroup_id) +static void test_yes_rcu_lock(__u64 cgroup_id) { struct cgrp_ls_sleepable *skel; int err; @@ -204,7 +204,7 @@ static void test_no_rcu_lock(__u64 cgroup_id) skel->bss->target_pid = syscall(SYS_gettid); - bpf_program__set_autoload(skel->progs.no_rcu_lock, true); + bpf_program__set_autoload(skel->progs.yes_rcu_lock, true); err = cgrp_ls_sleepable__load(skel); if (!ASSERT_OK(err, "skel_load")) goto out; @@ -220,7 +220,7 @@ out: cgrp_ls_sleepable__destroy(skel); } -static void test_rcu_lock(void) +static void test_no_rcu_lock(void) { struct cgrp_ls_sleepable *skel; int err; @@ -229,7 +229,7 @@ static void test_rcu_lock(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - bpf_program__set_autoload(skel->progs.yes_rcu_lock, true); + bpf_program__set_autoload(skel->progs.no_rcu_lock, true); err = cgrp_ls_sleepable__load(skel); ASSERT_ERR(err, "skel_load"); @@ -256,10 +256,10 @@ void test_cgrp_local_storage(void) test_negative(); if (test__start_subtest("cgroup_iter_sleepable")) test_cgroup_iter_sleepable(cgroup_fd, cgroup_id); + if (test__start_subtest("yes_rcu_lock")) + test_yes_rcu_lock(cgroup_id); if (test__start_subtest("no_rcu_lock")) - test_no_rcu_lock(cgroup_id); - if (test__start_subtest("rcu_lock")) - test_rcu_lock(); + test_no_rcu_lock(); close(cgroup_fd); } diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c index 224f016b0a53..2a55f717fc07 100644 --- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c @@ -13,6 +13,7 @@ #include "progs/test_cls_redirect.h" #include "test_cls_redirect.skel.h" +#include "test_cls_redirect_dynptr.skel.h" #include "test_cls_redirect_subprogs.skel.h" #define ENCAP_IP INADDR_LOOPBACK @@ -446,6 +447,28 @@ cleanup: close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0])); } +static void test_cls_redirect_dynptr(void) +{ + struct test_cls_redirect_dynptr *skel; + int err; + + skel = test_cls_redirect_dynptr__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP); + skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT); + + err = test_cls_redirect_dynptr__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + test_cls_redirect_common(skel->progs.cls_redirect); + +cleanup: + test_cls_redirect_dynptr__destroy(skel); +} + static void test_cls_redirect_inlined(void) { struct test_cls_redirect *skel; @@ -496,4 +519,6 @@ void test_cls_redirect(void) test_cls_redirect_inlined(); if (test__start_subtest("cls_redirect_subprogs")) test_cls_redirect_subprogs(); + if (test__start_subtest("cls_redirect_dynptr")) + test_cls_redirect_dynptr(); } diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c new file mode 100644 index 000000000000..d5fe3d4b936c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c @@ -0,0 +1,917 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <limits.h> +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <regex.h> +#include <test_progs.h> + +#include "bpf/btf.h" +#include "bpf_util.h" +#include "linux/filter.h" +#include "disasm.h" + +#define MAX_PROG_TEXT_SZ (32 * 1024) + +/* The code in this file serves the sole purpose of executing test cases + * specified in the test_cases array. Each test case specifies a program + * type, context field offset, and disassembly patterns that correspond + * to read and write instructions generated by + * verifier.c:convert_ctx_access() for accessing that field. + * + * For each test case, up to three programs are created: + * - One that uses BPF_LDX_MEM to read the context field. + * - One that uses BPF_STX_MEM to write to the context field. + * - One that uses BPF_ST_MEM to write to the context field. + * + * The disassembly of each program is then compared with the pattern + * specified in the test case. + */ +struct test_case { + char *name; + enum bpf_prog_type prog_type; + enum bpf_attach_type expected_attach_type; + int field_offset; + int field_sz; + /* Program generated for BPF_ST_MEM uses value 42 by default, + * this field allows to specify custom value. + */ + struct { + bool use; + int value; + } st_value; + /* Pattern for BPF_LDX_MEM(field_sz, dst, ctx, field_offset) */ + char *read; + /* Pattern for BPF_STX_MEM(field_sz, ctx, src, field_offset) and + * BPF_ST_MEM (field_sz, ctx, src, field_offset) + */ + char *write; + /* Pattern for BPF_ST_MEM(field_sz, ctx, src, field_offset), + * takes priority over `write`. + */ + char *write_st; + /* Pattern for BPF_STX_MEM (field_sz, ctx, src, field_offset), + * takes priority over `write`. + */ + char *write_stx; +}; + +#define N(_prog_type, type, field, name_extra...) \ + .name = #_prog_type "." #field name_extra, \ + .prog_type = BPF_PROG_TYPE_##_prog_type, \ + .field_offset = offsetof(type, field), \ + .field_sz = sizeof(typeof(((type *)NULL)->field)) + +static struct test_case test_cases[] = { +/* Sign extension on s390 changes the pattern */ +#if defined(__x86_64__) || defined(__aarch64__) + { + N(SCHED_CLS, struct __sk_buff, tstamp), + .read = "r11 = *(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset);" + "w11 &= 160;" + "if w11 != 0xa0 goto pc+2;" + "$dst = 0;" + "goto pc+1;" + "$dst = *(u64 *)($ctx + sk_buff::tstamp);", + .write = "r11 = *(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset);" + "if w11 & 0x80 goto pc+1;" + "goto pc+2;" + "w11 &= -33;" + "*(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset) = r11;" + "*(u64 *)($ctx + sk_buff::tstamp) = $src;", + }, +#endif + { + N(SCHED_CLS, struct __sk_buff, priority), + .read = "$dst = *(u32 *)($ctx + sk_buff::priority);", + .write = "*(u32 *)($ctx + sk_buff::priority) = $src;", + }, + { + N(SCHED_CLS, struct __sk_buff, mark), + .read = "$dst = *(u32 *)($ctx + sk_buff::mark);", + .write = "*(u32 *)($ctx + sk_buff::mark) = $src;", + }, + { + N(SCHED_CLS, struct __sk_buff, cb[0]), + .read = "$dst = *(u32 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::data));", + .write = "*(u32 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::data)) = $src;", + }, + { + N(SCHED_CLS, struct __sk_buff, tc_classid), + .read = "$dst = *(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid));", + .write = "*(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid)) = $src;", + }, + { + N(SCHED_CLS, struct __sk_buff, tc_index), + .read = "$dst = *(u16 *)($ctx + sk_buff::tc_index);", + .write = "*(u16 *)($ctx + sk_buff::tc_index) = $src;", + }, + { + N(SCHED_CLS, struct __sk_buff, queue_mapping), + .read = "$dst = *(u16 *)($ctx + sk_buff::queue_mapping);", + .write_stx = "if $src >= 0xffff goto pc+1;" + "*(u16 *)($ctx + sk_buff::queue_mapping) = $src;", + .write_st = "*(u16 *)($ctx + sk_buff::queue_mapping) = $src;", + }, + { + /* This is a corner case in filter.c:bpf_convert_ctx_access() */ + N(SCHED_CLS, struct __sk_buff, queue_mapping, ".ushrt_max"), + .st_value = { true, USHRT_MAX }, + .write_st = "goto pc+0;", + }, + { + N(CGROUP_SOCK, struct bpf_sock, bound_dev_if), + .read = "$dst = *(u32 *)($ctx + sock_common::skc_bound_dev_if);", + .write = "*(u32 *)($ctx + sock_common::skc_bound_dev_if) = $src;", + }, + { + N(CGROUP_SOCK, struct bpf_sock, mark), + .read = "$dst = *(u32 *)($ctx + sock::sk_mark);", + .write = "*(u32 *)($ctx + sock::sk_mark) = $src;", + }, + { + N(CGROUP_SOCK, struct bpf_sock, priority), + .read = "$dst = *(u32 *)($ctx + sock::sk_priority);", + .write = "*(u32 *)($ctx + sock::sk_priority) = $src;", + }, + { + N(SOCK_OPS, struct bpf_sock_ops, replylong[0]), + .read = "$dst = *(u32 *)($ctx + bpf_sock_ops_kern::replylong);", + .write = "*(u32 *)($ctx + bpf_sock_ops_kern::replylong) = $src;", + }, + { + N(CGROUP_SYSCTL, struct bpf_sysctl, file_pos), +#if __BYTE_ORDER == __LITTLE_ENDIAN + .read = "$dst = *(u64 *)($ctx + bpf_sysctl_kern::ppos);" + "$dst = *(u32 *)($dst +0);", + .write = "*(u64 *)($ctx + bpf_sysctl_kern::tmp_reg) = r9;" + "r9 = *(u64 *)($ctx + bpf_sysctl_kern::ppos);" + "*(u32 *)(r9 +0) = $src;" + "r9 = *(u64 *)($ctx + bpf_sysctl_kern::tmp_reg);", +#else + .read = "$dst = *(u64 *)($ctx + bpf_sysctl_kern::ppos);" + "$dst = *(u32 *)($dst +4);", + .write = "*(u64 *)($ctx + bpf_sysctl_kern::tmp_reg) = r9;" + "r9 = *(u64 *)($ctx + bpf_sysctl_kern::ppos);" + "*(u32 *)(r9 +4) = $src;" + "r9 = *(u64 *)($ctx + bpf_sysctl_kern::tmp_reg);", +#endif + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, sk), + .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::sk);", + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, level), + .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::level);", + .write = "*(u32 *)($ctx + bpf_sockopt_kern::level) = $src;", + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, optname), + .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::optname);", + .write = "*(u32 *)($ctx + bpf_sockopt_kern::optname) = $src;", + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, optlen), + .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::optlen);", + .write = "*(u32 *)($ctx + bpf_sockopt_kern::optlen) = $src;", + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, retval), + .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::current_task);" + "$dst = *(u64 *)($dst + task_struct::bpf_ctx);" + "$dst = *(u32 *)($dst + bpf_cg_run_ctx::retval);", + .write = "*(u64 *)($ctx + bpf_sockopt_kern::tmp_reg) = r9;" + "r9 = *(u64 *)($ctx + bpf_sockopt_kern::current_task);" + "r9 = *(u64 *)(r9 + task_struct::bpf_ctx);" + "*(u32 *)(r9 + bpf_cg_run_ctx::retval) = $src;" + "r9 = *(u64 *)($ctx + bpf_sockopt_kern::tmp_reg);", + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, optval), + .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::optval);", + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, optval_end), + .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::optval_end);", + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + }, +}; + +#undef N + +static regex_t *ident_regex; +static regex_t *field_regex; + +static char *skip_space(char *str) +{ + while (*str && isspace(*str)) + ++str; + return str; +} + +static char *skip_space_and_semi(char *str) +{ + while (*str && (isspace(*str) || *str == ';')) + ++str; + return str; +} + +static char *match_str(char *str, char *prefix) +{ + while (*str && *prefix && *str == *prefix) { + ++str; + ++prefix; + } + if (*prefix) + return NULL; + return str; +} + +static char *match_number(char *str, int num) +{ + char *next; + int snum = strtol(str, &next, 10); + + if (next - str == 0 || num != snum) + return NULL; + + return next; +} + +static int find_field_offset_aux(struct btf *btf, int btf_id, char *field_name, int off) +{ + const struct btf_type *type = btf__type_by_id(btf, btf_id); + const struct btf_member *m; + __u16 mnum; + int i; + + if (!type) { + PRINT_FAIL("Can't find btf_type for id %d\n", btf_id); + return -1; + } + + if (!btf_is_struct(type) && !btf_is_union(type)) { + PRINT_FAIL("BTF id %d is not struct or union\n", btf_id); + return -1; + } + + m = btf_members(type); + mnum = btf_vlen(type); + + for (i = 0; i < mnum; ++i, ++m) { + const char *mname = btf__name_by_offset(btf, m->name_off); + + if (strcmp(mname, "") == 0) { + int msize = find_field_offset_aux(btf, m->type, field_name, + off + m->offset); + if (msize >= 0) + return msize; + } + + if (strcmp(mname, field_name)) + continue; + + return (off + m->offset) / 8; + } + + return -1; +} + +static int find_field_offset(struct btf *btf, char *pattern, regmatch_t *matches) +{ + int type_sz = matches[1].rm_eo - matches[1].rm_so; + int field_sz = matches[2].rm_eo - matches[2].rm_so; + char *type = pattern + matches[1].rm_so; + char *field = pattern + matches[2].rm_so; + char field_str[128] = {}; + char type_str[128] = {}; + int btf_id, field_offset; + + if (type_sz >= sizeof(type_str)) { + PRINT_FAIL("Malformed pattern: type ident is too long: %d\n", type_sz); + return -1; + } + + if (field_sz >= sizeof(field_str)) { + PRINT_FAIL("Malformed pattern: field ident is too long: %d\n", field_sz); + return -1; + } + + strncpy(type_str, type, type_sz); + strncpy(field_str, field, field_sz); + btf_id = btf__find_by_name(btf, type_str); + if (btf_id < 0) { + PRINT_FAIL("No BTF info for type %s\n", type_str); + return -1; + } + + field_offset = find_field_offset_aux(btf, btf_id, field_str, 0); + if (field_offset < 0) { + PRINT_FAIL("No BTF info for field %s::%s\n", type_str, field_str); + return -1; + } + + return field_offset; +} + +static regex_t *compile_regex(char *pat) +{ + regex_t *re; + int err; + + re = malloc(sizeof(regex_t)); + if (!re) { + PRINT_FAIL("Can't alloc regex\n"); + return NULL; + } + + err = regcomp(re, pat, REG_EXTENDED); + if (err) { + char errbuf[512]; + + regerror(err, re, errbuf, sizeof(errbuf)); + PRINT_FAIL("Can't compile regex: %s\n", errbuf); + free(re); + return NULL; + } + + return re; +} + +static void free_regex(regex_t *re) +{ + if (!re) + return; + + regfree(re); + free(re); +} + +static u32 max_line_len(char *str) +{ + u32 max_line = 0; + char *next = str; + + while (next) { + next = strchr(str, '\n'); + if (next) { + max_line = max_t(u32, max_line, (next - str)); + str = next + 1; + } else { + max_line = max_t(u32, max_line, strlen(str)); + } + } + + return min(max_line, 60u); +} + +/* Print strings `pattern_origin` and `text_origin` side by side, + * assume `pattern_pos` and `text_pos` designate location within + * corresponding origin string where match diverges. + * The output should look like: + * + * Can't match disassembly(left) with pattern(right): + * r2 = *(u64 *)(r1 +0) ; $dst = *(u64 *)($ctx + bpf_sockopt_kern::sk1) + * ^ ^ + * r0 = 0 ; + * exit ; + */ +static void print_match_error(FILE *out, + char *pattern_origin, char *text_origin, + char *pattern_pos, char *text_pos) +{ + char *pattern = pattern_origin; + char *text = text_origin; + int middle = max_line_len(text) + 2; + + fprintf(out, "Can't match disassembly(left) with pattern(right):\n"); + while (*pattern || *text) { + int column = 0; + int mark1 = -1; + int mark2 = -1; + + /* Print one line from text */ + while (*text && *text != '\n') { + if (text == text_pos) + mark1 = column; + fputc(*text, out); + ++text; + ++column; + } + if (text == text_pos) + mark1 = column; + + /* Pad to the middle */ + while (column < middle) { + fputc(' ', out); + ++column; + } + fputs("; ", out); + column += 3; + + /* Print one line from pattern, pattern lines are terminated by ';' */ + while (*pattern && *pattern != ';') { + if (pattern == pattern_pos) + mark2 = column; + fputc(*pattern, out); + ++pattern; + ++column; + } + if (pattern == pattern_pos) + mark2 = column; + + fputc('\n', out); + if (*pattern) + ++pattern; + if (*text) + ++text; + + /* If pattern and text diverge at this line, print an + * additional line with '^' marks, highlighting + * positions where match fails. + */ + if (mark1 > 0 || mark2 > 0) { + for (column = 0; column <= max(mark1, mark2); ++column) { + if (column == mark1 || column == mark2) + fputc('^', out); + else + fputc(' ', out); + } + fputc('\n', out); + } + } +} + +/* Test if `text` matches `pattern`. Pattern consists of the following elements: + * + * - Field offset references: + * + * <type>::<field> + * + * When such reference is encountered BTF is used to compute numerical + * value for the offset of <field> in <type>. The `text` is expected to + * contain matching numerical value. + * + * - Field groups: + * + * $(<type>::<field> [+ <type>::<field>]*) + * + * Allows to specify an offset that is a sum of multiple field offsets. + * The `text` is expected to contain matching numerical value. + * + * - Variable references, e.g. `$src`, `$dst`, `$ctx`. + * These are substitutions specified in `reg_map` array. + * If a substring of pattern is equal to `reg_map[i][0]` the `text` is + * expected to contain `reg_map[i][1]` in the matching position. + * + * - Whitespace is ignored, ';' counts as whitespace for `pattern`. + * + * - Any other characters, `pattern` and `text` should match one-to-one. + * + * Example of a pattern: + * + * __________ fields group ________________ + * ' ' + * *(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid)) = $src; + * ^^^^ '______________________' + * variable reference field offset reference + */ +static bool match_pattern(struct btf *btf, char *pattern, char *text, char *reg_map[][2]) +{ + char *pattern_origin = pattern; + char *text_origin = text; + regmatch_t matches[3]; + +_continue: + while (*pattern) { + if (!*text) + goto err; + + /* Skip whitespace */ + if (isspace(*pattern) || *pattern == ';') { + if (!isspace(*text) && text != text_origin && isalnum(text[-1])) + goto err; + pattern = skip_space_and_semi(pattern); + text = skip_space(text); + continue; + } + + /* Check for variable references */ + for (int i = 0; reg_map[i][0]; ++i) { + char *pattern_next, *text_next; + + pattern_next = match_str(pattern, reg_map[i][0]); + if (!pattern_next) + continue; + + text_next = match_str(text, reg_map[i][1]); + if (!text_next) + goto err; + + pattern = pattern_next; + text = text_next; + goto _continue; + } + + /* Match field group: + * $(sk_buff::cb + qdisc_skb_cb::tc_classid) + */ + if (strncmp(pattern, "$(", 2) == 0) { + char *group_start = pattern, *text_next; + int acc_offset = 0; + + pattern += 2; + + for (;;) { + int field_offset; + + pattern = skip_space(pattern); + if (!*pattern) { + PRINT_FAIL("Unexpected end of pattern\n"); + goto err; + } + + if (*pattern == ')') { + ++pattern; + break; + } + + if (*pattern == '+') { + ++pattern; + continue; + } + + printf("pattern: %s\n", pattern); + if (regexec(field_regex, pattern, 3, matches, 0) != 0) { + PRINT_FAIL("Field reference expected\n"); + goto err; + } + + field_offset = find_field_offset(btf, pattern, matches); + if (field_offset < 0) + goto err; + + pattern += matches[0].rm_eo; + acc_offset += field_offset; + } + + text_next = match_number(text, acc_offset); + if (!text_next) { + PRINT_FAIL("No match for group offset %.*s (%d)\n", + (int)(pattern - group_start), + group_start, + acc_offset); + goto err; + } + text = text_next; + } + + /* Match field reference: + * sk_buff::cb + */ + if (regexec(field_regex, pattern, 3, matches, 0) == 0) { + int field_offset; + char *text_next; + + field_offset = find_field_offset(btf, pattern, matches); + if (field_offset < 0) + goto err; + + text_next = match_number(text, field_offset); + if (!text_next) { + PRINT_FAIL("No match for field offset %.*s (%d)\n", + (int)matches[0].rm_eo, pattern, field_offset); + goto err; + } + + pattern += matches[0].rm_eo; + text = text_next; + continue; + } + + /* If pattern points to identifier not followed by '::' + * skip the identifier to avoid n^2 application of the + * field reference rule. + */ + if (regexec(ident_regex, pattern, 1, matches, 0) == 0) { + if (strncmp(pattern, text, matches[0].rm_eo) != 0) + goto err; + + pattern += matches[0].rm_eo; + text += matches[0].rm_eo; + continue; + } + + /* Match literally */ + if (*pattern != *text) + goto err; + + ++pattern; + ++text; + } + + return true; + +err: + test__fail(); + print_match_error(stdout, pattern_origin, text_origin, pattern, text); + return false; +} + +/* Request BPF program instructions after all rewrites are applied, + * e.g. verifier.c:convert_ctx_access() is done. + */ +static int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + __u32 xlated_prog_len; + __u32 buf_element_size = sizeof(struct bpf_insn); + + if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { + perror("bpf_prog_get_info_by_fd failed"); + return -1; + } + + xlated_prog_len = info.xlated_prog_len; + if (xlated_prog_len % buf_element_size) { + printf("Program length %d is not multiple of %d\n", + xlated_prog_len, buf_element_size); + return -1; + } + + *cnt = xlated_prog_len / buf_element_size; + *buf = calloc(*cnt, buf_element_size); + if (!buf) { + perror("can't allocate xlated program buffer"); + return -ENOMEM; + } + + bzero(&info, sizeof(info)); + info.xlated_prog_len = xlated_prog_len; + info.xlated_prog_insns = (__u64)(unsigned long)*buf; + if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { + perror("second bpf_prog_get_info_by_fd failed"); + goto out_free_buf; + } + + return 0; + +out_free_buf: + free(*buf); + return -1; +} + +static void print_insn(void *private_data, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vfprintf((FILE *)private_data, fmt, args); + va_end(args); +} + +/* Disassemble instructions to a stream */ +static void print_xlated(FILE *out, struct bpf_insn *insn, __u32 len) +{ + const struct bpf_insn_cbs cbs = { + .cb_print = print_insn, + .cb_call = NULL, + .cb_imm = NULL, + .private_data = out, + }; + bool double_insn = false; + int i; + + for (i = 0; i < len; i++) { + if (double_insn) { + double_insn = false; + continue; + } + + double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW); + print_bpf_insn(&cbs, insn + i, true); + } +} + +/* We share code with kernel BPF disassembler, it adds '(FF) ' prefix + * for each instruction (FF stands for instruction `code` byte). + * This function removes the prefix inplace for each line in `str`. + */ +static void remove_insn_prefix(char *str, int size) +{ + const int prefix_size = 5; + + int write_pos = 0, read_pos = prefix_size; + int len = strlen(str); + char c; + + size = min(size, len); + + while (read_pos < size) { + c = str[read_pos++]; + if (c == 0) + break; + str[write_pos++] = c; + if (c == '\n') + read_pos += prefix_size; + } + str[write_pos] = 0; +} + +struct prog_info { + char *prog_kind; + enum bpf_prog_type prog_type; + enum bpf_attach_type expected_attach_type; + struct bpf_insn *prog; + u32 prog_len; +}; + +static void match_program(struct btf *btf, + struct prog_info *pinfo, + char *pattern, + char *reg_map[][2], + bool skip_first_insn) +{ + struct bpf_insn *buf = NULL; + int err = 0, prog_fd = 0; + FILE *prog_out = NULL; + char *text = NULL; + __u32 cnt = 0; + + text = calloc(MAX_PROG_TEXT_SZ, 1); + if (!text) { + PRINT_FAIL("Can't allocate %d bytes\n", MAX_PROG_TEXT_SZ); + goto out; + } + + // TODO: log level + LIBBPF_OPTS(bpf_prog_load_opts, opts); + opts.log_buf = text; + opts.log_size = MAX_PROG_TEXT_SZ; + opts.log_level = 1 | 2 | 4; + opts.expected_attach_type = pinfo->expected_attach_type; + + prog_fd = bpf_prog_load(pinfo->prog_type, NULL, "GPL", + pinfo->prog, pinfo->prog_len, &opts); + if (prog_fd < 0) { + PRINT_FAIL("Can't load program, errno %d (%s), verifier log:\n%s\n", + errno, strerror(errno), text); + goto out; + } + + memset(text, 0, MAX_PROG_TEXT_SZ); + + err = get_xlated_program(prog_fd, &buf, &cnt); + if (err) { + PRINT_FAIL("Can't load back BPF program\n"); + goto out; + } + + prog_out = fmemopen(text, MAX_PROG_TEXT_SZ - 1, "w"); + if (!prog_out) { + PRINT_FAIL("Can't open memory stream\n"); + goto out; + } + if (skip_first_insn) + print_xlated(prog_out, buf + 1, cnt - 1); + else + print_xlated(prog_out, buf, cnt); + fclose(prog_out); + remove_insn_prefix(text, MAX_PROG_TEXT_SZ); + + ASSERT_TRUE(match_pattern(btf, pattern, text, reg_map), + pinfo->prog_kind); + +out: + if (prog_fd) + close(prog_fd); + free(buf); + free(text); +} + +static void run_one_testcase(struct btf *btf, struct test_case *test) +{ + struct prog_info pinfo = {}; + int bpf_sz; + + if (!test__start_subtest(test->name)) + return; + + switch (test->field_sz) { + case 8: + bpf_sz = BPF_DW; + break; + case 4: + bpf_sz = BPF_W; + break; + case 2: + bpf_sz = BPF_H; + break; + case 1: + bpf_sz = BPF_B; + break; + default: + PRINT_FAIL("Unexpected field size: %d, want 8,4,2 or 1\n", test->field_sz); + return; + } + + pinfo.prog_type = test->prog_type; + pinfo.expected_attach_type = test->expected_attach_type; + + if (test->read) { + struct bpf_insn ldx_prog[] = { + BPF_LDX_MEM(bpf_sz, BPF_REG_2, BPF_REG_1, test->field_offset), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + char *reg_map[][2] = { + { "$ctx", "r1" }, + { "$dst", "r2" }, + {} + }; + + pinfo.prog_kind = "LDX"; + pinfo.prog = ldx_prog; + pinfo.prog_len = ARRAY_SIZE(ldx_prog); + match_program(btf, &pinfo, test->read, reg_map, false); + } + + if (test->write || test->write_st || test->write_stx) { + struct bpf_insn stx_prog[] = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_STX_MEM(bpf_sz, BPF_REG_1, BPF_REG_2, test->field_offset), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + char *stx_reg_map[][2] = { + { "$ctx", "r1" }, + { "$src", "r2" }, + {} + }; + struct bpf_insn st_prog[] = { + BPF_ST_MEM(bpf_sz, BPF_REG_1, test->field_offset, + test->st_value.use ? test->st_value.value : 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + char *st_reg_map[][2] = { + { "$ctx", "r1" }, + { "$src", "42" }, + {} + }; + + if (test->write || test->write_stx) { + char *pattern = test->write_stx ? test->write_stx : test->write; + + pinfo.prog_kind = "STX"; + pinfo.prog = stx_prog; + pinfo.prog_len = ARRAY_SIZE(stx_prog); + match_program(btf, &pinfo, pattern, stx_reg_map, true); + } + + if (test->write || test->write_st) { + char *pattern = test->write_st ? test->write_st : test->write; + + pinfo.prog_kind = "ST"; + pinfo.prog = st_prog; + pinfo.prog_len = ARRAY_SIZE(st_prog); + match_program(btf, &pinfo, pattern, st_reg_map, false); + } + } + + test__end_subtest(); +} + +void test_ctx_rewrite(void) +{ + struct btf *btf; + int i; + + field_regex = compile_regex("^([[:alpha:]_][[:alnum:]_]+)::([[:alpha:]_][[:alnum:]_]+)"); + ident_regex = compile_regex("^[[:alpha:]_][[:alnum:]_]+"); + if (!field_regex || !ident_regex) + return; + + btf = btf__load_vmlinux_btf(); + if (!btf) { + PRINT_FAIL("Can't load vmlinux BTF, errno %d (%s)\n", errno, strerror(errno)); + goto out; + } + + for (i = 0; i < ARRAY_SIZE(test_cases); ++i) + run_one_testcase(btf, &test_cases[i]); + +out: + btf__free(btf); + free_regex(field_regex); + free_regex(ident_regex); +} diff --git a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c index 2853883b7cbb..5c0ebe6ba866 100644 --- a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c +++ b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c @@ -10,14 +10,6 @@ #include "network_helpers.h" #include "decap_sanity.skel.h" -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - goto fail; \ - }) - #define NS_TEST "decap_sanity_ns" #define IPV6_IFACE_ADDR "face::1" #define UDP_TEST_PORT 7777 @@ -37,9 +29,9 @@ void test_decap_sanity(void) if (!ASSERT_OK_PTR(skel, "skel open_and_load")) return; - SYS("ip netns add %s", NS_TEST); - SYS("ip -net %s -6 addr add %s/128 dev lo nodad", NS_TEST, IPV6_IFACE_ADDR); - SYS("ip -net %s link set dev lo up", NS_TEST); + SYS(fail, "ip netns add %s", NS_TEST); + SYS(fail, "ip -net %s -6 addr add %s/128 dev lo nodad", NS_TEST, IPV6_IFACE_ADDR); + SYS(fail, "ip -net %s link set dev lo up", NS_TEST); nstoken = open_netns(NS_TEST); if (!ASSERT_OK_PTR(nstoken, "open_netns")) @@ -80,6 +72,6 @@ fail: bpf_tc_hook_destroy(&qdisc_hook); close_netns(nstoken); } - system("ip netns del " NS_TEST " &> /dev/null"); + SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null"); decap_sanity__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index b99264ec0d9c..d176c34a7d2e 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -2,20 +2,32 @@ /* Copyright (c) 2022 Facebook */ #include <test_progs.h> +#include <network_helpers.h> #include "dynptr_fail.skel.h" #include "dynptr_success.skel.h" -static const char * const success_tests[] = { - "test_read_write", - "test_data_slice", - "test_ringbuf", +enum test_setup_type { + SETUP_SYSCALL_SLEEP, + SETUP_SKB_PROG, }; -static void verify_success(const char *prog_name) +static struct { + const char *prog_name; + enum test_setup_type type; +} success_tests[] = { + {"test_read_write", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_data", SETUP_SYSCALL_SLEEP}, + {"test_ringbuf", SETUP_SYSCALL_SLEEP}, + {"test_skb_readonly", SETUP_SKB_PROG}, + {"test_dynptr_skb_data", SETUP_SKB_PROG}, +}; + +static void verify_success(const char *prog_name, enum test_setup_type setup_type) { struct dynptr_success *skel; struct bpf_program *prog; struct bpf_link *link; + int err; skel = dynptr_success__open(); if (!ASSERT_OK_PTR(skel, "dynptr_success__open")) @@ -23,23 +35,53 @@ static void verify_success(const char *prog_name) skel->bss->pid = getpid(); - dynptr_success__load(skel); - if (!ASSERT_OK_PTR(skel, "dynptr_success__load")) - goto cleanup; - prog = bpf_object__find_program_by_name(skel->obj, prog_name); if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) goto cleanup; - link = bpf_program__attach(prog); - if (!ASSERT_OK_PTR(link, "bpf_program__attach")) + bpf_program__set_autoload(prog, true); + + err = dynptr_success__load(skel); + if (!ASSERT_OK(err, "dynptr_success__load")) goto cleanup; - usleep(1); + switch (setup_type) { + case SETUP_SYSCALL_SLEEP: + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "bpf_program__attach")) + goto cleanup; - ASSERT_EQ(skel->bss->err, 0, "err"); + usleep(1); + + bpf_link__destroy(link); + break; + case SETUP_SKB_PROG: + { + int prog_fd; + char buf[64]; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = sizeof(buf), + .repeat = 1, + ); - bpf_link__destroy(link); + prog_fd = bpf_program__fd(prog); + if (!ASSERT_GE(prog_fd, 0, "prog_fd")) + goto cleanup; + + err = bpf_prog_test_run_opts(prog_fd, &topts); + + if (!ASSERT_OK(err, "test_run")) + goto cleanup; + + break; + } + } + + ASSERT_EQ(skel->bss->err, 0, "err"); cleanup: dynptr_success__destroy(skel); @@ -50,10 +92,10 @@ void test_dynptr(void) int i; for (i = 0; i < ARRAY_SIZE(success_tests); i++) { - if (!test__start_subtest(success_tests[i])) + if (!test__start_subtest(success_tests[i].prog_name)) continue; - verify_success(success_tests[i]); + verify_success(success_tests[i].prog_name, success_tests[i].type); } RUN_TESTS(dynptr_fail); diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c index 32dd731e9070..3b77d8a422db 100644 --- a/tools/testing/selftests/bpf/prog_tests/empty_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c @@ -4,11 +4,6 @@ #include <net/if.h> #include "empty_skb.skel.h" -#define SYS(cmd) ({ \ - if (!ASSERT_OK(system(cmd), (cmd))) \ - goto out; \ -}) - void test_empty_skb(void) { LIBBPF_OPTS(bpf_test_run_opts, tattr); @@ -93,18 +88,18 @@ void test_empty_skb(void) }, }; - SYS("ip netns add empty_skb"); + SYS(out, "ip netns add empty_skb"); tok = open_netns("empty_skb"); - SYS("ip link add veth0 type veth peer veth1"); - SYS("ip link set dev veth0 up"); - SYS("ip link set dev veth1 up"); - SYS("ip addr add 10.0.0.1/8 dev veth0"); - SYS("ip addr add 10.0.0.2/8 dev veth1"); + SYS(out, "ip link add veth0 type veth peer veth1"); + SYS(out, "ip link set dev veth0 up"); + SYS(out, "ip link set dev veth1 up"); + SYS(out, "ip addr add 10.0.0.1/8 dev veth0"); + SYS(out, "ip addr add 10.0.0.2/8 dev veth1"); veth_ifindex = if_nametoindex("veth0"); - SYS("ip link add ipip0 type ipip local 10.0.0.1 remote 10.0.0.2"); - SYS("ip link set ipip0 up"); - SYS("ip addr add 192.168.1.1/16 dev ipip0"); + SYS(out, "ip link add ipip0 type ipip local 10.0.0.1 remote 10.0.0.2"); + SYS(out, "ip link set ipip0 up"); + SYS(out, "ip addr add 192.168.1.1/16 dev ipip0"); ipip_ifindex = if_nametoindex("ipip0"); bpf_obj = empty_skb__open_and_load(); @@ -142,5 +137,5 @@ out: empty_skb__destroy(bpf_obj); if (tok) close_netns(tok); - system("ip netns del empty_skb"); + SYS_NOFAIL("ip netns del empty_skb"); } diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c index 61ccddccf485..429393caf612 100644 --- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c @@ -8,14 +8,6 @@ #include "network_helpers.h" #include "fib_lookup.skel.h" -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - goto fail; \ - }) - #define NS_TEST "fib_lookup_ns" #define IPV6_IFACE_ADDR "face::face" #define IPV6_NUD_FAILED_ADDR "face::1" @@ -59,16 +51,16 @@ static int setup_netns(void) { int err; - SYS("ip link add veth1 type veth peer name veth2"); - SYS("ip link set dev veth1 up"); + SYS(fail, "ip link add veth1 type veth peer name veth2"); + SYS(fail, "ip link set dev veth1 up"); - SYS("ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR); - SYS("ip neigh add %s dev veth1 nud failed", IPV6_NUD_FAILED_ADDR); - SYS("ip neigh add %s dev veth1 lladdr %s nud stale", IPV6_NUD_STALE_ADDR, DMAC); + SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR); + SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV6_NUD_FAILED_ADDR); + SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV6_NUD_STALE_ADDR, DMAC); - SYS("ip addr add %s/24 dev veth1 nodad", IPV4_IFACE_ADDR); - SYS("ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR); - SYS("ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC); + SYS(fail, "ip addr add %s/24 dev veth1 nodad", IPV4_IFACE_ADDR); + SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR); + SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC); err = write_sysctl("/proc/sys/net/ipv4/conf/veth1/forwarding", "1"); if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth1.forwarding)")) @@ -140,7 +132,7 @@ void test_fib_lookup(void) return; prog_fd = bpf_program__fd(skel->progs.fib_lookup); - SYS("ip netns add %s", NS_TEST); + SYS(fail, "ip netns add %s", NS_TEST); nstoken = open_netns(NS_TEST); if (!ASSERT_OK_PTR(nstoken, "open_netns")) @@ -182,6 +174,6 @@ void test_fib_lookup(void) fail: if (nstoken) close_netns(nstoken); - system("ip netns del " NS_TEST " &> /dev/null"); + SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null"); fib_lookup__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 7acca37a3d2b..c4773173a4e4 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -346,6 +346,30 @@ struct test tests[] = { .retval = BPF_OK, }, { + .name = "ipv6-empty-flow-label", + .pkt.ipv6 = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), + .iph.nexthdr = IPPROTO_TCP, + .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), + .iph.flow_lbl = { 0x00, 0x00, 0x00 }, + .tcp.doff = 5, + .tcp.source = 80, + .tcp.dest = 8080, + }, + .keys = { + .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL, + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct ipv6hdr), + .addr_proto = ETH_P_IPV6, + .ip_proto = IPPROTO_TCP, + .n_proto = __bpf_constant_htons(ETH_P_IPV6), + .sport = 80, + .dport = 8080, + }, + .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL, + .retval = BPF_OK, + }, + { .name = "ipip-encap", .pkt.ipip = { .eth.h_proto = __bpf_constant_htons(ETH_P_IP), diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c index 9c1a18573ffd..1eab286b14fe 100644 --- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c +++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c @@ -93,4 +93,6 @@ void test_l4lb_all(void) test_l4lb("test_l4lb.bpf.o"); if (test__start_subtest("l4lb_noinline")) test_l4lb("test_l4lb_noinline.bpf.o"); + if (test__start_subtest("l4lb_noinline_dynptr")) + test_l4lb("test_l4lb_noinline_dynptr.bpf.o"); } diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c index f4ffdcabf4e4..239e1c5753b0 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_fixup.c +++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c @@ -141,7 +141,7 @@ void test_log_fixup(void) if (test__start_subtest("bad_core_relo_trunc_partial")) bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */); if (test__start_subtest("bad_core_relo_trunc_full")) - bad_core_relo(250, TRUNC_FULL /* truncate also libbpf's message patch */); + bad_core_relo(210, TRUNC_FULL /* truncate also libbpf's message patch */); if (test__start_subtest("bad_core_relo_subprog")) bad_core_relo_subprog(); if (test__start_subtest("missing_map")) diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c index 3533a4ecad01..8743df599567 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c +++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c @@ -4,70 +4,160 @@ #include "map_kptr.skel.h" #include "map_kptr_fail.skel.h" +#include "rcu_tasks_trace_gp.skel.h" static void test_map_kptr_success(bool test_run) { + LIBBPF_OPTS(bpf_test_run_opts, lopts); LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &pkt_v4, .data_size_in = sizeof(pkt_v4), .repeat = 1, ); + int key = 0, ret, cpu; struct map_kptr *skel; - int key = 0, ret; - char buf[16]; + char buf[16], *pbuf; skel = map_kptr__open_and_load(); if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load")) return; - ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref), &opts); - ASSERT_OK(ret, "test_map_kptr_ref refcount"); - ASSERT_OK(opts.retval, "test_map_kptr_ref retval"); + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref1), &opts); + ASSERT_OK(ret, "test_map_kptr_ref1 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref1 retval"); ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref2), &opts); ASSERT_OK(ret, "test_map_kptr_ref2 refcount"); ASSERT_OK(opts.retval, "test_map_kptr_ref2 retval"); + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref1), &lopts); + ASSERT_OK(ret, "test_ls_map_kptr_ref1 refcount"); + ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref1 retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref2), &lopts); + ASSERT_OK(ret, "test_ls_map_kptr_ref2 refcount"); + ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref2 retval"); + if (test_run) goto exit; + cpu = libbpf_num_possible_cpus(); + if (!ASSERT_GT(cpu, 0, "libbpf_num_possible_cpus")) + goto exit; + + pbuf = calloc(cpu, sizeof(buf)); + if (!ASSERT_OK_PTR(pbuf, "calloc(pbuf)")) + goto exit; + ret = bpf_map__update_elem(skel->maps.array_map, &key, sizeof(key), buf, sizeof(buf), 0); ASSERT_OK(ret, "array_map update"); - ret = bpf_map__update_elem(skel->maps.array_map, - &key, sizeof(key), buf, sizeof(buf), 0); - ASSERT_OK(ret, "array_map update2"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); + + ret = bpf_map__update_elem(skel->maps.pcpu_array_map, + &key, sizeof(key), pbuf, cpu * sizeof(buf), 0); + ASSERT_OK(ret, "pcpu_array_map update"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); - ret = bpf_map__update_elem(skel->maps.hash_map, - &key, sizeof(key), buf, sizeof(buf), 0); - ASSERT_OK(ret, "hash_map update"); ret = bpf_map__delete_elem(skel->maps.hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "hash_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); + + ret = bpf_map__delete_elem(skel->maps.pcpu_hash_map, &key, sizeof(key), 0); + ASSERT_OK(ret, "pcpu_hash_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); - ret = bpf_map__update_elem(skel->maps.hash_malloc_map, - &key, sizeof(key), buf, sizeof(buf), 0); - ASSERT_OK(ret, "hash_malloc_map update"); ret = bpf_map__delete_elem(skel->maps.hash_malloc_map, &key, sizeof(key), 0); ASSERT_OK(ret, "hash_malloc_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); + + ret = bpf_map__delete_elem(skel->maps.pcpu_hash_malloc_map, &key, sizeof(key), 0); + ASSERT_OK(ret, "pcpu_hash_malloc_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); - ret = bpf_map__update_elem(skel->maps.lru_hash_map, - &key, sizeof(key), buf, sizeof(buf), 0); - ASSERT_OK(ret, "lru_hash_map update"); ret = bpf_map__delete_elem(skel->maps.lru_hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "lru_hash_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); + + ret = bpf_map__delete_elem(skel->maps.lru_pcpu_hash_map, &key, sizeof(key), 0); + ASSERT_OK(ret, "lru_pcpu_hash_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref_del), &lopts); + ASSERT_OK(ret, "test_ls_map_kptr_ref_del delete"); + skel->data->ref--; + ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref_del retval"); + + free(pbuf); exit: map_kptr__destroy(skel); } -void test_map_kptr(void) +static int kern_sync_rcu_tasks_trace(struct rcu_tasks_trace_gp *rcu) { - if (test__start_subtest("success")) { + long gp_seq = READ_ONCE(rcu->bss->gp_seq); + LIBBPF_OPTS(bpf_test_run_opts, opts); + + if (!ASSERT_OK(bpf_prog_test_run_opts(bpf_program__fd(rcu->progs.do_call_rcu_tasks_trace), + &opts), "do_call_rcu_tasks_trace")) + return -EFAULT; + if (!ASSERT_OK(opts.retval, "opts.retval == 0")) + return -EFAULT; + while (gp_seq == READ_ONCE(rcu->bss->gp_seq)) + sched_yield(); + return 0; +} + +void serial_test_map_kptr(void) +{ + struct rcu_tasks_trace_gp *skel; + + RUN_TESTS(map_kptr_fail); + + skel = rcu_tasks_trace_gp__open_and_load(); + if (!ASSERT_OK_PTR(skel, "rcu_tasks_trace_gp__open_and_load")) + return; + if (!ASSERT_OK(rcu_tasks_trace_gp__attach(skel), "rcu_tasks_trace_gp__attach")) + goto end; + + if (test__start_subtest("success-map")) { + test_map_kptr_success(true); + + ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace"); + ASSERT_OK(kern_sync_rcu(), "sync rcu"); + /* Observe refcount dropping to 1 on bpf_map_free_deferred */ test_map_kptr_success(false); - /* Do test_run twice, so that we see refcount going back to 1 - * after we leave it in map from first iteration. - */ + + ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace"); + ASSERT_OK(kern_sync_rcu(), "sync rcu"); + /* Observe refcount dropping to 1 on synchronous delete elem */ test_map_kptr_success(true); } - RUN_TESTS(map_kptr_fail); +end: + rcu_tasks_trace_gp__destroy(skel); + return; } diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index 59f08d6d1d53..cd0c42fff7c0 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -7,6 +7,8 @@ #include "network_helpers.h" #include "mptcp_sock.skel.h" +#define NS_TEST "mptcp_ns" + #ifndef TCP_CA_NAME_MAX #define TCP_CA_NAME_MAX 16 #endif @@ -138,12 +140,20 @@ out: static void test_base(void) { + struct nstoken *nstoken = NULL; int server_fd, cgroup_fd; cgroup_fd = test__join_cgroup("/mptcp"); if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup")) return; + SYS(fail, "ip netns add %s", NS_TEST); + SYS(fail, "ip -net %s link set dev lo up", NS_TEST); + + nstoken = open_netns(NS_TEST); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto fail; + /* without MPTCP */ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); if (!ASSERT_GE(server_fd, 0, "start_server")) @@ -157,13 +167,18 @@ with_mptcp: /* with MPTCP */ server_fd = start_mptcp_server(AF_INET, NULL, 0, 0); if (!ASSERT_GE(server_fd, 0, "start_mptcp_server")) - goto close_cgroup_fd; + goto fail; ASSERT_OK(run_test(cgroup_fd, server_fd, true), "run_test mptcp"); close(server_fd); -close_cgroup_fd: +fail: + if (nstoken) + close_netns(nstoken); + + SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null"); + close(cgroup_fd); } diff --git a/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c b/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c new file mode 100644 index 000000000000..daa952711d8f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include <network_helpers.h> +#include "test_parse_tcp_hdr_opt.skel.h" +#include "test_parse_tcp_hdr_opt_dynptr.skel.h" +#include "test_tcp_hdr_options.h" + +struct test_pkt { + struct ipv6_packet pk6_v6; + u8 options[16]; +} __packed; + +struct test_pkt pkt = { + .pk6_v6.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), + .pk6_v6.iph.nexthdr = IPPROTO_TCP, + .pk6_v6.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), + .pk6_v6.tcp.urg_ptr = 123, + .pk6_v6.tcp.doff = 9, /* 16 bytes of options */ + + .options = { + TCPOPT_MSS, 4, 0x05, 0xB4, TCPOPT_NOP, TCPOPT_NOP, + 0, 6, 0xBB, 0xBB, 0xBB, 0xBB, TCPOPT_EOL + }, +}; + +static void test_parse_opt(void) +{ + struct test_parse_tcp_hdr_opt *skel; + struct bpf_program *prog; + char buf[128]; + int err; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt, + .data_size_in = sizeof(pkt), + .data_out = buf, + .data_size_out = sizeof(buf), + .repeat = 3, + ); + + skel = test_parse_tcp_hdr_opt__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + pkt.options[6] = skel->rodata->tcp_hdr_opt_kind_tpr; + prog = skel->progs.xdp_ingress_v6; + + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts); + ASSERT_OK(err, "ipv6 test_run"); + ASSERT_EQ(topts.retval, XDP_PASS, "ipv6 test_run retval"); + ASSERT_EQ(skel->bss->server_id, 0xBBBBBBBB, "server id"); + + test_parse_tcp_hdr_opt__destroy(skel); +} + +static void test_parse_opt_dynptr(void) +{ + struct test_parse_tcp_hdr_opt_dynptr *skel; + struct bpf_program *prog; + char buf[128]; + int err; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt, + .data_size_in = sizeof(pkt), + .data_out = buf, + .data_size_out = sizeof(buf), + .repeat = 3, + ); + + skel = test_parse_tcp_hdr_opt_dynptr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + pkt.options[6] = skel->rodata->tcp_hdr_opt_kind_tpr; + prog = skel->progs.xdp_ingress_v6; + + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts); + ASSERT_OK(err, "ipv6 test_run"); + ASSERT_EQ(topts.retval, XDP_PASS, "ipv6 test_run retval"); + ASSERT_EQ(skel->bss->server_id, 0xBBBBBBBB, "server id"); + + test_parse_tcp_hdr_opt_dynptr__destroy(skel); +} + +void test_parse_tcp_hdr_opt(void) +{ + if (test__start_subtest("parse_tcp_hdr_opt")) + test_parse_opt(); + if (test__start_subtest("parse_tcp_hdr_opt_dynptr")) + test_parse_opt_dynptr(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c index 447d8560ecb6..3f1f58d3a729 100644 --- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c @@ -25,10 +25,10 @@ static void test_success(void) bpf_program__set_autoload(skel->progs.get_cgroup_id, true); bpf_program__set_autoload(skel->progs.task_succ, true); - bpf_program__set_autoload(skel->progs.no_lock, true); bpf_program__set_autoload(skel->progs.two_regions, true); bpf_program__set_autoload(skel->progs.non_sleepable_1, true); bpf_program__set_autoload(skel->progs.non_sleepable_2, true); + bpf_program__set_autoload(skel->progs.task_trusted_non_rcuptr, true); err = rcu_read_lock__load(skel); if (!ASSERT_OK(err, "skel_load")) goto out; @@ -69,6 +69,7 @@ out: static const char * const inproper_region_tests[] = { "miss_lock", + "no_lock", "miss_unlock", "non_sleepable_rcu_mismatch", "inproper_sleepable_helper", @@ -99,7 +100,6 @@ out: } static const char * const rcuptr_misuse_tests[] = { - "task_untrusted_non_rcuptr", "task_untrusted_rcuptr", "cross_rcu_region", }; @@ -128,17 +128,8 @@ out: void test_rcu_read_lock(void) { - struct btf *vmlinux_btf; int cgroup_fd; - vmlinux_btf = btf__load_vmlinux_btf(); - if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF")) - return; - if (btf__find_by_name_kind(vmlinux_btf, "rcu", BTF_KIND_TYPE_TAG) < 0) { - test__skip(); - goto out; - } - cgroup_fd = test__join_cgroup("/rcu_read_lock"); if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /rcu_read_lock")) goto out; @@ -153,6 +144,5 @@ void test_rcu_read_lock(void) if (test__start_subtest("negative_tests_rcuptr_misuse")) test_rcuptr_misuse(); close(cgroup_fd); -out: - btf__free(vmlinux_btf); +out:; } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 567e07c19ecc..8f09e1ea3ba7 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -18,6 +18,7 @@ #include <string.h> #include <sys/select.h> #include <unistd.h> +#include <linux/vm_sockets.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> @@ -251,6 +252,16 @@ static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len) *len = sizeof(*addr6); } +static void init_addr_loopback_vsock(struct sockaddr_storage *ss, socklen_t *len) +{ + struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss)); + + addr->svm_family = AF_VSOCK; + addr->svm_port = VMADDR_PORT_ANY; + addr->svm_cid = VMADDR_CID_LOCAL; + *len = sizeof(*addr); +} + static void init_addr_loopback(int family, struct sockaddr_storage *ss, socklen_t *len) { @@ -261,6 +272,9 @@ static void init_addr_loopback(int family, struct sockaddr_storage *ss, case AF_INET6: init_addr_loopback6(ss, len); return; + case AF_VSOCK: + init_addr_loopback_vsock(ss, len); + return; default: FAIL("unsupported address family %d", family); } @@ -1478,6 +1492,8 @@ static const char *family_str(sa_family_t family) return "IPv6"; case AF_UNIX: return "Unix"; + case AF_VSOCK: + return "VSOCK"; default: return "unknown"; } @@ -1689,6 +1705,151 @@ static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *ma unix_skb_redir_to_connected(skel, map, sotype); } +/* Returns two connected loopback vsock sockets */ +static int vsock_socketpair_connectible(int sotype, int *v0, int *v1) +{ + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int s, p, c; + + s = socket_loopback(AF_VSOCK, sotype); + if (s < 0) + return -1; + + c = xsocket(AF_VSOCK, sotype | SOCK_NONBLOCK, 0); + if (c == -1) + goto close_srv; + + if (getsockname(s, sockaddr(&addr), &len) < 0) + goto close_cli; + + if (connect(c, sockaddr(&addr), len) < 0 && errno != EINPROGRESS) { + FAIL_ERRNO("connect"); + goto close_cli; + } + + len = sizeof(addr); + p = accept_timeout(s, sockaddr(&addr), &len, IO_TIMEOUT_SEC); + if (p < 0) + goto close_cli; + + *v0 = p; + *v1 = c; + + return 0; + +close_cli: + close(c); +close_srv: + close(s); + + return -1; +} + +static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd, + enum redir_mode mode, int sotype) +{ + const char *log_prefix = redir_mode_str(mode); + char a = 'a', b = 'b'; + int u0, u1, v0, v1; + int sfd[2]; + unsigned int pass; + int err, n; + u32 key; + + zero_verdict_count(verd_mapfd); + + if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, sfd)) + return; + + u0 = sfd[0]; + u1 = sfd[1]; + + err = vsock_socketpair_connectible(sotype, &v0, &v1); + if (err) { + FAIL("vsock_socketpair_connectible() failed"); + goto close_uds; + } + + err = add_to_sockmap(sock_mapfd, u0, v0); + if (err) { + FAIL("add_to_sockmap failed"); + goto close_vsock; + } + + n = write(v1, &a, sizeof(a)); + if (n < 0) + FAIL_ERRNO("%s: write", log_prefix); + if (n == 0) + FAIL("%s: incomplete write", log_prefix); + if (n < 1) + goto out; + + n = recv(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), MSG_DONTWAIT); + if (n < 0) + FAIL("%s: recv() err, errno=%d", log_prefix, errno); + if (n == 0) + FAIL("%s: incomplete recv", log_prefix); + if (b != a) + FAIL("%s: vsock socket map failed, %c != %c", log_prefix, a, b); + + key = SK_PASS; + err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); + if (err) + goto out; + if (pass != 1) + FAIL("%s: want pass count 1, have %d", log_prefix, pass); +out: + key = 0; + bpf_map_delete_elem(sock_mapfd, &key); + key = 1; + bpf_map_delete_elem(sock_mapfd, &key); + +close_vsock: + close(v0); + close(v1); + +close_uds: + close(u0); + close(u1); +} + +static void vsock_unix_skb_redir_connectible(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, + int sotype) +{ + int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + int verdict_map = bpf_map__fd(skel->maps.verdict_map); + int sock_map = bpf_map__fd(inner_map); + int err; + + err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); + if (err) + return; + + skel->bss->test_ingress = false; + vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_EGRESS, sotype); + skel->bss->test_ingress = true; + vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_INGRESS, sotype); + + xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); +} + +static void test_vsock_redir(struct test_sockmap_listen *skel, struct bpf_map *map) +{ + const char *family_name, *map_name; + char s[MAX_TEST_NAME]; + + family_name = family_str(AF_VSOCK); + map_name = map_type_str(map); + snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); + if (!test__start_subtest(s)) + return; + + vsock_unix_skb_redir_connectible(skel, map, SOCK_STREAM); + vsock_unix_skb_redir_connectible(skel, map, SOCK_SEQPACKET); +} + static void test_reuseport(struct test_sockmap_listen *skel, struct bpf_map *map, int family, int sotype) { @@ -2060,12 +2221,14 @@ void serial_test_sockmap_listen(void) run_tests(skel, skel->maps.sock_map, AF_INET6); test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM); test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM); + test_vsock_redir(skel, skel->maps.sock_map); skel->bss->test_sockmap = false; run_tests(skel, skel->maps.sock_hash, AF_INET); run_tests(skel, skel->maps.sock_hash, AF_INET6); test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM); test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM); + test_vsock_redir(skel, skel->maps.sock_hash); test_sockmap_listen__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index bca5e6839ac4..6ee22c3b251a 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -137,24 +137,16 @@ static int get_ifaddr(const char *name, char *ifaddr) return 0; } -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - goto fail; \ - }) - static int netns_setup_links_and_routes(struct netns_setup_result *result) { struct nstoken *nstoken = NULL; char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {}; - SYS("ip link add veth_src type veth peer name veth_src_fwd"); - SYS("ip link add veth_dst type veth peer name veth_dst_fwd"); + SYS(fail, "ip link add veth_src type veth peer name veth_src_fwd"); + SYS(fail, "ip link add veth_dst type veth peer name veth_dst_fwd"); - SYS("ip link set veth_dst_fwd address " MAC_DST_FWD); - SYS("ip link set veth_dst address " MAC_DST); + SYS(fail, "ip link set veth_dst_fwd address " MAC_DST_FWD); + SYS(fail, "ip link set veth_dst address " MAC_DST); if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr)) goto fail; @@ -175,27 +167,27 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd")) goto fail; - SYS("ip link set veth_src netns " NS_SRC); - SYS("ip link set veth_src_fwd netns " NS_FWD); - SYS("ip link set veth_dst_fwd netns " NS_FWD); - SYS("ip link set veth_dst netns " NS_DST); + SYS(fail, "ip link set veth_src netns " NS_SRC); + SYS(fail, "ip link set veth_src_fwd netns " NS_FWD); + SYS(fail, "ip link set veth_dst_fwd netns " NS_FWD); + SYS(fail, "ip link set veth_dst netns " NS_DST); /** setup in 'src' namespace */ nstoken = open_netns(NS_SRC); if (!ASSERT_OK_PTR(nstoken, "setns src")) goto fail; - SYS("ip addr add " IP4_SRC "/32 dev veth_src"); - SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad"); - SYS("ip link set dev veth_src up"); + SYS(fail, "ip addr add " IP4_SRC "/32 dev veth_src"); + SYS(fail, "ip addr add " IP6_SRC "/128 dev veth_src nodad"); + SYS(fail, "ip link set dev veth_src up"); - SYS("ip route add " IP4_DST "/32 dev veth_src scope global"); - SYS("ip route add " IP4_NET "/16 dev veth_src scope global"); - SYS("ip route add " IP6_DST "/128 dev veth_src scope global"); + SYS(fail, "ip route add " IP4_DST "/32 dev veth_src scope global"); + SYS(fail, "ip route add " IP4_NET "/16 dev veth_src scope global"); + SYS(fail, "ip route add " IP6_DST "/128 dev veth_src scope global"); - SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s", + SYS(fail, "ip neigh add " IP4_DST " dev veth_src lladdr %s", veth_src_fwd_addr); - SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s", + SYS(fail, "ip neigh add " IP6_DST " dev veth_src lladdr %s", veth_src_fwd_addr); close_netns(nstoken); @@ -209,15 +201,15 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) * needs v4 one in order to start ARP probing. IP4_NET route is added * to the endpoints so that the ARP processing will reply. */ - SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd"); - SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd"); - SYS("ip link set dev veth_src_fwd up"); - SYS("ip link set dev veth_dst_fwd up"); + SYS(fail, "ip addr add " IP4_SLL "/32 dev veth_src_fwd"); + SYS(fail, "ip addr add " IP4_DLL "/32 dev veth_dst_fwd"); + SYS(fail, "ip link set dev veth_src_fwd up"); + SYS(fail, "ip link set dev veth_dst_fwd up"); - SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global"); - SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global"); - SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global"); - SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global"); + SYS(fail, "ip route add " IP4_SRC "/32 dev veth_src_fwd scope global"); + SYS(fail, "ip route add " IP6_SRC "/128 dev veth_src_fwd scope global"); + SYS(fail, "ip route add " IP4_DST "/32 dev veth_dst_fwd scope global"); + SYS(fail, "ip route add " IP6_DST "/128 dev veth_dst_fwd scope global"); close_netns(nstoken); @@ -226,16 +218,16 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) if (!ASSERT_OK_PTR(nstoken, "setns dst")) goto fail; - SYS("ip addr add " IP4_DST "/32 dev veth_dst"); - SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad"); - SYS("ip link set dev veth_dst up"); + SYS(fail, "ip addr add " IP4_DST "/32 dev veth_dst"); + SYS(fail, "ip addr add " IP6_DST "/128 dev veth_dst nodad"); + SYS(fail, "ip link set dev veth_dst up"); - SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global"); - SYS("ip route add " IP4_NET "/16 dev veth_dst scope global"); - SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global"); + SYS(fail, "ip route add " IP4_SRC "/32 dev veth_dst scope global"); + SYS(fail, "ip route add " IP4_NET "/16 dev veth_dst scope global"); + SYS(fail, "ip route add " IP6_SRC "/128 dev veth_dst scope global"); - SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD); - SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS(fail, "ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS(fail, "ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD); close_netns(nstoken); @@ -375,7 +367,7 @@ done: static int test_ping(int family, const char *addr) { - SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr); + SYS(fail, "ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr); return 0; fail: return -1; @@ -953,7 +945,7 @@ static int tun_open(char *name) if (!ASSERT_OK(err, "ioctl TUNSETIFF")) goto fail; - SYS("ip link set dev %s up", name); + SYS(fail, "ip link set dev %s up", name); return fd; fail: @@ -1076,23 +1068,23 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0); /* Setup route and neigh tables */ - SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24"); - SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24"); + SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24"); + SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24"); - SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad"); - SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad"); + SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad"); + SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad"); - SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global"); - SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD + SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global"); + SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD " dev tun_src scope global"); - SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global"); - SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global"); - SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD + SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global"); + SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global"); + SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD " dev tun_src scope global"); - SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global"); + SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global"); - SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); - SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) goto fail; diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c index b13feceb38f1..810b14981c2e 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_ima.c +++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c @@ -70,7 +70,7 @@ void test_test_ima(void) u64 bin_true_sample; char cmd[256]; - int err, duration = 0; + int err, duration = 0, fresh_digest_idx = 0; struct ima *skel = NULL; skel = ima__open_and_load(); @@ -129,7 +129,15 @@ void test_test_ima(void) /* * Test #3 * - Goal: confirm that bpf_ima_inode_hash() returns a non-fresh digest - * - Expected result: 2 samples (/bin/true: non-fresh, fresh) + * - Expected result: + * 1 sample (/bin/true: fresh) if commit 62622dab0a28 applied + * 2 samples (/bin/true: non-fresh, fresh) if commit 62622dab0a28 is + * not applied + * + * If commit 62622dab0a28 ("ima: return IMA digest value only when + * IMA_COLLECTED flag is set") is applied, bpf_ima_inode_hash() refuses + * to give a non-fresh digest, hence the correct result is 1 instead of + * 2. */ test_init(skel->bss); @@ -144,13 +152,18 @@ void test_test_ima(void) goto close_clean; err = ring_buffer__consume(ringbuf); - ASSERT_EQ(err, 2, "num_samples_or_err"); - ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash"); - ASSERT_NEQ(ima_hash_from_bpf[1], 0, "ima_hash"); - ASSERT_EQ(ima_hash_from_bpf[0], bin_true_sample, "sample_equal_or_err"); + ASSERT_GE(err, 1, "num_samples_or_err"); + if (err == 2) { + ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash"); + ASSERT_EQ(ima_hash_from_bpf[0], bin_true_sample, + "sample_equal_or_err"); + fresh_digest_idx = 1; + } + + ASSERT_NEQ(ima_hash_from_bpf[fresh_digest_idx], 0, "ima_hash"); /* IMA refreshed the digest. */ - ASSERT_NEQ(ima_hash_from_bpf[1], bin_true_sample, - "sample_different_or_err"); + ASSERT_NEQ(ima_hash_from_bpf[fresh_digest_idx], bin_true_sample, + "sample_equal_or_err"); /* * Test #4 diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index 07ad457f3370..47f1d482fe39 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -91,30 +91,15 @@ #define PING_ARGS "-i 0.01 -c 3 -w 10 -q" -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - goto fail; \ - }) - -#define SYS_NOFAIL(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - system(cmd); \ - }) - static int config_device(void) { - SYS("ip netns add at_ns0"); - SYS("ip link add veth0 address " MAC_VETH1 " type veth peer name veth1"); - SYS("ip link set veth0 netns at_ns0"); - SYS("ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1"); - SYS("ip link set dev veth1 up mtu 1500"); - SYS("ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0"); - SYS("ip netns exec at_ns0 ip link set dev veth0 up mtu 1500"); + SYS(fail, "ip netns add at_ns0"); + SYS(fail, "ip link add veth0 address " MAC_VETH1 " type veth peer name veth1"); + SYS(fail, "ip link set veth0 netns at_ns0"); + SYS(fail, "ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1"); + SYS(fail, "ip link set dev veth1 up mtu 1500"); + SYS(fail, "ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0"); + SYS(fail, "ip netns exec at_ns0 ip link set dev veth0 up mtu 1500"); return 0; fail: @@ -132,23 +117,23 @@ static void cleanup(void) static int add_vxlan_tunnel(void) { /* at_ns0 namespace */ - SYS("ip netns exec at_ns0 ip link add dev %s type vxlan external gbp dstport 4789", + SYS(fail, "ip netns exec at_ns0 ip link add dev %s type vxlan external gbp dstport 4789", VXLAN_TUNL_DEV0); - SYS("ip netns exec at_ns0 ip link set dev %s address %s up", + SYS(fail, "ip netns exec at_ns0 ip link set dev %s address %s up", VXLAN_TUNL_DEV0, MAC_TUNL_DEV0); - SYS("ip netns exec at_ns0 ip addr add dev %s %s/24", + SYS(fail, "ip netns exec at_ns0 ip addr add dev %s %s/24", VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0); - SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s", + SYS(fail, "ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s", IP4_ADDR_TUNL_DEV1, MAC_TUNL_DEV1, VXLAN_TUNL_DEV0); - SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev veth0", + SYS(fail, "ip netns exec at_ns0 ip neigh add %s lladdr %s dev veth0", IP4_ADDR2_VETH1, MAC_VETH1); /* root namespace */ - SYS("ip link add dev %s type vxlan external gbp dstport 4789", + SYS(fail, "ip link add dev %s type vxlan external gbp dstport 4789", VXLAN_TUNL_DEV1); - SYS("ip link set dev %s address %s up", VXLAN_TUNL_DEV1, MAC_TUNL_DEV1); - SYS("ip addr add dev %s %s/24", VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1); - SYS("ip neigh add %s lladdr %s dev %s", + SYS(fail, "ip link set dev %s address %s up", VXLAN_TUNL_DEV1, MAC_TUNL_DEV1); + SYS(fail, "ip addr add dev %s %s/24", VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1); + SYS(fail, "ip neigh add %s lladdr %s dev %s", IP4_ADDR_TUNL_DEV0, MAC_TUNL_DEV0, VXLAN_TUNL_DEV1); return 0; @@ -165,26 +150,26 @@ static void delete_vxlan_tunnel(void) static int add_ip6vxlan_tunnel(void) { - SYS("ip netns exec at_ns0 ip -6 addr add %s/96 dev veth0", + SYS(fail, "ip netns exec at_ns0 ip -6 addr add %s/96 dev veth0", IP6_ADDR_VETH0); - SYS("ip netns exec at_ns0 ip link set dev veth0 up"); - SYS("ip -6 addr add %s/96 dev veth1", IP6_ADDR1_VETH1); - SYS("ip -6 addr add %s/96 dev veth1", IP6_ADDR2_VETH1); - SYS("ip link set dev veth1 up"); + SYS(fail, "ip netns exec at_ns0 ip link set dev veth0 up"); + SYS(fail, "ip -6 addr add %s/96 dev veth1", IP6_ADDR1_VETH1); + SYS(fail, "ip -6 addr add %s/96 dev veth1", IP6_ADDR2_VETH1); + SYS(fail, "ip link set dev veth1 up"); /* at_ns0 namespace */ - SYS("ip netns exec at_ns0 ip link add dev %s type vxlan external dstport 4789", + SYS(fail, "ip netns exec at_ns0 ip link add dev %s type vxlan external dstport 4789", IP6VXLAN_TUNL_DEV0); - SYS("ip netns exec at_ns0 ip addr add dev %s %s/24", + SYS(fail, "ip netns exec at_ns0 ip addr add dev %s %s/24", IP6VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0); - SYS("ip netns exec at_ns0 ip link set dev %s address %s up", + SYS(fail, "ip netns exec at_ns0 ip link set dev %s address %s up", IP6VXLAN_TUNL_DEV0, MAC_TUNL_DEV0); /* root namespace */ - SYS("ip link add dev %s type vxlan external dstport 4789", + SYS(fail, "ip link add dev %s type vxlan external dstport 4789", IP6VXLAN_TUNL_DEV1); - SYS("ip addr add dev %s %s/24", IP6VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1); - SYS("ip link set dev %s address %s up", + SYS(fail, "ip addr add dev %s %s/24", IP6VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1); + SYS(fail, "ip link set dev %s address %s up", IP6VXLAN_TUNL_DEV1, MAC_TUNL_DEV1); return 0; @@ -205,7 +190,7 @@ static void delete_ip6vxlan_tunnel(void) static int test_ping(int family, const char *addr) { - SYS("%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr); + SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr); return 0; fail: return -1; diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index 7eb049214859..290c21dbe65a 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -29,6 +29,9 @@ static int timer(struct timer *timer_skel) /* check that timer_cb2() was executed twice */ ASSERT_EQ(timer_skel->bss->bss_data, 10, "bss_data"); + /* check that timer_cb3() was executed twice */ + ASSERT_EQ(timer_skel->bss->abs_data, 12, "abs_data"); + /* check that there were no errors in timer execution */ ASSERT_EQ(timer_skel->bss->err, 0, "err"); diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c index 3a13e102c149..e51721df14fc 100644 --- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c @@ -590,7 +590,7 @@ static void *kick_kernel_cb(void *arg) /* Kick the kernel, causing it to drain the ring buffer and then wake * up the test thread waiting on epoll. */ - syscall(__NR_getrlimit); + syscall(__NR_prlimit64); return NULL; } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c index d4cd9f873c14..fa3cac5488f5 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c @@ -4,11 +4,10 @@ #define IFINDEX_LO 1 #define XDP_FLAGS_REPLACE (1U << 4) -void serial_test_xdp_attach(void) +static void test_xdp_attach(const char *file) { __u32 duration = 0, id1, id2, id0 = 0, len; struct bpf_object *obj1, *obj2, *obj3; - const char *file = "./test_xdp.bpf.o"; struct bpf_prog_info info = {}; int err, fd1, fd2, fd3; LIBBPF_OPTS(bpf_xdp_attach_opts, opts); @@ -85,3 +84,11 @@ out_2: out_1: bpf_object__close(obj1); } + +void serial_test_xdp_attach(void) +{ + if (test__start_subtest("xdp_attach")) + test_xdp_attach("./test_xdp.bpf.o"); + if (test__start_subtest("xdp_attach_dynptr")) + test_xdp_attach("./test_xdp_dynptr.bpf.o"); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c index 5e3a26b15ec6..d19f79048ff6 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -141,41 +141,33 @@ static const char * const xmit_policy_names[] = { static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy, int bond_both_attach) { -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - return -1; \ - }) - - SYS("ip netns add ns_dst"); - SYS("ip link add veth1_1 type veth peer name veth2_1 netns ns_dst"); - SYS("ip link add veth1_2 type veth peer name veth2_2 netns ns_dst"); - - SYS("ip link add bond1 type bond mode %s xmit_hash_policy %s", + SYS(fail, "ip netns add ns_dst"); + SYS(fail, "ip link add veth1_1 type veth peer name veth2_1 netns ns_dst"); + SYS(fail, "ip link add veth1_2 type veth peer name veth2_2 netns ns_dst"); + + SYS(fail, "ip link add bond1 type bond mode %s xmit_hash_policy %s", mode_names[mode], xmit_policy_names[xmit_policy]); - SYS("ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none"); - SYS("ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s", + SYS(fail, "ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none"); + SYS(fail, "ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s", mode_names[mode], xmit_policy_names[xmit_policy]); - SYS("ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none"); + SYS(fail, "ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none"); - SYS("ip link set veth1_1 master bond1"); + SYS(fail, "ip link set veth1_1 master bond1"); if (bond_both_attach == BOND_BOTH_AND_ATTACH) { - SYS("ip link set veth1_2 master bond1"); + SYS(fail, "ip link set veth1_2 master bond1"); } else { - SYS("ip link set veth1_2 up addrgenmode none"); + SYS(fail, "ip link set veth1_2 up addrgenmode none"); if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "veth1_2")) return -1; } - SYS("ip -netns ns_dst link set veth2_1 master bond2"); + SYS(fail, "ip -netns ns_dst link set veth2_1 master bond2"); if (bond_both_attach == BOND_BOTH_AND_ATTACH) - SYS("ip -netns ns_dst link set veth2_2 master bond2"); + SYS(fail, "ip -netns ns_dst link set veth2_2 master bond2"); else - SYS("ip -netns ns_dst link set veth2_2 up addrgenmode none"); + SYS(fail, "ip -netns ns_dst link set veth2_2 up addrgenmode none"); /* Load a dummy program on sending side as with veth peer needs to have a * XDP program loaded as well. @@ -194,8 +186,8 @@ static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy, } return 0; - -#undef SYS +fail: + return -1; } static void bonding_cleanup(struct skeletons *skeletons) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c index 8251a0fc6ee9..fde13010980d 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c @@ -12,14 +12,6 @@ #include <uapi/linux/netdev.h> #include "test_xdp_do_redirect.skel.h" -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - goto out; \ - }) - struct udp_packet { struct ethhdr eth; struct ipv6hdr iph; @@ -127,19 +119,19 @@ void test_xdp_do_redirect(void) * iface and NUM_PKTS-2 in the TC hook. We match the packets on the UDP * payload. */ - SYS("ip netns add testns"); + SYS(out, "ip netns add testns"); nstoken = open_netns("testns"); if (!ASSERT_OK_PTR(nstoken, "setns")) goto out; - SYS("ip link add veth_src type veth peer name veth_dst"); - SYS("ip link set dev veth_src address 00:11:22:33:44:55"); - SYS("ip link set dev veth_dst address 66:77:88:99:aa:bb"); - SYS("ip link set dev veth_src up"); - SYS("ip link set dev veth_dst up"); - SYS("ip addr add dev veth_src fc00::1/64"); - SYS("ip addr add dev veth_dst fc00::2/64"); - SYS("ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb"); + SYS(out, "ip link add veth_src type veth peer name veth_dst"); + SYS(out, "ip link set dev veth_src address 00:11:22:33:44:55"); + SYS(out, "ip link set dev veth_dst address 66:77:88:99:aa:bb"); + SYS(out, "ip link set dev veth_src up"); + SYS(out, "ip link set dev veth_dst up"); + SYS(out, "ip addr add dev veth_src fc00::1/64"); + SYS(out, "ip addr add dev veth_dst fc00::2/64"); + SYS(out, "ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb"); /* We enable forwarding in the test namespace because that will cause * the packets that go through the kernel stack (with XDP_PASS) to be @@ -152,7 +144,7 @@ void test_xdp_do_redirect(void) * code didn't have this, so we keep the test behaviour to make sure the * bug doesn't resurface. */ - SYS("sysctl -qw net.ipv6.conf.all.forwarding=1"); + SYS(out, "sysctl -qw net.ipv6.conf.all.forwarding=1"); ifindex_src = if_nametoindex("veth_src"); ifindex_dst = if_nametoindex("veth_dst"); @@ -250,6 +242,6 @@ out_tc: out: if (nstoken) close_netns(nstoken); - system("ip netns del testns"); + SYS_NOFAIL("ip netns del testns"); test_xdp_do_redirect__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index 8c5e98da9ae9..626c461fa34d 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -34,11 +34,6 @@ #define PREFIX_LEN "8" #define FAMILY AF_INET -#define SYS(cmd) ({ \ - if (!ASSERT_OK(system(cmd), (cmd))) \ - goto out; \ -}) - struct xsk { void *umem_area; struct xsk_umem *umem; @@ -300,16 +295,16 @@ void test_xdp_metadata(void) /* Setup new networking namespace, with a veth pair. */ - SYS("ip netns add xdp_metadata"); + SYS(out, "ip netns add xdp_metadata"); tok = open_netns("xdp_metadata"); - SYS("ip link add numtxqueues 1 numrxqueues 1 " TX_NAME + SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1"); - SYS("ip link set dev " TX_NAME " address 00:00:00:00:00:01"); - SYS("ip link set dev " RX_NAME " address 00:00:00:00:00:02"); - SYS("ip link set dev " TX_NAME " up"); - SYS("ip link set dev " RX_NAME " up"); - SYS("ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME); - SYS("ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME); + SYS(out, "ip link set dev " TX_NAME " address 00:00:00:00:00:01"); + SYS(out, "ip link set dev " RX_NAME " address 00:00:00:00:00:02"); + SYS(out, "ip link set dev " TX_NAME " up"); + SYS(out, "ip link set dev " RX_NAME " up"); + SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME); + SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME); rx_ifindex = if_nametoindex(RX_NAME); tx_ifindex = if_nametoindex(TX_NAME); @@ -407,5 +402,5 @@ out: xdp_metadata__destroy(bpf_obj); if (tok) close_netns(tok); - system("ip netns del xdp_metadata"); + SYS_NOFAIL("ip netns del xdp_metadata"); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c index c72083885b6d..8b50a992d233 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c @@ -8,11 +8,6 @@ #define CMD_OUT_BUF_SIZE 1023 -#define SYS(cmd) ({ \ - if (!ASSERT_OK(system(cmd), (cmd))) \ - goto out; \ -}) - #define SYS_OUT(cmd, ...) ({ \ char buf[1024]; \ snprintf(buf, sizeof(buf), (cmd), ##__VA_ARGS__); \ @@ -69,37 +64,37 @@ static void test_synproxy(bool xdp) char buf[CMD_OUT_BUF_SIZE]; size_t size; - SYS("ip netns add synproxy"); + SYS(out, "ip netns add synproxy"); - SYS("ip link add tmp0 type veth peer name tmp1"); - SYS("ip link set tmp1 netns synproxy"); - SYS("ip link set tmp0 up"); - SYS("ip addr replace 198.18.0.1/24 dev tmp0"); + SYS(out, "ip link add tmp0 type veth peer name tmp1"); + SYS(out, "ip link set tmp1 netns synproxy"); + SYS(out, "ip link set tmp0 up"); + SYS(out, "ip addr replace 198.18.0.1/24 dev tmp0"); /* When checksum offload is enabled, the XDP program sees wrong * checksums and drops packets. */ - SYS("ethtool -K tmp0 tx off"); + SYS(out, "ethtool -K tmp0 tx off"); if (xdp) /* Workaround required for veth. */ - SYS("ip link set tmp0 xdp object xdp_dummy.bpf.o section xdp 2> /dev/null"); + SYS(out, "ip link set tmp0 xdp object xdp_dummy.bpf.o section xdp 2> /dev/null"); ns = open_netns("synproxy"); if (!ASSERT_OK_PTR(ns, "setns")) goto out; - SYS("ip link set lo up"); - SYS("ip link set tmp1 up"); - SYS("ip addr replace 198.18.0.2/24 dev tmp1"); - SYS("sysctl -w net.ipv4.tcp_syncookies=2"); - SYS("sysctl -w net.ipv4.tcp_timestamps=1"); - SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0"); - SYS("iptables-legacy -t raw -I PREROUTING \ + SYS(out, "ip link set lo up"); + SYS(out, "ip link set tmp1 up"); + SYS(out, "ip addr replace 198.18.0.2/24 dev tmp1"); + SYS(out, "sysctl -w net.ipv4.tcp_syncookies=2"); + SYS(out, "sysctl -w net.ipv4.tcp_timestamps=1"); + SYS(out, "sysctl -w net.netfilter.nf_conntrack_tcp_loose=0"); + SYS(out, "iptables-legacy -t raw -I PREROUTING \ -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack"); - SYS("iptables-legacy -t filter -A INPUT \ + SYS(out, "iptables-legacy -t filter -A INPUT \ -i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \ -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460"); - SYS("iptables-legacy -t filter -A INPUT \ + SYS(out, "iptables-legacy -t filter -A INPUT \ -i tmp1 -m state --state INVALID -j DROP"); ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \ @@ -170,8 +165,8 @@ out: if (ns) close_netns(ns); - system("ip link del tmp0"); - system("ip netns del synproxy"); + SYS_NOFAIL("ip link del tmp0"); + SYS_NOFAIL("ip netns del synproxy"); } void test_xdp_synproxy(void) diff --git a/tools/testing/selftests/bpf/prog_tests/xfrm_info.c b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c index 8b03c9bb4862..d37f5394e199 100644 --- a/tools/testing/selftests/bpf/prog_tests/xfrm_info.c +++ b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c @@ -69,21 +69,6 @@ "proto esp aead 'rfc4106(gcm(aes))' " \ "0xe4d8f4b4da1df18a3510b3781496daa82488b713 128 mode tunnel " -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - goto fail; \ - }) - -#define SYS_NOFAIL(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - system(cmd); \ - }) - static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd) { LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, .priority = 1, @@ -126,23 +111,23 @@ static void cleanup(void) static int config_underlay(void) { - SYS("ip netns add " NS0); - SYS("ip netns add " NS1); - SYS("ip netns add " NS2); + SYS(fail, "ip netns add " NS0); + SYS(fail, "ip netns add " NS1); + SYS(fail, "ip netns add " NS2); /* NS0 <-> NS1 [veth01 <-> veth10] */ - SYS("ip link add veth01 netns " NS0 " type veth peer name veth10 netns " NS1); - SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01"); - SYS("ip -net " NS0 " link set dev veth01 up"); - SYS("ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10"); - SYS("ip -net " NS1 " link set dev veth10 up"); + SYS(fail, "ip link add veth01 netns " NS0 " type veth peer name veth10 netns " NS1); + SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01"); + SYS(fail, "ip -net " NS0 " link set dev veth01 up"); + SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10"); + SYS(fail, "ip -net " NS1 " link set dev veth10 up"); /* NS0 <-> NS2 [veth02 <-> veth20] */ - SYS("ip link add veth02 netns " NS0 " type veth peer name veth20 netns " NS2); - SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02"); - SYS("ip -net " NS0 " link set dev veth02 up"); - SYS("ip -net " NS2 " addr add " IP4_ADDR_VETH20 "/24 dev veth20"); - SYS("ip -net " NS2 " link set dev veth20 up"); + SYS(fail, "ip link add veth02 netns " NS0 " type veth peer name veth20 netns " NS2); + SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02"); + SYS(fail, "ip -net " NS0 " link set dev veth02 up"); + SYS(fail, "ip -net " NS2 " addr add " IP4_ADDR_VETH20 "/24 dev veth20"); + SYS(fail, "ip -net " NS2 " link set dev veth20 up"); return 0; fail: @@ -153,20 +138,20 @@ static int setup_xfrm_tunnel_ns(const char *ns, const char *ipv4_local, const char *ipv4_remote, int if_id) { /* State: local -> remote */ - SYS("ip -net %s xfrm state add src %s dst %s spi 1 " + SYS(fail, "ip -net %s xfrm state add src %s dst %s spi 1 " ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_local, ipv4_remote, if_id); /* State: local <- remote */ - SYS("ip -net %s xfrm state add src %s dst %s spi 1 " + SYS(fail, "ip -net %s xfrm state add src %s dst %s spi 1 " ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_remote, ipv4_local, if_id); /* Policy: local -> remote */ - SYS("ip -net %s xfrm policy add dir out src 0.0.0.0/0 dst 0.0.0.0/0 " + SYS(fail, "ip -net %s xfrm policy add dir out src 0.0.0.0/0 dst 0.0.0.0/0 " "if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns, if_id, ipv4_local, ipv4_remote, if_id); /* Policy: local <- remote */ - SYS("ip -net %s xfrm policy add dir in src 0.0.0.0/0 dst 0.0.0.0/0 " + SYS(fail, "ip -net %s xfrm policy add dir in src 0.0.0.0/0 dst 0.0.0.0/0 " "if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns, if_id, ipv4_remote, ipv4_local, if_id); @@ -274,16 +259,16 @@ static int config_overlay(void) if (!ASSERT_OK(setup_xfrmi_external_dev(NS0), "xfrmi")) goto fail; - SYS("ip -net " NS0 " addr add 192.168.1.100/24 dev ipsec0"); - SYS("ip -net " NS0 " link set dev ipsec0 up"); + SYS(fail, "ip -net " NS0 " addr add 192.168.1.100/24 dev ipsec0"); + SYS(fail, "ip -net " NS0 " link set dev ipsec0 up"); - SYS("ip -net " NS1 " link add ipsec0 type xfrm if_id %d", IF_ID_1); - SYS("ip -net " NS1 " addr add 192.168.1.200/24 dev ipsec0"); - SYS("ip -net " NS1 " link set dev ipsec0 up"); + SYS(fail, "ip -net " NS1 " link add ipsec0 type xfrm if_id %d", IF_ID_1); + SYS(fail, "ip -net " NS1 " addr add 192.168.1.200/24 dev ipsec0"); + SYS(fail, "ip -net " NS1 " link set dev ipsec0 up"); - SYS("ip -net " NS2 " link add ipsec0 type xfrm if_id %d", IF_ID_2); - SYS("ip -net " NS2 " addr add 192.168.1.200/24 dev ipsec0"); - SYS("ip -net " NS2 " link set dev ipsec0 up"); + SYS(fail, "ip -net " NS2 " link add ipsec0 type xfrm if_id %d", IF_ID_2); + SYS(fail, "ip -net " NS2 " addr add 192.168.1.200/24 dev ipsec0"); + SYS(fail, "ip -net " NS2 " link set dev ipsec0 up"); return 0; fail: @@ -294,7 +279,7 @@ static int test_xfrm_ping(struct xfrm_info *skel, u32 if_id) { skel->bss->req_if_id = if_id; - SYS("ping -i 0.01 -c 3 -w 10 -q 192.168.1.200 > /dev/null"); + SYS(fail, "ping -i 0.01 -c 3 -w 10 -q 192.168.1.200 > /dev/null"); if (!ASSERT_EQ(skel->bss->resp_if_id, if_id, "if_id")) goto fail; diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index a20c5ed5e454..b04e092fac94 100644 --- a/tools/testing/selftests/bpf/progs/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -337,7 +337,7 @@ PROG(IPV6)(struct __sk_buff *skb) keys->ip_proto = ip6h->nexthdr; keys->flow_label = ip6_flowlabel(ip6h); - if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) + if (keys->flow_label && keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) return export_flow_keys(keys, BPF_OK); return parse_ipv6_proto(skb, ip6h->nexthdr); diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 14e28f991451..f704885aa534 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -2,10 +2,33 @@ #ifndef __BPF_MISC_H__ #define __BPF_MISC_H__ +/* This set of attributes controls behavior of the + * test_loader.c:test_loader__run_subtests(). + * + * __msg Message expected to be found in the verifier log. + * Multiple __msg attributes could be specified. + * + * __success Expect program load success in privileged mode. + * + * __failure Expect program load failure in privileged mode. + * + * __log_level Log level to use for the program, numeric value expected. + * + * __flag Adds one flag use for the program, the following values are valid: + * - BPF_F_STRICT_ALIGNMENT; + * - BPF_F_TEST_RND_HI32; + * - BPF_F_TEST_STATE_FREQ; + * - BPF_F_SLEEPABLE; + * - BPF_F_XDP_HAS_FRAGS; + * - A numeric value. + * Multiple __flag attributes could be specified, the final flags + * value is derived by applying binary "or" to all specified values. + */ #define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" msg))) #define __failure __attribute__((btf_decl_tag("comment:test_expect_failure"))) #define __success __attribute__((btf_decl_tag("comment:test_expect_success"))) #define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl))) +#define __flag(flag) __attribute__((btf_decl_tag("comment:test_prog_flags="#flag))) /* Convenience macro for use with 'asm volatile' blocks */ #define __naked __attribute__((naked)) diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c index 7653df1bc787..ce96b33e38d6 100644 --- a/tools/testing/selftests/bpf/progs/cb_refs.c +++ b/tools/testing/selftests/bpf/progs/cb_refs.c @@ -4,7 +4,7 @@ #include <bpf/bpf_helpers.h> struct map_value { - struct prog_test_ref_kfunc __kptr_ref *ptr; + struct prog_test_ref_kfunc __kptr *ptr; }; struct { diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h index 7d30855bfe78..d0b7cd0d09d7 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h @@ -10,7 +10,7 @@ #include <bpf/bpf_tracing.h> struct __cgrps_kfunc_map_value { - struct cgroup __kptr_ref * cgrp; + struct cgroup __kptr * cgrp; }; struct hash_map { @@ -24,6 +24,7 @@ struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym; struct cgroup *bpf_cgroup_kptr_get(struct cgroup **pp) __ksym; void bpf_cgroup_release(struct cgroup *p) __ksym; struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym; +struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym; static inline struct __cgrps_kfunc_map_value *cgrps_kfunc_map_value_lookup(struct cgroup *cgrp) { diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c index 4ad7fe24966d..b42291ed9586 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c @@ -205,7 +205,7 @@ int BPF_PROG(cgrp_kfunc_get_unreleased, struct cgroup *cgrp, const char *path) } SEC("tp_btf/cgroup_mkdir") -__failure __msg("arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket") +__failure __msg("expects refcounted") int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path) { struct __cgrps_kfunc_map_value *v; diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c index 0c23ea32df9f..030aff700084 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c @@ -61,7 +61,7 @@ int BPF_PROG(test_cgrp_acquire_leave_in_map, struct cgroup *cgrp, const char *pa SEC("tp_btf/cgroup_mkdir") int BPF_PROG(test_cgrp_xchg_release, struct cgroup *cgrp, const char *path) { - struct cgroup *kptr; + struct cgroup *kptr, *cg; struct __cgrps_kfunc_map_value *v; long status; @@ -80,6 +80,16 @@ int BPF_PROG(test_cgrp_xchg_release, struct cgroup *cgrp, const char *path) return 0; } + kptr = v->cgrp; + if (!kptr) { + err = 4; + return 0; + } + + cg = bpf_cgroup_ancestor(kptr, 1); + if (cg) /* verifier only check */ + bpf_cgroup_release(cg); + kptr = bpf_kptr_xchg(&v->cgrp, NULL); if (!kptr) { err = 3; @@ -168,3 +178,45 @@ int BPF_PROG(test_cgrp_get_ancestors, struct cgroup *cgrp, const char *path) return 0; } + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_from_id, struct cgroup *cgrp, const char *path) +{ + struct cgroup *parent, *res; + u64 parent_cgid; + + if (!is_test_kfunc_task()) + return 0; + + /* @cgrp's ID is not visible yet, let's test with the parent */ + parent = bpf_cgroup_ancestor(cgrp, cgrp->level - 1); + if (!parent) { + err = 1; + return 0; + } + + parent_cgid = parent->kn->id; + bpf_cgroup_release(parent); + + res = bpf_cgroup_from_id(parent_cgid); + if (!res) { + err = 2; + return 0; + } + + bpf_cgroup_release(res); + + if (res != parent) { + err = 3; + return 0; + } + + res = bpf_cgroup_from_id((u64)-1); + if (res) { + bpf_cgroup_release(res); + err = 4; + return 0; + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c index 2d11ed528b6f..7615dc23d301 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c @@ -49,7 +49,7 @@ int no_rcu_lock(void *ctx) if (task->pid != target_pid) return 0; - /* ptr_to_btf_id semantics. should work. */ + /* task->cgroups is untrusted in sleepable prog outside of RCU CS */ cgrp = task->cgroups->dfl_cgrp; ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); @@ -71,7 +71,7 @@ int yes_rcu_lock(void *ctx) bpf_rcu_read_lock(); cgrp = task->cgroups->dfl_cgrp; - /* cgrp is untrusted and cannot pass to bpf_cgrp_storage_get() helper. */ + /* cgrp is trusted under RCU CS */ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (ptr) cgroup_id = cgrp->kn->id; diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h index ad34f3b602be..65e5496ca1b2 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_common.h +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h @@ -10,7 +10,7 @@ int err; struct __cpumask_map_value { - struct bpf_cpumask __kptr_ref * cpumask; + struct bpf_cpumask __kptr * cpumask; }; struct array_map { diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c index 33e8e86dd090..c16f7563b84e 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_failure.c +++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c @@ -44,7 +44,7 @@ int BPF_PROG(test_alloc_double_release, struct task_struct *task, u64 clone_flag } SEC("tp_btf/task_newtask") -__failure __msg("bpf_cpumask_acquire args#0 expected pointer to STRUCT bpf_cpumask") +__failure __msg("must be referenced") int BPF_PROG(test_acquire_wrong_cpumask, struct task_struct *task, u64 clone_flags) { struct bpf_cpumask *cpumask; diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index aa5b69354b91..20ce920d891d 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -5,7 +5,9 @@ #include <string.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include <linux/if_ether.h> #include "bpf_misc.h" +#include "bpf_kfuncs.h" char _license[] SEC("license") = "GPL"; @@ -244,6 +246,27 @@ done: return 0; } +/* A data slice can't be accessed out of bounds */ +SEC("?tc") +__failure __msg("value is outside of the allowed memory range") +int data_slice_out_of_bounds_skb(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + /* this should fail */ + *(__u8*)(hdr + 1) = 1; + + return SK_PASS; +} + SEC("?raw_tp") __failure __msg("value is outside of the allowed memory range") int data_slice_out_of_bounds_map_value(void *ctx) @@ -399,7 +422,6 @@ int invalid_helper2(void *ctx) /* this should fail */ bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0, 0); - return 0; } @@ -1044,6 +1066,193 @@ int dynptr_read_into_slot(void *ctx) return 0; } +/* bpf_dynptr_slice()s are read-only and cannot be written to */ +SEC("?tc") +__failure __msg("R0 cannot write into rdonly_mem") +int skb_invalid_slice_write(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + /* this should fail */ + hdr->h_proto = 1; + + return SK_PASS; +} + +/* The read-only data slice is invalidated whenever a helper changes packet data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int skb_invalid_data_slice1(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + val = hdr->h_proto; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + val = hdr->h_proto; + + return SK_PASS; +} + +/* The read-write data slice is invalidated whenever a helper changes packet data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int skb_invalid_data_slice2(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + hdr->h_proto = 123; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + hdr->h_proto = 1; + + return SK_PASS; +} + +/* The read-only data slice is invalidated whenever bpf_dynptr_write() is called */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int skb_invalid_data_slice3(struct __sk_buff *skb) +{ + char write_data[64] = "hello there, world!!"; + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + val = hdr->h_proto; + + bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0); + + /* this should fail */ + val = hdr->h_proto; + + return SK_PASS; +} + +/* The read-write data slice is invalidated whenever bpf_dynptr_write() is called */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int skb_invalid_data_slice4(struct __sk_buff *skb) +{ + char write_data[64] = "hello there, world!!"; + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + hdr->h_proto = 123; + + bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0); + + /* this should fail */ + hdr->h_proto = 1; + + return SK_PASS; +} + +/* The read-only data slice is invalidated whenever a helper changes packet data */ +SEC("?xdp") +__failure __msg("invalid mem access 'scalar'") +int xdp_invalid_data_slice1(struct xdp_md *xdp) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_xdp(xdp, 0, &ptr); + hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + val = hdr->h_proto; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr))) + return XDP_DROP; + + /* this should fail */ + val = hdr->h_proto; + + return XDP_PASS; +} + +/* The read-write data slice is invalidated whenever a helper changes packet data */ +SEC("?xdp") +__failure __msg("invalid mem access 'scalar'") +int xdp_invalid_data_slice2(struct xdp_md *xdp) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_xdp(xdp, 0, &ptr); + hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + hdr->h_proto = 9; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr))) + return XDP_DROP; + + /* this should fail */ + hdr->h_proto = 1; + + return XDP_PASS; +} + +/* Only supported prog type can create skb-type dynptrs */ +SEC("?raw_tp") +__failure __msg("calling kernel function bpf_dynptr_from_skb is not allowed") +int skb_invalid_ctx(void *ctx) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_dynptr_from_skb(ctx, 0, &ptr); + + return 0; +} + /* Reject writes to dynptr slot for uninit arg */ SEC("?raw_tp") __failure __msg("potential write to dynptr at off=-16") @@ -1061,6 +1270,61 @@ int uninit_write_into_slot(void *ctx) return 0; } +/* Only supported prog type can create xdp-type dynptrs */ +SEC("?raw_tp") +__failure __msg("calling kernel function bpf_dynptr_from_xdp is not allowed") +int xdp_invalid_ctx(void *ctx) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_dynptr_from_xdp(ctx, 0, &ptr); + + return 0; +} + +__u32 hdr_size = sizeof(struct ethhdr); +/* Can't pass in variable-sized len to bpf_dynptr_slice */ +SEC("?tc") +__failure __msg("unbounded memory access") +int dynptr_slice_var_len1(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + /* this should fail */ + hdr = bpf_dynptr_slice(&ptr, 0, buffer, hdr_size); + if (!hdr) + return SK_DROP; + + return SK_PASS; +} + +/* Can't pass in variable-sized len to bpf_dynptr_slice */ +SEC("?tc") +__failure __msg("must be a known constant") +int dynptr_slice_var_len2(struct __sk_buff *skb) +{ + char buffer[sizeof(struct ethhdr)] = {}; + struct bpf_dynptr ptr; + struct ethhdr *hdr; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + if (hdr_size <= sizeof(buffer)) { + /* this should fail */ + hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, hdr_size); + if (!hdr) + return SK_DROP; + hdr->h_proto = 12; + } + + return SK_PASS; +} + static int callback(__u32 index, void *data) { *(__u32 *)data = 123; @@ -1092,3 +1356,24 @@ int invalid_data_slices(void *ctx) return 0; } + +/* Program types that don't allow writes to packet data should fail if + * bpf_dynptr_slice_rdwr is called + */ +SEC("cgroup_skb/ingress") +__failure __msg("the prog does not allow writes to packet data") +int invalid_slice_rdwr_rdonly(struct __sk_buff *skb) +{ + char buffer[sizeof(struct ethhdr)] = {}; + struct bpf_dynptr ptr; + struct ethhdr *hdr; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + /* this should fail since cgroup_skb doesn't allow + * changing packet data + */ + hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index 35db7c6c1fc7..c8358a7c7924 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -5,6 +5,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" +#include "bpf_kfuncs.h" #include "errno.h" char _license[] SEC("license") = "GPL"; @@ -30,7 +31,7 @@ struct { __type(value, __u32); } array_map SEC(".maps"); -SEC("tp/syscalls/sys_enter_nanosleep") +SEC("?tp/syscalls/sys_enter_nanosleep") int test_read_write(void *ctx) { char write_data[64] = "hello there, world!!"; @@ -61,8 +62,8 @@ int test_read_write(void *ctx) return 0; } -SEC("tp/syscalls/sys_enter_nanosleep") -int test_data_slice(void *ctx) +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_data(void *ctx) { __u32 key = 0, val = 235, *map_val; struct bpf_dynptr ptr; @@ -131,7 +132,7 @@ static int ringbuf_callback(__u32 index, void *data) return 0; } -SEC("tp/syscalls/sys_enter_nanosleep") +SEC("?tp/syscalls/sys_enter_nanosleep") int test_ringbuf(void *ctx) { struct bpf_dynptr ptr; @@ -163,3 +164,49 @@ done: bpf_ringbuf_discard_dynptr(&ptr, 0); return 0; } + +SEC("?cgroup_skb/egress") +int test_skb_readonly(struct __sk_buff *skb) +{ + __u8 write_data[2] = {1, 2}; + struct bpf_dynptr ptr; + __u64 *data; + int ret; + + if (bpf_dynptr_from_skb(skb, 0, &ptr)) { + err = 1; + return 1; + } + + /* since cgroup skbs are read only, writes should fail */ + ret = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0); + if (ret != -EINVAL) { + err = 2; + return 1; + } + + return 1; +} + +SEC("?cgroup_skb/egress") +int test_dynptr_skb_data(struct __sk_buff *skb) +{ + __u8 write_data[2] = {1, 2}; + struct bpf_dynptr ptr; + __u64 *data; + int ret; + + if (bpf_dynptr_from_skb(skb, 0, &ptr)) { + err = 1; + return 1; + } + + /* This should return NULL. Must use bpf_dynptr_slice API */ + data = bpf_dynptr_data(&ptr, 0, 1); + if (data) { + err = 2; + return 1; + } + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/find_vma_fail1.c b/tools/testing/selftests/bpf/progs/find_vma_fail1.c index 6dab9cffda13..7ba9a428f228 100644 --- a/tools/testing/selftests/bpf/progs/find_vma_fail1.c +++ b/tools/testing/selftests/bpf/progs/find_vma_fail1.c @@ -14,7 +14,7 @@ static long write_vma(struct task_struct *task, struct vm_area_struct *vma, struct callback_ctx *data) { /* writing to vma, which is illegal */ - vma->vm_flags |= 0x55; + vma->vm_start = 0xffffffffff600000; return 0; } diff --git a/tools/testing/selftests/bpf/progs/jit_probe_mem.c b/tools/testing/selftests/bpf/progs/jit_probe_mem.c index 2d2e61470794..13f00ca2ed0a 100644 --- a/tools/testing/selftests/bpf/progs/jit_probe_mem.c +++ b/tools/testing/selftests/bpf/progs/jit_probe_mem.c @@ -4,7 +4,7 @@ #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> -static struct prog_test_ref_kfunc __kptr_ref *v; +static struct prog_test_ref_kfunc __kptr *v; long total_sum = -1; extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; diff --git a/tools/testing/selftests/bpf/progs/lru_bug.c b/tools/testing/selftests/bpf/progs/lru_bug.c index 687081a724b3..ad73029cb1e3 100644 --- a/tools/testing/selftests/bpf/progs/lru_bug.c +++ b/tools/testing/selftests/bpf/progs/lru_bug.c @@ -4,7 +4,7 @@ #include <bpf/bpf_helpers.h> struct map_value { - struct task_struct __kptr *ptr; + struct task_struct __kptr_untrusted *ptr; }; struct { diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c index 228ec45365a8..3903d30217b8 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr.c +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -4,8 +4,8 @@ #include <bpf/bpf_helpers.h> struct map_value { - struct prog_test_ref_kfunc __kptr *unref_ptr; - struct prog_test_ref_kfunc __kptr_ref *ref_ptr; + struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr; + struct prog_test_ref_kfunc __kptr *ref_ptr; }; struct array_map { @@ -15,6 +15,13 @@ struct array_map { __uint(max_entries, 1); } array_map SEC(".maps"); +struct pcpu_array_map { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} pcpu_array_map SEC(".maps"); + struct hash_map { __uint(type, BPF_MAP_TYPE_HASH); __type(key, int); @@ -22,6 +29,13 @@ struct hash_map { __uint(max_entries, 1); } hash_map SEC(".maps"); +struct pcpu_hash_map { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} pcpu_hash_map SEC(".maps"); + struct hash_malloc_map { __uint(type, BPF_MAP_TYPE_HASH); __type(key, int); @@ -30,6 +44,14 @@ struct hash_malloc_map { __uint(map_flags, BPF_F_NO_PREALLOC); } hash_malloc_map SEC(".maps"); +struct pcpu_hash_malloc_map { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); + __uint(map_flags, BPF_F_NO_PREALLOC); +} pcpu_hash_malloc_map SEC(".maps"); + struct lru_hash_map { __uint(type, BPF_MAP_TYPE_LRU_HASH); __type(key, int); @@ -37,6 +59,41 @@ struct lru_hash_map { __uint(max_entries, 1); } lru_hash_map SEC(".maps"); +struct lru_pcpu_hash_map { + __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} lru_pcpu_hash_map SEC(".maps"); + +struct cgrp_ls_map { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); +} cgrp_ls_map SEC(".maps"); + +struct task_ls_map { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); +} task_ls_map SEC(".maps"); + +struct inode_ls_map { + __uint(type, BPF_MAP_TYPE_INODE_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); +} inode_ls_map SEC(".maps"); + +struct sk_ls_map { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); +} sk_ls_map SEC(".maps"); + #define DEFINE_MAP_OF_MAP(map_type, inner_map_type, name) \ struct { \ __uint(type, map_type); \ @@ -61,6 +118,7 @@ extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp extern struct prog_test_ref_kfunc * bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym; extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; +void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym; #define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val)) @@ -90,12 +148,23 @@ static void test_kptr_ref(struct map_value *v) WRITE_ONCE(v->unref_ptr, p); if (!p) return; + /* + * p is rcu_ptr_prog_test_ref_kfunc, + * because bpf prog is non-sleepable and runs in RCU CS. + * p can be passed to kfunc that requires KF_RCU. + */ + bpf_kfunc_call_test_ref(p); if (p->a + p->b > 100) return; /* store NULL */ p = bpf_kptr_xchg(&v->ref_ptr, NULL); if (!p) return; + /* + * p is trusted_ptr_prog_test_ref_kfunc. + * p can be passed to kfunc that requires KF_RCU. + */ + bpf_kfunc_call_test_ref(p); if (p->a + p->b > 100) { bpf_kfunc_call_test_release(p); return; @@ -160,6 +229,58 @@ int test_map_kptr(struct __sk_buff *ctx) return 0; } +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_map_kptr, struct cgroup *cgrp, const char *path) +{ + struct map_value *v; + + v = bpf_cgrp_storage_get(&cgrp_ls_map, cgrp, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (v) + test_kptr(v); + return 0; +} + +SEC("lsm/inode_unlink") +int BPF_PROG(test_task_map_kptr, struct inode *inode, struct dentry *victim) +{ + struct task_struct *task; + struct map_value *v; + + task = bpf_get_current_task_btf(); + if (!task) + return 0; + v = bpf_task_storage_get(&task_ls_map, task, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (v) + test_kptr(v); + return 0; +} + +SEC("lsm/inode_unlink") +int BPF_PROG(test_inode_map_kptr, struct inode *inode, struct dentry *victim) +{ + struct map_value *v; + + v = bpf_inode_storage_get(&inode_ls_map, inode, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (v) + test_kptr(v); + return 0; +} + +SEC("tc") +int test_sk_map_kptr(struct __sk_buff *ctx) +{ + struct map_value *v; + struct bpf_sock *sk; + + sk = ctx->sk; + if (!sk) + return 0; + v = bpf_sk_storage_get(&sk_ls_map, sk, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (v) + test_kptr(v); + return 0; +} + SEC("tc") int test_map_in_map_kptr(struct __sk_buff *ctx) { @@ -189,106 +310,257 @@ int test_map_in_map_kptr(struct __sk_buff *ctx) return 0; } -SEC("tc") -int test_map_kptr_ref(struct __sk_buff *ctx) +int ref = 1; + +static __always_inline +int test_map_kptr_ref_pre(struct map_value *v) { struct prog_test_ref_kfunc *p, *p_st; unsigned long arg = 0; - struct map_value *v; - int key = 0, ret; + int ret; p = bpf_kfunc_call_test_acquire(&arg); if (!p) return 1; + ref++; p_st = p->next; - if (p_st->cnt.refs.counter != 2) { + if (p_st->cnt.refs.counter != ref) { ret = 2; goto end; } - v = bpf_map_lookup_elem(&array_map, &key); - if (!v) { - ret = 3; - goto end; - } - p = bpf_kptr_xchg(&v->ref_ptr, p); if (p) { - ret = 4; + ret = 3; goto end; } - if (p_st->cnt.refs.counter != 2) - return 5; + if (p_st->cnt.refs.counter != ref) + return 4; p = bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0); if (!p) - return 6; - if (p_st->cnt.refs.counter != 3) { - ret = 7; + return 5; + ref++; + if (p_st->cnt.refs.counter != ref) { + ret = 6; goto end; } bpf_kfunc_call_test_release(p); - if (p_st->cnt.refs.counter != 2) - return 8; + ref--; + if (p_st->cnt.refs.counter != ref) + return 7; p = bpf_kptr_xchg(&v->ref_ptr, NULL); if (!p) - return 9; + return 8; bpf_kfunc_call_test_release(p); - if (p_st->cnt.refs.counter != 1) - return 10; + ref--; + if (p_st->cnt.refs.counter != ref) + return 9; p = bpf_kfunc_call_test_acquire(&arg); if (!p) - return 11; + return 10; + ref++; p = bpf_kptr_xchg(&v->ref_ptr, p); if (p) { - ret = 12; + ret = 11; goto end; } - if (p_st->cnt.refs.counter != 2) - return 13; + if (p_st->cnt.refs.counter != ref) + return 12; /* Leave in map */ return 0; end: + ref--; bpf_kfunc_call_test_release(p); return ret; } -SEC("tc") -int test_map_kptr_ref2(struct __sk_buff *ctx) +static __always_inline +int test_map_kptr_ref_post(struct map_value *v) { struct prog_test_ref_kfunc *p, *p_st; - struct map_value *v; - int key = 0; - - v = bpf_map_lookup_elem(&array_map, &key); - if (!v) - return 1; p_st = v->ref_ptr; - if (!p_st || p_st->cnt.refs.counter != 2) - return 2; + if (!p_st || p_st->cnt.refs.counter != ref) + return 1; p = bpf_kptr_xchg(&v->ref_ptr, NULL); if (!p) - return 3; - if (p_st->cnt.refs.counter != 2) { + return 2; + if (p_st->cnt.refs.counter != ref) { bpf_kfunc_call_test_release(p); - return 4; + return 3; } p = bpf_kptr_xchg(&v->ref_ptr, p); if (p) { bpf_kfunc_call_test_release(p); - return 5; + return 4; } - if (p_st->cnt.refs.counter != 2) - return 6; + if (p_st->cnt.refs.counter != ref) + return 5; + + return 0; +} + +#define TEST(map) \ + v = bpf_map_lookup_elem(&map, &key); \ + if (!v) \ + return -1; \ + ret = test_map_kptr_ref_pre(v); \ + if (ret) \ + return ret; + +#define TEST_PCPU(map) \ + v = bpf_map_lookup_percpu_elem(&map, &key, 0); \ + if (!v) \ + return -1; \ + ret = test_map_kptr_ref_pre(v); \ + if (ret) \ + return ret; + +SEC("tc") +int test_map_kptr_ref1(struct __sk_buff *ctx) +{ + struct map_value *v, val = {}; + int key = 0, ret; + + bpf_map_update_elem(&hash_map, &key, &val, 0); + bpf_map_update_elem(&hash_malloc_map, &key, &val, 0); + bpf_map_update_elem(&lru_hash_map, &key, &val, 0); + + bpf_map_update_elem(&pcpu_hash_map, &key, &val, 0); + bpf_map_update_elem(&pcpu_hash_malloc_map, &key, &val, 0); + bpf_map_update_elem(&lru_pcpu_hash_map, &key, &val, 0); + + TEST(array_map); + TEST(hash_map); + TEST(hash_malloc_map); + TEST(lru_hash_map); + + TEST_PCPU(pcpu_array_map); + TEST_PCPU(pcpu_hash_map); + TEST_PCPU(pcpu_hash_malloc_map); + TEST_PCPU(lru_pcpu_hash_map); + + return 0; +} + +#undef TEST +#undef TEST_PCPU + +#define TEST(map) \ + v = bpf_map_lookup_elem(&map, &key); \ + if (!v) \ + return -1; \ + ret = test_map_kptr_ref_post(v); \ + if (ret) \ + return ret; + +#define TEST_PCPU(map) \ + v = bpf_map_lookup_percpu_elem(&map, &key, 0); \ + if (!v) \ + return -1; \ + ret = test_map_kptr_ref_post(v); \ + if (ret) \ + return ret; + +SEC("tc") +int test_map_kptr_ref2(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0, ret; + + TEST(array_map); + TEST(hash_map); + TEST(hash_malloc_map); + TEST(lru_hash_map); + + TEST_PCPU(pcpu_array_map); + TEST_PCPU(pcpu_hash_map); + TEST_PCPU(pcpu_hash_malloc_map); + TEST_PCPU(lru_pcpu_hash_map); return 0; } +#undef TEST +#undef TEST_PCPU + +SEC("tc") +int test_map_kptr_ref3(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *p; + unsigned long sp = 0; + + p = bpf_kfunc_call_test_acquire(&sp); + if (!p) + return 1; + ref++; + if (p->cnt.refs.counter != ref) { + bpf_kfunc_call_test_release(p); + return 2; + } + bpf_kfunc_call_test_release(p); + ref--; + return 0; +} + +SEC("syscall") +int test_ls_map_kptr_ref1(void *ctx) +{ + struct task_struct *current; + struct map_value *v; + int ret; + + current = bpf_get_current_task_btf(); + if (!current) + return 100; + v = bpf_task_storage_get(&task_ls_map, current, NULL, 0); + if (v) + return 150; + v = bpf_task_storage_get(&task_ls_map, current, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!v) + return 200; + return test_map_kptr_ref_pre(v); +} + +SEC("syscall") +int test_ls_map_kptr_ref2(void *ctx) +{ + struct task_struct *current; + struct map_value *v; + int ret; + + current = bpf_get_current_task_btf(); + if (!current) + return 100; + v = bpf_task_storage_get(&task_ls_map, current, NULL, 0); + if (!v) + return 200; + return test_map_kptr_ref_post(v); +} + +SEC("syscall") +int test_ls_map_kptr_ref_del(void *ctx) +{ + struct task_struct *current; + struct map_value *v; + int ret; + + current = bpf_get_current_task_btf(); + if (!current) + return 100; + v = bpf_task_storage_get(&task_ls_map, current, NULL, 0); + if (!v) + return 200; + if (!v->ref_ptr) + return 300; + return bpf_task_storage_delete(&task_ls_map, current); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c index 760e41e1a632..08f9ec18c345 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c @@ -7,9 +7,9 @@ struct map_value { char buf[8]; - struct prog_test_ref_kfunc __kptr *unref_ptr; - struct prog_test_ref_kfunc __kptr_ref *ref_ptr; - struct prog_test_member __kptr_ref *ref_memb_ptr; + struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr; + struct prog_test_ref_kfunc __kptr *ref_ptr; + struct prog_test_member __kptr *ref_memb_ptr; }; struct array_map { @@ -281,7 +281,7 @@ int reject_kptr_get_bad_type_match(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_") +__failure __msg("R1 type=rcu_ptr_or_null_ expected=percpu_ptr_") int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx) { struct map_value *v; @@ -316,7 +316,7 @@ int reject_untrusted_store_to_ref(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("R2 type=untrusted_ptr_ expected=ptr_") +__failure __msg("R2 must be referenced") int reject_untrusted_xchg(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *p; diff --git a/tools/testing/selftests/bpf/progs/nested_trust_failure.c b/tools/testing/selftests/bpf/progs/nested_trust_failure.c index 14aff7676436..0d1aa6bbace4 100644 --- a/tools/testing/selftests/bpf/progs/nested_trust_failure.c +++ b/tools/testing/selftests/bpf/progs/nested_trust_failure.c @@ -17,7 +17,7 @@ char _license[] SEC("license") = "GPL"; */ SEC("tp_btf/task_newtask") -__failure __msg("R2 must be referenced or trusted") +__failure __msg("R2 must be") int BPF_PROG(test_invalid_nested_user_cpus, struct task_struct *task, u64 clone_flags) { bpf_cpumask_test_cpu(0, task->user_cpus_ptr); diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c index e5db1a4287e5..4c90aa6abddd 100644 --- a/tools/testing/selftests/bpf/progs/rbtree.c +++ b/tools/testing/selftests/bpf/progs/rbtree.c @@ -75,7 +75,7 @@ SEC("tc") long rbtree_add_and_remove(void *ctx) { struct bpf_rb_node *res = NULL; - struct node_data *n, *m; + struct node_data *n, *m = NULL; n = bpf_obj_new(typeof(*n)); if (!n) diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c index bf3cba115897..1ced900f3fce 100644 --- a/tools/testing/selftests/bpf/progs/rbtree_fail.c +++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c @@ -232,8 +232,11 @@ long rbtree_api_first_release_unlock_escape(void *ctx) bpf_spin_lock(&glock); res = bpf_rbtree_first(&groot); - if (res) - n = container_of(res, struct node_data, node); + if (!res) { + bpf_spin_unlock(&glock); + return 1; + } + n = container_of(res, struct node_data, node); bpf_spin_unlock(&glock); bpf_spin_lock(&glock); diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c index 5cecbdbbb16e..7250bb76d18a 100644 --- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -81,7 +81,7 @@ int no_lock(void *ctx) { struct task_struct *task, *real_parent; - /* no bpf_rcu_read_lock(), old code still works */ + /* old style ptr_to_btf_id is not allowed in sleepable */ task = bpf_get_current_task_btf(); real_parent = task->real_parent; (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); @@ -286,13 +286,13 @@ out: } SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") -int task_untrusted_non_rcuptr(void *ctx) +int task_trusted_non_rcuptr(void *ctx) { struct task_struct *task, *group_leader; task = bpf_get_current_task_btf(); bpf_rcu_read_lock(); - /* the pointer group_leader marked as untrusted */ + /* the pointer group_leader is explicitly marked as trusted */ group_leader = task->real_parent->group_leader; (void)bpf_task_storage_get(&map_a, group_leader, 0, 0); bpf_rcu_read_unlock(); diff --git a/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c b/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c new file mode 100644 index 000000000000..df4873558634 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +struct task_ls_map { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} task_ls_map SEC(".maps"); + +long gp_seq; + +SEC("syscall") +int do_call_rcu_tasks_trace(void *ctx) +{ + struct task_struct *current; + int *v; + + current = bpf_get_current_task_btf(); + v = bpf_task_storage_get(&task_ls_map, current, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!v) + return 1; + /* Invoke call_rcu_tasks_trace */ + return bpf_task_storage_delete(&task_ls_map, current); +} + +SEC("kprobe/rcu_tasks_trace_postgp") +int rcu_tasks_trace_postgp(void *ctx) +{ + __sync_add_and_fetch(&gp_seq, 1); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h index c0ffd171743e..4c2a4b0e3a25 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_common.h +++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h @@ -10,7 +10,7 @@ #include <bpf/bpf_tracing.h> struct __tasks_kfunc_map_value { - struct task_struct __kptr_ref * task; + struct task_struct __kptr * task; }; struct hash_map { diff --git a/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c b/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c new file mode 100644 index 000000000000..f548b7446218 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> +#include "bpf_misc.h" + +int kprobe_res = 0; + +/** + * This program will be manually made sleepable on the userspace side + * and should thus be unattachable. + */ +SEC("kprobe/" SYS_PREFIX "sys_nanosleep") +int handle_kprobe_sleepable(struct pt_regs *ctx) +{ + kprobe_res = 1; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index 3b5dc34d23e9..68466a6ad18c 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -7,12 +7,8 @@ #include <bpf/bpf_core_read.h> #include "bpf_misc.h" -int kprobe_res = 0; int kprobe2_res = 0; -int kretprobe_res = 0; int kretprobe2_res = 0; -int uprobe_res = 0; -int uretprobe_res = 0; int uprobe_byname_res = 0; int uretprobe_byname_res = 0; int uprobe_byname2_res = 0; @@ -23,13 +19,6 @@ int uretprobe_byname3_sleepable_res = 0; int uretprobe_byname3_res = 0; void *user_ptr = 0; -SEC("kprobe") -int handle_kprobe(struct pt_regs *ctx) -{ - kprobe_res = 1; - return 0; -} - SEC("ksyscall/nanosleep") int BPF_KSYSCALL(handle_kprobe_auto, struct __kernel_timespec *req, struct __kernel_timespec *rem) { @@ -37,24 +26,6 @@ int BPF_KSYSCALL(handle_kprobe_auto, struct __kernel_timespec *req, struct __ker return 0; } -/** - * This program will be manually made sleepable on the userspace side - * and should thus be unattachable. - */ -SEC("kprobe/" SYS_PREFIX "sys_nanosleep") -int handle_kprobe_sleepable(struct pt_regs *ctx) -{ - kprobe_res = 2; - return 0; -} - -SEC("kretprobe") -int handle_kretprobe(struct pt_regs *ctx) -{ - kretprobe_res = 2; - return 0; -} - SEC("kretsyscall/nanosleep") int BPF_KRETPROBE(handle_kretprobe_auto, int ret) { @@ -63,16 +34,14 @@ int BPF_KRETPROBE(handle_kretprobe_auto, int ret) } SEC("uprobe") -int handle_uprobe(struct pt_regs *ctx) +int handle_uprobe_ref_ctr(struct pt_regs *ctx) { - uprobe_res = 3; return 0; } SEC("uretprobe") -int handle_uretprobe(struct pt_regs *ctx) +int handle_uretprobe_ref_ctr(struct pt_regs *ctx) { - uretprobe_res = 4; return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c b/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c new file mode 100644 index 000000000000..7f08bce94596 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> +#include "bpf_misc.h" + +int kprobe_res = 0; +int kretprobe_res = 0; +int uprobe_res = 0; +int uretprobe_res = 0; +int uprobe_byname_res = 0; +void *user_ptr = 0; + +SEC("kprobe") +int handle_kprobe(struct pt_regs *ctx) +{ + kprobe_res = 1; + return 0; +} + +SEC("kretprobe") +int handle_kretprobe(struct pt_regs *ctx) +{ + kretprobe_res = 2; + return 0; +} + +SEC("uprobe") +int handle_uprobe(struct pt_regs *ctx) +{ + uprobe_res = 3; + return 0; +} + +SEC("uretprobe") +int handle_uretprobe(struct pt_regs *ctx) +{ + uretprobe_res = 4; + return 0; +} + +SEC("uprobe") +int handle_uprobe_byname(struct pt_regs *ctx) +{ + uprobe_byname_res = 5; + return 0; +} + + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c new file mode 100644 index 000000000000..f45a7095de7a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c @@ -0,0 +1,980 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// Copyright (c) 2019, 2020 Cloudflare + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#include <linux/bpf.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/pkt_cls.h> +#include <linux/tcp.h> +#include <linux/udp.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#include "test_cls_redirect.h" +#include "bpf_kfuncs.h" + +#define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER))) + +#define IP_OFFSET_MASK (0x1FFF) +#define IP_MF (0x2000) + +char _license[] SEC("license") = "Dual BSD/GPL"; + +/** + * Destination port and IP used for UDP encapsulation. + */ +volatile const __be16 ENCAPSULATION_PORT; +volatile const __be32 ENCAPSULATION_IP; + +typedef struct { + uint64_t processed_packets_total; + uint64_t l3_protocol_packets_total_ipv4; + uint64_t l3_protocol_packets_total_ipv6; + uint64_t l4_protocol_packets_total_tcp; + uint64_t l4_protocol_packets_total_udp; + uint64_t accepted_packets_total_syn; + uint64_t accepted_packets_total_syn_cookies; + uint64_t accepted_packets_total_last_hop; + uint64_t accepted_packets_total_icmp_echo_request; + uint64_t accepted_packets_total_established; + uint64_t forwarded_packets_total_gue; + uint64_t forwarded_packets_total_gre; + + uint64_t errors_total_unknown_l3_proto; + uint64_t errors_total_unknown_l4_proto; + uint64_t errors_total_malformed_ip; + uint64_t errors_total_fragmented_ip; + uint64_t errors_total_malformed_icmp; + uint64_t errors_total_unwanted_icmp; + uint64_t errors_total_malformed_icmp_pkt_too_big; + uint64_t errors_total_malformed_tcp; + uint64_t errors_total_malformed_udp; + uint64_t errors_total_icmp_echo_replies; + uint64_t errors_total_malformed_encapsulation; + uint64_t errors_total_encap_adjust_failed; + uint64_t errors_total_encap_buffer_too_small; + uint64_t errors_total_redirect_loop; + uint64_t errors_total_encap_mtu_violate; +} metrics_t; + +typedef enum { + INVALID = 0, + UNKNOWN, + ECHO_REQUEST, + SYN, + SYN_COOKIE, + ESTABLISHED, +} verdict_t; + +typedef struct { + uint16_t src, dst; +} flow_ports_t; + +_Static_assert( + sizeof(flow_ports_t) != + offsetofend(struct bpf_sock_tuple, ipv4.dport) - + offsetof(struct bpf_sock_tuple, ipv4.sport) - 1, + "flow_ports_t must match sport and dport in struct bpf_sock_tuple"); +_Static_assert( + sizeof(flow_ports_t) != + offsetofend(struct bpf_sock_tuple, ipv6.dport) - + offsetof(struct bpf_sock_tuple, ipv6.sport) - 1, + "flow_ports_t must match sport and dport in struct bpf_sock_tuple"); + +struct iphdr_info { + void *hdr; + __u64 len; +}; + +typedef int ret_t; + +/* This is a bit of a hack. We need a return value which allows us to + * indicate that the regular flow of the program should continue, + * while allowing functions to use XDP_PASS and XDP_DROP, etc. + */ +static const ret_t CONTINUE_PROCESSING = -1; + +/* Convenience macro to call functions which return ret_t. + */ +#define MAYBE_RETURN(x) \ + do { \ + ret_t __ret = x; \ + if (__ret != CONTINUE_PROCESSING) \ + return __ret; \ + } while (0) + +static bool ipv4_is_fragment(const struct iphdr *ip) +{ + uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK); + return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0; +} + +static int pkt_parse_ipv4(struct bpf_dynptr *dynptr, __u64 *offset, struct iphdr *iphdr) +{ + if (bpf_dynptr_read(iphdr, sizeof(*iphdr), dynptr, *offset, 0)) + return -1; + + *offset += sizeof(*iphdr); + + if (iphdr->ihl < 5) + return -1; + + /* skip ipv4 options */ + *offset += (iphdr->ihl - 5) * 4; + + return 0; +} + +/* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */ +static bool pkt_parse_icmp_l4_ports(struct bpf_dynptr *dynptr, __u64 *offset, flow_ports_t *ports) +{ + if (bpf_dynptr_read(ports, sizeof(*ports), dynptr, *offset, 0)) + return false; + + *offset += sizeof(*ports); + + /* Ports in the L4 headers are reversed, since we are parsing an ICMP + * payload which is going towards the eyeball. + */ + uint16_t dst = ports->src; + ports->src = ports->dst; + ports->dst = dst; + return true; +} + +static uint16_t pkt_checksum_fold(uint32_t csum) +{ + /* The highest reasonable value for an IPv4 header + * checksum requires two folds, so we just do that always. + */ + csum = (csum & 0xffff) + (csum >> 16); + csum = (csum & 0xffff) + (csum >> 16); + return (uint16_t)~csum; +} + +static void pkt_ipv4_checksum(struct iphdr *iph) +{ + iph->check = 0; + + /* An IP header without options is 20 bytes. Two of those + * are the checksum, which we always set to zero. Hence, + * the maximum accumulated value is 18 / 2 * 0xffff = 0x8fff7, + * which fits in 32 bit. + */ + _Static_assert(sizeof(struct iphdr) == 20, "iphdr must be 20 bytes"); + uint32_t acc = 0; + uint16_t *ipw = (uint16_t *)iph; + + for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++) + acc += ipw[i]; + + iph->check = pkt_checksum_fold(acc); +} + +static bool pkt_skip_ipv6_extension_headers(struct bpf_dynptr *dynptr, __u64 *offset, + const struct ipv6hdr *ipv6, uint8_t *upper_proto, + bool *is_fragment) +{ + /* We understand five extension headers. + * https://tools.ietf.org/html/rfc8200#section-4.1 states that all + * headers should occur once, except Destination Options, which may + * occur twice. Hence we give up after 6 headers. + */ + struct { + uint8_t next; + uint8_t len; + } exthdr = { + .next = ipv6->nexthdr, + }; + *is_fragment = false; + + for (int i = 0; i < 6; i++) { + switch (exthdr.next) { + case IPPROTO_FRAGMENT: + *is_fragment = true; + /* NB: We don't check that hdrlen == 0 as per spec. */ + /* fallthrough; */ + + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: + case IPPROTO_MH: + if (bpf_dynptr_read(&exthdr, sizeof(exthdr), dynptr, *offset, 0)) + return false; + + /* hdrlen is in 8-octet units, and excludes the first 8 octets. */ + *offset += (exthdr.len + 1) * 8; + + /* Decode next header */ + break; + + default: + /* The next header is not one of the known extension + * headers, treat it as the upper layer header. + * + * This handles IPPROTO_NONE. + * + * Encapsulating Security Payload (50) and Authentication + * Header (51) also end up here (and will trigger an + * unknown proto error later). They have a custom header + * format and seem too esoteric to care about. + */ + *upper_proto = exthdr.next; + return true; + } + } + + /* We never found an upper layer header. */ + return false; +} + +static int pkt_parse_ipv6(struct bpf_dynptr *dynptr, __u64 *offset, struct ipv6hdr *ipv6, + uint8_t *proto, bool *is_fragment) +{ + if (bpf_dynptr_read(ipv6, sizeof(*ipv6), dynptr, *offset, 0)) + return -1; + + *offset += sizeof(*ipv6); + + if (!pkt_skip_ipv6_extension_headers(dynptr, offset, ipv6, proto, is_fragment)) + return -1; + + return 0; +} + +/* Global metrics, per CPU + */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, unsigned int); + __type(value, metrics_t); +} metrics_map SEC(".maps"); + +static metrics_t *get_global_metrics(void) +{ + uint64_t key = 0; + return bpf_map_lookup_elem(&metrics_map, &key); +} + +static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap) +{ + const int payload_off = + sizeof(*encap) + + sizeof(struct in_addr) * encap->unigue.hop_count; + int32_t encap_overhead = payload_off - sizeof(struct ethhdr); + + /* Changing the ethertype if the encapsulated packet is ipv6 */ + if (encap->gue.proto_ctype == IPPROTO_IPV6) + encap->eth.h_proto = bpf_htons(ETH_P_IPV6); + + if (bpf_skb_adjust_room(skb, -encap_overhead, BPF_ADJ_ROOM_MAC, + BPF_F_ADJ_ROOM_FIXED_GSO | + BPF_F_ADJ_ROOM_NO_CSUM_RESET) || + bpf_csum_level(skb, BPF_CSUM_LEVEL_DEC)) + return TC_ACT_SHOT; + + return bpf_redirect(skb->ifindex, BPF_F_INGRESS); +} + +static ret_t forward_with_gre(struct __sk_buff *skb, struct bpf_dynptr *dynptr, + encap_headers_t *encap, struct in_addr *next_hop, + metrics_t *metrics) +{ + const int payload_off = + sizeof(*encap) + + sizeof(struct in_addr) * encap->unigue.hop_count; + int32_t encap_overhead = + payload_off - sizeof(struct ethhdr) - sizeof(struct iphdr); + int32_t delta = sizeof(struct gre_base_hdr) - encap_overhead; + __u8 encap_buffer[sizeof(encap_gre_t)] = {}; + uint16_t proto = ETH_P_IP; + uint32_t mtu_len = 0; + encap_gre_t *encap_gre; + + metrics->forwarded_packets_total_gre++; + + /* Loop protection: the inner packet's TTL is decremented as a safeguard + * against any forwarding loop. As the only interesting field is the TTL + * hop limit for IPv6, it is easier to use bpf_skb_load_bytes/bpf_skb_store_bytes + * as they handle the split packets if needed (no need for the data to be + * in the linear section). + */ + if (encap->gue.proto_ctype == IPPROTO_IPV6) { + proto = ETH_P_IPV6; + uint8_t ttl; + int rc; + + rc = bpf_skb_load_bytes( + skb, payload_off + offsetof(struct ipv6hdr, hop_limit), + &ttl, 1); + if (rc != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (ttl == 0) { + metrics->errors_total_redirect_loop++; + return TC_ACT_SHOT; + } + + ttl--; + rc = bpf_skb_store_bytes( + skb, payload_off + offsetof(struct ipv6hdr, hop_limit), + &ttl, 1, 0); + if (rc != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + } else { + uint8_t ttl; + int rc; + + rc = bpf_skb_load_bytes( + skb, payload_off + offsetof(struct iphdr, ttl), &ttl, + 1); + if (rc != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (ttl == 0) { + metrics->errors_total_redirect_loop++; + return TC_ACT_SHOT; + } + + /* IPv4 also has a checksum to patch. While the TTL is only one byte, + * this function only works for 2 and 4 bytes arguments (the result is + * the same). + */ + rc = bpf_l3_csum_replace( + skb, payload_off + offsetof(struct iphdr, check), ttl, + ttl - 1, 2); + if (rc != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + ttl--; + rc = bpf_skb_store_bytes( + skb, payload_off + offsetof(struct iphdr, ttl), &ttl, 1, + 0); + if (rc != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + } + + if (bpf_check_mtu(skb, skb->ifindex, &mtu_len, delta, 0)) { + metrics->errors_total_encap_mtu_violate++; + return TC_ACT_SHOT; + } + + if (bpf_skb_adjust_room(skb, delta, BPF_ADJ_ROOM_NET, + BPF_F_ADJ_ROOM_FIXED_GSO | + BPF_F_ADJ_ROOM_NO_CSUM_RESET) || + bpf_csum_level(skb, BPF_CSUM_LEVEL_INC)) { + metrics->errors_total_encap_adjust_failed++; + return TC_ACT_SHOT; + } + + if (bpf_skb_pull_data(skb, sizeof(encap_gre_t))) { + metrics->errors_total_encap_buffer_too_small++; + return TC_ACT_SHOT; + } + + encap_gre = bpf_dynptr_slice_rdwr(dynptr, 0, encap_buffer, sizeof(encap_buffer)); + if (!encap_gre) { + metrics->errors_total_encap_buffer_too_small++; + return TC_ACT_SHOT; + } + + encap_gre->ip.protocol = IPPROTO_GRE; + encap_gre->ip.daddr = next_hop->s_addr; + encap_gre->ip.saddr = ENCAPSULATION_IP; + encap_gre->ip.tot_len = + bpf_htons(bpf_ntohs(encap_gre->ip.tot_len) + delta); + encap_gre->gre.flags = 0; + encap_gre->gre.protocol = bpf_htons(proto); + pkt_ipv4_checksum((void *)&encap_gre->ip); + + if (encap_gre == encap_buffer) + bpf_dynptr_write(dynptr, 0, encap_buffer, sizeof(encap_buffer), 0); + + return bpf_redirect(skb->ifindex, 0); +} + +static ret_t forward_to_next_hop(struct __sk_buff *skb, struct bpf_dynptr *dynptr, + encap_headers_t *encap, struct in_addr *next_hop, + metrics_t *metrics) +{ + /* swap L2 addresses */ + /* This assumes that packets are received from a router. + * So just swapping the MAC addresses here will make the packet go back to + * the router, which will send it to the appropriate machine. + */ + unsigned char temp[ETH_ALEN]; + memcpy(temp, encap->eth.h_dest, sizeof(temp)); + memcpy(encap->eth.h_dest, encap->eth.h_source, + sizeof(encap->eth.h_dest)); + memcpy(encap->eth.h_source, temp, sizeof(encap->eth.h_source)); + + if (encap->unigue.next_hop == encap->unigue.hop_count - 1 && + encap->unigue.last_hop_gre) { + return forward_with_gre(skb, dynptr, encap, next_hop, metrics); + } + + metrics->forwarded_packets_total_gue++; + uint32_t old_saddr = encap->ip.saddr; + encap->ip.saddr = encap->ip.daddr; + encap->ip.daddr = next_hop->s_addr; + if (encap->unigue.next_hop < encap->unigue.hop_count) { + encap->unigue.next_hop++; + } + + /* Remove ip->saddr, add next_hop->s_addr */ + const uint64_t off = offsetof(typeof(*encap), ip.check); + int ret = bpf_l3_csum_replace(skb, off, old_saddr, next_hop->s_addr, 4); + if (ret < 0) { + return TC_ACT_SHOT; + } + + return bpf_redirect(skb->ifindex, 0); +} + +static ret_t skip_next_hops(__u64 *offset, int n) +{ + __u32 res; + switch (n) { + case 1: + *offset += sizeof(struct in_addr); + case 0: + return CONTINUE_PROCESSING; + + default: + return TC_ACT_SHOT; + } +} + +/* Get the next hop from the GLB header. + * + * Sets next_hop->s_addr to 0 if there are no more hops left. + * pkt is positioned just after the variable length GLB header + * iff the call is successful. + */ +static ret_t get_next_hop(struct bpf_dynptr *dynptr, __u64 *offset, encap_headers_t *encap, + struct in_addr *next_hop) +{ + if (encap->unigue.next_hop > encap->unigue.hop_count) + return TC_ACT_SHOT; + + /* Skip "used" next hops. */ + MAYBE_RETURN(skip_next_hops(offset, encap->unigue.next_hop)); + + if (encap->unigue.next_hop == encap->unigue.hop_count) { + /* No more next hops, we are at the end of the GLB header. */ + next_hop->s_addr = 0; + return CONTINUE_PROCESSING; + } + + if (bpf_dynptr_read(next_hop, sizeof(*next_hop), dynptr, *offset, 0)) + return TC_ACT_SHOT; + + *offset += sizeof(*next_hop); + + /* Skip the remainig next hops (may be zero). */ + return skip_next_hops(offset, encap->unigue.hop_count - encap->unigue.next_hop - 1); +} + +/* Fill a bpf_sock_tuple to be used with the socket lookup functions. + * This is a kludge that let's us work around verifier limitations: + * + * fill_tuple(&t, foo, sizeof(struct iphdr), 123, 321) + * + * clang will substitue a costant for sizeof, which allows the verifier + * to track it's value. Based on this, it can figure out the constant + * return value, and calling code works while still being "generic" to + * IPv4 and IPv6. + */ +static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph, + uint64_t iphlen, uint16_t sport, uint16_t dport) +{ + switch (iphlen) { + case sizeof(struct iphdr): { + struct iphdr *ipv4 = (struct iphdr *)iph; + tuple->ipv4.daddr = ipv4->daddr; + tuple->ipv4.saddr = ipv4->saddr; + tuple->ipv4.sport = sport; + tuple->ipv4.dport = dport; + return sizeof(tuple->ipv4); + } + + case sizeof(struct ipv6hdr): { + struct ipv6hdr *ipv6 = (struct ipv6hdr *)iph; + memcpy(&tuple->ipv6.daddr, &ipv6->daddr, + sizeof(tuple->ipv6.daddr)); + memcpy(&tuple->ipv6.saddr, &ipv6->saddr, + sizeof(tuple->ipv6.saddr)); + tuple->ipv6.sport = sport; + tuple->ipv6.dport = dport; + return sizeof(tuple->ipv6); + } + + default: + return 0; + } +} + +static verdict_t classify_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, + uint64_t tuplen, void *iph, struct tcphdr *tcp) +{ + struct bpf_sock *sk = + bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); + + if (sk == NULL) + return UNKNOWN; + + if (sk->state != BPF_TCP_LISTEN) { + bpf_sk_release(sk); + return ESTABLISHED; + } + + if (iph != NULL && tcp != NULL) { + /* Kludge: we've run out of arguments, but need the length of the ip header. */ + uint64_t iphlen = sizeof(struct iphdr); + + if (tuplen == sizeof(tuple->ipv6)) + iphlen = sizeof(struct ipv6hdr); + + if (bpf_tcp_check_syncookie(sk, iph, iphlen, tcp, + sizeof(*tcp)) == 0) { + bpf_sk_release(sk); + return SYN_COOKIE; + } + } + + bpf_sk_release(sk); + return UNKNOWN; +} + +static verdict_t classify_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, uint64_t tuplen) +{ + struct bpf_sock *sk = + bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); + + if (sk == NULL) + return UNKNOWN; + + if (sk->state == BPF_TCP_ESTABLISHED) { + bpf_sk_release(sk); + return ESTABLISHED; + } + + bpf_sk_release(sk); + return UNKNOWN; +} + +static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto, struct bpf_sock_tuple *tuple, + uint64_t tuplen, metrics_t *metrics) +{ + switch (proto) { + case IPPROTO_TCP: + return classify_tcp(skb, tuple, tuplen, NULL, NULL); + + case IPPROTO_UDP: + return classify_udp(skb, tuple, tuplen); + + default: + metrics->errors_total_malformed_icmp++; + return INVALID; + } +} + +static verdict_t process_icmpv4(struct __sk_buff *skb, struct bpf_dynptr *dynptr, __u64 *offset, + metrics_t *metrics) +{ + struct icmphdr icmp; + struct iphdr ipv4; + + if (bpf_dynptr_read(&icmp, sizeof(icmp), dynptr, *offset, 0)) { + metrics->errors_total_malformed_icmp++; + return INVALID; + } + + *offset += sizeof(icmp); + + /* We should never receive encapsulated echo replies. */ + if (icmp.type == ICMP_ECHOREPLY) { + metrics->errors_total_icmp_echo_replies++; + return INVALID; + } + + if (icmp.type == ICMP_ECHO) + return ECHO_REQUEST; + + if (icmp.type != ICMP_DEST_UNREACH || icmp.code != ICMP_FRAG_NEEDED) { + metrics->errors_total_unwanted_icmp++; + return INVALID; + } + + if (pkt_parse_ipv4(dynptr, offset, &ipv4)) { + metrics->errors_total_malformed_icmp_pkt_too_big++; + return INVALID; + } + + /* The source address in the outer IP header is from the entity that + * originated the ICMP message. Use the original IP header to restore + * the correct flow tuple. + */ + struct bpf_sock_tuple tuple; + tuple.ipv4.saddr = ipv4.daddr; + tuple.ipv4.daddr = ipv4.saddr; + + if (!pkt_parse_icmp_l4_ports(dynptr, offset, (flow_ports_t *)&tuple.ipv4.sport)) { + metrics->errors_total_malformed_icmp_pkt_too_big++; + return INVALID; + } + + return classify_icmp(skb, ipv4.protocol, &tuple, + sizeof(tuple.ipv4), metrics); +} + +static verdict_t process_icmpv6(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb, + metrics_t *metrics) +{ + struct bpf_sock_tuple tuple; + struct ipv6hdr ipv6; + struct icmp6hdr icmp6; + bool is_fragment; + uint8_t l4_proto; + + if (bpf_dynptr_read(&icmp6, sizeof(icmp6), dynptr, *offset, 0)) { + metrics->errors_total_malformed_icmp++; + return INVALID; + } + + /* We should never receive encapsulated echo replies. */ + if (icmp6.icmp6_type == ICMPV6_ECHO_REPLY) { + metrics->errors_total_icmp_echo_replies++; + return INVALID; + } + + if (icmp6.icmp6_type == ICMPV6_ECHO_REQUEST) { + return ECHO_REQUEST; + } + + if (icmp6.icmp6_type != ICMPV6_PKT_TOOBIG) { + metrics->errors_total_unwanted_icmp++; + return INVALID; + } + + if (pkt_parse_ipv6(dynptr, offset, &ipv6, &l4_proto, &is_fragment)) { + metrics->errors_total_malformed_icmp_pkt_too_big++; + return INVALID; + } + + if (is_fragment) { + metrics->errors_total_fragmented_ip++; + return INVALID; + } + + /* Swap source and dest addresses. */ + memcpy(&tuple.ipv6.saddr, &ipv6.daddr, sizeof(tuple.ipv6.saddr)); + memcpy(&tuple.ipv6.daddr, &ipv6.saddr, sizeof(tuple.ipv6.daddr)); + + if (!pkt_parse_icmp_l4_ports(dynptr, offset, (flow_ports_t *)&tuple.ipv6.sport)) { + metrics->errors_total_malformed_icmp_pkt_too_big++; + return INVALID; + } + + return classify_icmp(skb, l4_proto, &tuple, sizeof(tuple.ipv6), + metrics); +} + +static verdict_t process_tcp(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb, + struct iphdr_info *info, metrics_t *metrics) +{ + struct bpf_sock_tuple tuple; + struct tcphdr tcp; + uint64_t tuplen; + + metrics->l4_protocol_packets_total_tcp++; + + if (bpf_dynptr_read(&tcp, sizeof(tcp), dynptr, *offset, 0)) { + metrics->errors_total_malformed_tcp++; + return INVALID; + } + + *offset += sizeof(tcp); + + if (tcp.syn) + return SYN; + + tuplen = fill_tuple(&tuple, info->hdr, info->len, tcp.source, tcp.dest); + return classify_tcp(skb, &tuple, tuplen, info->hdr, &tcp); +} + +static verdict_t process_udp(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb, + struct iphdr_info *info, metrics_t *metrics) +{ + struct bpf_sock_tuple tuple; + struct udphdr udph; + uint64_t tuplen; + + metrics->l4_protocol_packets_total_udp++; + + if (bpf_dynptr_read(&udph, sizeof(udph), dynptr, *offset, 0)) { + metrics->errors_total_malformed_udp++; + return INVALID; + } + *offset += sizeof(udph); + + tuplen = fill_tuple(&tuple, info->hdr, info->len, udph.source, udph.dest); + return classify_udp(skb, &tuple, tuplen); +} + +static verdict_t process_ipv4(struct __sk_buff *skb, struct bpf_dynptr *dynptr, + __u64 *offset, metrics_t *metrics) +{ + struct iphdr ipv4; + struct iphdr_info info = { + .hdr = &ipv4, + .len = sizeof(ipv4), + }; + + metrics->l3_protocol_packets_total_ipv4++; + + if (pkt_parse_ipv4(dynptr, offset, &ipv4)) { + metrics->errors_total_malformed_ip++; + return INVALID; + } + + if (ipv4.version != 4) { + metrics->errors_total_malformed_ip++; + return INVALID; + } + + if (ipv4_is_fragment(&ipv4)) { + metrics->errors_total_fragmented_ip++; + return INVALID; + } + + switch (ipv4.protocol) { + case IPPROTO_ICMP: + return process_icmpv4(skb, dynptr, offset, metrics); + + case IPPROTO_TCP: + return process_tcp(dynptr, offset, skb, &info, metrics); + + case IPPROTO_UDP: + return process_udp(dynptr, offset, skb, &info, metrics); + + default: + metrics->errors_total_unknown_l4_proto++; + return INVALID; + } +} + +static verdict_t process_ipv6(struct __sk_buff *skb, struct bpf_dynptr *dynptr, + __u64 *offset, metrics_t *metrics) +{ + struct ipv6hdr ipv6; + struct iphdr_info info = { + .hdr = &ipv6, + .len = sizeof(ipv6), + }; + uint8_t l4_proto; + bool is_fragment; + + metrics->l3_protocol_packets_total_ipv6++; + + if (pkt_parse_ipv6(dynptr, offset, &ipv6, &l4_proto, &is_fragment)) { + metrics->errors_total_malformed_ip++; + return INVALID; + } + + if (ipv6.version != 6) { + metrics->errors_total_malformed_ip++; + return INVALID; + } + + if (is_fragment) { + metrics->errors_total_fragmented_ip++; + return INVALID; + } + + switch (l4_proto) { + case IPPROTO_ICMPV6: + return process_icmpv6(dynptr, offset, skb, metrics); + + case IPPROTO_TCP: + return process_tcp(dynptr, offset, skb, &info, metrics); + + case IPPROTO_UDP: + return process_udp(dynptr, offset, skb, &info, metrics); + + default: + metrics->errors_total_unknown_l4_proto++; + return INVALID; + } +} + +SEC("tc") +int cls_redirect(struct __sk_buff *skb) +{ + __u8 encap_buffer[sizeof(encap_headers_t)] = {}; + struct bpf_dynptr dynptr; + struct in_addr next_hop; + /* Tracks offset of the dynptr. This will be unnecessary once + * bpf_dynptr_advance() is available. + */ + __u64 off = 0; + ret_t ret; + + bpf_dynptr_from_skb(skb, 0, &dynptr); + + metrics_t *metrics = get_global_metrics(); + if (metrics == NULL) + return TC_ACT_SHOT; + + metrics->processed_packets_total++; + + /* Pass bogus packets as long as we're not sure they're + * destined for us. + */ + if (skb->protocol != bpf_htons(ETH_P_IP)) + return TC_ACT_OK; + + encap_headers_t *encap; + + /* Make sure that all encapsulation headers are available in + * the linear portion of the skb. This makes it easy to manipulate them. + */ + if (bpf_skb_pull_data(skb, sizeof(*encap))) + return TC_ACT_OK; + + encap = bpf_dynptr_slice_rdwr(&dynptr, 0, encap_buffer, sizeof(encap_buffer)); + if (!encap) + return TC_ACT_OK; + + off += sizeof(*encap); + + if (encap->ip.ihl != 5) + /* We never have any options. */ + return TC_ACT_OK; + + if (encap->ip.daddr != ENCAPSULATION_IP || + encap->ip.protocol != IPPROTO_UDP) + return TC_ACT_OK; + + /* TODO Check UDP length? */ + if (encap->udp.dest != ENCAPSULATION_PORT) + return TC_ACT_OK; + + /* We now know that the packet is destined to us, we can + * drop bogus ones. + */ + if (ipv4_is_fragment((void *)&encap->ip)) { + metrics->errors_total_fragmented_ip++; + return TC_ACT_SHOT; + } + + if (encap->gue.variant != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (encap->gue.control != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (encap->gue.flags != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (encap->gue.hlen != + sizeof(encap->unigue) / 4 + encap->unigue.hop_count) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (encap->unigue.version != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (encap->unigue.reserved != 0) + return TC_ACT_SHOT; + + MAYBE_RETURN(get_next_hop(&dynptr, &off, encap, &next_hop)); + + if (next_hop.s_addr == 0) { + metrics->accepted_packets_total_last_hop++; + return accept_locally(skb, encap); + } + + verdict_t verdict; + switch (encap->gue.proto_ctype) { + case IPPROTO_IPIP: + verdict = process_ipv4(skb, &dynptr, &off, metrics); + break; + + case IPPROTO_IPV6: + verdict = process_ipv6(skb, &dynptr, &off, metrics); + break; + + default: + metrics->errors_total_unknown_l3_proto++; + return TC_ACT_SHOT; + } + + switch (verdict) { + case INVALID: + /* metrics have already been bumped */ + return TC_ACT_SHOT; + + case UNKNOWN: + return forward_to_next_hop(skb, &dynptr, encap, &next_hop, metrics); + + case ECHO_REQUEST: + metrics->accepted_packets_total_icmp_echo_request++; + break; + + case SYN: + if (encap->unigue.forward_syn) { + return forward_to_next_hop(skb, &dynptr, encap, &next_hop, + metrics); + } + + metrics->accepted_packets_total_syn++; + break; + + case SYN_COOKIE: + metrics->accepted_packets_total_syn_cookies++; + break; + + case ESTABLISHED: + metrics->accepted_packets_total_established++; + break; + } + + ret = accept_locally(skb, encap); + + if (encap == encap_buffer) + bpf_dynptr_write(&dynptr, 0, encap_buffer, sizeof(encap_buffer), 0); + + return ret; +} diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c index 2fbef3cc7ad8..2dde8e3fe4c9 100644 --- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c +++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c @@ -48,7 +48,7 @@ SEC("?lsm.s/bpf") __failure __msg("arg#0 expected pointer to stack or dynptr_ptr") int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size) { - unsigned long val; + unsigned long val = 0; return bpf_verify_pkcs7_signature((struct bpf_dynptr *)val, (struct bpf_dynptr *)val, NULL); diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c new file mode 100644 index 000000000000..f997f5080748 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c @@ -0,0 +1,487 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/pkt_cls.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <bpf/bpf_helpers.h> +#include "test_iptunnel_common.h" +#include <bpf/bpf_endian.h> + +#include "bpf_kfuncs.h" + +static __always_inline __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +/* copy paste of jhash from kernel sources to make sure llvm + * can compile it into valid sequence of bpf instructions + */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +#define JHASH_INITVAL 0xdeadbeef + +typedef unsigned int u32; + +static __noinline u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const unsigned char *k = key; + + a = b = c = JHASH_INITVAL + length + initval; + + while (length > 12) { + a += *(u32 *)(k); + b += *(u32 *)(k + 4); + c += *(u32 *)(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +static __noinline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + __jhash_final(a, b, c); + return c; +} + +static __noinline u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +#define PCKT_FRAGMENTED 65343 +#define IPV4_HDR_LEN_NO_OPT 20 +#define IPV4_PLUS_ICMP_HDR 28 +#define IPV6_PLUS_ICMP_HDR 48 +#define RING_SIZE 2 +#define MAX_VIPS 12 +#define MAX_REALS 5 +#define CTL_MAP_SIZE 16 +#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE) +#define F_IPV6 (1 << 0) +#define F_HASH_NO_SRC_PORT (1 << 0) +#define F_ICMP (1 << 0) +#define F_SYN_SET (1 << 1) + +struct packet_description { + union { + __be32 src; + __be32 srcv6[4]; + }; + union { + __be32 dst; + __be32 dstv6[4]; + }; + union { + __u32 ports; + __u16 port16[2]; + }; + __u8 proto; + __u8 flags; +}; + +struct ctl_value { + union { + __u64 value; + __u32 ifindex; + __u8 mac[6]; + }; +}; + +struct vip_meta { + __u32 flags; + __u32 vip_num; +}; + +struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; +}; + +struct vip_stats { + __u64 bytes; + __u64 pkts; +}; + +struct eth_hdr { + unsigned char eth_dest[ETH_ALEN]; + unsigned char eth_source[ETH_ALEN]; + unsigned short eth_proto; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_VIPS); + __type(key, struct vip); + __type(value, struct vip_meta); +} vip_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, CH_RINGS_SIZE); + __type(key, __u32); + __type(value, __u32); +} ch_rings SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, MAX_REALS); + __type(key, __u32); + __type(value, struct real_definition); +} reals SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, MAX_VIPS); + __type(key, __u32); + __type(value, struct vip_stats); +} stats SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, CTL_MAP_SIZE); + __type(key, __u32); + __type(value, struct ctl_value); +} ctl_array SEC(".maps"); + +static __noinline __u32 get_packet_hash(struct packet_description *pckt, bool ipv6) +{ + if (ipv6) + return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS), + pckt->ports, CH_RINGS_SIZE); + else + return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE); +} + +static __noinline bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6) +{ + __u32 hash = get_packet_hash(pckt, is_ipv6); + __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE; + __u32 *real_pos; + + if (hash != 0x358459b7 /* jhash of ipv4 packet */ && + hash != 0x2f4bc6bb /* jhash of ipv6 packet */) + return false; + + real_pos = bpf_map_lookup_elem(&ch_rings, &key); + if (!real_pos) + return false; + key = *real_pos; + *real = bpf_map_lookup_elem(&reals, &key); + if (!(*real)) + return false; + return true; +} + +static __noinline int parse_icmpv6(struct bpf_dynptr *skb_ptr, __u64 off, + struct packet_description *pckt) +{ + __u8 buffer[sizeof(struct ipv6hdr)] = {}; + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + + icmp_hdr = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer)); + if (!icmp_hdr) + return TC_ACT_SHOT; + + if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG) + return TC_ACT_OK; + off += sizeof(struct icmp6hdr); + ip6h = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer)); + if (!ip6h) + return TC_ACT_SHOT; + pckt->proto = ip6h->nexthdr; + pckt->flags |= F_ICMP; + memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16); + memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16); + return TC_ACT_UNSPEC; +} + +static __noinline int parse_icmp(struct bpf_dynptr *skb_ptr, __u64 off, + struct packet_description *pckt) +{ + __u8 buffer_icmp[sizeof(struct iphdr)] = {}; + __u8 buffer_ip[sizeof(struct iphdr)] = {}; + struct icmphdr *icmp_hdr; + struct iphdr *iph; + + icmp_hdr = bpf_dynptr_slice(skb_ptr, off, buffer_icmp, sizeof(buffer_icmp)); + if (!icmp_hdr) + return TC_ACT_SHOT; + if (icmp_hdr->type != ICMP_DEST_UNREACH || + icmp_hdr->code != ICMP_FRAG_NEEDED) + return TC_ACT_OK; + off += sizeof(struct icmphdr); + iph = bpf_dynptr_slice(skb_ptr, off, buffer_ip, sizeof(buffer_ip)); + if (!iph || iph->ihl != 5) + return TC_ACT_SHOT; + pckt->proto = iph->protocol; + pckt->flags |= F_ICMP; + pckt->src = iph->daddr; + pckt->dst = iph->saddr; + return TC_ACT_UNSPEC; +} + +static __noinline bool parse_udp(struct bpf_dynptr *skb_ptr, __u64 off, + struct packet_description *pckt) +{ + __u8 buffer[sizeof(struct udphdr)] = {}; + struct udphdr *udp; + + udp = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer)); + if (!udp) + return false; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = udp->source; + pckt->port16[1] = udp->dest; + } else { + pckt->port16[0] = udp->dest; + pckt->port16[1] = udp->source; + } + return true; +} + +static __noinline bool parse_tcp(struct bpf_dynptr *skb_ptr, __u64 off, + struct packet_description *pckt) +{ + __u8 buffer[sizeof(struct tcphdr)] = {}; + struct tcphdr *tcp; + + tcp = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer)); + if (!tcp) + return false; + + if (tcp->syn) + pckt->flags |= F_SYN_SET; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = tcp->source; + pckt->port16[1] = tcp->dest; + } else { + pckt->port16[0] = tcp->dest; + pckt->port16[1] = tcp->source; + } + return true; +} + +static __noinline int process_packet(struct bpf_dynptr *skb_ptr, + struct eth_hdr *eth, __u64 off, + bool is_ipv6, struct __sk_buff *skb) +{ + struct packet_description pckt = {}; + struct bpf_tunnel_key tkey = {}; + struct vip_stats *data_stats; + struct real_definition *dst; + struct vip_meta *vip_info; + struct ctl_value *cval; + __u32 v4_intf_pos = 1; + __u32 v6_intf_pos = 2; + struct ipv6hdr *ip6h; + struct vip vip = {}; + struct iphdr *iph; + int tun_flag = 0; + __u16 pkt_bytes; + __u64 iph_len; + __u32 ifindex; + __u8 protocol; + __u32 vip_num; + int action; + + tkey.tunnel_ttl = 64; + if (is_ipv6) { + __u8 buffer[sizeof(struct ipv6hdr)] = {}; + + ip6h = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer)); + if (!ip6h) + return TC_ACT_SHOT; + + iph_len = sizeof(struct ipv6hdr); + protocol = ip6h->nexthdr; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(ip6h->payload_len); + off += iph_len; + if (protocol == IPPROTO_FRAGMENT) { + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_ICMPV6) { + action = parse_icmpv6(skb_ptr, off, &pckt); + if (action >= 0) + return action; + off += IPV6_PLUS_ICMP_HDR; + } else { + memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16); + memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16); + } + } else { + __u8 buffer[sizeof(struct iphdr)] = {}; + + iph = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer)); + if (!iph || iph->ihl != 5) + return TC_ACT_SHOT; + + protocol = iph->protocol; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(iph->tot_len); + off += IPV4_HDR_LEN_NO_OPT; + + if (iph->frag_off & PCKT_FRAGMENTED) + return TC_ACT_SHOT; + if (protocol == IPPROTO_ICMP) { + action = parse_icmp(skb_ptr, off, &pckt); + if (action >= 0) + return action; + off += IPV4_PLUS_ICMP_HDR; + } else { + pckt.src = iph->saddr; + pckt.dst = iph->daddr; + } + } + protocol = pckt.proto; + + if (protocol == IPPROTO_TCP) { + if (!parse_tcp(skb_ptr, off, &pckt)) + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_UDP) { + if (!parse_udp(skb_ptr, off, &pckt)) + return TC_ACT_SHOT; + } else { + return TC_ACT_SHOT; + } + + if (is_ipv6) + memcpy(vip.daddr.v6, pckt.dstv6, 16); + else + vip.daddr.v4 = pckt.dst; + + vip.dport = pckt.port16[1]; + vip.protocol = pckt.proto; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) { + vip.dport = 0; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) + return TC_ACT_SHOT; + pckt.port16[1] = 0; + } + + if (vip_info->flags & F_HASH_NO_SRC_PORT) + pckt.port16[0] = 0; + + if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6)) + return TC_ACT_SHOT; + + if (dst->flags & F_IPV6) { + cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + memcpy(tkey.remote_ipv6, dst->dstv6, 16); + tun_flag = BPF_F_TUNINFO_IPV6; + } else { + cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + tkey.remote_ipv4 = dst->dst; + } + vip_num = vip_info->vip_num; + data_stats = bpf_map_lookup_elem(&stats, &vip_num); + if (!data_stats) + return TC_ACT_SHOT; + data_stats->pkts++; + data_stats->bytes += pkt_bytes; + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag); + *(u32 *)eth->eth_dest = tkey.remote_ipv4; + return bpf_redirect(ifindex, 0); +} + +SEC("tc") +int balancer_ingress(struct __sk_buff *ctx) +{ + __u8 buffer[sizeof(struct eth_hdr)] = {}; + struct bpf_dynptr ptr; + struct eth_hdr *eth; + __u32 eth_proto; + __u32 nh_off; + int err; + + nh_off = sizeof(struct eth_hdr); + + bpf_dynptr_from_skb(ctx, 0, &ptr); + eth = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + if (!eth) + return TC_ACT_SHOT; + eth_proto = eth->eth_proto; + if (eth_proto == bpf_htons(ETH_P_IP)) + err = process_packet(&ptr, eth, nh_off, false, ctx); + else if (eth_proto == bpf_htons(ETH_P_IPV6)) + err = process_packet(&ptr, eth, nh_off, true, ctx); + else + return TC_ACT_SHOT; + + if (eth == buffer) + bpf_dynptr_write(&ptr, 0, buffer, sizeof(buffer), 0); + + return err; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c new file mode 100644 index 000000000000..79bab9b50e9e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* This parsing logic is taken from the open source library katran, a layer 4 + * load balancer. + * + * This code logic using dynptrs can be found in test_parse_tcp_hdr_opt_dynptr.c + * + * https://github.com/facebookincubator/katran/blob/main/katran/lib/bpf/pckt_parsing.h + */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <linux/tcp.h> +#include <stdbool.h> +#include <linux/ipv6.h> +#include <linux/if_ether.h> +#include "test_tcp_hdr_options.h" + +char _license[] SEC("license") = "GPL"; + +/* Kind number used for experiments */ +const __u32 tcp_hdr_opt_kind_tpr = 0xFD; +/* Length of the tcp header option */ +const __u32 tcp_hdr_opt_len_tpr = 6; +/* maximum number of header options to check to lookup server_id */ +const __u32 tcp_hdr_opt_max_opt_checks = 15; + +__u32 server_id; + +struct hdr_opt_state { + __u32 server_id; + __u8 byte_offset; + __u8 hdr_bytes_remaining; +}; + +static int parse_hdr_opt(const struct xdp_md *xdp, struct hdr_opt_state *state) +{ + const void *data = (void *)(long)xdp->data; + const void *data_end = (void *)(long)xdp->data_end; + __u8 *tcp_opt, kind, hdr_len; + + tcp_opt = (__u8 *)(data + state->byte_offset); + if (tcp_opt + 1 > data_end) + return -1; + + kind = tcp_opt[0]; + + if (kind == TCPOPT_EOL) + return -1; + + if (kind == TCPOPT_NOP) { + state->hdr_bytes_remaining--; + state->byte_offset++; + return 0; + } + + if (state->hdr_bytes_remaining < 2 || + tcp_opt + sizeof(__u8) + sizeof(__u8) > data_end) + return -1; + + hdr_len = tcp_opt[1]; + if (hdr_len > state->hdr_bytes_remaining) + return -1; + + if (kind == tcp_hdr_opt_kind_tpr) { + if (hdr_len != tcp_hdr_opt_len_tpr) + return -1; + + if (tcp_opt + tcp_hdr_opt_len_tpr > data_end) + return -1; + + state->server_id = *(__u32 *)&tcp_opt[2]; + return 1; + } + + state->hdr_bytes_remaining -= hdr_len; + state->byte_offset += hdr_len; + return 0; +} + +SEC("xdp") +int xdp_ingress_v6(struct xdp_md *xdp) +{ + const void *data = (void *)(long)xdp->data; + const void *data_end = (void *)(long)xdp->data_end; + struct hdr_opt_state opt_state = {}; + __u8 tcp_hdr_opt_len = 0; + struct tcphdr *tcp_hdr; + __u64 tcp_offset = 0; + __u32 off; + int err; + + tcp_offset = sizeof(struct ethhdr) + sizeof(struct ipv6hdr); + tcp_hdr = (struct tcphdr *)(data + tcp_offset); + if (tcp_hdr + 1 > data_end) + return XDP_DROP; + + tcp_hdr_opt_len = (tcp_hdr->doff * 4) - sizeof(struct tcphdr); + if (tcp_hdr_opt_len < tcp_hdr_opt_len_tpr) + return XDP_DROP; + + opt_state.hdr_bytes_remaining = tcp_hdr_opt_len; + opt_state.byte_offset = sizeof(struct tcphdr) + tcp_offset; + + /* max number of bytes of options in tcp header is 40 bytes */ + for (int i = 0; i < tcp_hdr_opt_max_opt_checks; i++) { + err = parse_hdr_opt(xdp, &opt_state); + + if (err || !opt_state.hdr_bytes_remaining) + break; + } + + if (!opt_state.server_id) + return XDP_DROP; + + server_id = opt_state.server_id; + + return XDP_PASS; +} diff --git a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c new file mode 100644 index 000000000000..d3b319722e30 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* This logic is lifted from a real-world use case of packet parsing, used in + * the open source library katran, a layer 4 load balancer. + * + * This test demonstrates how to parse packet contents using dynptrs. The + * original code (parsing without dynptrs) can be found in test_parse_tcp_hdr_opt.c + */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <linux/tcp.h> +#include <stdbool.h> +#include <linux/ipv6.h> +#include <linux/if_ether.h> +#include "test_tcp_hdr_options.h" +#include "bpf_kfuncs.h" + +char _license[] SEC("license") = "GPL"; + +/* Kind number used for experiments */ +const __u32 tcp_hdr_opt_kind_tpr = 0xFD; +/* Length of the tcp header option */ +const __u32 tcp_hdr_opt_len_tpr = 6; +/* maximum number of header options to check to lookup server_id */ +const __u32 tcp_hdr_opt_max_opt_checks = 15; + +__u32 server_id; + +static int parse_hdr_opt(struct bpf_dynptr *ptr, __u32 *off, __u8 *hdr_bytes_remaining, + __u32 *server_id) +{ + __u8 *tcp_opt, kind, hdr_len; + __u8 buffer[sizeof(kind) + sizeof(hdr_len) + sizeof(*server_id)]; + __u8 *data; + + __builtin_memset(buffer, 0, sizeof(buffer)); + + data = bpf_dynptr_slice(ptr, *off, buffer, sizeof(buffer)); + if (!data) + return -1; + + kind = data[0]; + + if (kind == TCPOPT_EOL) + return -1; + + if (kind == TCPOPT_NOP) { + *off += 1; + *hdr_bytes_remaining -= 1; + return 0; + } + + if (*hdr_bytes_remaining < 2) + return -1; + + hdr_len = data[1]; + if (hdr_len > *hdr_bytes_remaining) + return -1; + + if (kind == tcp_hdr_opt_kind_tpr) { + if (hdr_len != tcp_hdr_opt_len_tpr) + return -1; + + __builtin_memcpy(server_id, (__u32 *)(data + 2), sizeof(*server_id)); + return 1; + } + + *off += hdr_len; + *hdr_bytes_remaining -= hdr_len; + return 0; +} + +SEC("xdp") +int xdp_ingress_v6(struct xdp_md *xdp) +{ + __u8 buffer[sizeof(struct tcphdr)] = {}; + __u8 hdr_bytes_remaining; + struct tcphdr *tcp_hdr; + __u8 tcp_hdr_opt_len; + int err = 0; + __u32 off; + + struct bpf_dynptr ptr; + + bpf_dynptr_from_xdp(xdp, 0, &ptr); + + off = sizeof(struct ethhdr) + sizeof(struct ipv6hdr); + + tcp_hdr = bpf_dynptr_slice(&ptr, off, buffer, sizeof(buffer)); + if (!tcp_hdr) + return XDP_DROP; + + tcp_hdr_opt_len = (tcp_hdr->doff * 4) - sizeof(struct tcphdr); + if (tcp_hdr_opt_len < tcp_hdr_opt_len_tpr) + return XDP_DROP; + + hdr_bytes_remaining = tcp_hdr_opt_len; + + off += sizeof(struct tcphdr); + + /* max number of bytes of options in tcp header is 40 bytes */ + for (int i = 0; i < tcp_hdr_opt_max_opt_checks; i++) { + err = parse_hdr_opt(&ptr, &off, &hdr_bytes_remaining, &server_id); + + if (err || !hdr_bytes_remaining) + break; + } + + if (!server_id) + return XDP_DROP; + + return XDP_PASS; +} diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c index b502e5c92e33..6ccf6d546074 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c @@ -23,8 +23,8 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off, bool *ipv4) { struct bpf_sock_tuple *result; + __u64 ihl_len = 0; __u8 proto = 0; - __u64 ihl_len; if (eth_proto == bpf_htons(ETH_P_IP)) { struct iphdr *iph = (struct iphdr *)(data + nh_off); diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 508da4a23c4f..95b4aa0928ba 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -324,11 +324,11 @@ int ip4ip6erspan_get_tunnel(struct __sk_buff *skb) SEC("tc") int vxlan_set_tunnel_dst(struct __sk_buff *skb) { - int ret; struct bpf_tunnel_key key; struct vxlan_metadata md; __u32 index = 0; __u32 *local_ip = NULL; + int ret = 0; local_ip = bpf_map_lookup_elem(&local_ip_map, &index); if (!local_ip) { @@ -363,11 +363,11 @@ int vxlan_set_tunnel_dst(struct __sk_buff *skb) SEC("tc") int vxlan_set_tunnel_src(struct __sk_buff *skb) { - int ret; struct bpf_tunnel_key key; struct vxlan_metadata md; __u32 index = 0; __u32 *local_ip = NULL; + int ret = 0; local_ip = bpf_map_lookup_elem(&local_ip_map, &index); if (!local_ip) { @@ -494,9 +494,9 @@ SEC("tc") int ip6vxlan_set_tunnel_dst(struct __sk_buff *skb) { struct bpf_tunnel_key key; - int ret; __u32 index = 0; __u32 *local_ip; + int ret = 0; local_ip = bpf_map_lookup_elem(&local_ip_map, &index); if (!local_ip) { @@ -525,9 +525,9 @@ SEC("tc") int ip6vxlan_set_tunnel_src(struct __sk_buff *skb) { struct bpf_tunnel_key key; - int ret; __u32 index = 0; __u32 *local_ip; + int ret = 0; local_ip = bpf_map_lookup_elem(&local_ip_map, &index); if (!local_ip) { @@ -556,9 +556,9 @@ SEC("tc") int ip6vxlan_get_tunnel_src(struct __sk_buff *skb) { struct bpf_tunnel_key key; - int ret; __u32 index = 0; __u32 *local_ip; + int ret = 0; local_ip = bpf_map_lookup_elem(&local_ip_map, &index); if (!local_ip) { diff --git a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c new file mode 100644 index 000000000000..7521a805b506 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta */ +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/in.h> +#include <linux/udp.h> +#include <linux/tcp.h> +#include <linux/pkt_cls.h> +#include <sys/socket.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include "test_iptunnel_common.h" +#include "bpf_kfuncs.h" + +const size_t tcphdr_sz = sizeof(struct tcphdr); +const size_t udphdr_sz = sizeof(struct udphdr); +const size_t ethhdr_sz = sizeof(struct ethhdr); +const size_t iphdr_sz = sizeof(struct iphdr); +const size_t ipv6hdr_sz = sizeof(struct ipv6hdr); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 256); + __type(key, __u32); + __type(value, __u64); +} rxcnt SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_IPTNL_ENTRIES); + __type(key, struct vip); + __type(value, struct iptnl_info); +} vip2tnl SEC(".maps"); + +static __always_inline void count_tx(__u32 protocol) +{ + __u64 *rxcnt_count; + + rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol); + if (rxcnt_count) + *rxcnt_count += 1; +} + +static __always_inline int get_dport(void *trans_data, __u8 protocol) +{ + struct tcphdr *th; + struct udphdr *uh; + + switch (protocol) { + case IPPROTO_TCP: + th = (struct tcphdr *)trans_data; + return th->dest; + case IPPROTO_UDP: + uh = (struct udphdr *)trans_data; + return uh->dest; + default: + return 0; + } +} + +static __always_inline void set_ethhdr(struct ethhdr *new_eth, + const struct ethhdr *old_eth, + const struct iptnl_info *tnl, + __be16 h_proto) +{ + memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source)); + memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest)); + new_eth->h_proto = h_proto; +} + +static __always_inline int handle_ipv4(struct xdp_md *xdp, struct bpf_dynptr *xdp_ptr) +{ + __u8 eth_buffer[ethhdr_sz + iphdr_sz + ethhdr_sz]; + __u8 iph_buffer_tcp[iphdr_sz + tcphdr_sz]; + __u8 iph_buffer_udp[iphdr_sz + udphdr_sz]; + struct bpf_dynptr new_xdp_ptr; + struct iptnl_info *tnl; + struct ethhdr *new_eth; + struct ethhdr *old_eth; + __u32 transport_hdr_sz; + struct iphdr *iph; + __u16 *next_iph; + __u16 payload_len; + struct vip vip = {}; + int dport; + __u32 csum = 0; + int i; + + __builtin_memset(eth_buffer, 0, sizeof(eth_buffer)); + __builtin_memset(iph_buffer_tcp, 0, sizeof(iph_buffer_tcp)); + __builtin_memset(iph_buffer_udp, 0, sizeof(iph_buffer_udp)); + + if (ethhdr_sz + iphdr_sz + tcphdr_sz > xdp->data_end - xdp->data) + iph = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, iph_buffer_udp, sizeof(iph_buffer_udp)); + else + iph = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, iph_buffer_tcp, sizeof(iph_buffer_tcp)); + + if (!iph) + return XDP_DROP; + + dport = get_dport(iph + 1, iph->protocol); + if (dport == -1) + return XDP_DROP; + + vip.protocol = iph->protocol; + vip.family = AF_INET; + vip.daddr.v4 = iph->daddr; + vip.dport = dport; + payload_len = bpf_ntohs(iph->tot_len); + + tnl = bpf_map_lookup_elem(&vip2tnl, &vip); + /* It only does v4-in-v4 */ + if (!tnl || tnl->family != AF_INET) + return XDP_PASS; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)iphdr_sz)) + return XDP_DROP; + + bpf_dynptr_from_xdp(xdp, 0, &new_xdp_ptr); + new_eth = bpf_dynptr_slice_rdwr(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer)); + if (!new_eth) + return XDP_DROP; + + iph = (struct iphdr *)(new_eth + 1); + old_eth = (struct ethhdr *)(iph + 1); + + set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP)); + + if (new_eth == eth_buffer) + bpf_dynptr_write(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer), 0); + + iph->version = 4; + iph->ihl = iphdr_sz >> 2; + iph->frag_off = 0; + iph->protocol = IPPROTO_IPIP; + iph->check = 0; + iph->tos = 0; + iph->tot_len = bpf_htons(payload_len + iphdr_sz); + iph->daddr = tnl->daddr.v4; + iph->saddr = tnl->saddr.v4; + iph->ttl = 8; + + next_iph = (__u16 *)iph; + for (i = 0; i < iphdr_sz >> 1; i++) + csum += *next_iph++; + + iph->check = ~((csum & 0xffff) + (csum >> 16)); + + count_tx(vip.protocol); + + return XDP_TX; +} + +static __always_inline int handle_ipv6(struct xdp_md *xdp, struct bpf_dynptr *xdp_ptr) +{ + __u8 eth_buffer[ethhdr_sz + ipv6hdr_sz + ethhdr_sz]; + __u8 ip6h_buffer_tcp[ipv6hdr_sz + tcphdr_sz]; + __u8 ip6h_buffer_udp[ipv6hdr_sz + udphdr_sz]; + struct bpf_dynptr new_xdp_ptr; + struct iptnl_info *tnl; + struct ethhdr *new_eth; + struct ethhdr *old_eth; + __u32 transport_hdr_sz; + struct ipv6hdr *ip6h; + __u16 payload_len; + struct vip vip = {}; + int dport; + + __builtin_memset(eth_buffer, 0, sizeof(eth_buffer)); + __builtin_memset(ip6h_buffer_tcp, 0, sizeof(ip6h_buffer_tcp)); + __builtin_memset(ip6h_buffer_udp, 0, sizeof(ip6h_buffer_udp)); + + if (ethhdr_sz + iphdr_sz + tcphdr_sz > xdp->data_end - xdp->data) + ip6h = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, ip6h_buffer_udp, sizeof(ip6h_buffer_udp)); + else + ip6h = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, ip6h_buffer_tcp, sizeof(ip6h_buffer_tcp)); + + if (!ip6h) + return XDP_DROP; + + dport = get_dport(ip6h + 1, ip6h->nexthdr); + if (dport == -1) + return XDP_DROP; + + vip.protocol = ip6h->nexthdr; + vip.family = AF_INET6; + memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr)); + vip.dport = dport; + payload_len = ip6h->payload_len; + + tnl = bpf_map_lookup_elem(&vip2tnl, &vip); + /* It only does v6-in-v6 */ + if (!tnl || tnl->family != AF_INET6) + return XDP_PASS; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)ipv6hdr_sz)) + return XDP_DROP; + + bpf_dynptr_from_xdp(xdp, 0, &new_xdp_ptr); + new_eth = bpf_dynptr_slice_rdwr(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer)); + if (!new_eth) + return XDP_DROP; + + ip6h = (struct ipv6hdr *)(new_eth + 1); + old_eth = (struct ethhdr *)(ip6h + 1); + + set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6)); + + if (new_eth == eth_buffer) + bpf_dynptr_write(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer), 0); + + ip6h->version = 6; + ip6h->priority = 0; + memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl)); + ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + ipv6hdr_sz); + ip6h->nexthdr = IPPROTO_IPV6; + ip6h->hop_limit = 8; + memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6)); + memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6)); + + count_tx(vip.protocol); + + return XDP_TX; +} + +SEC("xdp") +int _xdp_tx_iptunnel(struct xdp_md *xdp) +{ + __u8 buffer[ethhdr_sz]; + struct bpf_dynptr ptr; + struct ethhdr *eth; + __u16 h_proto; + + __builtin_memset(buffer, 0, sizeof(buffer)); + + bpf_dynptr_from_xdp(xdp, 0, &ptr); + eth = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer)); + if (!eth) + return XDP_DROP; + + h_proto = eth->h_proto; + + if (h_proto == bpf_htons(ETH_P_IP)) + return handle_ipv4(xdp, &ptr); + else if (h_proto == bpf_htons(ETH_P_IPV6)) + + return handle_ipv6(xdp, &ptr); + else + return XDP_DROP; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c index acda5c9cea93..9a16d95213e1 100644 --- a/tools/testing/selftests/bpf/progs/timer.c +++ b/tools/testing/selftests/bpf/progs/timer.c @@ -46,7 +46,15 @@ struct { __type(value, struct elem); } lru SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} abs_timer SEC(".maps"); + __u64 bss_data; +__u64 abs_data; __u64 err; __u64 ok; __u64 callback_check = 52; @@ -284,3 +292,40 @@ int BPF_PROG2(test2, int, a, int, b) return bpf_timer_test(); } + +/* callback for absolute timer */ +static int timer_cb3(void *map, int *key, struct bpf_timer *timer) +{ + abs_data += 6; + + if (abs_data < 12) { + bpf_timer_start(timer, bpf_ktime_get_boot_ns() + 1000, + BPF_F_TIMER_ABS); + } else { + /* Re-arm timer ~35 seconds in future */ + bpf_timer_start(timer, bpf_ktime_get_boot_ns() + (1ull << 35), + BPF_F_TIMER_ABS); + } + + return 0; +} + +SEC("fentry/bpf_fentry_test3") +int BPF_PROG2(test3, int, a) +{ + int key = 0; + struct bpf_timer *timer; + + bpf_printk("test3"); + + timer = bpf_map_lookup_elem(&abs_timer, &key); + if (timer) { + if (bpf_timer_init(timer, &abs_timer, CLOCK_BOOTTIME) != 0) + err |= 2048; + bpf_timer_set_callback(timer, timer_cb3); + bpf_timer_start(timer, bpf_ktime_get_boot_ns() + 1000, + BPF_F_TIMER_ABS); + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_success.c b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c index b39093dd5715..0ade1110613b 100644 --- a/tools/testing/selftests/bpf/progs/user_ringbuf_success.c +++ b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c @@ -202,7 +202,7 @@ do_nothing_cb(struct bpf_dynptr *dynptr, void *context) return 0; } -SEC("fentry/" SYS_PREFIX "sys_getrlimit") +SEC("fentry/" SYS_PREFIX "sys_prlimit64") int test_user_ringbuf_epoll(void *ctx) { long num_samples; diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 679efb3aa785..bf41390157bf 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -13,12 +13,15 @@ #define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success" #define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg=" #define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level=" +#define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags=" struct test_spec { const char *name; bool expect_failure; - const char *expect_msg; + const char **expect_msgs; + size_t expect_msg_cnt; int log_level; + int prog_flags; }; static int tester_init(struct test_loader *tester) @@ -67,7 +70,8 @@ static int parse_test_spec(struct test_loader *tester, for (i = 1; i < btf__type_cnt(btf); i++) { const struct btf_type *t; - const char *s; + const char *s, *val; + char *e; t = btf__type_by_id(btf, i); if (!btf_is_decl_tag(t)) @@ -82,14 +86,48 @@ static int parse_test_spec(struct test_loader *tester, } else if (strcmp(s, TEST_TAG_EXPECT_SUCCESS) == 0) { spec->expect_failure = false; } else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX)) { - spec->expect_msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1; + void *tmp; + const char **msg; + + tmp = realloc(spec->expect_msgs, + (1 + spec->expect_msg_cnt) * sizeof(void *)); + if (!tmp) { + ASSERT_FAIL("failed to realloc memory for messages\n"); + return -ENOMEM; + } + spec->expect_msgs = tmp; + msg = &spec->expect_msgs[spec->expect_msg_cnt++]; + *msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1; } else if (str_has_pfx(s, TEST_TAG_LOG_LEVEL_PFX)) { + val = s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1; errno = 0; - spec->log_level = strtol(s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1, NULL, 0); - if (errno) { + spec->log_level = strtol(val, &e, 0); + if (errno || e[0] != '\0') { ASSERT_FAIL("failed to parse test log level from '%s'", s); return -EINVAL; } + } else if (str_has_pfx(s, TEST_TAG_PROG_FLAGS_PFX)) { + val = s + sizeof(TEST_TAG_PROG_FLAGS_PFX) - 1; + if (strcmp(val, "BPF_F_STRICT_ALIGNMENT") == 0) { + spec->prog_flags |= BPF_F_STRICT_ALIGNMENT; + } else if (strcmp(val, "BPF_F_ANY_ALIGNMENT") == 0) { + spec->prog_flags |= BPF_F_ANY_ALIGNMENT; + } else if (strcmp(val, "BPF_F_TEST_RND_HI32") == 0) { + spec->prog_flags |= BPF_F_TEST_RND_HI32; + } else if (strcmp(val, "BPF_F_TEST_STATE_FREQ") == 0) { + spec->prog_flags |= BPF_F_TEST_STATE_FREQ; + } else if (strcmp(val, "BPF_F_SLEEPABLE") == 0) { + spec->prog_flags |= BPF_F_SLEEPABLE; + } else if (strcmp(val, "BPF_F_XDP_HAS_FRAGS") == 0) { + spec->prog_flags |= BPF_F_XDP_HAS_FRAGS; + } else /* assume numeric value */ { + errno = 0; + spec->prog_flags |= strtol(val, &e, 0); + if (errno || e[0] != '\0') { + ASSERT_FAIL("failed to parse test prog flags from '%s'", s); + return -EINVAL; + } + } } } @@ -101,7 +139,7 @@ static void prepare_case(struct test_loader *tester, struct bpf_object *obj, struct bpf_program *prog) { - int min_log_level = 0; + int min_log_level = 0, prog_flags; if (env.verbosity > VERBOSE_NONE) min_log_level = 1; @@ -119,7 +157,11 @@ static void prepare_case(struct test_loader *tester, else bpf_program__set_log_level(prog, spec->log_level); + prog_flags = bpf_program__flags(prog); + bpf_program__set_flags(prog, prog_flags | spec->prog_flags); + tester->log_buf[0] = '\0'; + tester->next_match_pos = 0; } static void emit_verifier_log(const char *log_buf, bool force) @@ -135,17 +177,26 @@ static void validate_case(struct test_loader *tester, struct bpf_program *prog, int load_err) { - if (spec->expect_msg) { + int i, j; + + for (i = 0; i < spec->expect_msg_cnt; i++) { char *match; + const char *expect_msg; + + expect_msg = spec->expect_msgs[i]; - match = strstr(tester->log_buf, spec->expect_msg); + match = strstr(tester->log_buf + tester->next_match_pos, expect_msg); if (!ASSERT_OK_PTR(match, "expect_msg")) { /* if we are in verbose mode, we've already emitted log */ if (env.verbosity == VERBOSE_NONE) emit_verifier_log(tester->log_buf, true /*force*/); - fprintf(stderr, "EXPECTED MSG: '%s'\n", spec->expect_msg); + for (j = 0; j < i; j++) + fprintf(stderr, "MATCHED MSG: '%s'\n", spec->expect_msgs[j]); + fprintf(stderr, "EXPECTED MSG: '%s'\n", expect_msg); return; } + + tester->next_match_pos = match - tester->log_buf + strlen(expect_msg); } } diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index d5d51ec97ec8..3cbf005747ed 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -376,6 +376,21 @@ int test__join_cgroup(const char *path); ___ok; \ }) +#define SYS(goto_label, fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (!ASSERT_OK(system(cmd), cmd)) \ + goto goto_label; \ + }) + +#define SYS_NOFAIL(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + system(cmd); \ + }) + static inline __u64 ptr_to_u64(const void *ptr) { return (__u64) (unsigned long) ptr; @@ -412,6 +427,7 @@ int get_bpf_max_tramp_links(void); struct test_loader { char *log_buf; size_t log_buf_sz; + size_t next_match_pos; struct bpf_object *obj; }; diff --git a/tools/testing/selftests/bpf/test_tcp_hdr_options.h b/tools/testing/selftests/bpf/test_tcp_hdr_options.h index 6118e3ab61fc..56c9f8a3ad3d 100644 --- a/tools/testing/selftests/bpf/test_tcp_hdr_options.h +++ b/tools/testing/selftests/bpf/test_tcp_hdr_options.h @@ -50,6 +50,7 @@ struct linum_err { #define TCPOPT_EOL 0 #define TCPOPT_NOP 1 +#define TCPOPT_MSS 2 #define TCPOPT_WINDOW 3 #define TCPOPT_EXP 254 diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 8b9949bb833d..49a70d9beb0b 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -699,13 +699,13 @@ static int create_cgroup_storage(bool percpu) * struct bpf_timer t; * }; * struct btf_ptr { + * struct prog_test_ref_kfunc __kptr_untrusted *ptr; * struct prog_test_ref_kfunc __kptr *ptr; - * struct prog_test_ref_kfunc __kptr_ref *ptr; - * struct prog_test_member __kptr_ref *ptr; + * struct prog_test_member __kptr *ptr; * } */ static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t" - "\0btf_ptr\0prog_test_ref_kfunc\0ptr\0kptr\0kptr_ref" + "\0btf_ptr\0prog_test_ref_kfunc\0ptr\0kptr\0kptr_untrusted" "\0prog_test_member"; static __u32 btf_raw_types[] = { /* int */ @@ -724,20 +724,20 @@ static __u32 btf_raw_types[] = { BTF_MEMBER_ENC(41, 4, 0), /* struct bpf_timer t; */ /* struct prog_test_ref_kfunc */ /* [6] */ BTF_STRUCT_ENC(51, 0, 0), - BTF_STRUCT_ENC(89, 0, 0), /* [7] */ + BTF_STRUCT_ENC(95, 0, 0), /* [7] */ + /* type tag "kptr_untrusted" */ + BTF_TYPE_TAG_ENC(80, 6), /* [8] */ /* type tag "kptr" */ - BTF_TYPE_TAG_ENC(75, 6), /* [8] */ - /* type tag "kptr_ref" */ - BTF_TYPE_TAG_ENC(80, 6), /* [9] */ - BTF_TYPE_TAG_ENC(80, 7), /* [10] */ + BTF_TYPE_TAG_ENC(75, 6), /* [9] */ + BTF_TYPE_TAG_ENC(75, 7), /* [10] */ BTF_PTR_ENC(8), /* [11] */ BTF_PTR_ENC(9), /* [12] */ BTF_PTR_ENC(10), /* [13] */ /* struct btf_ptr */ /* [14] */ BTF_STRUCT_ENC(43, 3, 24), - BTF_MEMBER_ENC(71, 11, 0), /* struct prog_test_ref_kfunc __kptr *ptr; */ - BTF_MEMBER_ENC(71, 12, 64), /* struct prog_test_ref_kfunc __kptr_ref *ptr; */ - BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr_ref *ptr; */ + BTF_MEMBER_ENC(71, 11, 0), /* struct prog_test_ref_kfunc __kptr_untrusted *ptr; */ + BTF_MEMBER_ENC(71, 12, 64), /* struct prog_test_ref_kfunc __kptr *ptr; */ + BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr *ptr; */ }; static char bpf_vlog[UINT_MAX >> 8]; diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 289ed202ec66..5702fc9761ef 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -181,7 +181,7 @@ }, .result_unpriv = REJECT, .result = REJECT, - .errstr = "negative offset ptr_ ptr R1 off=-4 disallowed", + .errstr = "ptr R1 off=-4 disallowed", }, { "calls: invalid kfunc call: PTR_TO_BTF_ID with variable offset", @@ -243,7 +243,7 @@ }, .result_unpriv = REJECT, .result = REJECT, - .errstr = "R1 must be referenced", + .errstr = "R1 must be", }, { "calls: valid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID", diff --git a/tools/testing/selftests/bpf/verifier/ctx.c b/tools/testing/selftests/bpf/verifier/ctx.c index c8eaf0536c24..2fd31612c0b8 100644 --- a/tools/testing/selftests/bpf/verifier/ctx.c +++ b/tools/testing/selftests/bpf/verifier/ctx.c @@ -1,15 +1,4 @@ { - "context stores via ST", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0), - BPF_EXIT_INSN(), - }, - .errstr = "BPF_ST stores into R1 ctx is not allowed", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ "context stores via BPF_ATOMIC", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c index 6914904344c0..d775ccb01989 100644 --- a/tools/testing/selftests/bpf/verifier/map_kptr.c +++ b/tools/testing/selftests/bpf/verifier/map_kptr.c @@ -336,7 +336,7 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .fixup_map_kptr = { 1 }, .result = REJECT, - .errstr = "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_", + .errstr = "R1 type=rcu_ptr_or_null_ expected=percpu_ptr_", }, { "map_kptr: ref: reject off != 0", diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c index 878ca26c3f0a..af0c0f336625 100644 --- a/tools/testing/selftests/bpf/verifier/unpriv.c +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -240,6 +240,29 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { + /* Same as above, but use BPF_ST_MEM to save 42 + * instead of BPF_STX_MEM. + */ + "unpriv: spill/fill of different pointers st", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + BPF_ST_MEM(BPF_W, BPF_REG_1, offsetof(struct __sk_buff, mark), 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "same insn cannot be used with different pointers", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ "unpriv: spill/fill of different pointers stx - ctx and sock", .insns = { BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 80fbfe0330f6..1de34ec99290 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -48,6 +48,7 @@ TEST_PROGS += l2_tos_ttl_inherit.sh TEST_PROGS += bind_bhash.sh TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh +TEST_PROGS += big_tcp.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest @@ -81,13 +82,14 @@ TEST_GEN_FILES += csum TEST_GEN_FILES += nat6to4.o TEST_GEN_FILES += ip_local_port_range TEST_GEN_FILES += bind_wildcard +TEST_PROGS += test_vxlan_mdb.sh TEST_FILES := settings include ../lib.mk $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma -$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread +$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto $(OUTPUT)/tcp_inq: LDLIBS += -lpthread $(OUTPUT)/bind_bhash: LDLIBS += -lpthread diff --git a/tools/testing/selftests/net/big_tcp.sh b/tools/testing/selftests/net/big_tcp.sh new file mode 100755 index 000000000000..cde9a91c4797 --- /dev/null +++ b/tools/testing/selftests/net/big_tcp.sh @@ -0,0 +1,180 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing For IPv4 and IPv6 BIG TCP. +# TOPO: CLIENT_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) SERVER_NS + +CLIENT_NS=$(mktemp -u client-XXXXXXXX) +CLIENT_IP4="198.51.100.1" +CLIENT_IP6="2001:db8:1::1" + +SERVER_NS=$(mktemp -u server-XXXXXXXX) +SERVER_IP4="203.0.113.1" +SERVER_IP6="2001:db8:2::1" + +ROUTER_NS=$(mktemp -u router-XXXXXXXX) +SERVER_GW4="203.0.113.2" +CLIENT_GW4="198.51.100.2" +SERVER_GW6="2001:db8:2::2" +CLIENT_GW6="2001:db8:1::2" + +MAX_SIZE=128000 +CHK_SIZE=65535 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +setup() { + ip netns add $CLIENT_NS + ip netns add $SERVER_NS + ip netns add $ROUTER_NS + ip -net $ROUTER_NS link add link1 type veth peer name link0 netns $CLIENT_NS + ip -net $ROUTER_NS link add link2 type veth peer name link3 netns $SERVER_NS + + ip -net $CLIENT_NS link set link0 up + ip -net $CLIENT_NS link set link0 mtu 1442 + ip -net $CLIENT_NS addr add $CLIENT_IP4/24 dev link0 + ip -net $CLIENT_NS addr add $CLIENT_IP6/64 dev link0 nodad + ip -net $CLIENT_NS route add $SERVER_IP4 dev link0 via $CLIENT_GW4 + ip -net $CLIENT_NS route add $SERVER_IP6 dev link0 via $CLIENT_GW6 + ip -net $CLIENT_NS link set dev link0 \ + gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE + ip -net $CLIENT_NS link set dev link0 \ + gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE + ip net exec $CLIENT_NS sysctl -wq net.ipv4.tcp_window_scaling=10 + + ip -net $ROUTER_NS link set link1 up + ip -net $ROUTER_NS link set link2 up + ip -net $ROUTER_NS addr add $CLIENT_GW4/24 dev link1 + ip -net $ROUTER_NS addr add $CLIENT_GW6/64 dev link1 nodad + ip -net $ROUTER_NS addr add $SERVER_GW4/24 dev link2 + ip -net $ROUTER_NS addr add $SERVER_GW6/64 dev link2 nodad + ip -net $ROUTER_NS link set dev link1 \ + gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE + ip -net $ROUTER_NS link set dev link2 \ + gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE + ip -net $ROUTER_NS link set dev link1 \ + gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE + ip -net $ROUTER_NS link set dev link2 \ + gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE + # test for nf_ct_skb_network_trim in nf_conntrack_ovs used by TC ct action. + ip net exec $ROUTER_NS tc qdisc add dev link1 ingress + ip net exec $ROUTER_NS tc filter add dev link1 ingress \ + proto ip flower ip_proto tcp action ct + ip net exec $ROUTER_NS tc filter add dev link1 ingress \ + proto ipv6 flower ip_proto tcp action ct + ip net exec $ROUTER_NS sysctl -wq net.ipv4.ip_forward=1 + ip net exec $ROUTER_NS sysctl -wq net.ipv6.conf.all.forwarding=1 + + ip -net $SERVER_NS link set link3 up + ip -net $SERVER_NS addr add $SERVER_IP4/24 dev link3 + ip -net $SERVER_NS addr add $SERVER_IP6/64 dev link3 nodad + ip -net $SERVER_NS route add $CLIENT_IP4 dev link3 via $SERVER_GW4 + ip -net $SERVER_NS route add $CLIENT_IP6 dev link3 via $SERVER_GW6 + ip -net $SERVER_NS link set dev link3 \ + gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE + ip -net $SERVER_NS link set dev link3 \ + gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE + ip net exec $SERVER_NS sysctl -wq net.ipv4.tcp_window_scaling=10 + ip net exec $SERVER_NS netserver 2>&1 >/dev/null +} + +cleanup() { + ip net exec $SERVER_NS pkill netserver + ip -net $ROUTER_NS link del link1 + ip -net $ROUTER_NS link del link2 + ip netns del "$CLIENT_NS" + ip netns del "$SERVER_NS" + ip netns del "$ROUTER_NS" +} + +start_counter() { + local ipt="iptables" + local iface=$1 + local netns=$2 + + [ "$NF" = "6" ] && ipt="ip6tables" + ip net exec $netns $ipt -t raw -A PREROUTING -i $iface \ + -m length ! --length 0:$CHK_SIZE -j ACCEPT +} + +check_counter() { + local ipt="iptables" + local iface=$1 + local netns=$2 + + [ "$NF" = "6" ] && ipt="ip6tables" + test `ip net exec $netns $ipt -t raw -L -v |grep $iface | awk '{print $1}'` != "0" +} + +stop_counter() { + local ipt="iptables" + local iface=$1 + local netns=$2 + + [ "$NF" = "6" ] && ipt="ip6tables" + ip net exec $netns $ipt -t raw -D PREROUTING -i $iface \ + -m length ! --length 0:$CHK_SIZE -j ACCEPT +} + +do_netperf() { + local serip=$SERVER_IP4 + local netns=$1 + + [ "$NF" = "6" ] && serip=$SERVER_IP6 + ip net exec $netns netperf -$NF -t TCP_STREAM -H $serip 2>&1 >/dev/null +} + +do_test() { + local cli_tso=$1 + local gw_gro=$2 + local gw_tso=$3 + local ser_gro=$4 + local ret="PASS" + + ip net exec $CLIENT_NS ethtool -K link0 tso $cli_tso + ip net exec $ROUTER_NS ethtool -K link1 gro $gw_gro + ip net exec $ROUTER_NS ethtool -K link2 tso $gw_tso + ip net exec $SERVER_NS ethtool -K link3 gro $ser_gro + + start_counter link1 $ROUTER_NS + start_counter link3 $SERVER_NS + do_netperf $CLIENT_NS + + if check_counter link1 $ROUTER_NS; then + check_counter link3 $SERVER_NS || ret="FAIL_on_link3" + else + ret="FAIL_on_link1" + fi + + stop_counter link1 $ROUTER_NS + stop_counter link3 $SERVER_NS + printf "%-9s %-8s %-8s %-8s: [%s]\n" \ + $cli_tso $gw_gro $gw_tso $ser_gro $ret + test $ret = "PASS" +} + +testup() { + echo "CLI GSO | GW GRO | GW GSO | SER GRO" && \ + do_test "on" "on" "on" "on" && \ + do_test "on" "off" "on" "off" && \ + do_test "off" "on" "on" "on" && \ + do_test "on" "on" "off" "on" && \ + do_test "off" "on" "off" "on" +} + +if ! netperf -V &> /dev/null; then + echo "SKIP: Could not run test without netperf tool" + exit $ksft_skip +fi + +if ! ip link help 2>&1 | grep gso_ipv4_max_size &> /dev/null; then + echo "SKIP: Could not run test without gso/gro_ipv4_max_size supported in ip-link" + exit $ksft_skip +fi + +trap cleanup EXIT +setup && echo "Testing for BIG TCP:" && \ +NF=4 testup && echo "***v4 Tests Done***" && \ +NF=6 testup && echo "***v6 Tests Done***" +exit $? diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 2529226ce87c..d1d421ec10a3 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -48,4 +48,5 @@ CONFIG_BAREUDP=m CONFIG_IPV6_IOAM6_LWTUNNEL=y CONFIG_CRYPTO_SM4_GENERIC=y CONFIG_AMT=m +CONFIG_VXLAN=m CONFIG_IP_SCTP=m diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 91201ab3c4fc..236f6b796a52 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -85,6 +85,7 @@ TEST_PROGS = bridge_igmp.sh \ tc_mpls_l2vpn.sh \ tc_police.sh \ tc_shblocks.sh \ + tc_tunnel_key.sh \ tc_vlan_modify.sh \ vxlan_asymmetric_ipv6.sh \ vxlan_asymmetric.sh \ diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh index 9c1f76e108af..432fe8469851 100755 --- a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh +++ b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh @@ -319,6 +319,19 @@ trap cleanup EXIT setup_prepare setup_wait -tests_run +used=$(ip -j stats show dev $rp1.200 group offload subgroup hw_stats_info | + jq '.[].info.l3_stats.used') +kind=$(ip -j -d link show dev $rp1 | + jq -r '.[].linkinfo.info_kind') +if [[ $used != true ]]; then + if [[ $kind == veth ]]; then + log_test_skip "l3_stats not offloaded on veth interface" + EXIT_STATUS=$ksft_skip + else + RET=1 log_test "l3_stats not offloaded" + fi +else + tests_run +fi exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh new file mode 100755 index 000000000000..5ac184d51809 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh @@ -0,0 +1,161 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +ALL_TESTS="tunnel_key_nofrag_test" + +NUM_NETIFS=4 +source tc_common.sh +source lib.sh + +tcflags="skip_hw" + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 + forwarding_enable + mtu_set $h1 1500 + tunnel_create h1-et vxlan 192.0.2.1 192.0.2.2 dev $h1 dstport 0 external + tc qdisc add dev h1-et clsact + mtu_set h1-et 1230 + mtu_restore $h1 + mtu_set $h1 1000 +} + +h1_destroy() +{ + tc qdisc del dev h1-et clsact + tunnel_destroy h1-et + forwarding_restore + mtu_restore $h1 + simple_if_fini $h1 192.0.2.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/24 +} + +switch_create() +{ + simple_if_init $swp1 192.0.2.2/24 + tc qdisc add dev $swp1 clsact + simple_if_init $swp2 192.0.2.1/24 +} + +switch_destroy() +{ + simple_if_fini $swp2 192.0.2.1/24 + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 192.0.2.2/24 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + swp1origmac=$(mac_get $swp1) + swp2origmac=$(mac_get $swp2) + ip link set $swp1 address $h2mac + ip link set $swp2 address $h1mac + + vrf_prepare + + h1_create + h2_create + switch_create + + if ! tc action add action tunnel_key help 2>&1 | grep -q nofrag; then + log_test "SKIP: iproute doesn't support nofrag" + exit $ksft_skip + fi +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup + + ip link set $swp2 address $swp2origmac + ip link set $swp1 address $swp1origmac +} + +tunnel_key_nofrag_test() +{ + RET=0 + local i + + tc filter add dev $swp1 ingress protocol ip pref 100 handle 100 \ + flower ip_flags nofrag action drop + tc filter add dev $swp1 ingress protocol ip pref 101 handle 101 \ + flower ip_flags firstfrag action drop + tc filter add dev $swp1 ingress protocol ip pref 102 handle 102 \ + flower ip_flags nofirstfrag action drop + + # test 'nofrag' set + tc filter add dev h1-et egress protocol all pref 1 handle 1 matchall $tcflags \ + action tunnel_key set src_ip 192.0.2.1 dst_ip 192.0.2.2 id 42 nofrag index 10 + $MZ h1-et -c 1 -p 930 -a 00:aa:bb:cc:dd:ee -b 00:ee:dd:cc:bb:aa -t ip -q + tc_check_packets "dev $swp1 ingress" 100 1 + check_err $? "packet smaller than MTU was not tunneled" + + $MZ h1-et -c 1 -p 931 -a 00:aa:bb:cc:dd:ee -b 00:ee:dd:cc:bb:aa -t ip -q + tc_check_packets "dev $swp1 ingress" 100 1 + check_err $? "packet bigger than MTU matched nofrag (nofrag was set)" + tc_check_packets "dev $swp1 ingress" 101 0 + check_err $? "packet bigger than MTU matched firstfrag (nofrag was set)" + tc_check_packets "dev $swp1 ingress" 102 0 + check_err $? "packet bigger than MTU matched nofirstfrag (nofrag was set)" + + # test 'nofrag' cleared + tc actions change action tunnel_key set src_ip 192.0.2.1 dst_ip 192.0.2.2 id 42 index 10 + $MZ h1-et -c 1 -p 931 -a 00:aa:bb:cc:dd:ee -b 00:ee:dd:cc:bb:aa -t ip -q + tc_check_packets "dev $swp1 ingress" 100 1 + check_err $? "packet bigger than MTU matched nofrag (nofrag was unset)" + tc_check_packets "dev $swp1 ingress" 101 1 + check_err $? "packet bigger than MTU didn't match firstfrag (nofrag was unset) " + tc_check_packets "dev $swp1 ingress" 102 1 + check_err $? "packet bigger than MTU didn't match nofirstfrag (nofrag was unset) " + + for i in 100 101 102; do + tc filter del dev $swp1 ingress protocol ip pref $i handle $i flower + done + tc filter del dev h1-et egress pref 1 handle 1 matchall + + log_test "tunnel_key nofrag ($tcflags)" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_info "Could not test offloaded functionality" +else + tcflags="skip_sw" + tests_run +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 42e3bd1a05f5..fafd19ec7e1f 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1719,6 +1719,46 @@ chk_subflow_nr() fi } +chk_mptcp_info() +{ + local nr_info=$1 + local info + local cnt1 + local cnt2 + local dump_stats + + if [[ $nr_info = "subflows_"* ]]; then + info="subflows" + nr_info=${nr_info:9} + else + echo "[fail] unsupported argument: $nr_info" + fail_test + return 1 + fi + + printf "%-${nr_blank}s %-30s" " " "mptcp_info $info=$nr_info" + + cnt1=$(ss -N $ns1 -inmHM | grep "$info:" | + sed -n 's/.*\('"$info"':\)\([[:digit:]]*\).*$/\2/p;q') + [ -z "$cnt1" ] && cnt1=0 + cnt2=$(ss -N $ns2 -inmHM | grep "$info:" | + sed -n 's/.*\('"$info"':\)\([[:digit:]]*\).*$/\2/p;q') + [ -z "$cnt2" ] && cnt2=0 + if [ "$cnt1" != "$nr_info" ] || [ "$cnt2" != "$nr_info" ]; then + echo "[fail] got $cnt1:$cnt2 $info expected $nr_info" + fail_test + dump_stats=1 + else + echo "[ ok ]" + fi + + if [ "$dump_stats" = 1 ]; then + ss -N $ns1 -inmHM + ss -N $ns2 -inmHM + dump_stats + fi +} + chk_link_usage() { local ns=$1 @@ -3118,13 +3158,18 @@ endpoint_tests() run_tests $ns1 $ns2 10.0.1.1 4 0 0 speed_20 2>/dev/null & wait_mpj $ns2 + chk_subflow_nr needtitle "before delete" 2 + chk_mptcp_info subflows_1 + pm_nl_del_endpoint $ns2 2 10.0.2.2 sleep 0.5 - chk_subflow_nr needtitle "after delete" 1 + chk_subflow_nr "" "after delete" 1 + chk_mptcp_info subflows_0 pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow wait_mpj $ns2 chk_subflow_nr "" "after re-add" 2 + chk_mptcp_info subflows_1 kill_tests_wait fi } diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 275491be3da2..383ac6fc037d 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -4,6 +4,31 @@ # # set -e +ALL_TESTS=" + kci_test_polrouting + kci_test_route_get + kci_test_addrlft + kci_test_promote_secondaries + kci_test_tc + kci_test_gre + kci_test_gretap + kci_test_ip6gretap + kci_test_erspan + kci_test_ip6erspan + kci_test_bridge + kci_test_addrlabel + kci_test_ifalias + kci_test_vrf + kci_test_encap + kci_test_macsec + kci_test_ipsec + kci_test_ipsec_offload + kci_test_fdb_get + kci_test_neigh_get + kci_test_bridge_parent_id + kci_test_address_proto +" + devdummy="test-dummy0" # Kselftest framework requirement - SKIP code is 4. @@ -1225,62 +1250,130 @@ kci_test_bridge_parent_id() echo "PASS: bridge_parent_id" } -kci_test_rtnl() +address_get_proto() +{ + local addr=$1; shift + + ip -N -j address show dev "$devdummy" | + jq -e -r --arg addr "${addr%/*}" \ + '.[].addr_info[] | select(.local == $addr) | .protocol' +} + +address_count() { + ip -N -j address show dev "$devdummy" "$@" | + jq -e -r '[.[].addr_info[] | .local | select(. != null)] | length' +} + +do_test_address_proto() +{ + local what=$1; shift + local addr=$1; shift + local addr2=${addr%/*}2/${addr#*/} + local addr3=${addr%/*}3/${addr#*/} + local proto + local count local ret=0 - kci_add_dummy - if [ $ret -ne 0 ];then - echo "FAIL: cannot add dummy interface" - return 1 - fi + local err - kci_test_polrouting + ip address add dev "$devdummy" "$addr3" check_err $? - kci_test_route_get + proto=$(address_get_proto "$addr3") + [[ "$proto" == null ]] check_err $? - kci_test_addrlft - check_err $? - kci_test_promote_secondaries - check_err $? - kci_test_tc - check_err $? - kci_test_gre + + ip address add dev "$devdummy" "$addr2" proto 0x99 check_err $? - kci_test_gretap + proto=$(address_get_proto "$addr2") + [[ "$proto" == 0x99 ]] check_err $? - kci_test_ip6gretap + + ip address add dev "$devdummy" "$addr" proto 0xab check_err $? - kci_test_erspan + proto=$(address_get_proto "$addr") + [[ "$proto" == 0xab ]] check_err $? - kci_test_ip6erspan + + ip address replace dev "$devdummy" "$addr" proto 0x11 + proto=$(address_get_proto "$addr") check_err $? - kci_test_bridge + [[ "$proto" == 0x11 ]] check_err $? - kci_test_addrlabel + + count=$(address_count) check_err $? - kci_test_ifalias + (( count >= 3 )) # $addr, $addr2 and $addr3 plus any kernel addresses check_err $? - kci_test_vrf + + count=$(address_count proto 0) check_err $? - kci_test_encap + (( count == 1 )) # just $addr3 check_err $? - kci_test_macsec + + count=$(address_count proto 0x11) check_err $? - kci_test_ipsec + (( count == 2 )) # $addr and $addr3 check_err $? - kci_test_ipsec_offload + + count=$(address_count proto 0xab) check_err $? - kci_test_fdb_get + (( count == 1 )) # just $addr3 check_err $? - kci_test_neigh_get + + ip address del dev "$devdummy" "$addr" + ip address del dev "$devdummy" "$addr2" + ip address del dev "$devdummy" "$addr3" + + if [ $ret -ne 0 ]; then + echo "FAIL: address proto $what" + return 1 + fi + echo "PASS: address proto $what" +} + +kci_test_address_proto() +{ + local ret=0 + + do_test_address_proto IPv4 192.0.2.1/28 check_err $? - kci_test_bridge_parent_id + + do_test_address_proto IPv6 2001:db8:1::1/64 check_err $? + return $ret +} + +kci_test_rtnl() +{ + local current_test + local ret=0 + + kci_add_dummy + if [ $ret -ne 0 ];then + echo "FAIL: cannot add dummy interface" + return 1 + fi + + for current_test in ${TESTS:-$ALL_TESTS}; do + $current_test + check_err $? + done + kci_del_dummy return $ret } +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $(echo $ALL_TESTS)) +EOF +} + #check for needed privileges if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" @@ -1295,6 +1388,14 @@ for x in ip tc;do fi done +while getopts t:h o; do + case $o in + t) TESTS=$OPTARG;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + kci_test_rtnl exit $? diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c index 46a02bbd31d0..6e59b1461dcc 100644 --- a/tools/testing/selftests/net/tcp_mmap.c +++ b/tools/testing/selftests/net/tcp_mmap.c @@ -66,11 +66,16 @@ #include <poll.h> #include <linux/tcp.h> #include <assert.h> +#include <openssl/pem.h> #ifndef MSG_ZEROCOPY #define MSG_ZEROCOPY 0x4000000 #endif +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif + #define FILE_SZ (1ULL << 35) static int cfg_family = AF_INET6; static socklen_t cfg_alen = sizeof(struct sockaddr_in6); @@ -81,12 +86,14 @@ static int sndbuf; /* Default: autotuning. Can be set with -w <integer> option static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */ static int xflg; /* hash received data (simple xor) (-h option) */ static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */ +static int integrity; /* -i option: sender and receiver compute sha256 over the data.*/ static size_t chunk_size = 512*1024; static size_t map_align; unsigned long htotal; +unsigned int digest_len; static inline void prefetch(const void *x) { @@ -148,12 +155,14 @@ static void *mmap_large_buffer(size_t need, size_t *allocated) void *child_thread(void *arg) { + unsigned char digest[SHA256_DIGEST_LENGTH]; unsigned long total_mmap = 0, total = 0; struct tcp_zerocopy_receive zc; + unsigned char *buffer = NULL; unsigned long delta_usec; + EVP_MD_CTX *ctx = NULL; int flags = MAP_SHARED; struct timeval t0, t1; - char *buffer = NULL; void *raddr = NULL; void *addr = NULL; double throughput; @@ -180,6 +189,14 @@ void *child_thread(void *arg) addr = ALIGN_PTR_UP(raddr, map_align); } } + if (integrity) { + ctx = EVP_MD_CTX_new(); + if (!ctx) { + perror("cannot enable SHA computing"); + goto error; + } + EVP_DigestInit_ex(ctx, EVP_sha256(), NULL); + } while (1) { struct pollfd pfd = { .fd = fd, .events = POLLIN, }; int sub; @@ -191,7 +208,7 @@ void *child_thread(void *arg) memset(&zc, 0, sizeof(zc)); zc.address = (__u64)((unsigned long)addr); - zc.length = chunk_size; + zc.length = min(chunk_size, FILE_SZ - total); res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, &zc, &zc_len); @@ -200,6 +217,8 @@ void *child_thread(void *arg) if (zc.length) { assert(zc.length <= chunk_size); + if (integrity) + EVP_DigestUpdate(ctx, addr, zc.length); total_mmap += zc.length; if (xflg) hash_zone(addr, zc.length); @@ -211,22 +230,30 @@ void *child_thread(void *arg) } if (zc.recv_skip_hint) { assert(zc.recv_skip_hint <= chunk_size); - lu = read(fd, buffer, zc.recv_skip_hint); + lu = read(fd, buffer, min(zc.recv_skip_hint, + FILE_SZ - total)); if (lu > 0) { + if (integrity) + EVP_DigestUpdate(ctx, buffer, lu); if (xflg) hash_zone(buffer, lu); total += lu; } + if (lu == 0) + goto end; } continue; } sub = 0; while (sub < chunk_size) { - lu = read(fd, buffer + sub, chunk_size - sub); + lu = read(fd, buffer + sub, min(chunk_size - sub, + FILE_SZ - total)); if (lu == 0) goto end; if (lu < 0) break; + if (integrity) + EVP_DigestUpdate(ctx, buffer + sub, lu); if (xflg) hash_zone(buffer + sub, lu); total += lu; @@ -237,6 +264,20 @@ end: gettimeofday(&t1, NULL); delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec; + if (integrity) { + fcntl(fd, F_SETFL, 0); + EVP_DigestFinal_ex(ctx, digest, &digest_len); + lu = read(fd, buffer, SHA256_DIGEST_LENGTH); + if (lu != SHA256_DIGEST_LENGTH) + perror("Error: Cannot read SHA256\n"); + + if (memcmp(digest, buffer, + SHA256_DIGEST_LENGTH)) + fprintf(stderr, "Error: SHA256 of the data is not right\n"); + else + printf("\nSHA256 is correct\n"); + } + throughput = 0; if (delta_usec) throughput = total * 8.0 / (double)delta_usec / 1000.0; @@ -368,19 +409,38 @@ static unsigned long default_huge_page_size(void) return hps; } +static void randomize(void *target, size_t count) +{ + static int urandom = -1; + ssize_t got; + + urandom = open("/dev/urandom", O_RDONLY); + if (urandom < 0) { + perror("open /dev/urandom"); + exit(1); + } + got = read(urandom, target, count); + if (got != count) { + perror("read /dev/urandom"); + exit(1); + } +} + int main(int argc, char *argv[]) { + unsigned char digest[SHA256_DIGEST_LENGTH]; struct sockaddr_storage listenaddr, addr; unsigned int max_pacing_rate = 0; + EVP_MD_CTX *ctx = NULL; + unsigned char *buffer; uint64_t total = 0; char *host = NULL; int fd, c, on = 1; size_t buffer_sz; - char *buffer; int sflg = 0; int mss = 0; - while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:C:a:")) != -1) { + while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:C:a:i")) != -1) { switch (c) { case '4': cfg_family = PF_INET; @@ -426,6 +486,9 @@ int main(int argc, char *argv[]) case 'a': map_align = atol(optarg); break; + case 'i': + integrity = 1; + break; default: exit(1); } @@ -468,7 +531,7 @@ int main(int argc, char *argv[]) } buffer = mmap_large_buffer(chunk_size, &buffer_sz); - if (buffer == (char *)-1) { + if (buffer == (unsigned char *)-1) { perror("mmap"); exit(1); } @@ -501,17 +564,34 @@ int main(int argc, char *argv[]) perror("setsockopt SO_ZEROCOPY, (-z option disabled)"); zflg = 0; } + if (integrity) { + randomize(buffer, buffer_sz); + ctx = EVP_MD_CTX_new(); + if (!ctx) { + perror("cannot enable SHA computing"); + exit(1); + } + EVP_DigestInit_ex(ctx, EVP_sha256(), NULL); + } while (total < FILE_SZ) { + size_t offset = total % chunk_size; int64_t wr = FILE_SZ - total; - if (wr > chunk_size) - wr = chunk_size; - /* Note : we just want to fill the pipe with 0 bytes */ - wr = send(fd, buffer, (size_t)wr, zflg ? MSG_ZEROCOPY : 0); + if (wr > chunk_size - offset) + wr = chunk_size - offset; + /* Note : we just want to fill the pipe with random bytes */ + wr = send(fd, buffer + offset, + (size_t)wr, zflg ? MSG_ZEROCOPY : 0); if (wr <= 0) break; + if (integrity) + EVP_DigestUpdate(ctx, buffer + offset, wr); total += wr; } + if (integrity && total == FILE_SZ) { + EVP_DigestFinal_ex(ctx, digest, &digest_len); + send(fd, digest, (size_t)SHA256_DIGEST_LENGTH, 0); + } close(fd); munmap(buffer, buffer_sz); return 0; diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh new file mode 100755 index 000000000000..31e5f0f8859d --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_mdb.sh @@ -0,0 +1,2318 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking VXLAN MDB functionality. The topology consists of +# two sets of namespaces: One for the testing of IPv4 underlay and another for +# IPv6. In both cases, both IPv4 and IPv6 overlay traffic are tested. +# +# Data path functionality is tested by sending traffic from one of the upper +# namespaces and checking using ingress tc filters that the expected traffic +# was received by one of the lower namespaces. +# +# +------------------------------------+ +------------------------------------+ +# | ns1_v4 | | ns1_v6 | +# | | | | +# | br0.10 br0.4000 br0.20 | | br0.10 br0.4000 br0.20 | +# | + + + | | + + + | +# | | | | | | | | | | +# | | | | | | | | | | +# | +---------+---------+ | | +---------+---------+ | +# | | | | | | +# | | | | | | +# | + | | + | +# | br0 | | br0 | +# | + | | + | +# | | | | | | +# | | | | | | +# | + | | + | +# | vx0 | | vx0 | +# | | | | +# | | | | +# | veth0 | | veth0 | +# | + | | + | +# +-----------------|------------------+ +-----------------|------------------+ +# | | +# +-----------------|------------------+ +-----------------|------------------+ +# | + | | + | +# | veth0 | | veth0 | +# | | | | +# | | | | +# | vx0 | | vx0 | +# | + | | + | +# | | | | | | +# | | | | | | +# | + | | + | +# | br0 | | br0 | +# | + | | + | +# | | | | | | +# | | | | | | +# | +---------+---------+ | | +---------+---------+ | +# | | | | | | | | | | +# | | | | | | | | | | +# | + + + | | + + + | +# | br0.10 br0.4000 br0.10 | | br0.10 br0.4000 br0.20 | +# | | | | +# | ns2_v4 | | ns2_v6 | +# +------------------------------------+ +------------------------------------+ + +ret=0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +CONTROL_PATH_TESTS=" + basic_star_g_ipv4_ipv4 + basic_star_g_ipv6_ipv4 + basic_star_g_ipv4_ipv6 + basic_star_g_ipv6_ipv6 + basic_sg_ipv4_ipv4 + basic_sg_ipv6_ipv4 + basic_sg_ipv4_ipv6 + basic_sg_ipv6_ipv6 + star_g_ipv4_ipv4 + star_g_ipv6_ipv4 + star_g_ipv4_ipv6 + star_g_ipv6_ipv6 + sg_ipv4_ipv4 + sg_ipv6_ipv4 + sg_ipv4_ipv6 + sg_ipv6_ipv6 + dump_ipv4_ipv4 + dump_ipv6_ipv4 + dump_ipv4_ipv6 + dump_ipv6_ipv6 +" + +DATA_PATH_TESTS=" + encap_params_ipv4_ipv4 + encap_params_ipv6_ipv4 + encap_params_ipv4_ipv6 + encap_params_ipv6_ipv6 + starg_exclude_ir_ipv4_ipv4 + starg_exclude_ir_ipv6_ipv4 + starg_exclude_ir_ipv4_ipv6 + starg_exclude_ir_ipv6_ipv6 + starg_include_ir_ipv4_ipv4 + starg_include_ir_ipv6_ipv4 + starg_include_ir_ipv4_ipv6 + starg_include_ir_ipv6_ipv6 + starg_exclude_p2mp_ipv4_ipv4 + starg_exclude_p2mp_ipv6_ipv4 + starg_exclude_p2mp_ipv4_ipv6 + starg_exclude_p2mp_ipv6_ipv6 + starg_include_p2mp_ipv4_ipv4 + starg_include_p2mp_ipv6_ipv4 + starg_include_p2mp_ipv4_ipv6 + starg_include_p2mp_ipv6_ipv6 + egress_vni_translation_ipv4_ipv4 + egress_vni_translation_ipv6_ipv4 + egress_vni_translation_ipv4_ipv6 + egress_vni_translation_ipv6_ipv6 + all_zeros_mdb_ipv4 + all_zeros_mdb_ipv6 + mdb_fdb_ipv4_ipv4 + mdb_fdb_ipv6_ipv4 + mdb_fdb_ipv4_ipv6 + mdb_fdb_ipv6_ipv6 + mdb_torture_ipv4_ipv4 + mdb_torture_ipv6_ipv4 + mdb_torture_ipv4_ipv6 + mdb_torture_ipv6_ipv6 +" + +# All tests in this script. Can be overridden with -t option. +TESTS=" + $CONTROL_PATH_TESTS + $DATA_PATH_TESTS +" +VERBOSE=0 +PAUSE_ON_FAIL=no +PAUSE=no + +################################################################################ +# Utilities + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "$VERBOSE" = "1" ]; then + echo " rc=$rc, expected $expected" + fi + + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + + [ "$VERBOSE" = "1" ] && echo +} + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + printf "COMMAND: $cmd\n" + stderr= + fi + + out=$(eval $cmd $stderr) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +tc_check_packets() +{ + local ns=$1; shift + local id=$1; shift + local handle=$1; shift + local count=$1; shift + local pkts + + sleep 0.1 + pkts=$(tc -n $ns -j -s filter show $id \ + | jq ".[] | select(.options.handle == $handle) | \ + .options.actions[0].stats.packets") + [[ $pkts == $count ]] +} + +################################################################################ +# Setup + +setup_common_ns() +{ + local ns=$1; shift + local local_addr=$1; shift + + ip netns exec $ns sysctl -qw net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -qw net.ipv4.fib_multipath_use_neigh=1 + ip netns exec $ns sysctl -qw net.ipv4.conf.default.ignore_routes_with_linkdown=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.forwarding=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.default.forwarding=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0 + ip netns exec $ns sysctl -qw net.ipv6.conf.default.accept_dad=0 + + ip -n $ns link set dev lo up + ip -n $ns address add $local_addr dev lo + + ip -n $ns link set dev veth0 up + + ip -n $ns link add name br0 up type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + + ip -n $ns link add link br0 name br0.10 up type vlan id 10 + bridge -n $ns vlan add vid 10 dev br0 self + + ip -n $ns link add link br0 name br0.20 up type vlan id 20 + bridge -n $ns vlan add vid 20 dev br0 self + + ip -n $ns link add link br0 name br0.4000 up type vlan id 4000 + bridge -n $ns vlan add vid 4000 dev br0 self + + ip -n $ns link add name vx0 up master br0 type vxlan \ + local $local_addr dstport 4789 external vnifilter + bridge -n $ns link set dev vx0 vlan_tunnel on + + bridge -n $ns vlan add vid 10 dev vx0 + bridge -n $ns vlan add vid 10 dev vx0 tunnel_info id 10010 + bridge -n $ns vni add vni 10010 dev vx0 + + bridge -n $ns vlan add vid 20 dev vx0 + bridge -n $ns vlan add vid 20 dev vx0 tunnel_info id 10020 + bridge -n $ns vni add vni 10020 dev vx0 + + bridge -n $ns vlan add vid 4000 dev vx0 pvid + bridge -n $ns vlan add vid 4000 dev vx0 tunnel_info id 14000 + bridge -n $ns vni add vni 14000 dev vx0 +} + +setup_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local local_addr1=$1; shift + local local_addr2=$1; shift + + ip netns add $ns1 + ip netns add $ns2 + + ip link add name veth0 type veth peer name veth1 + ip link set dev veth0 netns $ns1 name veth0 + ip link set dev veth1 netns $ns2 name veth0 + + setup_common_ns $ns1 $local_addr1 + setup_common_ns $ns2 $local_addr2 +} + +setup_v4() +{ + setup_common ns1_v4 ns2_v4 192.0.2.1 192.0.2.2 + + ip -n ns1_v4 address add 192.0.2.17/28 dev veth0 + ip -n ns2_v4 address add 192.0.2.18/28 dev veth0 + + ip -n ns1_v4 route add default via 192.0.2.18 + ip -n ns2_v4 route add default via 192.0.2.17 +} + +cleanup_v4() +{ + ip netns del ns2_v4 + ip netns del ns1_v4 +} + +setup_v6() +{ + setup_common ns1_v6 ns2_v6 2001:db8:1::1 2001:db8:1::2 + + ip -n ns1_v6 address add 2001:db8:2::1/64 dev veth0 nodad + ip -n ns2_v6 address add 2001:db8:2::2/64 dev veth0 nodad + + ip -n ns1_v6 route add default via 2001:db8:2::2 + ip -n ns2_v6 route add default via 2001:db8:2::1 +} + +cleanup_v6() +{ + ip netns del ns2_v6 + ip netns del ns1_v6 +} + +setup() +{ + set -e + + setup_v4 + setup_v6 + + sleep 5 + + set +e +} + +cleanup() +{ + cleanup_v6 &> /dev/null + cleanup_v4 &> /dev/null +} + +################################################################################ +# Tests - Control path + +basic_common() +{ + local ns1=$1; shift + local grp_key=$1; shift + local vtep_ip=$1; shift + + # Test basic control path operations common to all MDB entry types. + + # Basic add, replace and delete behavior. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + log_test $? 0 "MDB entry addition" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\"" + log_test $? 0 "MDB entry presence after addition" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + log_test $? 0 "MDB entry replacement" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\"" + log_test $? 0 "MDB entry presence after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + log_test $? 0 "MDB entry deletion" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\"" + log_test $? 1 "MDB entry presence after deletion" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + log_test $? 255 "Non-existent MDB entry deletion" + + # Default protocol and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"proto static\"" + log_test $? 0 "MDB entry default protocol" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent proto 123 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"proto 123\"" + log_test $? 0 "MDB entry protocol replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + + # Default destination port and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" dst_port \"" + log_test $? 1 "MDB entry default destination port" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip dst_port 1234 src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"dst_port 1234\"" + log_test $? 0 "MDB entry destination port replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + + # Default destination VNI and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" vni \"" + log_test $? 1 "MDB entry default destination VNI" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip vni 1234 src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"vni 1234\"" + log_test $? 0 "MDB entry destination VNI replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + + # Default outgoing interface and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" via \"" + log_test $? 1 "MDB entry default outgoing interface" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010 via veth0" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"via veth0\"" + log_test $? 0 "MDB entry outgoing interface replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + + # Common error cases. + run_cmd "bridge -n $ns1 mdb add dev vx0 port veth0 $grp_key permanent dst $vtep_ip src_vni 10010" + log_test $? 255 "MDB entry with mismatch between device and port" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key temp dst $vtep_ip src_vni 10010" + log_test $? 255 "MDB entry with temp state" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent vid 10 dst $vtep_ip src_vni 10010" + log_test $? 255 "MDB entry with VLAN" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp 01:02:03:04:05:06 permanent dst $vtep_ip src_vni 10010" + log_test $? 255 "MDB entry MAC address" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent" + log_test $? 255 "MDB entry without extended parameters" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent proto 3 dst $vtep_ip src_vni 10010" + log_test $? 255 "MDB entry with an invalid protocol" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip vni $((2 ** 24)) src_vni 10010" + log_test $? 255 "MDB entry with an invalid destination VNI" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni $((2 ** 24))" + log_test $? 255 "MDB entry with an invalid source VNI" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent src_vni 10010" + log_test $? 255 "MDB entry without a remote destination IP" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + log_test $? 255 "Duplicate MDB entries" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" +} + +basic_star_g_ipv4_ipv4() +{ + local ns1=ns1_v4 + local grp_key="grp 239.1.1.1" + local vtep_ip=198.51.100.100 + + echo + echo "Control path: Basic (*, G) operations - IPv4 overlay / IPv4 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_star_g_ipv6_ipv4() +{ + local ns1=ns1_v4 + local grp_key="grp ff0e::1" + local vtep_ip=198.51.100.100 + + echo + echo "Control path: Basic (*, G) operations - IPv6 overlay / IPv4 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_star_g_ipv4_ipv6() +{ + local ns1=ns1_v6 + local grp_key="grp 239.1.1.1" + local vtep_ip=2001:db8:1000::1 + + echo + echo "Control path: Basic (*, G) operations - IPv4 overlay / IPv6 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_star_g_ipv6_ipv6() +{ + local ns1=ns1_v6 + local grp_key="grp ff0e::1" + local vtep_ip=2001:db8:1000::1 + + echo + echo "Control path: Basic (*, G) operations - IPv6 overlay / IPv6 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_sg_ipv4_ipv4() +{ + local ns1=ns1_v4 + local grp_key="grp 239.1.1.1 src 192.0.2.129" + local vtep_ip=198.51.100.100 + + echo + echo "Control path: Basic (S, G) operations - IPv4 overlay / IPv4 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_sg_ipv6_ipv4() +{ + local ns1=ns1_v4 + local grp_key="grp ff0e::1 src 2001:db8:100::1" + local vtep_ip=198.51.100.100 + + echo + echo "Control path: Basic (S, G) operations - IPv6 overlay / IPv4 underlay" + echo "---------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_sg_ipv4_ipv6() +{ + local ns1=ns1_v6 + local grp_key="grp 239.1.1.1 src 192.0.2.129" + local vtep_ip=2001:db8:1000::1 + + echo + echo "Control path: Basic (S, G) operations - IPv4 overlay / IPv6 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_sg_ipv6_ipv6() +{ + local ns1=ns1_v6 + local grp_key="grp ff0e::1 src 2001:db8:100::1" + local vtep_ip=2001:db8:1000::1 + + echo + echo "Control path: Basic (S, G) operations - IPv6 overlay / IPv6 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +star_g_common() +{ + local ns1=$1; shift + local grp=$1; shift + local src1=$1; shift + local src2=$1; shift + local src3=$1; shift + local vtep_ip=$1; shift + local all_zeros_grp=$1; shift + + # Test control path operations specific to (*, G) entries. + + # Basic add, replace and delete behavior. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + log_test $? 0 "(*, G) MDB entry addition with source list" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \"" + log_test $? 0 "(*, G) MDB entry presence after addition" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 0 "(S, G) MDB entry presence after addition" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + log_test $? 0 "(*, G) MDB entry replacement with source list" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \"" + log_test $? 0 "(*, G) MDB entry presence after replacement" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 0 "(S, G) MDB entry presence after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + log_test $? 0 "(*, G) MDB entry deletion" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \"" + log_test $? 1 "(*, G) MDB entry presence after deletion" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 1 "(S, G) MDB entry presence after deletion" + + # Default filter mode and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep exclude" + log_test $? 0 "(*, G) MDB entry default filter mode" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep include" + log_test $? 0 "(*, G) MDB entry after replacing filter mode to \"include\"" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 0 "(S, G) MDB entry after replacing filter mode to \"include\"" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\" | grep blocked" + log_test $? 1 "\"blocked\" flag after replacing filter mode to \"include\"" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep exclude" + log_test $? 0 "(*, G) MDB entry after replacing filter mode to \"exclude\"" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 0 "(S, G) MDB entry after replacing filter mode to \"exclude\"" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\" | grep blocked" + log_test $? 0 "\"blocked\" flag after replacing filter mode to \"exclude\"" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Default source list and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep source_list" + log_test $? 1 "(*, G) MDB entry default source list" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1,$src2,$src3 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 0 "(S, G) MDB entry of 1st source after replacing source list" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src2\"" + log_test $? 0 "(S, G) MDB entry of 2nd source after replacing source list" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src3\"" + log_test $? 0 "(S, G) MDB entry of 3rd source after replacing source list" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1,$src3 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 0 "(S, G) MDB entry of 1st source after removing source" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src2\"" + log_test $? 1 "(S, G) MDB entry of 2nd source after removing source" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src3\"" + log_test $? 0 "(S, G) MDB entry of 3rd source after removing source" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Default protocol and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \"proto static\"" + log_test $? 0 "(*, G) MDB entry default protocol" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \"proto static\"" + log_test $? 0 "(S, G) MDB entry default protocol" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 proto bgp dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \"proto bgp\"" + log_test $? 0 "(*, G) MDB entry protocol after replacement" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \"proto bgp\"" + log_test $? 0 "(S, G) MDB entry protocol after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Default destination port and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" dst_port \"" + log_test $? 1 "(*, G) MDB entry default destination port" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" dst_port \"" + log_test $? 1 "(S, G) MDB entry default destination port" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip dst_port 1234 src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" dst_port 1234 \"" + log_test $? 0 "(*, G) MDB entry destination port after replacement" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" dst_port 1234 \"" + log_test $? 0 "(S, G) MDB entry destination port after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Default destination VNI and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" vni \"" + log_test $? 1 "(*, G) MDB entry default destination VNI" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" vni \"" + log_test $? 1 "(S, G) MDB entry default destination VNI" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip vni 1234 src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" vni 1234 \"" + log_test $? 0 "(*, G) MDB entry destination VNI after replacement" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" vni 1234 \"" + log_test $? 0 "(S, G) MDB entry destination VNI after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Default outgoing interface and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" via \"" + log_test $? 1 "(*, G) MDB entry default outgoing interface" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" via \"" + log_test $? 1 "(S, G) MDB entry default outgoing interface" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010 via veth0" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" via veth0 \"" + log_test $? 0 "(*, G) MDB entry outgoing interface after replacement" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" via veth0 \"" + log_test $? 0 "(S, G) MDB entry outgoing interface after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Error cases. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp permanent filter_mode exclude dst $vtep_ip src_vni 10010" + log_test $? 255 "All-zeros group with filter mode" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp permanent source_list $src1 dst $vtep_ip src_vni 10010" + log_test $? 255 "All-zeros group with source list" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode include dst $vtep_ip src_vni 10010" + log_test $? 255 "(*, G) INCLUDE with an empty source list" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $grp dst $vtep_ip src_vni 10010" + log_test $? 255 "Invalid source in source list" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent source_list $src1 dst $vtep_ip src_vni 10010" + log_test $? 255 "Source list without filter mode" +} + +star_g_ipv4_ipv4() +{ + local ns1=ns1_v4 + local grp=239.1.1.1 + local src1=192.0.2.129 + local src2=192.0.2.130 + local src3=192.0.2.131 + local vtep_ip=198.51.100.100 + local all_zeros_grp=0.0.0.0 + + echo + echo "Control path: (*, G) operations - IPv4 overlay / IPv4 underlay" + echo "--------------------------------------------------------------" + + star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp +} + +star_g_ipv6_ipv4() +{ + local ns1=ns1_v4 + local grp=ff0e::1 + local src1=2001:db8:100::1 + local src2=2001:db8:100::2 + local src3=2001:db8:100::3 + local vtep_ip=198.51.100.100 + local all_zeros_grp=:: + + echo + echo "Control path: (*, G) operations - IPv6 overlay / IPv4 underlay" + echo "--------------------------------------------------------------" + + star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp +} + +star_g_ipv4_ipv6() +{ + local ns1=ns1_v6 + local grp=239.1.1.1 + local src1=192.0.2.129 + local src2=192.0.2.130 + local src3=192.0.2.131 + local vtep_ip=2001:db8:1000::1 + local all_zeros_grp=0.0.0.0 + + echo + echo "Control path: (*, G) operations - IPv4 overlay / IPv6 underlay" + echo "--------------------------------------------------------------" + + star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp +} + +star_g_ipv6_ipv6() +{ + local ns1=ns1_v6 + local grp=ff0e::1 + local src1=2001:db8:100::1 + local src2=2001:db8:100::2 + local src3=2001:db8:100::3 + local vtep_ip=2001:db8:1000::1 + local all_zeros_grp=:: + + echo + echo "Control path: (*, G) operations - IPv6 overlay / IPv6 underlay" + echo "--------------------------------------------------------------" + + star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp +} + +sg_common() +{ + local ns1=$1; shift + local grp=$1; shift + local src=$1; shift + local vtep_ip=$1; shift + local all_zeros_grp=$1; shift + + # Test control path operations specific to (S, G) entries. + + # Default filter mode. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep include" + log_test $? 0 "(S, G) MDB entry default filter mode" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp src $src permanent dst $vtep_ip src_vni 10010" + + # Error cases. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent filter_mode include dst $vtep_ip src_vni 10010" + log_test $? 255 "(S, G) with filter mode" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent source_list $src dst $vtep_ip src_vni 10010" + log_test $? 255 "(S, G) with source list" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $grp permanent dst $vtep_ip src_vni 10010" + log_test $? 255 "(S, G) with an invalid source list" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp src $src permanent dst $vtep_ip src_vni 10010" + log_test $? 255 "All-zeros group with source" +} + +sg_ipv4_ipv4() +{ + local ns1=ns1_v4 + local grp=239.1.1.1 + local src=192.0.2.129 + local vtep_ip=198.51.100.100 + local all_zeros_grp=0.0.0.0 + + echo + echo "Control path: (S, G) operations - IPv4 overlay / IPv4 underlay" + echo "--------------------------------------------------------------" + + sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp +} + +sg_ipv6_ipv4() +{ + local ns1=ns1_v4 + local grp=ff0e::1 + local src=2001:db8:100::1 + local vtep_ip=198.51.100.100 + local all_zeros_grp=:: + + echo + echo "Control path: (S, G) operations - IPv6 overlay / IPv4 underlay" + echo "--------------------------------------------------------------" + + sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp +} + +sg_ipv4_ipv6() +{ + local ns1=ns1_v6 + local grp=239.1.1.1 + local src=192.0.2.129 + local vtep_ip=2001:db8:1000::1 + local all_zeros_grp=0.0.0.0 + + echo + echo "Control path: (S, G) operations - IPv4 overlay / IPv6 underlay" + echo "--------------------------------------------------------------" + + sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp +} + +sg_ipv6_ipv6() +{ + local ns1=ns1_v6 + local grp=ff0e::1 + local src=2001:db8:100::1 + local vtep_ip=2001:db8:1000::1 + local all_zeros_grp=:: + + echo + echo "Control path: (S, G) operations - IPv6 overlay / IPv6 underlay" + echo "--------------------------------------------------------------" + + sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp +} + +ipv4_grps_get() +{ + local max_grps=$1; shift + local i + + for i in $(seq 0 $((max_grps - 1))); do + echo "239.1.1.$i" + done +} + +ipv6_grps_get() +{ + local max_grps=$1; shift + local i + + for i in $(seq 0 $((max_grps - 1))); do + echo "ff0e::$(printf %x $i)" + done +} + +dump_common() +{ + local ns1=$1; shift + local local_addr=$1; shift + local remote_prefix=$1; shift + local fn=$1; shift + local max_vxlan_devs=2 + local max_remotes=64 + local max_grps=256 + local num_entries + local batch_file + local grp + local i j + + # The kernel maintains various markers for the MDB dump. Add a test for + # large scale MDB dump to make sure that all the configured entries are + # dumped and that the markers are used correctly. + + # Create net devices. + for i in $(seq 1 $max_vxlan_devs); do + ip -n $ns1 link add name vx-test${i} up type vxlan \ + local $local_addr dstport 4789 external vnifilter + done + + # Create batch file with MDB entries. + batch_file=$(mktemp) + for i in $(seq 1 $max_vxlan_devs); do + for j in $(seq 1 $max_remotes); do + for grp in $($fn $max_grps); do + echo "mdb add dev vx-test${i} port vx-test${i} grp $grp permanent dst ${remote_prefix}${j}" >> $batch_file + done + done + done + + # Program the batch file and check for expected number of entries. + bridge -n $ns1 -b $batch_file + for i in $(seq 1 $max_vxlan_devs); do + num_entries=$(bridge -n $ns1 mdb show dev vx-test${i} | grep "permanent" | wc -l) + [[ $num_entries -eq $((max_grps * max_remotes)) ]] + log_test $? 0 "Large scale dump - VXLAN device #$i" + done + + rm -rf $batch_file +} + +dump_ipv4_ipv4() +{ + local ns1=ns1_v4 + local local_addr=192.0.2.1 + local remote_prefix=198.51.100. + local fn=ipv4_grps_get + + echo + echo "Control path: Large scale MDB dump - IPv4 overlay / IPv4 underlay" + echo "-----------------------------------------------------------------" + + dump_common $ns1 $local_addr $remote_prefix $fn +} + +dump_ipv6_ipv4() +{ + local ns1=ns1_v4 + local local_addr=192.0.2.1 + local remote_prefix=198.51.100. + local fn=ipv6_grps_get + + echo + echo "Control path: Large scale MDB dump - IPv6 overlay / IPv4 underlay" + echo "-----------------------------------------------------------------" + + dump_common $ns1 $local_addr $remote_prefix $fn +} + +dump_ipv4_ipv6() +{ + local ns1=ns1_v6 + local local_addr=2001:db8:1::1 + local remote_prefix=2001:db8:1000:: + local fn=ipv4_grps_get + + echo + echo "Control path: Large scale MDB dump - IPv4 overlay / IPv6 underlay" + echo "-----------------------------------------------------------------" + + dump_common $ns1 $local_addr $remote_prefix $fn +} + +dump_ipv6_ipv6() +{ + local ns1=ns1_v6 + local local_addr=2001:db8:1::1 + local remote_prefix=2001:db8:1000:: + local fn=ipv6_grps_get + + echo + echo "Control path: Large scale MDB dump - IPv6 overlay / IPv6 underlay" + echo "-----------------------------------------------------------------" + + dump_common $ns1 $local_addr $remote_prefix $fn +} + +################################################################################ +# Tests - Data path + +encap_params_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local plen=$1; shift + local enc_ethtype=$1; shift + local grp=$1; shift + local src=$1; shift + local mz=$1; shift + + # Test that packets forwarded by the VXLAN MDB are encapsulated with + # the correct parameters. Transmit packets from the first namespace and + # check that they hit the corresponding filters on the ingress of the + # second namespace. + + run_cmd "tc -n $ns2 qdisc replace dev veth0 clsact" + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo" + + # Check destination IP. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep2_ip src_vni 10020" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Destination IP - match" + + run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Destination IP - no match" + + run_cmd "tc -n $ns2 filter del dev vx0 ingress pref 1 handle 101 flower" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10020" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" + + # Check destination port. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip dst_port 1111 src_vni 10020" + + run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 4789 action pass" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev veth0 ingress" 101 1 + log_test $? 0 "Default destination port - match" + + run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev veth0 ingress" 101 1 + log_test $? 0 "Default destination port - no match" + + run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 1111 action pass" + run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev veth0 ingress" 101 1 + log_test $? 0 "Non-default destination port - match" + + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev veth0 ingress" 101 1 + log_test $? 0 "Non-default destination port - no match" + + run_cmd "tc -n $ns2 filter del dev veth0 ingress pref 1 handle 101 flower" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10020" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" + + # Check default VNI. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10020" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10010 action pass" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Default destination VNI - match" + + run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Default destination VNI - no match" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10020 src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10010 src_vni 10020" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10020 action pass" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Non-default destination VNI - match" + + run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Non-default destination VNI - no match" + + run_cmd "tc -n $ns2 filter del dev vx0 ingress pref 1 handle 101 flower" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10020" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" +} + +encap_params_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local enc_ethtype="ip" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: Encapsulation parameters - IPv4 overlay / IPv4 underlay" + echo "------------------------------------------------------------------" + + encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ + $grp $src "mausezahn" +} + +encap_params_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local enc_ethtype="ip" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: Encapsulation parameters - IPv6 overlay / IPv4 underlay" + echo "------------------------------------------------------------------" + + encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ + $grp $src "mausezahn -6" +} + +encap_params_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local enc_ethtype="ipv6" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: Encapsulation parameters - IPv4 overlay / IPv6 underlay" + echo "------------------------------------------------------------------" + + encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ + $grp $src "mausezahn" +} + +encap_params_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local enc_ethtype="ipv6" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: Encapsulation parameters - IPv6 overlay / IPv6 underlay" + echo "------------------------------------------------------------------" + + encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ + $grp $src "mausezahn -6" +} + +starg_exclude_ir_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local plen=$1; shift + local grp=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mz=$1; shift + + # Install a (*, G) EXCLUDE MDB entry with one source and two remote + # VTEPs. Make sure that the source in the source list is not forwarded + # and that a source not in the list is forwarded. Remove one of the + # VTEPs from the entry and make sure that packets are only forwarded to + # the remaining VTEP. + + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep2_ip src_vni 10010" + + # Check that invalid source is not forwarded to any VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "Block excluded source - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "Block excluded source - second VTEP" + + # Check that valid source is forwarded to both VTEPs. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Forward valid source - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Forward valid source - second VTEP" + + # Remove second VTEP. + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010" + + # Check that invalid source is not forwarded to any VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Block excluded source after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Block excluded source after removal - second VTEP" + + # Check that valid source is forwarded to the remaining VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Forward valid source after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Forward valid source after removal - second VTEP" +} + +starg_exclude_ir_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) EXCLUDE - IR - IPv4 overlay / IPv4 underlay" + echo "-------------------------------------------------------------" + + starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_exclude_ir_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) EXCLUDE - IR - IPv6 overlay / IPv4 underlay" + echo "-------------------------------------------------------------" + + starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_exclude_ir_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) EXCLUDE - IR - IPv4 overlay / IPv6 underlay" + echo "-------------------------------------------------------------" + + starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_exclude_ir_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) EXCLUDE - IR - IPv6 overlay / IPv6 underlay" + echo "-------------------------------------------------------------" + + starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_include_ir_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local plen=$1; shift + local grp=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mz=$1; shift + + # Install a (*, G) INCLUDE MDB entry with one source and two remote + # VTEPs. Make sure that the source in the source list is forwarded and + # that a source not in the list is not forwarded. Remove one of the + # VTEPs from the entry and make sure that packets are only forwarded to + # the remaining VTEP. + + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep2_ip src_vni 10010" + + # Check that invalid source is not forwarded to any VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "Block excluded source - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "Block excluded source - second VTEP" + + # Check that valid source is forwarded to both VTEPs. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Forward valid source - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Forward valid source - second VTEP" + + # Remove second VTEP. + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010" + + # Check that invalid source is not forwarded to any VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Block excluded source after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Block excluded source after removal - second VTEP" + + # Check that valid source is forwarded to the remaining VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Forward valid source after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Forward valid source after removal - second VTEP" +} + +starg_include_ir_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) INCLUDE - IR - IPv4 overlay / IPv4 underlay" + echo "-------------------------------------------------------------" + + starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_include_ir_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) INCLUDE - IR - IPv6 overlay / IPv4 underlay" + echo "-------------------------------------------------------------" + + starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_include_ir_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) INCLUDE - IR - IPv4 overlay / IPv6 underlay" + echo "-------------------------------------------------------------" + + starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_include_ir_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) INCLUDE - IR - IPv6 overlay / IPv6 underlay" + echo "-------------------------------------------------------------" + + starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_exclude_p2mp_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local mcast_grp=$1; shift + local plen=$1; shift + local grp=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mz=$1; shift + + # Install a (*, G) EXCLUDE MDB entry with one source and one multicast + # group to which packets are sent. Make sure that the source in the + # source list is not forwarded and that a source not in the list is + # forwarded. + + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $mcast_grp action pass" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $mcast_grp src_vni 10010 via veth0" + + # Check that invalid source is not forwarded. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "Block excluded source" + + # Check that valid source is forwarded. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Forward valid source" + + # Remove the VTEP from the multicast group. + run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0" + + # Check that valid source is not received anymore. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Receive of valid source after removal from group" +} + +starg_exclude_p2mp_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv4 underlay" + echo "---------------------------------------------------------------" + + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_exclude_p2mp_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv4 underlay" + echo "---------------------------------------------------------------" + + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_exclude_p2mp_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv6 underlay" + echo "---------------------------------------------------------------" + + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_exclude_p2mp_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv6 underlay" + echo "---------------------------------------------------------------" + + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_include_p2mp_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local mcast_grp=$1; shift + local plen=$1; shift + local grp=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mz=$1; shift + + # Install a (*, G) INCLUDE MDB entry with one source and one multicast + # group to which packets are sent. Make sure that the source in the + # source list is forwarded and that a source not in the list is not + # forwarded. + + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $mcast_grp action pass" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $mcast_grp src_vni 10010 via veth0" + + # Check that invalid source is not forwarded. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "Block excluded source" + + # Check that valid source is forwarded. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Forward valid source" + + # Remove the VTEP from the multicast group. + run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0" + + # Check that valid source is not received anymore. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Receive of valid source after removal from group" +} + +starg_include_p2mp_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv4 underlay" + echo "---------------------------------------------------------------" + + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_include_p2mp_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv4 underlay" + echo "---------------------------------------------------------------" + + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_include_p2mp_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv6 underlay" + echo "---------------------------------------------------------------" + + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_include_p2mp_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv6 underlay" + echo "---------------------------------------------------------------" + + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +egress_vni_translation_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local mcast_grp=$1; shift + local plen=$1; shift + local proto=$1; shift + local grp=$1; shift + local src=$1; shift + local mz=$1; shift + + # When P2MP tunnels are used with optimized inter-subnet multicast + # (OISM) [1], the ingress VTEP does not perform VNI translation and + # uses the VNI of the source broadcast domain (BD). If the egress VTEP + # is a member in the source BD, then no VNI translation is needed. + # Otherwise, the egress VTEP needs to translate the VNI to the + # supplementary broadcast domain (SBD) VNI, which is usually the L3VNI. + # + # In this test, remove the VTEP in the second namespace from VLAN 10 + # (VNI 10010) and make sure that a packet sent from this VLAN on the + # first VTEP is received by the SVI corresponding to the L3VNI (14000 / + # VLAN 4000) on the second VTEP. + # + # The second VTEP will be able to decapsulate the packet with VNI 10010 + # because this VNI is configured on its shared VXLAN device. Later, + # when ingressing the bridge, the VNI to VLAN lookup will fail because + # the VTEP is not a member in VLAN 10, which will cause the packet to + # be tagged with VLAN 4000 since it is configured as PVID. + # + # [1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast + + run_cmd "tc -n $ns2 qdisc replace dev br0.4000 clsact" + run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin" + run_cmd "tc -n $ns2 filter replace dev br0.4000 ingress pref 1 handle 101 proto $proto flower src_ip $src dst_ip $grp action pass" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp src $src permanent dst $mcast_grp src_vni 10010 via veth0" + + # Remove the second VTEP from VLAN 10. + run_cmd "bridge -n $ns2 vlan del vid 10 dev vx0" + + # Make sure that packets sent from the first VTEP over VLAN 10 are + # received by the SVI corresponding to the L3VNI (14000 / VLAN 4000) on + # the second VTEP, since it is configured as PVID. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1 + log_test $? 0 "Egress VNI translation - PVID configured" + + # Remove PVID flag from VLAN 4000 on the second VTEP and make sure + # packets are no longer received by the SVI interface. + run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1 + log_test $? 0 "Egress VNI translation - no PVID configured" + + # Reconfigure the PVID and make sure packets are received again. + run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0 pvid" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev br0.4000 ingress" 101 2 + log_test $? 0 "Egress VNI translation - PVID reconfigured" +} + +egress_vni_translation_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local proto="ipv4" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: Egress VNI translation - IPv4 overlay / IPv4 underlay" + echo "----------------------------------------------------------------" + + egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ + $src "mausezahn" +} + +egress_vni_translation_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local proto="ipv6" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: Egress VNI translation - IPv6 overlay / IPv4 underlay" + echo "----------------------------------------------------------------" + + egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ + $src "mausezahn -6" +} + +egress_vni_translation_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local proto="ipv4" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: Egress VNI translation - IPv4 overlay / IPv6 underlay" + echo "----------------------------------------------------------------" + + egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ + $src "mausezahn" +} + +egress_vni_translation_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local proto="ipv6" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: Egress VNI translation - IPv6 overlay / IPv6 underlay" + echo "----------------------------------------------------------------" + + egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ + $src "mausezahn -6" +} + +all_zeros_mdb_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local vtep3_ip=$1; shift + local vtep4_ip=$1; shift + local plen=$1; shift + local ipv4_grp=239.1.1.1 + local ipv4_unreg_grp=239.2.2.2 + local ipv4_ll_grp=224.0.0.100 + local ipv4_src=192.0.2.129 + local ipv6_grp=ff0e::1 + local ipv6_unreg_grp=ff0e::2 + local ipv6_ll_grp=ff02::1 + local ipv6_src=2001:db8:100::1 + + # Install all-zeros (catchall) MDB entries for IPv4 and IPv6 traffic + # and make sure they only forward unregistered IP multicast traffic + # which is not link-local. Also make sure that each entry only forwards + # traffic from the matching address family. + + # Associate two different VTEPs with one all-zeros MDB entry: Two with + # the IPv4 entry (0.0.0.0) and another two with the IPv6 one (::). + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp 0.0.0.0 permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp 0.0.0.0 permanent dst $vtep2_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp :: permanent dst $vtep3_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp :: permanent dst $vtep4_ip src_vni 10010" + + # Associate one VTEP from each set with a regular MDB entry: One with + # an IPv4 entry and another with an IPv6 one. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $ipv4_grp permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $ipv6_grp permanent dst $vtep3_ip src_vni 10010" + + # Add filters to match on decapsulated traffic in the second namespace. + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 103 proto all flower enc_dst_ip $vtep3_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 104 proto all flower enc_dst_ip $vtep4_ip action pass" + + # Configure the VTEP addresses in the second namespace to enable + # decapsulation. + run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep3_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep4_ip/$plen dev lo" + + # Send registered IPv4 multicast and make sure it only arrives to the + # first VTEP. + run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Registered IPv4 multicast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "Registered IPv4 multicast - second VTEP" + + # Send unregistered IPv4 multicast that is not link-local and make sure + # it arrives to the first and second VTEPs. + run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Unregistered IPv4 multicast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Unregistered IPv4 multicast - second VTEP" + + # Send IPv4 link-local multicast traffic and make sure it does not + # arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Link-local IPv4 multicast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Link-local IPv4 multicast - second VTEP" + + # Send registered IPv4 multicast using a unicast MAC address and make + # sure it does not arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b 00:11:22:33:44:55 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Registered IPv4 multicast with a unicast MAC - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Registered IPv4 multicast with a unicast MAC - second VTEP" + + # Send registered IPv4 multicast using a broadcast MAC address and make + # sure it does not arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b bcast -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Registered IPv4 multicast with a broadcast MAC - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Registered IPv4 multicast with a broadcast MAC - second VTEP" + + # Make sure IPv4 traffic did not reach the VTEPs associated with + # IPv6 entries. + tc_check_packets "$ns2" "dev vx0 ingress" 103 0 + log_test $? 0 "IPv4 traffic - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 0 + log_test $? 0 "IPv4 traffic - fourth VTEP" + + # Reset IPv4 filters before testing IPv6 traffic. + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass" + + # Send registered IPv6 multicast and make sure it only arrives to the + # third VTEP. + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 1 + log_test $? 0 "Registered IPv6 multicast - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 0 + log_test $? 0 "Registered IPv6 multicast - fourth VTEP" + + # Send unregistered IPv6 multicast that is not link-local and make sure + # it arrives to the third and fourth VTEPs. + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 2 + log_test $? 0 "Unregistered IPv6 multicast - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 1 + log_test $? 0 "Unregistered IPv6 multicast - fourth VTEP" + + # Send IPv6 link-local multicast traffic and make sure it does not + # arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 2 + log_test $? 0 "Link-local IPv6 multicast - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 1 + log_test $? 0 "Link-local IPv6 multicast - fourth VTEP" + + # Send registered IPv6 multicast using a unicast MAC address and make + # sure it does not arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b 00:11:22:33:44:55 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 2 + log_test $? 0 "Registered IPv6 multicast with a unicast MAC - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 1 + log_test $? 0 "Registered IPv6 multicast with a unicast MAC - fourth VTEP" + + # Send registered IPv6 multicast using a broadcast MAC address and make + # sure it does not arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b bcast -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 2 + log_test $? 0 "Registered IPv6 multicast with a broadcast MAC - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 1 + log_test $? 0 "Registered IPv6 multicast with a broadcast MAC - fourth VTEP" + + # Make sure IPv6 traffic did not reach the VTEPs associated with + # IPv4 entries. + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "IPv6 traffic - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "IPv6 traffic - second VTEP" +} + +all_zeros_mdb_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.101 + local vtep2_ip=198.51.100.102 + local vtep3_ip=198.51.100.103 + local vtep4_ip=198.51.100.104 + local plen=32 + + echo + echo "Data path: All-zeros MDB entry - IPv4 underlay" + echo "----------------------------------------------" + + all_zeros_mdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $vtep3_ip \ + $vtep4_ip $plen +} + +all_zeros_mdb_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local vtep3_ip=2001:db8:3000::1 + local vtep4_ip=2001:db8:4000::1 + local plen=128 + + echo + echo "Data path: All-zeros MDB entry - IPv6 underlay" + echo "----------------------------------------------" + + all_zeros_mdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $vtep3_ip \ + $vtep4_ip $plen +} + +mdb_fdb_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local plen=$1; shift + local proto=$1; shift + local grp=$1; shift + local src=$1; shift + local mz=$1; shift + + # Install an MDB entry and an FDB entry and make sure that the FDB + # entry only forwards traffic that was not forwarded by the MDB. + + # Associate the MDB entry with one VTEP and the FDB entry with another + # VTEP. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 fdb add 00:00:00:00:00:00 dev vx0 self static dst $vtep2_ip src_vni 10010" + + # Add filters to match on decapsulated traffic in the second namespace. + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto $proto flower ip_proto udp dst_port 54321 enc_dst_ip $vtep1_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto $proto flower ip_proto udp dst_port 54321 enc_dst_ip $vtep2_ip action pass" + + # Configure the VTEP addresses in the second namespace to enable + # decapsulation. + run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo" + + # Send IP multicast traffic and make sure it is forwarded by the MDB + # and only arrives to the first VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "IP multicast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "IP multicast - second VTEP" + + # Send broadcast traffic and make sure it is forwarded by the FDB and + # only arrives to the second VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b bcast -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Broadcast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Broadcast - second VTEP" + + # Remove the MDB entry and make sure that IP multicast is now forwarded + # by the FDB to the second VTEP. + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "IP multicast after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 2 + log_test $? 0 "IP multicast after removal - second VTEP" +} + +mdb_fdb_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local proto="ipv4" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: MDB with FDB - IPv4 overlay / IPv4 underlay" + echo "------------------------------------------------------" + + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ + "mausezahn" +} + +mdb_fdb_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local proto="ipv6" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: MDB with FDB - IPv6 overlay / IPv4 underlay" + echo "------------------------------------------------------" + + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ + "mausezahn -6" +} + +mdb_fdb_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local proto="ipv4" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: MDB with FDB - IPv4 overlay / IPv6 underlay" + echo "------------------------------------------------------" + + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ + "mausezahn" +} + +mdb_fdb_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local proto="ipv6" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: MDB with FDB - IPv6 overlay / IPv6 underlay" + echo "------------------------------------------------------" + + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ + "mausezahn -6" +} + +mdb_grp1_loop() +{ + local ns1=$1; shift + local vtep1_ip=$1; shift + local grp1=$1; shift + + while true; do + bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp1 dst $vtep1_ip src_vni 10010 + bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp1 permanent dst $vtep1_ip src_vni 10010 + done >/dev/null 2>&1 +} + +mdb_grp2_loop() +{ + local ns1=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local grp2=$1; shift + + while true; do + bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp2 dst $vtep1_ip src_vni 10010 + bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp2 permanent dst $vtep1_ip src_vni 10010 + bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep2_ip src_vni 10010 + done >/dev/null 2>&1 +} + +mdb_torture_common() +{ + local ns1=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local grp1=$1; shift + local grp2=$1; shift + local src=$1; shift + local mz=$1; shift + local pid1 + local pid2 + local pid3 + local pid4 + + # Continuously send two streams that are forwarded by two different MDB + # entries. The first entry will be added and deleted in a loop. This + # allows us to test that the data path does not use freed MDB entry + # memory. The second entry will have two remotes, one that is added and + # deleted in a loop and another that is replaced in a loop. This allows + # us to test that the data path does not use freed remote entry memory. + # The test is considered successful if nothing crashed. + + # Create the MDB entries that will be continuously deleted / replaced. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp1 permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep2_ip src_vni 10010" + + mdb_grp1_loop $ns1 $vtep1_ip $grp1 & + pid1=$! + mdb_grp2_loop $ns1 $vtep1_ip $vtep2_ip $grp2 & + pid2=$! + ip netns exec $ns1 $mz br0.10 -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & + pid3=$! + ip netns exec $ns1 $mz br0.10 -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & + pid4=$! + + sleep 30 + kill -9 $pid1 $pid2 $pid3 $pid4 + wait $pid1 $pid2 $pid3 $pid4 2>/dev/null + + log_test 0 0 "Torture test" +} + +mdb_torture_ipv4_ipv4() +{ + local ns1=ns1_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local grp1=239.1.1.1 + local grp2=239.2.2.2 + local src=192.0.2.129 + + echo + echo "Data path: MDB torture test - IPv4 overlay / IPv4 underlay" + echo "----------------------------------------------------------" + + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ + "mausezahn" +} + +mdb_torture_ipv6_ipv4() +{ + local ns1=ns1_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local grp1=ff0e::1 + local grp2=ff0e::2 + local src=2001:db8:100::1 + + echo + echo "Data path: MDB torture test - IPv6 overlay / IPv4 underlay" + echo "----------------------------------------------------------" + + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ + "mausezahn -6" +} + +mdb_torture_ipv4_ipv6() +{ + local ns1=ns1_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local grp1=239.1.1.1 + local grp2=239.2.2.2 + local src=192.0.2.129 + + echo + echo "Data path: MDB torture test - IPv4 overlay / IPv6 underlay" + echo "----------------------------------------------------------" + + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ + "mausezahn" +} + +mdb_torture_ipv6_ipv6() +{ + local ns1=ns1_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local grp1=ff0e::1 + local grp2=ff0e::2 + local src=2001:db8:100::1 + + echo + echo "Data path: MDB torture test - IPv6 overlay / IPv6 underlay" + echo "----------------------------------------------------------" + + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ + "mausezahn -6" +} + +################################################################################ +# Usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) + -c Control path tests only + -d Data path tests only + -p Pause on fail + -P Pause after each test before cleanup + -v Verbose mode (show commands and output) +EOF +} + +################################################################################ +# Main + +trap cleanup EXIT + +while getopts ":t:cdpPvh" opt; do + case $opt in + t) TESTS=$OPTARG;; + c) TESTS=${CONTROL_PATH_TESTS};; + d) TESTS=${DATA_PATH_TESTS};; + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +# Make sure we don't pause twice. +[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v bridge)" ]; then + echo "SKIP: Could not run test without bridge tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v jq)" ]; then + echo "SKIP: Could not run test without jq tool" + exit $ksft_skip +fi + +bridge mdb help 2>&1 | grep -q "src_vni" +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 bridge too old, missing VXLAN MDB support" + exit $ksft_skip +fi + +# Start clean. +cleanup + +for t in $TESTS +do + setup; $t; cleanup; +done + +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi + +exit $ret diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 2cbb12736596..e699548d4247 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1820,4 +1820,49 @@ TEST(tls_v6ops) { close(sfd); } +TEST(prequeue) { + struct tls_crypto_info_keys tls12; + char buf[20000], buf2[20000]; + struct sockaddr_in addr; + int sfd, cfd, ret, fd; + socklen_t len; + + len = sizeof(addr); + memrnd(buf, sizeof(buf)); + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12); + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = 0; + + fd = socket(AF_INET, SOCK_STREAM, 0); + sfd = socket(AF_INET, SOCK_STREAM, 0); + + ASSERT_EQ(bind(sfd, &addr, sizeof(addr)), 0); + ASSERT_EQ(listen(sfd, 10), 0); + ASSERT_EQ(getsockname(sfd, &addr, &len), 0); + ASSERT_EQ(connect(fd, &addr, sizeof(addr)), 0); + ASSERT_GE(cfd = accept(sfd, &addr, &len), 0); + close(sfd); + + ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + if (ret) { + ASSERT_EQ(errno, ENOENT); + SKIP(return, "no TLS support"); + } + + ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + EXPECT_EQ(send(fd, buf, sizeof(buf), MSG_DONTWAIT), sizeof(buf)); + + ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0); + ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_WAITALL), sizeof(buf2)); + + EXPECT_EQ(memcmp(buf, buf2, sizeof(buf)), 0); + + close(fd); + close(cfd); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt index a28571aff0e1..ff956d8c99c5 100644 --- a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt +++ b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt @@ -38,6 +38,8 @@ skip: A completely optional key, if the corresponding value is "yes" this test case will still appear in the results output but marked as skipped. This key can be placed anywhere inside the test case at the top level. +dependsOn: Same as 'skip', but the value is executed as a command. The test + is skipped when the command returns non-zero. category: A list of single-word descriptions covering what the command under test is testing. Example: filter, actions, u32, gact, etc. setup: The list of commands required to ensure the command under test diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json index b40ee602918a..b5b47fbf6c00 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json @@ -983,5 +983,30 @@ "teardown": [ "$TC actions flush action tunnel_key" ] + }, + { + "id": "6bda", + "name": "Add tunnel_key action with nofrag option", + "category": [ + "actions", + "tunnel_key" + ], + "dependsOn": "$TC actions add action tunnel_key help 2>&1 | grep -q nofrag", + "setup": [ + [ + "$TC action flush action tunnel_key", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 1111 nofrag index 222", + "expExitCode": "0", + "verifyCmd": "$TC actions get action tunnel_key index 222", + "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 1111.*csum.*nofrag pipe.*index 222", + "matchCount": "1", + "teardown": [ + "$TC actions flush action tunnel_key" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json new file mode 100644 index 000000000000..16f3a83605e4 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json @@ -0,0 +1,416 @@ +[ + { + "id": "abdc", + "name": "Reference pedit action object in filter", + "category": [ + "infra", + "pedit" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC action add action pedit munge offset 0 u8 clear index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action pedit index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action pedit" + ] + }, + { + "id": "7a70", + "name": "Reference mpls action object in filter", + "category": [ + "infra", + "mpls" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC action add action mpls pop protocol ipv4 index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action mpls index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action mpls" + ] + }, + { + "id": "d241", + "name": "Reference bpf action object in filter", + "category": [ + "infra", + "bpf" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action bpf index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action bpf" + ] + }, + { + "id": "383a", + "name": "Reference connmark action object in filter", + "category": [ + "infra", + "connmark" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action connmark" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action connmark index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action connmark" + ] + }, + { + "id": "c619", + "name": "Reference csum action object in filter", + "category": [ + "infra", + "csum" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action csum ip4h index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action csum index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action csum" + ] + }, + { + "id": "a93d", + "name": "Reference ct action object in filter", + "category": [ + "infra", + "ct" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action ct index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action ct index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action ct" + ] + }, + { + "id": "8bb5", + "name": "Reference ctinfo action object in filter", + "category": [ + "infra", + "ctinfo" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC action add action ctinfo index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action ctinfo index 10", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action ctinfo" + ] + }, + { + "id": "2241", + "name": "Reference gact action object in filter", + "category": [ + "infra", + "gact" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action pass index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action gact index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action gact" + ] + }, + { + "id": "35e9", + "name": "Reference gate action object in filter", + "category": [ + "infra", + "gate" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC action add action gate priority 1 sched-entry close 100000000ns index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action gate index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action gate" + ] + }, + { + "id": "b22e", + "name": "Reference ife action object in filter", + "category": [ + "infra", + "ife" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action ife encode allow mark pass index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action ife index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action ife" + ] + }, + { + "id": "ef74", + "name": "Reference mirred action object in filter", + "category": [ + "infra", + "mirred" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action mirred egress mirror index 1 dev lo" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action mirred index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action mirred" + ] + }, + { + "id": "2c81", + "name": "Reference nat action object in filter", + "category": [ + "infra", + "nat" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action nat ingress 192.168.1.1 200.200.200.1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action nat index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action nat" + ] + }, + { + "id": "ac9d", + "name": "Reference police action object in filter", + "category": [ + "infra", + "police" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action police rate 1kbit burst 10k index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action police index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action police" + ] + }, + { + "id": "68be", + "name": "Reference sample action object in filter", + "category": [ + "infra", + "sample" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action sample rate 10 group 1 index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action sample index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action sample" + ] + }, + { + "id": "cf01", + "name": "Reference skbedit action object in filter", + "category": [ + "infra", + "skbedit" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action skbedit mark 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action skbedit index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action skbedit" + ] + }, + { + "id": "c109", + "name": "Reference skbmod action object in filter", + "category": [ + "infra", + "skbmod" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action skbmod set dmac 11:22:33:44:55:66 index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action skbmod index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action skbmod" + ] + }, + { + "id": "4abc", + "name": "Reference tunnel_key action object in filter", + "category": [ + "infra", + "tunnel_key" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1 index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action tunnel_key index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action tunnel_key" + ] + }, + { + "id": "dadd", + "name": "Reference vlan action object in filter", + "category": [ + "infra", + "tunnel_key" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action vlan pop pipe index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action vlan index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action vlan" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index 7bd94f8e490a..b98256f38447 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -369,6 +369,19 @@ def run_one_test(pm, args, index, tidx): pm.call_post_execute() return res + if 'dependsOn' in tidx: + if (args.verbose > 0): + print('probe command for test skip') + (p, procout) = exec_cmd(args, pm, 'execute', tidx['dependsOn']) + if p: + if (p.returncode != 0): + res = TestResult(tidx['id'], tidx['name']) + res.set_result(ResultState.skip) + res.set_errormsg('probe command: test skipped.') + pm.call_pre_case(tidx, test_skip=True) + pm.call_post_execute() + return res + # populate NAMES with TESTID for this test NAMES['TESTID'] = tidx['id'] |