diff options
Diffstat (limited to 'tools/testing')
90 files changed, 8124 insertions, 637 deletions
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index b89eb87034e4..a02a085e7f32 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -4,6 +4,8 @@ bloom_filter_map # failed to find kernel BTF type ID of bpf_cookie # failed to open_and_load program: -524 (trampoline) bpf_loop # attaches to __x64_sys_nanosleep cgrp_local_storage # prog_attach unexpected error: -524 (trampoline) +dynptr/test_dynptr_skb_data +dynptr/test_skb_readonly fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline) get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index b677dcd0b77a..16f404aa1b23 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -338,7 +338,8 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ define get_sys_includes $(shell $(1) $(2) -v -E - </dev/null 2>&1 \ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ -$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') +$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') \ +$(shell $(1) $(2) -dM -E - </dev/null | grep '__loongarch_grlen ' | awk '{printf("-D__BITS_PER_LONG=%d", $$3)}') endef # Determine target endianness. @@ -356,7 +357,7 @@ BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(abspath $(OUTPUT)/../usr/include) CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ - -Wno-compare-distinct-pointer-types + -Wno-compare-distinct-pointer-types -Wuninitialized $(OUTPUT)/test_l4lb_noinline.o: BPF_CFLAGS += -fno-inline $(OUTPUT)/test_xdp_noinline.o: BPF_CFLAGS += -fno-inline @@ -558,7 +559,7 @@ TRUNNER_BPF_PROGS_DIR := progs TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ network_helpers.c testing_helpers.c \ btf_helpers.c flow_dissector_load.h \ - cap_helpers.c test_loader.c xsk.c + cap_helpers.c test_loader.c xsk.c disasm.c TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(OUTPUT)/liburandom_read.so \ $(OUTPUT)/xdp_synproxy \ diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h new file mode 100644 index 000000000000..8c993ec8ceea --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -0,0 +1,38 @@ +#ifndef __BPF_KFUNCS__ +#define __BPF_KFUNCS__ + +/* Description + * Initializes an skb-type dynptr + * Returns + * Error code + */ +extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, + struct bpf_dynptr *ptr__uninit) __ksym; + +/* Description + * Initializes an xdp-type dynptr + * Returns + * Error code + */ +extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags, + struct bpf_dynptr *ptr__uninit) __ksym; + +/* Description + * Obtain a read-only pointer to the dynptr's data + * Returns + * Either a direct pointer to the dynptr data or a pointer to the user-provided + * buffer if unable to obtain a direct pointer + */ +extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset, + void *buffer, __u32 buffer__szk) __ksym; + +/* Description + * Obtain a read-write pointer to the dynptr's data + * Returns + * Either a direct pointer to the dynptr data or a pointer to the user-provided + * buffer if unable to obtain a direct pointer + */ +extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset, + void *buffer, __u32 buffer__szk) __ksym; + +#endif diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64 index 1f0437644186..253821494884 100644 --- a/tools/testing/selftests/bpf/config.aarch64 +++ b/tools/testing/selftests/bpf/config.aarch64 @@ -176,6 +176,8 @@ CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_NET=y CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y CONFIG_VSOCKETS=y +CONFIG_VSOCKETS_LOOPBACK=y CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x index d49f6170e7bd..2ba92167be35 100644 --- a/tools/testing/selftests/bpf/config.s390x +++ b/tools/testing/selftests/bpf/config.s390x @@ -140,5 +140,8 @@ CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_BLK=y CONFIG_VIRTIO_NET=y CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y +CONFIG_VSOCKETS=y +CONFIG_VSOCKETS_LOOPBACK=y CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index dd97d61d325c..b650b2e617b8 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -234,7 +234,10 @@ CONFIG_VIRTIO_BLK=y CONFIG_VIRTIO_CONSOLE=y CONFIG_VIRTIO_NET=y CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y +CONFIG_VSOCKETS=y +CONFIG_VSOCKETS_LOOPBACK=y CONFIG_X86_ACPI_CPUFREQ=y CONFIG_X86_CPUID=y CONFIG_X86_MSR=y diff --git a/tools/testing/selftests/bpf/disasm.c b/tools/testing/selftests/bpf/disasm.c new file mode 120000 index 000000000000..b1571927bd54 --- /dev/null +++ b/tools/testing/selftests/bpf/disasm.c @@ -0,0 +1 @@ +../../../../kernel/bpf/disasm.c
\ No newline at end of file diff --git a/tools/testing/selftests/bpf/disasm.h b/tools/testing/selftests/bpf/disasm.h new file mode 120000 index 000000000000..8054fd497340 --- /dev/null +++ b/tools/testing/selftests/bpf/disasm.h @@ -0,0 +1 @@ +../../../../kernel/bpf/disasm.h
\ No newline at end of file diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index 4666f88f2bb4..c94fa8d6c4f6 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -660,16 +660,22 @@ static int do_test_single(struct bpf_align_test *test) * func#0 @0 * 0: R1=ctx(off=0,imm=0) R10=fp0 * 0: (b7) r3 = 2 ; R3_w=2 + * + * Sometimes it's actually two lines below, e.g. when + * searching for "6: R3_w=scalar(umax=255,var_off=(0x0; 0xff))": + * from 4 to 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 + * 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 + * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(off=0,r=8,imm=0) R3_w=scalar(umax=255,var_off=(0x0; 0xff)) */ - if (!strstr(line_ptr, m.match)) { + while (!strstr(line_ptr, m.match)) { cur_line = -1; line_ptr = strtok(NULL, "\n"); - sscanf(line_ptr, "%u: ", &cur_line); + sscanf(line_ptr ?: "", "%u: ", &cur_line); + if (!line_ptr || cur_line != m.line) + break; } - if (cur_line != m.line || !line_ptr || - !strstr(line_ptr, m.match)) { - printf("Failed to find match %u: %s\n", - m.line, m.match); + if (cur_line != m.line || !line_ptr || !strstr(line_ptr, m.match)) { + printf("Failed to find match %u: %s\n", m.line, m.match); ret = 1; printf("%s", bpf_vlog); break; diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 56374c8b5436..7175af39134f 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include "test_attach_kprobe_sleepable.skel.h" +#include "test_attach_probe_manual.skel.h" #include "test_attach_probe.skel.h" /* this is how USDT semaphore is actually defined, except volatile modifier */ @@ -23,81 +25,54 @@ static noinline void trigger_func3(void) asm volatile (""); } +/* attach point for ref_ctr */ +static noinline void trigger_func4(void) +{ + asm volatile (""); +} + static char test_data[] = "test_data"; -void test_attach_probe(void) +/* manual attach kprobe/kretprobe/uprobe/uretprobe testings */ +static void test_attach_probe_manual(enum probe_attach_mode attach_mode) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts); struct bpf_link *kprobe_link, *kretprobe_link; struct bpf_link *uprobe_link, *uretprobe_link; - struct test_attach_probe* skel; - ssize_t uprobe_offset, ref_ctr_offset; - struct bpf_link *uprobe_err_link; - FILE *devnull; - bool legacy; - - /* Check if new-style kprobe/uprobe API is supported. - * Kernels that support new FD-based kprobe and uprobe BPF attachment - * through perf_event_open() syscall expose - * /sys/bus/event_source/devices/kprobe/type and - * /sys/bus/event_source/devices/uprobe/type files, respectively. They - * contain magic numbers that are passed as "type" field of - * perf_event_attr. Lack of such file in the system indicates legacy - * kernel with old-style kprobe/uprobe attach interface through - * creating per-probe event through tracefs. For such cases - * ref_ctr_offset feature is not supported, so we don't test it. - */ - legacy = access("/sys/bus/event_source/devices/kprobe/type", F_OK) != 0; + struct test_attach_probe_manual *skel; + ssize_t uprobe_offset; - uprobe_offset = get_uprobe_offset(&trigger_func); - if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset")) + skel = test_attach_probe_manual__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load")) return; - ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr); - if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset")) - return; - - skel = test_attach_probe__open(); - if (!ASSERT_OK_PTR(skel, "skel_open")) - return; - - /* sleepable kprobe test case needs flags set before loading */ - if (!ASSERT_OK(bpf_program__set_flags(skel->progs.handle_kprobe_sleepable, - BPF_F_SLEEPABLE), "kprobe_sleepable_flags")) - goto cleanup; - - if (!ASSERT_OK(test_attach_probe__load(skel), "skel_load")) - goto cleanup; - if (!ASSERT_OK_PTR(skel->bss, "check_bss")) + uprobe_offset = get_uprobe_offset(&trigger_func); + if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset")) goto cleanup; /* manual-attach kprobe/kretprobe */ - kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe, - false /* retprobe */, - SYS_NANOSLEEP_KPROBE_NAME); + kprobe_opts.attach_mode = attach_mode; + kprobe_opts.retprobe = false; + kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe, + SYS_NANOSLEEP_KPROBE_NAME, + &kprobe_opts); if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe")) goto cleanup; skel->links.handle_kprobe = kprobe_link; - kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe, - true /* retprobe */, - SYS_NANOSLEEP_KPROBE_NAME); + kprobe_opts.retprobe = true; + kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe, + SYS_NANOSLEEP_KPROBE_NAME, + &kprobe_opts); if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe")) goto cleanup; skel->links.handle_kretprobe = kretprobe_link; - /* auto-attachable kprobe and kretprobe */ - skel->links.handle_kprobe_auto = bpf_program__attach(skel->progs.handle_kprobe_auto); - ASSERT_OK_PTR(skel->links.handle_kprobe_auto, "attach_kprobe_auto"); - - skel->links.handle_kretprobe_auto = bpf_program__attach(skel->progs.handle_kretprobe_auto); - ASSERT_OK_PTR(skel->links.handle_kretprobe_auto, "attach_kretprobe_auto"); - - if (!legacy) - ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before"); - + /* manual-attach uprobe/uretprobe */ + uprobe_opts.attach_mode = attach_mode; + uprobe_opts.ref_ctr_offset = 0; uprobe_opts.retprobe = false; - uprobe_opts.ref_ctr_offset = legacy ? 0 : ref_ctr_offset; uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, 0 /* self pid */, "/proc/self/exe", @@ -107,12 +82,7 @@ void test_attach_probe(void) goto cleanup; skel->links.handle_uprobe = uprobe_link; - if (!legacy) - ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after"); - - /* if uprobe uses ref_ctr, uretprobe has to use ref_ctr as well */ uprobe_opts.retprobe = true; - uprobe_opts.ref_ctr_offset = legacy ? 0 : ref_ctr_offset; uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, -1 /* any pid */, "/proc/self/exe", @@ -121,12 +91,7 @@ void test_attach_probe(void) goto cleanup; skel->links.handle_uretprobe = uretprobe_link; - /* verify auto-attach fails for old-style uprobe definition */ - uprobe_err_link = bpf_program__attach(skel->progs.handle_uprobe_byname); - if (!ASSERT_EQ(libbpf_get_error(uprobe_err_link), -EOPNOTSUPP, - "auto-attach should fail for old-style name")) - goto cleanup; - + /* attach uprobe by function name manually */ uprobe_opts.func_name = "trigger_func2"; uprobe_opts.retprobe = false; uprobe_opts.ref_ctr_offset = 0; @@ -138,11 +103,63 @@ void test_attach_probe(void) if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname, "attach_uprobe_byname")) goto cleanup; + /* trigger & validate kprobe && kretprobe */ + usleep(1); + + /* trigger & validate uprobe & uretprobe */ + trigger_func(); + + /* trigger & validate uprobe attached by name */ + trigger_func2(); + + ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res"); + ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res"); + ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res"); + ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res"); + ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res"); + +cleanup: + test_attach_probe_manual__destroy(skel); +} + +static void test_attach_probe_auto(struct test_attach_probe *skel) +{ + struct bpf_link *uprobe_err_link; + + /* auto-attachable kprobe and kretprobe */ + skel->links.handle_kprobe_auto = bpf_program__attach(skel->progs.handle_kprobe_auto); + ASSERT_OK_PTR(skel->links.handle_kprobe_auto, "attach_kprobe_auto"); + + skel->links.handle_kretprobe_auto = bpf_program__attach(skel->progs.handle_kretprobe_auto); + ASSERT_OK_PTR(skel->links.handle_kretprobe_auto, "attach_kretprobe_auto"); + + /* verify auto-attach fails for old-style uprobe definition */ + uprobe_err_link = bpf_program__attach(skel->progs.handle_uprobe_byname); + if (!ASSERT_EQ(libbpf_get_error(uprobe_err_link), -EOPNOTSUPP, + "auto-attach should fail for old-style name")) + return; + /* verify auto-attach works */ skel->links.handle_uretprobe_byname = bpf_program__attach(skel->progs.handle_uretprobe_byname); if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname, "attach_uretprobe_byname")) - goto cleanup; + return; + + /* trigger & validate kprobe && kretprobe */ + usleep(1); + + /* trigger & validate uprobe attached by name */ + trigger_func2(); + + ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res"); + ASSERT_EQ(skel->bss->kretprobe2_res, 22, "check_kretprobe_auto_res"); + ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res"); +} + +static void test_uprobe_lib(struct test_attach_probe *skel) +{ + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + FILE *devnull; /* test attach by name for a library function, using the library * as the binary argument. libc.so.6 will be resolved via dlopen()/dlinfo(). @@ -155,7 +172,7 @@ void test_attach_probe(void) "libc.so.6", 0, &uprobe_opts); if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname2, "attach_uprobe_byname2")) - goto cleanup; + return; uprobe_opts.func_name = "fclose"; uprobe_opts.retprobe = true; @@ -165,62 +182,144 @@ void test_attach_probe(void) "libc.so.6", 0, &uprobe_opts); if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname2, "attach_uretprobe_byname2")) + return; + + /* trigger & validate shared library u[ret]probes attached by name */ + devnull = fopen("/dev/null", "r"); + fclose(devnull); + + ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res"); + ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res"); +} + +static void test_uprobe_ref_ctr(struct test_attach_probe *skel) +{ + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); + struct bpf_link *uprobe_link, *uretprobe_link; + ssize_t uprobe_offset, ref_ctr_offset; + + uprobe_offset = get_uprobe_offset(&trigger_func4); + if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset_ref_ctr")) + return; + + ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr); + if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset")) + return; + + ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before"); + + uprobe_opts.retprobe = false; + uprobe_opts.ref_ctr_offset = ref_ctr_offset; + uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_ref_ctr, + 0 /* self pid */, + "/proc/self/exe", + uprobe_offset, + &uprobe_opts); + if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe_ref_ctr")) + return; + skel->links.handle_uprobe_ref_ctr = uprobe_link; + + ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after"); + + /* if uprobe uses ref_ctr, uretprobe has to use ref_ctr as well */ + uprobe_opts.retprobe = true; + uprobe_opts.ref_ctr_offset = ref_ctr_offset; + uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe_ref_ctr, + -1 /* any pid */, + "/proc/self/exe", + uprobe_offset, &uprobe_opts); + if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe_ref_ctr")) + return; + skel->links.handle_uretprobe_ref_ctr = uretprobe_link; +} + +static void test_kprobe_sleepable(void) +{ + struct test_attach_kprobe_sleepable *skel; + + skel = test_attach_kprobe_sleepable__open(); + if (!ASSERT_OK_PTR(skel, "skel_kprobe_sleepable_open")) + return; + + /* sleepable kprobe test case needs flags set before loading */ + if (!ASSERT_OK(bpf_program__set_flags(skel->progs.handle_kprobe_sleepable, + BPF_F_SLEEPABLE), "kprobe_sleepable_flags")) + goto cleanup; + + if (!ASSERT_OK(test_attach_kprobe_sleepable__load(skel), + "skel_kprobe_sleepable_load")) goto cleanup; /* sleepable kprobes should not attach successfully */ skel->links.handle_kprobe_sleepable = bpf_program__attach(skel->progs.handle_kprobe_sleepable); - if (!ASSERT_ERR_PTR(skel->links.handle_kprobe_sleepable, "attach_kprobe_sleepable")) - goto cleanup; + ASSERT_ERR_PTR(skel->links.handle_kprobe_sleepable, "attach_kprobe_sleepable"); +cleanup: + test_attach_kprobe_sleepable__destroy(skel); +} + +static void test_uprobe_sleepable(struct test_attach_probe *skel) +{ /* test sleepable uprobe and uretprobe variants */ skel->links.handle_uprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uprobe_byname3_sleepable); if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3_sleepable, "attach_uprobe_byname3_sleepable")) - goto cleanup; + return; skel->links.handle_uprobe_byname3 = bpf_program__attach(skel->progs.handle_uprobe_byname3); if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3, "attach_uprobe_byname3")) - goto cleanup; + return; skel->links.handle_uretprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uretprobe_byname3_sleepable); if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3_sleepable, "attach_uretprobe_byname3_sleepable")) - goto cleanup; + return; skel->links.handle_uretprobe_byname3 = bpf_program__attach(skel->progs.handle_uretprobe_byname3); if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3, "attach_uretprobe_byname3")) - goto cleanup; + return; skel->bss->user_ptr = test_data; - /* trigger & validate kprobe && kretprobe */ - usleep(1); - - /* trigger & validate shared library u[ret]probes attached by name */ - devnull = fopen("/dev/null", "r"); - fclose(devnull); - - /* trigger & validate uprobe & uretprobe */ - trigger_func(); - - /* trigger & validate uprobe attached by name */ - trigger_func2(); - /* trigger & validate sleepable uprobe attached by name */ trigger_func3(); - ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res"); - ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res"); - ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res"); - ASSERT_EQ(skel->bss->kretprobe2_res, 22, "check_kretprobe_auto_res"); - ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res"); - ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res"); - ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res"); - ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res"); - ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res"); - ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res"); ASSERT_EQ(skel->bss->uprobe_byname3_sleepable_res, 9, "check_uprobe_byname3_sleepable_res"); ASSERT_EQ(skel->bss->uprobe_byname3_res, 10, "check_uprobe_byname3_res"); ASSERT_EQ(skel->bss->uretprobe_byname3_sleepable_res, 11, "check_uretprobe_byname3_sleepable_res"); ASSERT_EQ(skel->bss->uretprobe_byname3_res, 12, "check_uretprobe_byname3_res"); +} + +void test_attach_probe(void) +{ + struct test_attach_probe *skel; + + skel = test_attach_probe__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + if (!ASSERT_OK(test_attach_probe__load(skel), "skel_load")) + goto cleanup; + if (!ASSERT_OK_PTR(skel->bss, "check_bss")) + goto cleanup; + + if (test__start_subtest("manual-default")) + test_attach_probe_manual(PROBE_ATTACH_MODE_DEFAULT); + if (test__start_subtest("manual-legacy")) + test_attach_probe_manual(PROBE_ATTACH_MODE_LEGACY); + if (test__start_subtest("manual-perf")) + test_attach_probe_manual(PROBE_ATTACH_MODE_PERF); + if (test__start_subtest("manual-link")) + test_attach_probe_manual(PROBE_ATTACH_MODE_LINK); + + if (test__start_subtest("auto")) + test_attach_probe_auto(skel); + if (test__start_subtest("kprobe-sleepable")) + test_kprobe_sleepable(); + if (test__start_subtest("uprobe-lib")) + test_uprobe_lib(skel); + if (test__start_subtest("uprobe-sleepable")) + test_uprobe_sleepable(skel); + if (test__start_subtest("uprobe-ref_ctr")) + test_uprobe_ref_ctr(skel); cleanup: test_attach_probe__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c index b3f7985c8504..adda85f97058 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c @@ -84,6 +84,7 @@ static const char * const success_tests[] = { "test_cgrp_xchg_release", "test_cgrp_get_release", "test_cgrp_get_ancestors", + "test_cgrp_from_id", }; void test_cgrp_kfunc(void) diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c index 2cc759956e3b..63e776f4176e 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c @@ -193,7 +193,7 @@ out: cgrp_ls_sleepable__destroy(skel); } -static void test_no_rcu_lock(__u64 cgroup_id) +static void test_yes_rcu_lock(__u64 cgroup_id) { struct cgrp_ls_sleepable *skel; int err; @@ -204,7 +204,7 @@ static void test_no_rcu_lock(__u64 cgroup_id) skel->bss->target_pid = syscall(SYS_gettid); - bpf_program__set_autoload(skel->progs.no_rcu_lock, true); + bpf_program__set_autoload(skel->progs.yes_rcu_lock, true); err = cgrp_ls_sleepable__load(skel); if (!ASSERT_OK(err, "skel_load")) goto out; @@ -220,7 +220,7 @@ out: cgrp_ls_sleepable__destroy(skel); } -static void test_rcu_lock(void) +static void test_no_rcu_lock(void) { struct cgrp_ls_sleepable *skel; int err; @@ -229,7 +229,7 @@ static void test_rcu_lock(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - bpf_program__set_autoload(skel->progs.yes_rcu_lock, true); + bpf_program__set_autoload(skel->progs.no_rcu_lock, true); err = cgrp_ls_sleepable__load(skel); ASSERT_ERR(err, "skel_load"); @@ -256,10 +256,10 @@ void test_cgrp_local_storage(void) test_negative(); if (test__start_subtest("cgroup_iter_sleepable")) test_cgroup_iter_sleepable(cgroup_fd, cgroup_id); + if (test__start_subtest("yes_rcu_lock")) + test_yes_rcu_lock(cgroup_id); if (test__start_subtest("no_rcu_lock")) - test_no_rcu_lock(cgroup_id); - if (test__start_subtest("rcu_lock")) - test_rcu_lock(); + test_no_rcu_lock(); close(cgroup_fd); } diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c index 224f016b0a53..2a55f717fc07 100644 --- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c @@ -13,6 +13,7 @@ #include "progs/test_cls_redirect.h" #include "test_cls_redirect.skel.h" +#include "test_cls_redirect_dynptr.skel.h" #include "test_cls_redirect_subprogs.skel.h" #define ENCAP_IP INADDR_LOOPBACK @@ -446,6 +447,28 @@ cleanup: close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0])); } +static void test_cls_redirect_dynptr(void) +{ + struct test_cls_redirect_dynptr *skel; + int err; + + skel = test_cls_redirect_dynptr__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP); + skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT); + + err = test_cls_redirect_dynptr__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + test_cls_redirect_common(skel->progs.cls_redirect); + +cleanup: + test_cls_redirect_dynptr__destroy(skel); +} + static void test_cls_redirect_inlined(void) { struct test_cls_redirect *skel; @@ -496,4 +519,6 @@ void test_cls_redirect(void) test_cls_redirect_inlined(); if (test__start_subtest("cls_redirect_subprogs")) test_cls_redirect_subprogs(); + if (test__start_subtest("cls_redirect_dynptr")) + test_cls_redirect_dynptr(); } diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c new file mode 100644 index 000000000000..d5fe3d4b936c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c @@ -0,0 +1,917 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <limits.h> +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <regex.h> +#include <test_progs.h> + +#include "bpf/btf.h" +#include "bpf_util.h" +#include "linux/filter.h" +#include "disasm.h" + +#define MAX_PROG_TEXT_SZ (32 * 1024) + +/* The code in this file serves the sole purpose of executing test cases + * specified in the test_cases array. Each test case specifies a program + * type, context field offset, and disassembly patterns that correspond + * to read and write instructions generated by + * verifier.c:convert_ctx_access() for accessing that field. + * + * For each test case, up to three programs are created: + * - One that uses BPF_LDX_MEM to read the context field. + * - One that uses BPF_STX_MEM to write to the context field. + * - One that uses BPF_ST_MEM to write to the context field. + * + * The disassembly of each program is then compared with the pattern + * specified in the test case. + */ +struct test_case { + char *name; + enum bpf_prog_type prog_type; + enum bpf_attach_type expected_attach_type; + int field_offset; + int field_sz; + /* Program generated for BPF_ST_MEM uses value 42 by default, + * this field allows to specify custom value. + */ + struct { + bool use; + int value; + } st_value; + /* Pattern for BPF_LDX_MEM(field_sz, dst, ctx, field_offset) */ + char *read; + /* Pattern for BPF_STX_MEM(field_sz, ctx, src, field_offset) and + * BPF_ST_MEM (field_sz, ctx, src, field_offset) + */ + char *write; + /* Pattern for BPF_ST_MEM(field_sz, ctx, src, field_offset), + * takes priority over `write`. + */ + char *write_st; + /* Pattern for BPF_STX_MEM (field_sz, ctx, src, field_offset), + * takes priority over `write`. + */ + char *write_stx; +}; + +#define N(_prog_type, type, field, name_extra...) \ + .name = #_prog_type "." #field name_extra, \ + .prog_type = BPF_PROG_TYPE_##_prog_type, \ + .field_offset = offsetof(type, field), \ + .field_sz = sizeof(typeof(((type *)NULL)->field)) + +static struct test_case test_cases[] = { +/* Sign extension on s390 changes the pattern */ +#if defined(__x86_64__) || defined(__aarch64__) + { + N(SCHED_CLS, struct __sk_buff, tstamp), + .read = "r11 = *(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset);" + "w11 &= 160;" + "if w11 != 0xa0 goto pc+2;" + "$dst = 0;" + "goto pc+1;" + "$dst = *(u64 *)($ctx + sk_buff::tstamp);", + .write = "r11 = *(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset);" + "if w11 & 0x80 goto pc+1;" + "goto pc+2;" + "w11 &= -33;" + "*(u8 *)($ctx + sk_buff::__pkt_vlan_present_offset) = r11;" + "*(u64 *)($ctx + sk_buff::tstamp) = $src;", + }, +#endif + { + N(SCHED_CLS, struct __sk_buff, priority), + .read = "$dst = *(u32 *)($ctx + sk_buff::priority);", + .write = "*(u32 *)($ctx + sk_buff::priority) = $src;", + }, + { + N(SCHED_CLS, struct __sk_buff, mark), + .read = "$dst = *(u32 *)($ctx + sk_buff::mark);", + .write = "*(u32 *)($ctx + sk_buff::mark) = $src;", + }, + { + N(SCHED_CLS, struct __sk_buff, cb[0]), + .read = "$dst = *(u32 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::data));", + .write = "*(u32 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::data)) = $src;", + }, + { + N(SCHED_CLS, struct __sk_buff, tc_classid), + .read = "$dst = *(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid));", + .write = "*(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid)) = $src;", + }, + { + N(SCHED_CLS, struct __sk_buff, tc_index), + .read = "$dst = *(u16 *)($ctx + sk_buff::tc_index);", + .write = "*(u16 *)($ctx + sk_buff::tc_index) = $src;", + }, + { + N(SCHED_CLS, struct __sk_buff, queue_mapping), + .read = "$dst = *(u16 *)($ctx + sk_buff::queue_mapping);", + .write_stx = "if $src >= 0xffff goto pc+1;" + "*(u16 *)($ctx + sk_buff::queue_mapping) = $src;", + .write_st = "*(u16 *)($ctx + sk_buff::queue_mapping) = $src;", + }, + { + /* This is a corner case in filter.c:bpf_convert_ctx_access() */ + N(SCHED_CLS, struct __sk_buff, queue_mapping, ".ushrt_max"), + .st_value = { true, USHRT_MAX }, + .write_st = "goto pc+0;", + }, + { + N(CGROUP_SOCK, struct bpf_sock, bound_dev_if), + .read = "$dst = *(u32 *)($ctx + sock_common::skc_bound_dev_if);", + .write = "*(u32 *)($ctx + sock_common::skc_bound_dev_if) = $src;", + }, + { + N(CGROUP_SOCK, struct bpf_sock, mark), + .read = "$dst = *(u32 *)($ctx + sock::sk_mark);", + .write = "*(u32 *)($ctx + sock::sk_mark) = $src;", + }, + { + N(CGROUP_SOCK, struct bpf_sock, priority), + .read = "$dst = *(u32 *)($ctx + sock::sk_priority);", + .write = "*(u32 *)($ctx + sock::sk_priority) = $src;", + }, + { + N(SOCK_OPS, struct bpf_sock_ops, replylong[0]), + .read = "$dst = *(u32 *)($ctx + bpf_sock_ops_kern::replylong);", + .write = "*(u32 *)($ctx + bpf_sock_ops_kern::replylong) = $src;", + }, + { + N(CGROUP_SYSCTL, struct bpf_sysctl, file_pos), +#if __BYTE_ORDER == __LITTLE_ENDIAN + .read = "$dst = *(u64 *)($ctx + bpf_sysctl_kern::ppos);" + "$dst = *(u32 *)($dst +0);", + .write = "*(u64 *)($ctx + bpf_sysctl_kern::tmp_reg) = r9;" + "r9 = *(u64 *)($ctx + bpf_sysctl_kern::ppos);" + "*(u32 *)(r9 +0) = $src;" + "r9 = *(u64 *)($ctx + bpf_sysctl_kern::tmp_reg);", +#else + .read = "$dst = *(u64 *)($ctx + bpf_sysctl_kern::ppos);" + "$dst = *(u32 *)($dst +4);", + .write = "*(u64 *)($ctx + bpf_sysctl_kern::tmp_reg) = r9;" + "r9 = *(u64 *)($ctx + bpf_sysctl_kern::ppos);" + "*(u32 *)(r9 +4) = $src;" + "r9 = *(u64 *)($ctx + bpf_sysctl_kern::tmp_reg);", +#endif + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, sk), + .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::sk);", + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, level), + .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::level);", + .write = "*(u32 *)($ctx + bpf_sockopt_kern::level) = $src;", + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, optname), + .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::optname);", + .write = "*(u32 *)($ctx + bpf_sockopt_kern::optname) = $src;", + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, optlen), + .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::optlen);", + .write = "*(u32 *)($ctx + bpf_sockopt_kern::optlen) = $src;", + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, retval), + .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::current_task);" + "$dst = *(u64 *)($dst + task_struct::bpf_ctx);" + "$dst = *(u32 *)($dst + bpf_cg_run_ctx::retval);", + .write = "*(u64 *)($ctx + bpf_sockopt_kern::tmp_reg) = r9;" + "r9 = *(u64 *)($ctx + bpf_sockopt_kern::current_task);" + "r9 = *(u64 *)(r9 + task_struct::bpf_ctx);" + "*(u32 *)(r9 + bpf_cg_run_ctx::retval) = $src;" + "r9 = *(u64 *)($ctx + bpf_sockopt_kern::tmp_reg);", + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, optval), + .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::optval);", + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + }, + { + N(CGROUP_SOCKOPT, struct bpf_sockopt, optval_end), + .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::optval_end);", + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + }, +}; + +#undef N + +static regex_t *ident_regex; +static regex_t *field_regex; + +static char *skip_space(char *str) +{ + while (*str && isspace(*str)) + ++str; + return str; +} + +static char *skip_space_and_semi(char *str) +{ + while (*str && (isspace(*str) || *str == ';')) + ++str; + return str; +} + +static char *match_str(char *str, char *prefix) +{ + while (*str && *prefix && *str == *prefix) { + ++str; + ++prefix; + } + if (*prefix) + return NULL; + return str; +} + +static char *match_number(char *str, int num) +{ + char *next; + int snum = strtol(str, &next, 10); + + if (next - str == 0 || num != snum) + return NULL; + + return next; +} + +static int find_field_offset_aux(struct btf *btf, int btf_id, char *field_name, int off) +{ + const struct btf_type *type = btf__type_by_id(btf, btf_id); + const struct btf_member *m; + __u16 mnum; + int i; + + if (!type) { + PRINT_FAIL("Can't find btf_type for id %d\n", btf_id); + return -1; + } + + if (!btf_is_struct(type) && !btf_is_union(type)) { + PRINT_FAIL("BTF id %d is not struct or union\n", btf_id); + return -1; + } + + m = btf_members(type); + mnum = btf_vlen(type); + + for (i = 0; i < mnum; ++i, ++m) { + const char *mname = btf__name_by_offset(btf, m->name_off); + + if (strcmp(mname, "") == 0) { + int msize = find_field_offset_aux(btf, m->type, field_name, + off + m->offset); + if (msize >= 0) + return msize; + } + + if (strcmp(mname, field_name)) + continue; + + return (off + m->offset) / 8; + } + + return -1; +} + +static int find_field_offset(struct btf *btf, char *pattern, regmatch_t *matches) +{ + int type_sz = matches[1].rm_eo - matches[1].rm_so; + int field_sz = matches[2].rm_eo - matches[2].rm_so; + char *type = pattern + matches[1].rm_so; + char *field = pattern + matches[2].rm_so; + char field_str[128] = {}; + char type_str[128] = {}; + int btf_id, field_offset; + + if (type_sz >= sizeof(type_str)) { + PRINT_FAIL("Malformed pattern: type ident is too long: %d\n", type_sz); + return -1; + } + + if (field_sz >= sizeof(field_str)) { + PRINT_FAIL("Malformed pattern: field ident is too long: %d\n", field_sz); + return -1; + } + + strncpy(type_str, type, type_sz); + strncpy(field_str, field, field_sz); + btf_id = btf__find_by_name(btf, type_str); + if (btf_id < 0) { + PRINT_FAIL("No BTF info for type %s\n", type_str); + return -1; + } + + field_offset = find_field_offset_aux(btf, btf_id, field_str, 0); + if (field_offset < 0) { + PRINT_FAIL("No BTF info for field %s::%s\n", type_str, field_str); + return -1; + } + + return field_offset; +} + +static regex_t *compile_regex(char *pat) +{ + regex_t *re; + int err; + + re = malloc(sizeof(regex_t)); + if (!re) { + PRINT_FAIL("Can't alloc regex\n"); + return NULL; + } + + err = regcomp(re, pat, REG_EXTENDED); + if (err) { + char errbuf[512]; + + regerror(err, re, errbuf, sizeof(errbuf)); + PRINT_FAIL("Can't compile regex: %s\n", errbuf); + free(re); + return NULL; + } + + return re; +} + +static void free_regex(regex_t *re) +{ + if (!re) + return; + + regfree(re); + free(re); +} + +static u32 max_line_len(char *str) +{ + u32 max_line = 0; + char *next = str; + + while (next) { + next = strchr(str, '\n'); + if (next) { + max_line = max_t(u32, max_line, (next - str)); + str = next + 1; + } else { + max_line = max_t(u32, max_line, strlen(str)); + } + } + + return min(max_line, 60u); +} + +/* Print strings `pattern_origin` and `text_origin` side by side, + * assume `pattern_pos` and `text_pos` designate location within + * corresponding origin string where match diverges. + * The output should look like: + * + * Can't match disassembly(left) with pattern(right): + * r2 = *(u64 *)(r1 +0) ; $dst = *(u64 *)($ctx + bpf_sockopt_kern::sk1) + * ^ ^ + * r0 = 0 ; + * exit ; + */ +static void print_match_error(FILE *out, + char *pattern_origin, char *text_origin, + char *pattern_pos, char *text_pos) +{ + char *pattern = pattern_origin; + char *text = text_origin; + int middle = max_line_len(text) + 2; + + fprintf(out, "Can't match disassembly(left) with pattern(right):\n"); + while (*pattern || *text) { + int column = 0; + int mark1 = -1; + int mark2 = -1; + + /* Print one line from text */ + while (*text && *text != '\n') { + if (text == text_pos) + mark1 = column; + fputc(*text, out); + ++text; + ++column; + } + if (text == text_pos) + mark1 = column; + + /* Pad to the middle */ + while (column < middle) { + fputc(' ', out); + ++column; + } + fputs("; ", out); + column += 3; + + /* Print one line from pattern, pattern lines are terminated by ';' */ + while (*pattern && *pattern != ';') { + if (pattern == pattern_pos) + mark2 = column; + fputc(*pattern, out); + ++pattern; + ++column; + } + if (pattern == pattern_pos) + mark2 = column; + + fputc('\n', out); + if (*pattern) + ++pattern; + if (*text) + ++text; + + /* If pattern and text diverge at this line, print an + * additional line with '^' marks, highlighting + * positions where match fails. + */ + if (mark1 > 0 || mark2 > 0) { + for (column = 0; column <= max(mark1, mark2); ++column) { + if (column == mark1 || column == mark2) + fputc('^', out); + else + fputc(' ', out); + } + fputc('\n', out); + } + } +} + +/* Test if `text` matches `pattern`. Pattern consists of the following elements: + * + * - Field offset references: + * + * <type>::<field> + * + * When such reference is encountered BTF is used to compute numerical + * value for the offset of <field> in <type>. The `text` is expected to + * contain matching numerical value. + * + * - Field groups: + * + * $(<type>::<field> [+ <type>::<field>]*) + * + * Allows to specify an offset that is a sum of multiple field offsets. + * The `text` is expected to contain matching numerical value. + * + * - Variable references, e.g. `$src`, `$dst`, `$ctx`. + * These are substitutions specified in `reg_map` array. + * If a substring of pattern is equal to `reg_map[i][0]` the `text` is + * expected to contain `reg_map[i][1]` in the matching position. + * + * - Whitespace is ignored, ';' counts as whitespace for `pattern`. + * + * - Any other characters, `pattern` and `text` should match one-to-one. + * + * Example of a pattern: + * + * __________ fields group ________________ + * ' ' + * *(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid)) = $src; + * ^^^^ '______________________' + * variable reference field offset reference + */ +static bool match_pattern(struct btf *btf, char *pattern, char *text, char *reg_map[][2]) +{ + char *pattern_origin = pattern; + char *text_origin = text; + regmatch_t matches[3]; + +_continue: + while (*pattern) { + if (!*text) + goto err; + + /* Skip whitespace */ + if (isspace(*pattern) || *pattern == ';') { + if (!isspace(*text) && text != text_origin && isalnum(text[-1])) + goto err; + pattern = skip_space_and_semi(pattern); + text = skip_space(text); + continue; + } + + /* Check for variable references */ + for (int i = 0; reg_map[i][0]; ++i) { + char *pattern_next, *text_next; + + pattern_next = match_str(pattern, reg_map[i][0]); + if (!pattern_next) + continue; + + text_next = match_str(text, reg_map[i][1]); + if (!text_next) + goto err; + + pattern = pattern_next; + text = text_next; + goto _continue; + } + + /* Match field group: + * $(sk_buff::cb + qdisc_skb_cb::tc_classid) + */ + if (strncmp(pattern, "$(", 2) == 0) { + char *group_start = pattern, *text_next; + int acc_offset = 0; + + pattern += 2; + + for (;;) { + int field_offset; + + pattern = skip_space(pattern); + if (!*pattern) { + PRINT_FAIL("Unexpected end of pattern\n"); + goto err; + } + + if (*pattern == ')') { + ++pattern; + break; + } + + if (*pattern == '+') { + ++pattern; + continue; + } + + printf("pattern: %s\n", pattern); + if (regexec(field_regex, pattern, 3, matches, 0) != 0) { + PRINT_FAIL("Field reference expected\n"); + goto err; + } + + field_offset = find_field_offset(btf, pattern, matches); + if (field_offset < 0) + goto err; + + pattern += matches[0].rm_eo; + acc_offset += field_offset; + } + + text_next = match_number(text, acc_offset); + if (!text_next) { + PRINT_FAIL("No match for group offset %.*s (%d)\n", + (int)(pattern - group_start), + group_start, + acc_offset); + goto err; + } + text = text_next; + } + + /* Match field reference: + * sk_buff::cb + */ + if (regexec(field_regex, pattern, 3, matches, 0) == 0) { + int field_offset; + char *text_next; + + field_offset = find_field_offset(btf, pattern, matches); + if (field_offset < 0) + goto err; + + text_next = match_number(text, field_offset); + if (!text_next) { + PRINT_FAIL("No match for field offset %.*s (%d)\n", + (int)matches[0].rm_eo, pattern, field_offset); + goto err; + } + + pattern += matches[0].rm_eo; + text = text_next; + continue; + } + + /* If pattern points to identifier not followed by '::' + * skip the identifier to avoid n^2 application of the + * field reference rule. + */ + if (regexec(ident_regex, pattern, 1, matches, 0) == 0) { + if (strncmp(pattern, text, matches[0].rm_eo) != 0) + goto err; + + pattern += matches[0].rm_eo; + text += matches[0].rm_eo; + continue; + } + + /* Match literally */ + if (*pattern != *text) + goto err; + + ++pattern; + ++text; + } + + return true; + +err: + test__fail(); + print_match_error(stdout, pattern_origin, text_origin, pattern, text); + return false; +} + +/* Request BPF program instructions after all rewrites are applied, + * e.g. verifier.c:convert_ctx_access() is done. + */ +static int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + __u32 xlated_prog_len; + __u32 buf_element_size = sizeof(struct bpf_insn); + + if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { + perror("bpf_prog_get_info_by_fd failed"); + return -1; + } + + xlated_prog_len = info.xlated_prog_len; + if (xlated_prog_len % buf_element_size) { + printf("Program length %d is not multiple of %d\n", + xlated_prog_len, buf_element_size); + return -1; + } + + *cnt = xlated_prog_len / buf_element_size; + *buf = calloc(*cnt, buf_element_size); + if (!buf) { + perror("can't allocate xlated program buffer"); + return -ENOMEM; + } + + bzero(&info, sizeof(info)); + info.xlated_prog_len = xlated_prog_len; + info.xlated_prog_insns = (__u64)(unsigned long)*buf; + if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { + perror("second bpf_prog_get_info_by_fd failed"); + goto out_free_buf; + } + + return 0; + +out_free_buf: + free(*buf); + return -1; +} + +static void print_insn(void *private_data, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vfprintf((FILE *)private_data, fmt, args); + va_end(args); +} + +/* Disassemble instructions to a stream */ +static void print_xlated(FILE *out, struct bpf_insn *insn, __u32 len) +{ + const struct bpf_insn_cbs cbs = { + .cb_print = print_insn, + .cb_call = NULL, + .cb_imm = NULL, + .private_data = out, + }; + bool double_insn = false; + int i; + + for (i = 0; i < len; i++) { + if (double_insn) { + double_insn = false; + continue; + } + + double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW); + print_bpf_insn(&cbs, insn + i, true); + } +} + +/* We share code with kernel BPF disassembler, it adds '(FF) ' prefix + * for each instruction (FF stands for instruction `code` byte). + * This function removes the prefix inplace for each line in `str`. + */ +static void remove_insn_prefix(char *str, int size) +{ + const int prefix_size = 5; + + int write_pos = 0, read_pos = prefix_size; + int len = strlen(str); + char c; + + size = min(size, len); + + while (read_pos < size) { + c = str[read_pos++]; + if (c == 0) + break; + str[write_pos++] = c; + if (c == '\n') + read_pos += prefix_size; + } + str[write_pos] = 0; +} + +struct prog_info { + char *prog_kind; + enum bpf_prog_type prog_type; + enum bpf_attach_type expected_attach_type; + struct bpf_insn *prog; + u32 prog_len; +}; + +static void match_program(struct btf *btf, + struct prog_info *pinfo, + char *pattern, + char *reg_map[][2], + bool skip_first_insn) +{ + struct bpf_insn *buf = NULL; + int err = 0, prog_fd = 0; + FILE *prog_out = NULL; + char *text = NULL; + __u32 cnt = 0; + + text = calloc(MAX_PROG_TEXT_SZ, 1); + if (!text) { + PRINT_FAIL("Can't allocate %d bytes\n", MAX_PROG_TEXT_SZ); + goto out; + } + + // TODO: log level + LIBBPF_OPTS(bpf_prog_load_opts, opts); + opts.log_buf = text; + opts.log_size = MAX_PROG_TEXT_SZ; + opts.log_level = 1 | 2 | 4; + opts.expected_attach_type = pinfo->expected_attach_type; + + prog_fd = bpf_prog_load(pinfo->prog_type, NULL, "GPL", + pinfo->prog, pinfo->prog_len, &opts); + if (prog_fd < 0) { + PRINT_FAIL("Can't load program, errno %d (%s), verifier log:\n%s\n", + errno, strerror(errno), text); + goto out; + } + + memset(text, 0, MAX_PROG_TEXT_SZ); + + err = get_xlated_program(prog_fd, &buf, &cnt); + if (err) { + PRINT_FAIL("Can't load back BPF program\n"); + goto out; + } + + prog_out = fmemopen(text, MAX_PROG_TEXT_SZ - 1, "w"); + if (!prog_out) { + PRINT_FAIL("Can't open memory stream\n"); + goto out; + } + if (skip_first_insn) + print_xlated(prog_out, buf + 1, cnt - 1); + else + print_xlated(prog_out, buf, cnt); + fclose(prog_out); + remove_insn_prefix(text, MAX_PROG_TEXT_SZ); + + ASSERT_TRUE(match_pattern(btf, pattern, text, reg_map), + pinfo->prog_kind); + +out: + if (prog_fd) + close(prog_fd); + free(buf); + free(text); +} + +static void run_one_testcase(struct btf *btf, struct test_case *test) +{ + struct prog_info pinfo = {}; + int bpf_sz; + + if (!test__start_subtest(test->name)) + return; + + switch (test->field_sz) { + case 8: + bpf_sz = BPF_DW; + break; + case 4: + bpf_sz = BPF_W; + break; + case 2: + bpf_sz = BPF_H; + break; + case 1: + bpf_sz = BPF_B; + break; + default: + PRINT_FAIL("Unexpected field size: %d, want 8,4,2 or 1\n", test->field_sz); + return; + } + + pinfo.prog_type = test->prog_type; + pinfo.expected_attach_type = test->expected_attach_type; + + if (test->read) { + struct bpf_insn ldx_prog[] = { + BPF_LDX_MEM(bpf_sz, BPF_REG_2, BPF_REG_1, test->field_offset), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + char *reg_map[][2] = { + { "$ctx", "r1" }, + { "$dst", "r2" }, + {} + }; + + pinfo.prog_kind = "LDX"; + pinfo.prog = ldx_prog; + pinfo.prog_len = ARRAY_SIZE(ldx_prog); + match_program(btf, &pinfo, test->read, reg_map, false); + } + + if (test->write || test->write_st || test->write_stx) { + struct bpf_insn stx_prog[] = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_STX_MEM(bpf_sz, BPF_REG_1, BPF_REG_2, test->field_offset), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + char *stx_reg_map[][2] = { + { "$ctx", "r1" }, + { "$src", "r2" }, + {} + }; + struct bpf_insn st_prog[] = { + BPF_ST_MEM(bpf_sz, BPF_REG_1, test->field_offset, + test->st_value.use ? test->st_value.value : 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + char *st_reg_map[][2] = { + { "$ctx", "r1" }, + { "$src", "42" }, + {} + }; + + if (test->write || test->write_stx) { + char *pattern = test->write_stx ? test->write_stx : test->write; + + pinfo.prog_kind = "STX"; + pinfo.prog = stx_prog; + pinfo.prog_len = ARRAY_SIZE(stx_prog); + match_program(btf, &pinfo, pattern, stx_reg_map, true); + } + + if (test->write || test->write_st) { + char *pattern = test->write_st ? test->write_st : test->write; + + pinfo.prog_kind = "ST"; + pinfo.prog = st_prog; + pinfo.prog_len = ARRAY_SIZE(st_prog); + match_program(btf, &pinfo, pattern, st_reg_map, false); + } + } + + test__end_subtest(); +} + +void test_ctx_rewrite(void) +{ + struct btf *btf; + int i; + + field_regex = compile_regex("^([[:alpha:]_][[:alnum:]_]+)::([[:alpha:]_][[:alnum:]_]+)"); + ident_regex = compile_regex("^[[:alpha:]_][[:alnum:]_]+"); + if (!field_regex || !ident_regex) + return; + + btf = btf__load_vmlinux_btf(); + if (!btf) { + PRINT_FAIL("Can't load vmlinux BTF, errno %d (%s)\n", errno, strerror(errno)); + goto out; + } + + for (i = 0; i < ARRAY_SIZE(test_cases); ++i) + run_one_testcase(btf, &test_cases[i]); + +out: + btf__free(btf); + free_regex(field_regex); + free_regex(ident_regex); +} diff --git a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c index 2853883b7cbb..5c0ebe6ba866 100644 --- a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c +++ b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c @@ -10,14 +10,6 @@ #include "network_helpers.h" #include "decap_sanity.skel.h" -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - goto fail; \ - }) - #define NS_TEST "decap_sanity_ns" #define IPV6_IFACE_ADDR "face::1" #define UDP_TEST_PORT 7777 @@ -37,9 +29,9 @@ void test_decap_sanity(void) if (!ASSERT_OK_PTR(skel, "skel open_and_load")) return; - SYS("ip netns add %s", NS_TEST); - SYS("ip -net %s -6 addr add %s/128 dev lo nodad", NS_TEST, IPV6_IFACE_ADDR); - SYS("ip -net %s link set dev lo up", NS_TEST); + SYS(fail, "ip netns add %s", NS_TEST); + SYS(fail, "ip -net %s -6 addr add %s/128 dev lo nodad", NS_TEST, IPV6_IFACE_ADDR); + SYS(fail, "ip -net %s link set dev lo up", NS_TEST); nstoken = open_netns(NS_TEST); if (!ASSERT_OK_PTR(nstoken, "open_netns")) @@ -80,6 +72,6 @@ fail: bpf_tc_hook_destroy(&qdisc_hook); close_netns(nstoken); } - system("ip netns del " NS_TEST " &> /dev/null"); + SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null"); decap_sanity__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index b99264ec0d9c..d176c34a7d2e 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -2,20 +2,32 @@ /* Copyright (c) 2022 Facebook */ #include <test_progs.h> +#include <network_helpers.h> #include "dynptr_fail.skel.h" #include "dynptr_success.skel.h" -static const char * const success_tests[] = { - "test_read_write", - "test_data_slice", - "test_ringbuf", +enum test_setup_type { + SETUP_SYSCALL_SLEEP, + SETUP_SKB_PROG, }; -static void verify_success(const char *prog_name) +static struct { + const char *prog_name; + enum test_setup_type type; +} success_tests[] = { + {"test_read_write", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_data", SETUP_SYSCALL_SLEEP}, + {"test_ringbuf", SETUP_SYSCALL_SLEEP}, + {"test_skb_readonly", SETUP_SKB_PROG}, + {"test_dynptr_skb_data", SETUP_SKB_PROG}, +}; + +static void verify_success(const char *prog_name, enum test_setup_type setup_type) { struct dynptr_success *skel; struct bpf_program *prog; struct bpf_link *link; + int err; skel = dynptr_success__open(); if (!ASSERT_OK_PTR(skel, "dynptr_success__open")) @@ -23,23 +35,53 @@ static void verify_success(const char *prog_name) skel->bss->pid = getpid(); - dynptr_success__load(skel); - if (!ASSERT_OK_PTR(skel, "dynptr_success__load")) - goto cleanup; - prog = bpf_object__find_program_by_name(skel->obj, prog_name); if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) goto cleanup; - link = bpf_program__attach(prog); - if (!ASSERT_OK_PTR(link, "bpf_program__attach")) + bpf_program__set_autoload(prog, true); + + err = dynptr_success__load(skel); + if (!ASSERT_OK(err, "dynptr_success__load")) goto cleanup; - usleep(1); + switch (setup_type) { + case SETUP_SYSCALL_SLEEP: + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "bpf_program__attach")) + goto cleanup; - ASSERT_EQ(skel->bss->err, 0, "err"); + usleep(1); + + bpf_link__destroy(link); + break; + case SETUP_SKB_PROG: + { + int prog_fd; + char buf[64]; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = sizeof(buf), + .repeat = 1, + ); - bpf_link__destroy(link); + prog_fd = bpf_program__fd(prog); + if (!ASSERT_GE(prog_fd, 0, "prog_fd")) + goto cleanup; + + err = bpf_prog_test_run_opts(prog_fd, &topts); + + if (!ASSERT_OK(err, "test_run")) + goto cleanup; + + break; + } + } + + ASSERT_EQ(skel->bss->err, 0, "err"); cleanup: dynptr_success__destroy(skel); @@ -50,10 +92,10 @@ void test_dynptr(void) int i; for (i = 0; i < ARRAY_SIZE(success_tests); i++) { - if (!test__start_subtest(success_tests[i])) + if (!test__start_subtest(success_tests[i].prog_name)) continue; - verify_success(success_tests[i]); + verify_success(success_tests[i].prog_name, success_tests[i].type); } RUN_TESTS(dynptr_fail); diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c index 32dd731e9070..3b77d8a422db 100644 --- a/tools/testing/selftests/bpf/prog_tests/empty_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c @@ -4,11 +4,6 @@ #include <net/if.h> #include "empty_skb.skel.h" -#define SYS(cmd) ({ \ - if (!ASSERT_OK(system(cmd), (cmd))) \ - goto out; \ -}) - void test_empty_skb(void) { LIBBPF_OPTS(bpf_test_run_opts, tattr); @@ -93,18 +88,18 @@ void test_empty_skb(void) }, }; - SYS("ip netns add empty_skb"); + SYS(out, "ip netns add empty_skb"); tok = open_netns("empty_skb"); - SYS("ip link add veth0 type veth peer veth1"); - SYS("ip link set dev veth0 up"); - SYS("ip link set dev veth1 up"); - SYS("ip addr add 10.0.0.1/8 dev veth0"); - SYS("ip addr add 10.0.0.2/8 dev veth1"); + SYS(out, "ip link add veth0 type veth peer veth1"); + SYS(out, "ip link set dev veth0 up"); + SYS(out, "ip link set dev veth1 up"); + SYS(out, "ip addr add 10.0.0.1/8 dev veth0"); + SYS(out, "ip addr add 10.0.0.2/8 dev veth1"); veth_ifindex = if_nametoindex("veth0"); - SYS("ip link add ipip0 type ipip local 10.0.0.1 remote 10.0.0.2"); - SYS("ip link set ipip0 up"); - SYS("ip addr add 192.168.1.1/16 dev ipip0"); + SYS(out, "ip link add ipip0 type ipip local 10.0.0.1 remote 10.0.0.2"); + SYS(out, "ip link set ipip0 up"); + SYS(out, "ip addr add 192.168.1.1/16 dev ipip0"); ipip_ifindex = if_nametoindex("ipip0"); bpf_obj = empty_skb__open_and_load(); @@ -142,5 +137,5 @@ out: empty_skb__destroy(bpf_obj); if (tok) close_netns(tok); - system("ip netns del empty_skb"); + SYS_NOFAIL("ip netns del empty_skb"); } diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c index 61ccddccf485..429393caf612 100644 --- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c @@ -8,14 +8,6 @@ #include "network_helpers.h" #include "fib_lookup.skel.h" -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - goto fail; \ - }) - #define NS_TEST "fib_lookup_ns" #define IPV6_IFACE_ADDR "face::face" #define IPV6_NUD_FAILED_ADDR "face::1" @@ -59,16 +51,16 @@ static int setup_netns(void) { int err; - SYS("ip link add veth1 type veth peer name veth2"); - SYS("ip link set dev veth1 up"); + SYS(fail, "ip link add veth1 type veth peer name veth2"); + SYS(fail, "ip link set dev veth1 up"); - SYS("ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR); - SYS("ip neigh add %s dev veth1 nud failed", IPV6_NUD_FAILED_ADDR); - SYS("ip neigh add %s dev veth1 lladdr %s nud stale", IPV6_NUD_STALE_ADDR, DMAC); + SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR); + SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV6_NUD_FAILED_ADDR); + SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV6_NUD_STALE_ADDR, DMAC); - SYS("ip addr add %s/24 dev veth1 nodad", IPV4_IFACE_ADDR); - SYS("ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR); - SYS("ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC); + SYS(fail, "ip addr add %s/24 dev veth1 nodad", IPV4_IFACE_ADDR); + SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR); + SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC); err = write_sysctl("/proc/sys/net/ipv4/conf/veth1/forwarding", "1"); if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth1.forwarding)")) @@ -140,7 +132,7 @@ void test_fib_lookup(void) return; prog_fd = bpf_program__fd(skel->progs.fib_lookup); - SYS("ip netns add %s", NS_TEST); + SYS(fail, "ip netns add %s", NS_TEST); nstoken = open_netns(NS_TEST); if (!ASSERT_OK_PTR(nstoken, "open_netns")) @@ -182,6 +174,6 @@ void test_fib_lookup(void) fail: if (nstoken) close_netns(nstoken); - system("ip netns del " NS_TEST " &> /dev/null"); + SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null"); fib_lookup__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 7acca37a3d2b..c4773173a4e4 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -346,6 +346,30 @@ struct test tests[] = { .retval = BPF_OK, }, { + .name = "ipv6-empty-flow-label", + .pkt.ipv6 = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), + .iph.nexthdr = IPPROTO_TCP, + .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), + .iph.flow_lbl = { 0x00, 0x00, 0x00 }, + .tcp.doff = 5, + .tcp.source = 80, + .tcp.dest = 8080, + }, + .keys = { + .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL, + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct ipv6hdr), + .addr_proto = ETH_P_IPV6, + .ip_proto = IPPROTO_TCP, + .n_proto = __bpf_constant_htons(ETH_P_IPV6), + .sport = 80, + .dport = 8080, + }, + .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL, + .retval = BPF_OK, + }, + { .name = "ipip-encap", .pkt.ipip = { .eth.h_proto = __bpf_constant_htons(ETH_P_IP), diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c index 9c1a18573ffd..1eab286b14fe 100644 --- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c +++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c @@ -93,4 +93,6 @@ void test_l4lb_all(void) test_l4lb("test_l4lb.bpf.o"); if (test__start_subtest("l4lb_noinline")) test_l4lb("test_l4lb_noinline.bpf.o"); + if (test__start_subtest("l4lb_noinline_dynptr")) + test_l4lb("test_l4lb_noinline_dynptr.bpf.o"); } diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c index f4ffdcabf4e4..239e1c5753b0 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_fixup.c +++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c @@ -141,7 +141,7 @@ void test_log_fixup(void) if (test__start_subtest("bad_core_relo_trunc_partial")) bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */); if (test__start_subtest("bad_core_relo_trunc_full")) - bad_core_relo(250, TRUNC_FULL /* truncate also libbpf's message patch */); + bad_core_relo(210, TRUNC_FULL /* truncate also libbpf's message patch */); if (test__start_subtest("bad_core_relo_subprog")) bad_core_relo_subprog(); if (test__start_subtest("missing_map")) diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c index 3533a4ecad01..8743df599567 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_kptr.c +++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c @@ -4,70 +4,160 @@ #include "map_kptr.skel.h" #include "map_kptr_fail.skel.h" +#include "rcu_tasks_trace_gp.skel.h" static void test_map_kptr_success(bool test_run) { + LIBBPF_OPTS(bpf_test_run_opts, lopts); LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &pkt_v4, .data_size_in = sizeof(pkt_v4), .repeat = 1, ); + int key = 0, ret, cpu; struct map_kptr *skel; - int key = 0, ret; - char buf[16]; + char buf[16], *pbuf; skel = map_kptr__open_and_load(); if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load")) return; - ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref), &opts); - ASSERT_OK(ret, "test_map_kptr_ref refcount"); - ASSERT_OK(opts.retval, "test_map_kptr_ref retval"); + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref1), &opts); + ASSERT_OK(ret, "test_map_kptr_ref1 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref1 retval"); ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref2), &opts); ASSERT_OK(ret, "test_map_kptr_ref2 refcount"); ASSERT_OK(opts.retval, "test_map_kptr_ref2 retval"); + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref1), &lopts); + ASSERT_OK(ret, "test_ls_map_kptr_ref1 refcount"); + ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref1 retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref2), &lopts); + ASSERT_OK(ret, "test_ls_map_kptr_ref2 refcount"); + ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref2 retval"); + if (test_run) goto exit; + cpu = libbpf_num_possible_cpus(); + if (!ASSERT_GT(cpu, 0, "libbpf_num_possible_cpus")) + goto exit; + + pbuf = calloc(cpu, sizeof(buf)); + if (!ASSERT_OK_PTR(pbuf, "calloc(pbuf)")) + goto exit; + ret = bpf_map__update_elem(skel->maps.array_map, &key, sizeof(key), buf, sizeof(buf), 0); ASSERT_OK(ret, "array_map update"); - ret = bpf_map__update_elem(skel->maps.array_map, - &key, sizeof(key), buf, sizeof(buf), 0); - ASSERT_OK(ret, "array_map update2"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); + + ret = bpf_map__update_elem(skel->maps.pcpu_array_map, + &key, sizeof(key), pbuf, cpu * sizeof(buf), 0); + ASSERT_OK(ret, "pcpu_array_map update"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); - ret = bpf_map__update_elem(skel->maps.hash_map, - &key, sizeof(key), buf, sizeof(buf), 0); - ASSERT_OK(ret, "hash_map update"); ret = bpf_map__delete_elem(skel->maps.hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "hash_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); + + ret = bpf_map__delete_elem(skel->maps.pcpu_hash_map, &key, sizeof(key), 0); + ASSERT_OK(ret, "pcpu_hash_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); - ret = bpf_map__update_elem(skel->maps.hash_malloc_map, - &key, sizeof(key), buf, sizeof(buf), 0); - ASSERT_OK(ret, "hash_malloc_map update"); ret = bpf_map__delete_elem(skel->maps.hash_malloc_map, &key, sizeof(key), 0); ASSERT_OK(ret, "hash_malloc_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); + + ret = bpf_map__delete_elem(skel->maps.pcpu_hash_malloc_map, &key, sizeof(key), 0); + ASSERT_OK(ret, "pcpu_hash_malloc_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); - ret = bpf_map__update_elem(skel->maps.lru_hash_map, - &key, sizeof(key), buf, sizeof(buf), 0); - ASSERT_OK(ret, "lru_hash_map update"); ret = bpf_map__delete_elem(skel->maps.lru_hash_map, &key, sizeof(key), 0); ASSERT_OK(ret, "lru_hash_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); + + ret = bpf_map__delete_elem(skel->maps.lru_pcpu_hash_map, &key, sizeof(key), 0); + ASSERT_OK(ret, "lru_pcpu_hash_map delete"); + skel->data->ref--; + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts); + ASSERT_OK(ret, "test_map_kptr_ref3 refcount"); + ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval"); + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref_del), &lopts); + ASSERT_OK(ret, "test_ls_map_kptr_ref_del delete"); + skel->data->ref--; + ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref_del retval"); + + free(pbuf); exit: map_kptr__destroy(skel); } -void test_map_kptr(void) +static int kern_sync_rcu_tasks_trace(struct rcu_tasks_trace_gp *rcu) { - if (test__start_subtest("success")) { + long gp_seq = READ_ONCE(rcu->bss->gp_seq); + LIBBPF_OPTS(bpf_test_run_opts, opts); + + if (!ASSERT_OK(bpf_prog_test_run_opts(bpf_program__fd(rcu->progs.do_call_rcu_tasks_trace), + &opts), "do_call_rcu_tasks_trace")) + return -EFAULT; + if (!ASSERT_OK(opts.retval, "opts.retval == 0")) + return -EFAULT; + while (gp_seq == READ_ONCE(rcu->bss->gp_seq)) + sched_yield(); + return 0; +} + +void serial_test_map_kptr(void) +{ + struct rcu_tasks_trace_gp *skel; + + RUN_TESTS(map_kptr_fail); + + skel = rcu_tasks_trace_gp__open_and_load(); + if (!ASSERT_OK_PTR(skel, "rcu_tasks_trace_gp__open_and_load")) + return; + if (!ASSERT_OK(rcu_tasks_trace_gp__attach(skel), "rcu_tasks_trace_gp__attach")) + goto end; + + if (test__start_subtest("success-map")) { + test_map_kptr_success(true); + + ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace"); + ASSERT_OK(kern_sync_rcu(), "sync rcu"); + /* Observe refcount dropping to 1 on bpf_map_free_deferred */ test_map_kptr_success(false); - /* Do test_run twice, so that we see refcount going back to 1 - * after we leave it in map from first iteration. - */ + + ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace"); + ASSERT_OK(kern_sync_rcu(), "sync rcu"); + /* Observe refcount dropping to 1 on synchronous delete elem */ test_map_kptr_success(true); } - RUN_TESTS(map_kptr_fail); +end: + rcu_tasks_trace_gp__destroy(skel); + return; } diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index 59f08d6d1d53..cd0c42fff7c0 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -7,6 +7,8 @@ #include "network_helpers.h" #include "mptcp_sock.skel.h" +#define NS_TEST "mptcp_ns" + #ifndef TCP_CA_NAME_MAX #define TCP_CA_NAME_MAX 16 #endif @@ -138,12 +140,20 @@ out: static void test_base(void) { + struct nstoken *nstoken = NULL; int server_fd, cgroup_fd; cgroup_fd = test__join_cgroup("/mptcp"); if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup")) return; + SYS(fail, "ip netns add %s", NS_TEST); + SYS(fail, "ip -net %s link set dev lo up", NS_TEST); + + nstoken = open_netns(NS_TEST); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto fail; + /* without MPTCP */ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); if (!ASSERT_GE(server_fd, 0, "start_server")) @@ -157,13 +167,18 @@ with_mptcp: /* with MPTCP */ server_fd = start_mptcp_server(AF_INET, NULL, 0, 0); if (!ASSERT_GE(server_fd, 0, "start_mptcp_server")) - goto close_cgroup_fd; + goto fail; ASSERT_OK(run_test(cgroup_fd, server_fd, true), "run_test mptcp"); close(server_fd); -close_cgroup_fd: +fail: + if (nstoken) + close_netns(nstoken); + + SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null"); + close(cgroup_fd); } diff --git a/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c b/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c new file mode 100644 index 000000000000..daa952711d8f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include <network_helpers.h> +#include "test_parse_tcp_hdr_opt.skel.h" +#include "test_parse_tcp_hdr_opt_dynptr.skel.h" +#include "test_tcp_hdr_options.h" + +struct test_pkt { + struct ipv6_packet pk6_v6; + u8 options[16]; +} __packed; + +struct test_pkt pkt = { + .pk6_v6.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), + .pk6_v6.iph.nexthdr = IPPROTO_TCP, + .pk6_v6.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), + .pk6_v6.tcp.urg_ptr = 123, + .pk6_v6.tcp.doff = 9, /* 16 bytes of options */ + + .options = { + TCPOPT_MSS, 4, 0x05, 0xB4, TCPOPT_NOP, TCPOPT_NOP, + 0, 6, 0xBB, 0xBB, 0xBB, 0xBB, TCPOPT_EOL + }, +}; + +static void test_parse_opt(void) +{ + struct test_parse_tcp_hdr_opt *skel; + struct bpf_program *prog; + char buf[128]; + int err; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt, + .data_size_in = sizeof(pkt), + .data_out = buf, + .data_size_out = sizeof(buf), + .repeat = 3, + ); + + skel = test_parse_tcp_hdr_opt__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + pkt.options[6] = skel->rodata->tcp_hdr_opt_kind_tpr; + prog = skel->progs.xdp_ingress_v6; + + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts); + ASSERT_OK(err, "ipv6 test_run"); + ASSERT_EQ(topts.retval, XDP_PASS, "ipv6 test_run retval"); + ASSERT_EQ(skel->bss->server_id, 0xBBBBBBBB, "server id"); + + test_parse_tcp_hdr_opt__destroy(skel); +} + +static void test_parse_opt_dynptr(void) +{ + struct test_parse_tcp_hdr_opt_dynptr *skel; + struct bpf_program *prog; + char buf[128]; + int err; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt, + .data_size_in = sizeof(pkt), + .data_out = buf, + .data_size_out = sizeof(buf), + .repeat = 3, + ); + + skel = test_parse_tcp_hdr_opt_dynptr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + pkt.options[6] = skel->rodata->tcp_hdr_opt_kind_tpr; + prog = skel->progs.xdp_ingress_v6; + + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts); + ASSERT_OK(err, "ipv6 test_run"); + ASSERT_EQ(topts.retval, XDP_PASS, "ipv6 test_run retval"); + ASSERT_EQ(skel->bss->server_id, 0xBBBBBBBB, "server id"); + + test_parse_tcp_hdr_opt_dynptr__destroy(skel); +} + +void test_parse_tcp_hdr_opt(void) +{ + if (test__start_subtest("parse_tcp_hdr_opt")) + test_parse_opt(); + if (test__start_subtest("parse_tcp_hdr_opt_dynptr")) + test_parse_opt_dynptr(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c index 447d8560ecb6..3f1f58d3a729 100644 --- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c @@ -25,10 +25,10 @@ static void test_success(void) bpf_program__set_autoload(skel->progs.get_cgroup_id, true); bpf_program__set_autoload(skel->progs.task_succ, true); - bpf_program__set_autoload(skel->progs.no_lock, true); bpf_program__set_autoload(skel->progs.two_regions, true); bpf_program__set_autoload(skel->progs.non_sleepable_1, true); bpf_program__set_autoload(skel->progs.non_sleepable_2, true); + bpf_program__set_autoload(skel->progs.task_trusted_non_rcuptr, true); err = rcu_read_lock__load(skel); if (!ASSERT_OK(err, "skel_load")) goto out; @@ -69,6 +69,7 @@ out: static const char * const inproper_region_tests[] = { "miss_lock", + "no_lock", "miss_unlock", "non_sleepable_rcu_mismatch", "inproper_sleepable_helper", @@ -99,7 +100,6 @@ out: } static const char * const rcuptr_misuse_tests[] = { - "task_untrusted_non_rcuptr", "task_untrusted_rcuptr", "cross_rcu_region", }; @@ -128,17 +128,8 @@ out: void test_rcu_read_lock(void) { - struct btf *vmlinux_btf; int cgroup_fd; - vmlinux_btf = btf__load_vmlinux_btf(); - if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF")) - return; - if (btf__find_by_name_kind(vmlinux_btf, "rcu", BTF_KIND_TYPE_TAG) < 0) { - test__skip(); - goto out; - } - cgroup_fd = test__join_cgroup("/rcu_read_lock"); if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /rcu_read_lock")) goto out; @@ -153,6 +144,5 @@ void test_rcu_read_lock(void) if (test__start_subtest("negative_tests_rcuptr_misuse")) test_rcuptr_misuse(); close(cgroup_fd); -out: - btf__free(vmlinux_btf); +out:; } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 567e07c19ecc..8f09e1ea3ba7 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -18,6 +18,7 @@ #include <string.h> #include <sys/select.h> #include <unistd.h> +#include <linux/vm_sockets.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> @@ -251,6 +252,16 @@ static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len) *len = sizeof(*addr6); } +static void init_addr_loopback_vsock(struct sockaddr_storage *ss, socklen_t *len) +{ + struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss)); + + addr->svm_family = AF_VSOCK; + addr->svm_port = VMADDR_PORT_ANY; + addr->svm_cid = VMADDR_CID_LOCAL; + *len = sizeof(*addr); +} + static void init_addr_loopback(int family, struct sockaddr_storage *ss, socklen_t *len) { @@ -261,6 +272,9 @@ static void init_addr_loopback(int family, struct sockaddr_storage *ss, case AF_INET6: init_addr_loopback6(ss, len); return; + case AF_VSOCK: + init_addr_loopback_vsock(ss, len); + return; default: FAIL("unsupported address family %d", family); } @@ -1478,6 +1492,8 @@ static const char *family_str(sa_family_t family) return "IPv6"; case AF_UNIX: return "Unix"; + case AF_VSOCK: + return "VSOCK"; default: return "unknown"; } @@ -1689,6 +1705,151 @@ static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *ma unix_skb_redir_to_connected(skel, map, sotype); } +/* Returns two connected loopback vsock sockets */ +static int vsock_socketpair_connectible(int sotype, int *v0, int *v1) +{ + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int s, p, c; + + s = socket_loopback(AF_VSOCK, sotype); + if (s < 0) + return -1; + + c = xsocket(AF_VSOCK, sotype | SOCK_NONBLOCK, 0); + if (c == -1) + goto close_srv; + + if (getsockname(s, sockaddr(&addr), &len) < 0) + goto close_cli; + + if (connect(c, sockaddr(&addr), len) < 0 && errno != EINPROGRESS) { + FAIL_ERRNO("connect"); + goto close_cli; + } + + len = sizeof(addr); + p = accept_timeout(s, sockaddr(&addr), &len, IO_TIMEOUT_SEC); + if (p < 0) + goto close_cli; + + *v0 = p; + *v1 = c; + + return 0; + +close_cli: + close(c); +close_srv: + close(s); + + return -1; +} + +static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd, + enum redir_mode mode, int sotype) +{ + const char *log_prefix = redir_mode_str(mode); + char a = 'a', b = 'b'; + int u0, u1, v0, v1; + int sfd[2]; + unsigned int pass; + int err, n; + u32 key; + + zero_verdict_count(verd_mapfd); + + if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, sfd)) + return; + + u0 = sfd[0]; + u1 = sfd[1]; + + err = vsock_socketpair_connectible(sotype, &v0, &v1); + if (err) { + FAIL("vsock_socketpair_connectible() failed"); + goto close_uds; + } + + err = add_to_sockmap(sock_mapfd, u0, v0); + if (err) { + FAIL("add_to_sockmap failed"); + goto close_vsock; + } + + n = write(v1, &a, sizeof(a)); + if (n < 0) + FAIL_ERRNO("%s: write", log_prefix); + if (n == 0) + FAIL("%s: incomplete write", log_prefix); + if (n < 1) + goto out; + + n = recv(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), MSG_DONTWAIT); + if (n < 0) + FAIL("%s: recv() err, errno=%d", log_prefix, errno); + if (n == 0) + FAIL("%s: incomplete recv", log_prefix); + if (b != a) + FAIL("%s: vsock socket map failed, %c != %c", log_prefix, a, b); + + key = SK_PASS; + err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); + if (err) + goto out; + if (pass != 1) + FAIL("%s: want pass count 1, have %d", log_prefix, pass); +out: + key = 0; + bpf_map_delete_elem(sock_mapfd, &key); + key = 1; + bpf_map_delete_elem(sock_mapfd, &key); + +close_vsock: + close(v0); + close(v1); + +close_uds: + close(u0); + close(u1); +} + +static void vsock_unix_skb_redir_connectible(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, + int sotype) +{ + int verdict = bpf_program__fd(skel->progs.prog_skb_verdict); + int verdict_map = bpf_map__fd(skel->maps.verdict_map); + int sock_map = bpf_map__fd(inner_map); + int err; + + err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0); + if (err) + return; + + skel->bss->test_ingress = false; + vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_EGRESS, sotype); + skel->bss->test_ingress = true; + vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_INGRESS, sotype); + + xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT); +} + +static void test_vsock_redir(struct test_sockmap_listen *skel, struct bpf_map *map) +{ + const char *family_name, *map_name; + char s[MAX_TEST_NAME]; + + family_name = family_str(AF_VSOCK); + map_name = map_type_str(map); + snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__); + if (!test__start_subtest(s)) + return; + + vsock_unix_skb_redir_connectible(skel, map, SOCK_STREAM); + vsock_unix_skb_redir_connectible(skel, map, SOCK_SEQPACKET); +} + static void test_reuseport(struct test_sockmap_listen *skel, struct bpf_map *map, int family, int sotype) { @@ -2060,12 +2221,14 @@ void serial_test_sockmap_listen(void) run_tests(skel, skel->maps.sock_map, AF_INET6); test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM); test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM); + test_vsock_redir(skel, skel->maps.sock_map); skel->bss->test_sockmap = false; run_tests(skel, skel->maps.sock_hash, AF_INET); run_tests(skel, skel->maps.sock_hash, AF_INET6); test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM); test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM); + test_vsock_redir(skel, skel->maps.sock_hash); test_sockmap_listen__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index bca5e6839ac4..6ee22c3b251a 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -137,24 +137,16 @@ static int get_ifaddr(const char *name, char *ifaddr) return 0; } -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - goto fail; \ - }) - static int netns_setup_links_and_routes(struct netns_setup_result *result) { struct nstoken *nstoken = NULL; char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {}; - SYS("ip link add veth_src type veth peer name veth_src_fwd"); - SYS("ip link add veth_dst type veth peer name veth_dst_fwd"); + SYS(fail, "ip link add veth_src type veth peer name veth_src_fwd"); + SYS(fail, "ip link add veth_dst type veth peer name veth_dst_fwd"); - SYS("ip link set veth_dst_fwd address " MAC_DST_FWD); - SYS("ip link set veth_dst address " MAC_DST); + SYS(fail, "ip link set veth_dst_fwd address " MAC_DST_FWD); + SYS(fail, "ip link set veth_dst address " MAC_DST); if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr)) goto fail; @@ -175,27 +167,27 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd")) goto fail; - SYS("ip link set veth_src netns " NS_SRC); - SYS("ip link set veth_src_fwd netns " NS_FWD); - SYS("ip link set veth_dst_fwd netns " NS_FWD); - SYS("ip link set veth_dst netns " NS_DST); + SYS(fail, "ip link set veth_src netns " NS_SRC); + SYS(fail, "ip link set veth_src_fwd netns " NS_FWD); + SYS(fail, "ip link set veth_dst_fwd netns " NS_FWD); + SYS(fail, "ip link set veth_dst netns " NS_DST); /** setup in 'src' namespace */ nstoken = open_netns(NS_SRC); if (!ASSERT_OK_PTR(nstoken, "setns src")) goto fail; - SYS("ip addr add " IP4_SRC "/32 dev veth_src"); - SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad"); - SYS("ip link set dev veth_src up"); + SYS(fail, "ip addr add " IP4_SRC "/32 dev veth_src"); + SYS(fail, "ip addr add " IP6_SRC "/128 dev veth_src nodad"); + SYS(fail, "ip link set dev veth_src up"); - SYS("ip route add " IP4_DST "/32 dev veth_src scope global"); - SYS("ip route add " IP4_NET "/16 dev veth_src scope global"); - SYS("ip route add " IP6_DST "/128 dev veth_src scope global"); + SYS(fail, "ip route add " IP4_DST "/32 dev veth_src scope global"); + SYS(fail, "ip route add " IP4_NET "/16 dev veth_src scope global"); + SYS(fail, "ip route add " IP6_DST "/128 dev veth_src scope global"); - SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s", + SYS(fail, "ip neigh add " IP4_DST " dev veth_src lladdr %s", veth_src_fwd_addr); - SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s", + SYS(fail, "ip neigh add " IP6_DST " dev veth_src lladdr %s", veth_src_fwd_addr); close_netns(nstoken); @@ -209,15 +201,15 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) * needs v4 one in order to start ARP probing. IP4_NET route is added * to the endpoints so that the ARP processing will reply. */ - SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd"); - SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd"); - SYS("ip link set dev veth_src_fwd up"); - SYS("ip link set dev veth_dst_fwd up"); + SYS(fail, "ip addr add " IP4_SLL "/32 dev veth_src_fwd"); + SYS(fail, "ip addr add " IP4_DLL "/32 dev veth_dst_fwd"); + SYS(fail, "ip link set dev veth_src_fwd up"); + SYS(fail, "ip link set dev veth_dst_fwd up"); - SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global"); - SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global"); - SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global"); - SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global"); + SYS(fail, "ip route add " IP4_SRC "/32 dev veth_src_fwd scope global"); + SYS(fail, "ip route add " IP6_SRC "/128 dev veth_src_fwd scope global"); + SYS(fail, "ip route add " IP4_DST "/32 dev veth_dst_fwd scope global"); + SYS(fail, "ip route add " IP6_DST "/128 dev veth_dst_fwd scope global"); close_netns(nstoken); @@ -226,16 +218,16 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) if (!ASSERT_OK_PTR(nstoken, "setns dst")) goto fail; - SYS("ip addr add " IP4_DST "/32 dev veth_dst"); - SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad"); - SYS("ip link set dev veth_dst up"); + SYS(fail, "ip addr add " IP4_DST "/32 dev veth_dst"); + SYS(fail, "ip addr add " IP6_DST "/128 dev veth_dst nodad"); + SYS(fail, "ip link set dev veth_dst up"); - SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global"); - SYS("ip route add " IP4_NET "/16 dev veth_dst scope global"); - SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global"); + SYS(fail, "ip route add " IP4_SRC "/32 dev veth_dst scope global"); + SYS(fail, "ip route add " IP4_NET "/16 dev veth_dst scope global"); + SYS(fail, "ip route add " IP6_SRC "/128 dev veth_dst scope global"); - SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD); - SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS(fail, "ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS(fail, "ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD); close_netns(nstoken); @@ -375,7 +367,7 @@ done: static int test_ping(int family, const char *addr) { - SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr); + SYS(fail, "ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr); return 0; fail: return -1; @@ -953,7 +945,7 @@ static int tun_open(char *name) if (!ASSERT_OK(err, "ioctl TUNSETIFF")) goto fail; - SYS("ip link set dev %s up", name); + SYS(fail, "ip link set dev %s up", name); return fd; fail: @@ -1076,23 +1068,23 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0); /* Setup route and neigh tables */ - SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24"); - SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24"); + SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24"); + SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24"); - SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad"); - SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad"); + SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad"); + SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad"); - SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global"); - SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD + SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global"); + SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD " dev tun_src scope global"); - SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global"); - SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global"); - SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD + SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global"); + SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global"); + SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD " dev tun_src scope global"); - SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global"); + SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global"); - SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); - SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) goto fail; diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c index b13feceb38f1..810b14981c2e 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_ima.c +++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c @@ -70,7 +70,7 @@ void test_test_ima(void) u64 bin_true_sample; char cmd[256]; - int err, duration = 0; + int err, duration = 0, fresh_digest_idx = 0; struct ima *skel = NULL; skel = ima__open_and_load(); @@ -129,7 +129,15 @@ void test_test_ima(void) /* * Test #3 * - Goal: confirm that bpf_ima_inode_hash() returns a non-fresh digest - * - Expected result: 2 samples (/bin/true: non-fresh, fresh) + * - Expected result: + * 1 sample (/bin/true: fresh) if commit 62622dab0a28 applied + * 2 samples (/bin/true: non-fresh, fresh) if commit 62622dab0a28 is + * not applied + * + * If commit 62622dab0a28 ("ima: return IMA digest value only when + * IMA_COLLECTED flag is set") is applied, bpf_ima_inode_hash() refuses + * to give a non-fresh digest, hence the correct result is 1 instead of + * 2. */ test_init(skel->bss); @@ -144,13 +152,18 @@ void test_test_ima(void) goto close_clean; err = ring_buffer__consume(ringbuf); - ASSERT_EQ(err, 2, "num_samples_or_err"); - ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash"); - ASSERT_NEQ(ima_hash_from_bpf[1], 0, "ima_hash"); - ASSERT_EQ(ima_hash_from_bpf[0], bin_true_sample, "sample_equal_or_err"); + ASSERT_GE(err, 1, "num_samples_or_err"); + if (err == 2) { + ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash"); + ASSERT_EQ(ima_hash_from_bpf[0], bin_true_sample, + "sample_equal_or_err"); + fresh_digest_idx = 1; + } + + ASSERT_NEQ(ima_hash_from_bpf[fresh_digest_idx], 0, "ima_hash"); /* IMA refreshed the digest. */ - ASSERT_NEQ(ima_hash_from_bpf[1], bin_true_sample, - "sample_different_or_err"); + ASSERT_NEQ(ima_hash_from_bpf[fresh_digest_idx], bin_true_sample, + "sample_equal_or_err"); /* * Test #4 diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index 07ad457f3370..47f1d482fe39 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -91,30 +91,15 @@ #define PING_ARGS "-i 0.01 -c 3 -w 10 -q" -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - goto fail; \ - }) - -#define SYS_NOFAIL(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - system(cmd); \ - }) - static int config_device(void) { - SYS("ip netns add at_ns0"); - SYS("ip link add veth0 address " MAC_VETH1 " type veth peer name veth1"); - SYS("ip link set veth0 netns at_ns0"); - SYS("ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1"); - SYS("ip link set dev veth1 up mtu 1500"); - SYS("ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0"); - SYS("ip netns exec at_ns0 ip link set dev veth0 up mtu 1500"); + SYS(fail, "ip netns add at_ns0"); + SYS(fail, "ip link add veth0 address " MAC_VETH1 " type veth peer name veth1"); + SYS(fail, "ip link set veth0 netns at_ns0"); + SYS(fail, "ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1"); + SYS(fail, "ip link set dev veth1 up mtu 1500"); + SYS(fail, "ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0"); + SYS(fail, "ip netns exec at_ns0 ip link set dev veth0 up mtu 1500"); return 0; fail: @@ -132,23 +117,23 @@ static void cleanup(void) static int add_vxlan_tunnel(void) { /* at_ns0 namespace */ - SYS("ip netns exec at_ns0 ip link add dev %s type vxlan external gbp dstport 4789", + SYS(fail, "ip netns exec at_ns0 ip link add dev %s type vxlan external gbp dstport 4789", VXLAN_TUNL_DEV0); - SYS("ip netns exec at_ns0 ip link set dev %s address %s up", + SYS(fail, "ip netns exec at_ns0 ip link set dev %s address %s up", VXLAN_TUNL_DEV0, MAC_TUNL_DEV0); - SYS("ip netns exec at_ns0 ip addr add dev %s %s/24", + SYS(fail, "ip netns exec at_ns0 ip addr add dev %s %s/24", VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0); - SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s", + SYS(fail, "ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s", IP4_ADDR_TUNL_DEV1, MAC_TUNL_DEV1, VXLAN_TUNL_DEV0); - SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev veth0", + SYS(fail, "ip netns exec at_ns0 ip neigh add %s lladdr %s dev veth0", IP4_ADDR2_VETH1, MAC_VETH1); /* root namespace */ - SYS("ip link add dev %s type vxlan external gbp dstport 4789", + SYS(fail, "ip link add dev %s type vxlan external gbp dstport 4789", VXLAN_TUNL_DEV1); - SYS("ip link set dev %s address %s up", VXLAN_TUNL_DEV1, MAC_TUNL_DEV1); - SYS("ip addr add dev %s %s/24", VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1); - SYS("ip neigh add %s lladdr %s dev %s", + SYS(fail, "ip link set dev %s address %s up", VXLAN_TUNL_DEV1, MAC_TUNL_DEV1); + SYS(fail, "ip addr add dev %s %s/24", VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1); + SYS(fail, "ip neigh add %s lladdr %s dev %s", IP4_ADDR_TUNL_DEV0, MAC_TUNL_DEV0, VXLAN_TUNL_DEV1); return 0; @@ -165,26 +150,26 @@ static void delete_vxlan_tunnel(void) static int add_ip6vxlan_tunnel(void) { - SYS("ip netns exec at_ns0 ip -6 addr add %s/96 dev veth0", + SYS(fail, "ip netns exec at_ns0 ip -6 addr add %s/96 dev veth0", IP6_ADDR_VETH0); - SYS("ip netns exec at_ns0 ip link set dev veth0 up"); - SYS("ip -6 addr add %s/96 dev veth1", IP6_ADDR1_VETH1); - SYS("ip -6 addr add %s/96 dev veth1", IP6_ADDR2_VETH1); - SYS("ip link set dev veth1 up"); + SYS(fail, "ip netns exec at_ns0 ip link set dev veth0 up"); + SYS(fail, "ip -6 addr add %s/96 dev veth1", IP6_ADDR1_VETH1); + SYS(fail, "ip -6 addr add %s/96 dev veth1", IP6_ADDR2_VETH1); + SYS(fail, "ip link set dev veth1 up"); /* at_ns0 namespace */ - SYS("ip netns exec at_ns0 ip link add dev %s type vxlan external dstport 4789", + SYS(fail, "ip netns exec at_ns0 ip link add dev %s type vxlan external dstport 4789", IP6VXLAN_TUNL_DEV0); - SYS("ip netns exec at_ns0 ip addr add dev %s %s/24", + SYS(fail, "ip netns exec at_ns0 ip addr add dev %s %s/24", IP6VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0); - SYS("ip netns exec at_ns0 ip link set dev %s address %s up", + SYS(fail, "ip netns exec at_ns0 ip link set dev %s address %s up", IP6VXLAN_TUNL_DEV0, MAC_TUNL_DEV0); /* root namespace */ - SYS("ip link add dev %s type vxlan external dstport 4789", + SYS(fail, "ip link add dev %s type vxlan external dstport 4789", IP6VXLAN_TUNL_DEV1); - SYS("ip addr add dev %s %s/24", IP6VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1); - SYS("ip link set dev %s address %s up", + SYS(fail, "ip addr add dev %s %s/24", IP6VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1); + SYS(fail, "ip link set dev %s address %s up", IP6VXLAN_TUNL_DEV1, MAC_TUNL_DEV1); return 0; @@ -205,7 +190,7 @@ static void delete_ip6vxlan_tunnel(void) static int test_ping(int family, const char *addr) { - SYS("%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr); + SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr); return 0; fail: return -1; diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index 7eb049214859..290c21dbe65a 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -29,6 +29,9 @@ static int timer(struct timer *timer_skel) /* check that timer_cb2() was executed twice */ ASSERT_EQ(timer_skel->bss->bss_data, 10, "bss_data"); + /* check that timer_cb3() was executed twice */ + ASSERT_EQ(timer_skel->bss->abs_data, 12, "abs_data"); + /* check that there were no errors in timer execution */ ASSERT_EQ(timer_skel->bss->err, 0, "err"); diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c index 3a13e102c149..e51721df14fc 100644 --- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c @@ -590,7 +590,7 @@ static void *kick_kernel_cb(void *arg) /* Kick the kernel, causing it to drain the ring buffer and then wake * up the test thread waiting on epoll. */ - syscall(__NR_getrlimit); + syscall(__NR_prlimit64); return NULL; } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c index d4cd9f873c14..fa3cac5488f5 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c @@ -4,11 +4,10 @@ #define IFINDEX_LO 1 #define XDP_FLAGS_REPLACE (1U << 4) -void serial_test_xdp_attach(void) +static void test_xdp_attach(const char *file) { __u32 duration = 0, id1, id2, id0 = 0, len; struct bpf_object *obj1, *obj2, *obj3; - const char *file = "./test_xdp.bpf.o"; struct bpf_prog_info info = {}; int err, fd1, fd2, fd3; LIBBPF_OPTS(bpf_xdp_attach_opts, opts); @@ -85,3 +84,11 @@ out_2: out_1: bpf_object__close(obj1); } + +void serial_test_xdp_attach(void) +{ + if (test__start_subtest("xdp_attach")) + test_xdp_attach("./test_xdp.bpf.o"); + if (test__start_subtest("xdp_attach_dynptr")) + test_xdp_attach("./test_xdp_dynptr.bpf.o"); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c index 5e3a26b15ec6..d19f79048ff6 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -141,41 +141,33 @@ static const char * const xmit_policy_names[] = { static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy, int bond_both_attach) { -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - return -1; \ - }) - - SYS("ip netns add ns_dst"); - SYS("ip link add veth1_1 type veth peer name veth2_1 netns ns_dst"); - SYS("ip link add veth1_2 type veth peer name veth2_2 netns ns_dst"); - - SYS("ip link add bond1 type bond mode %s xmit_hash_policy %s", + SYS(fail, "ip netns add ns_dst"); + SYS(fail, "ip link add veth1_1 type veth peer name veth2_1 netns ns_dst"); + SYS(fail, "ip link add veth1_2 type veth peer name veth2_2 netns ns_dst"); + + SYS(fail, "ip link add bond1 type bond mode %s xmit_hash_policy %s", mode_names[mode], xmit_policy_names[xmit_policy]); - SYS("ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none"); - SYS("ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s", + SYS(fail, "ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none"); + SYS(fail, "ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s", mode_names[mode], xmit_policy_names[xmit_policy]); - SYS("ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none"); + SYS(fail, "ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none"); - SYS("ip link set veth1_1 master bond1"); + SYS(fail, "ip link set veth1_1 master bond1"); if (bond_both_attach == BOND_BOTH_AND_ATTACH) { - SYS("ip link set veth1_2 master bond1"); + SYS(fail, "ip link set veth1_2 master bond1"); } else { - SYS("ip link set veth1_2 up addrgenmode none"); + SYS(fail, "ip link set veth1_2 up addrgenmode none"); if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "veth1_2")) return -1; } - SYS("ip -netns ns_dst link set veth2_1 master bond2"); + SYS(fail, "ip -netns ns_dst link set veth2_1 master bond2"); if (bond_both_attach == BOND_BOTH_AND_ATTACH) - SYS("ip -netns ns_dst link set veth2_2 master bond2"); + SYS(fail, "ip -netns ns_dst link set veth2_2 master bond2"); else - SYS("ip -netns ns_dst link set veth2_2 up addrgenmode none"); + SYS(fail, "ip -netns ns_dst link set veth2_2 up addrgenmode none"); /* Load a dummy program on sending side as with veth peer needs to have a * XDP program loaded as well. @@ -194,8 +186,8 @@ static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy, } return 0; - -#undef SYS +fail: + return -1; } static void bonding_cleanup(struct skeletons *skeletons) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c index 7271a18ab3e2..662b6c6c5ed7 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c @@ -12,14 +12,6 @@ #include <uapi/linux/netdev.h> #include "test_xdp_do_redirect.skel.h" -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - goto out; \ - }) - struct udp_packet { struct ethhdr eth; struct ipv6hdr iph; @@ -127,19 +119,19 @@ void test_xdp_do_redirect(void) * iface and NUM_PKTS-2 in the TC hook. We match the packets on the UDP * payload. */ - SYS("ip netns add testns"); + SYS(out, "ip netns add testns"); nstoken = open_netns("testns"); if (!ASSERT_OK_PTR(nstoken, "setns")) goto out; - SYS("ip link add veth_src type veth peer name veth_dst"); - SYS("ip link set dev veth_src address 00:11:22:33:44:55"); - SYS("ip link set dev veth_dst address 66:77:88:99:aa:bb"); - SYS("ip link set dev veth_src up"); - SYS("ip link set dev veth_dst up"); - SYS("ip addr add dev veth_src fc00::1/64"); - SYS("ip addr add dev veth_dst fc00::2/64"); - SYS("ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb"); + SYS(out, "ip link add veth_src type veth peer name veth_dst"); + SYS(out, "ip link set dev veth_src address 00:11:22:33:44:55"); + SYS(out, "ip link set dev veth_dst address 66:77:88:99:aa:bb"); + SYS(out, "ip link set dev veth_src up"); + SYS(out, "ip link set dev veth_dst up"); + SYS(out, "ip addr add dev veth_src fc00::1/64"); + SYS(out, "ip addr add dev veth_dst fc00::2/64"); + SYS(out, "ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb"); /* We enable forwarding in the test namespace because that will cause * the packets that go through the kernel stack (with XDP_PASS) to be @@ -152,7 +144,7 @@ void test_xdp_do_redirect(void) * code didn't have this, so we keep the test behaviour to make sure the * bug doesn't resurface. */ - SYS("sysctl -qw net.ipv6.conf.all.forwarding=1"); + SYS(out, "sysctl -qw net.ipv6.conf.all.forwarding=1"); ifindex_src = if_nametoindex("veth_src"); ifindex_dst = if_nametoindex("veth_dst"); @@ -226,6 +218,6 @@ out_tc: out: if (nstoken) close_netns(nstoken); - system("ip netns del testns"); + SYS_NOFAIL("ip netns del testns"); test_xdp_do_redirect__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index aa4beae99f4f..490e851dc27d 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -34,11 +34,6 @@ #define PREFIX_LEN "8" #define FAMILY AF_INET -#define SYS(cmd) ({ \ - if (!ASSERT_OK(system(cmd), (cmd))) \ - goto out; \ -}) - struct xsk { void *umem_area; struct xsk_umem *umem; @@ -298,16 +293,16 @@ void test_xdp_metadata(void) /* Setup new networking namespace, with a veth pair. */ - SYS("ip netns add xdp_metadata"); + SYS(out, "ip netns add xdp_metadata"); tok = open_netns("xdp_metadata"); - SYS("ip link add numtxqueues 1 numrxqueues 1 " TX_NAME + SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1"); - SYS("ip link set dev " TX_NAME " address 00:00:00:00:00:01"); - SYS("ip link set dev " RX_NAME " address 00:00:00:00:00:02"); - SYS("ip link set dev " TX_NAME " up"); - SYS("ip link set dev " RX_NAME " up"); - SYS("ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME); - SYS("ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME); + SYS(out, "ip link set dev " TX_NAME " address 00:00:00:00:00:01"); + SYS(out, "ip link set dev " RX_NAME " address 00:00:00:00:00:02"); + SYS(out, "ip link set dev " TX_NAME " up"); + SYS(out, "ip link set dev " RX_NAME " up"); + SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME); + SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME); rx_ifindex = if_nametoindex(RX_NAME); tx_ifindex = if_nametoindex(TX_NAME); @@ -405,5 +400,5 @@ out: xdp_metadata__destroy(bpf_obj); if (tok) close_netns(tok); - system("ip netns del xdp_metadata"); + SYS_NOFAIL("ip netns del xdp_metadata"); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c index c72083885b6d..8b50a992d233 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c @@ -8,11 +8,6 @@ #define CMD_OUT_BUF_SIZE 1023 -#define SYS(cmd) ({ \ - if (!ASSERT_OK(system(cmd), (cmd))) \ - goto out; \ -}) - #define SYS_OUT(cmd, ...) ({ \ char buf[1024]; \ snprintf(buf, sizeof(buf), (cmd), ##__VA_ARGS__); \ @@ -69,37 +64,37 @@ static void test_synproxy(bool xdp) char buf[CMD_OUT_BUF_SIZE]; size_t size; - SYS("ip netns add synproxy"); + SYS(out, "ip netns add synproxy"); - SYS("ip link add tmp0 type veth peer name tmp1"); - SYS("ip link set tmp1 netns synproxy"); - SYS("ip link set tmp0 up"); - SYS("ip addr replace 198.18.0.1/24 dev tmp0"); + SYS(out, "ip link add tmp0 type veth peer name tmp1"); + SYS(out, "ip link set tmp1 netns synproxy"); + SYS(out, "ip link set tmp0 up"); + SYS(out, "ip addr replace 198.18.0.1/24 dev tmp0"); /* When checksum offload is enabled, the XDP program sees wrong * checksums and drops packets. */ - SYS("ethtool -K tmp0 tx off"); + SYS(out, "ethtool -K tmp0 tx off"); if (xdp) /* Workaround required for veth. */ - SYS("ip link set tmp0 xdp object xdp_dummy.bpf.o section xdp 2> /dev/null"); + SYS(out, "ip link set tmp0 xdp object xdp_dummy.bpf.o section xdp 2> /dev/null"); ns = open_netns("synproxy"); if (!ASSERT_OK_PTR(ns, "setns")) goto out; - SYS("ip link set lo up"); - SYS("ip link set tmp1 up"); - SYS("ip addr replace 198.18.0.2/24 dev tmp1"); - SYS("sysctl -w net.ipv4.tcp_syncookies=2"); - SYS("sysctl -w net.ipv4.tcp_timestamps=1"); - SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0"); - SYS("iptables-legacy -t raw -I PREROUTING \ + SYS(out, "ip link set lo up"); + SYS(out, "ip link set tmp1 up"); + SYS(out, "ip addr replace 198.18.0.2/24 dev tmp1"); + SYS(out, "sysctl -w net.ipv4.tcp_syncookies=2"); + SYS(out, "sysctl -w net.ipv4.tcp_timestamps=1"); + SYS(out, "sysctl -w net.netfilter.nf_conntrack_tcp_loose=0"); + SYS(out, "iptables-legacy -t raw -I PREROUTING \ -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack"); - SYS("iptables-legacy -t filter -A INPUT \ + SYS(out, "iptables-legacy -t filter -A INPUT \ -i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \ -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460"); - SYS("iptables-legacy -t filter -A INPUT \ + SYS(out, "iptables-legacy -t filter -A INPUT \ -i tmp1 -m state --state INVALID -j DROP"); ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \ @@ -170,8 +165,8 @@ out: if (ns) close_netns(ns); - system("ip link del tmp0"); - system("ip netns del synproxy"); + SYS_NOFAIL("ip link del tmp0"); + SYS_NOFAIL("ip netns del synproxy"); } void test_xdp_synproxy(void) diff --git a/tools/testing/selftests/bpf/prog_tests/xfrm_info.c b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c index 8b03c9bb4862..d37f5394e199 100644 --- a/tools/testing/selftests/bpf/prog_tests/xfrm_info.c +++ b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c @@ -69,21 +69,6 @@ "proto esp aead 'rfc4106(gcm(aes))' " \ "0xe4d8f4b4da1df18a3510b3781496daa82488b713 128 mode tunnel " -#define SYS(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - if (!ASSERT_OK(system(cmd), cmd)) \ - goto fail; \ - }) - -#define SYS_NOFAIL(fmt, ...) \ - ({ \ - char cmd[1024]; \ - snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ - system(cmd); \ - }) - static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd) { LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, .priority = 1, @@ -126,23 +111,23 @@ static void cleanup(void) static int config_underlay(void) { - SYS("ip netns add " NS0); - SYS("ip netns add " NS1); - SYS("ip netns add " NS2); + SYS(fail, "ip netns add " NS0); + SYS(fail, "ip netns add " NS1); + SYS(fail, "ip netns add " NS2); /* NS0 <-> NS1 [veth01 <-> veth10] */ - SYS("ip link add veth01 netns " NS0 " type veth peer name veth10 netns " NS1); - SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01"); - SYS("ip -net " NS0 " link set dev veth01 up"); - SYS("ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10"); - SYS("ip -net " NS1 " link set dev veth10 up"); + SYS(fail, "ip link add veth01 netns " NS0 " type veth peer name veth10 netns " NS1); + SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01"); + SYS(fail, "ip -net " NS0 " link set dev veth01 up"); + SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10"); + SYS(fail, "ip -net " NS1 " link set dev veth10 up"); /* NS0 <-> NS2 [veth02 <-> veth20] */ - SYS("ip link add veth02 netns " NS0 " type veth peer name veth20 netns " NS2); - SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02"); - SYS("ip -net " NS0 " link set dev veth02 up"); - SYS("ip -net " NS2 " addr add " IP4_ADDR_VETH20 "/24 dev veth20"); - SYS("ip -net " NS2 " link set dev veth20 up"); + SYS(fail, "ip link add veth02 netns " NS0 " type veth peer name veth20 netns " NS2); + SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02"); + SYS(fail, "ip -net " NS0 " link set dev veth02 up"); + SYS(fail, "ip -net " NS2 " addr add " IP4_ADDR_VETH20 "/24 dev veth20"); + SYS(fail, "ip -net " NS2 " link set dev veth20 up"); return 0; fail: @@ -153,20 +138,20 @@ static int setup_xfrm_tunnel_ns(const char *ns, const char *ipv4_local, const char *ipv4_remote, int if_id) { /* State: local -> remote */ - SYS("ip -net %s xfrm state add src %s dst %s spi 1 " + SYS(fail, "ip -net %s xfrm state add src %s dst %s spi 1 " ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_local, ipv4_remote, if_id); /* State: local <- remote */ - SYS("ip -net %s xfrm state add src %s dst %s spi 1 " + SYS(fail, "ip -net %s xfrm state add src %s dst %s spi 1 " ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_remote, ipv4_local, if_id); /* Policy: local -> remote */ - SYS("ip -net %s xfrm policy add dir out src 0.0.0.0/0 dst 0.0.0.0/0 " + SYS(fail, "ip -net %s xfrm policy add dir out src 0.0.0.0/0 dst 0.0.0.0/0 " "if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns, if_id, ipv4_local, ipv4_remote, if_id); /* Policy: local <- remote */ - SYS("ip -net %s xfrm policy add dir in src 0.0.0.0/0 dst 0.0.0.0/0 " + SYS(fail, "ip -net %s xfrm policy add dir in src 0.0.0.0/0 dst 0.0.0.0/0 " "if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns, if_id, ipv4_remote, ipv4_local, if_id); @@ -274,16 +259,16 @@ static int config_overlay(void) if (!ASSERT_OK(setup_xfrmi_external_dev(NS0), "xfrmi")) goto fail; - SYS("ip -net " NS0 " addr add 192.168.1.100/24 dev ipsec0"); - SYS("ip -net " NS0 " link set dev ipsec0 up"); + SYS(fail, "ip -net " NS0 " addr add 192.168.1.100/24 dev ipsec0"); + SYS(fail, "ip -net " NS0 " link set dev ipsec0 up"); - SYS("ip -net " NS1 " link add ipsec0 type xfrm if_id %d", IF_ID_1); - SYS("ip -net " NS1 " addr add 192.168.1.200/24 dev ipsec0"); - SYS("ip -net " NS1 " link set dev ipsec0 up"); + SYS(fail, "ip -net " NS1 " link add ipsec0 type xfrm if_id %d", IF_ID_1); + SYS(fail, "ip -net " NS1 " addr add 192.168.1.200/24 dev ipsec0"); + SYS(fail, "ip -net " NS1 " link set dev ipsec0 up"); - SYS("ip -net " NS2 " link add ipsec0 type xfrm if_id %d", IF_ID_2); - SYS("ip -net " NS2 " addr add 192.168.1.200/24 dev ipsec0"); - SYS("ip -net " NS2 " link set dev ipsec0 up"); + SYS(fail, "ip -net " NS2 " link add ipsec0 type xfrm if_id %d", IF_ID_2); + SYS(fail, "ip -net " NS2 " addr add 192.168.1.200/24 dev ipsec0"); + SYS(fail, "ip -net " NS2 " link set dev ipsec0 up"); return 0; fail: @@ -294,7 +279,7 @@ static int test_xfrm_ping(struct xfrm_info *skel, u32 if_id) { skel->bss->req_if_id = if_id; - SYS("ping -i 0.01 -c 3 -w 10 -q 192.168.1.200 > /dev/null"); + SYS(fail, "ping -i 0.01 -c 3 -w 10 -q 192.168.1.200 > /dev/null"); if (!ASSERT_EQ(skel->bss->resp_if_id, if_id, "if_id")) goto fail; diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index a20c5ed5e454..b04e092fac94 100644 --- a/tools/testing/selftests/bpf/progs/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -337,7 +337,7 @@ PROG(IPV6)(struct __sk_buff *skb) keys->ip_proto = ip6h->nexthdr; keys->flow_label = ip6_flowlabel(ip6h); - if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) + if (keys->flow_label && keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) return export_flow_keys(keys, BPF_OK); return parse_ipv6_proto(skb, ip6h->nexthdr); diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 14e28f991451..f704885aa534 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -2,10 +2,33 @@ #ifndef __BPF_MISC_H__ #define __BPF_MISC_H__ +/* This set of attributes controls behavior of the + * test_loader.c:test_loader__run_subtests(). + * + * __msg Message expected to be found in the verifier log. + * Multiple __msg attributes could be specified. + * + * __success Expect program load success in privileged mode. + * + * __failure Expect program load failure in privileged mode. + * + * __log_level Log level to use for the program, numeric value expected. + * + * __flag Adds one flag use for the program, the following values are valid: + * - BPF_F_STRICT_ALIGNMENT; + * - BPF_F_TEST_RND_HI32; + * - BPF_F_TEST_STATE_FREQ; + * - BPF_F_SLEEPABLE; + * - BPF_F_XDP_HAS_FRAGS; + * - A numeric value. + * Multiple __flag attributes could be specified, the final flags + * value is derived by applying binary "or" to all specified values. + */ #define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" msg))) #define __failure __attribute__((btf_decl_tag("comment:test_expect_failure"))) #define __success __attribute__((btf_decl_tag("comment:test_expect_success"))) #define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl))) +#define __flag(flag) __attribute__((btf_decl_tag("comment:test_prog_flags="#flag))) /* Convenience macro for use with 'asm volatile' blocks */ #define __naked __attribute__((naked)) diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c index 7653df1bc787..ce96b33e38d6 100644 --- a/tools/testing/selftests/bpf/progs/cb_refs.c +++ b/tools/testing/selftests/bpf/progs/cb_refs.c @@ -4,7 +4,7 @@ #include <bpf/bpf_helpers.h> struct map_value { - struct prog_test_ref_kfunc __kptr_ref *ptr; + struct prog_test_ref_kfunc __kptr *ptr; }; struct { diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h index 7d30855bfe78..d0b7cd0d09d7 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h @@ -10,7 +10,7 @@ #include <bpf/bpf_tracing.h> struct __cgrps_kfunc_map_value { - struct cgroup __kptr_ref * cgrp; + struct cgroup __kptr * cgrp; }; struct hash_map { @@ -24,6 +24,7 @@ struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym; struct cgroup *bpf_cgroup_kptr_get(struct cgroup **pp) __ksym; void bpf_cgroup_release(struct cgroup *p) __ksym; struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym; +struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym; static inline struct __cgrps_kfunc_map_value *cgrps_kfunc_map_value_lookup(struct cgroup *cgrp) { diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c index 4ad7fe24966d..b42291ed9586 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c @@ -205,7 +205,7 @@ int BPF_PROG(cgrp_kfunc_get_unreleased, struct cgroup *cgrp, const char *path) } SEC("tp_btf/cgroup_mkdir") -__failure __msg("arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket") +__failure __msg("expects refcounted") int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path) { struct __cgrps_kfunc_map_value *v; diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c index 0c23ea32df9f..030aff700084 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c @@ -61,7 +61,7 @@ int BPF_PROG(test_cgrp_acquire_leave_in_map, struct cgroup *cgrp, const char *pa SEC("tp_btf/cgroup_mkdir") int BPF_PROG(test_cgrp_xchg_release, struct cgroup *cgrp, const char *path) { - struct cgroup *kptr; + struct cgroup *kptr, *cg; struct __cgrps_kfunc_map_value *v; long status; @@ -80,6 +80,16 @@ int BPF_PROG(test_cgrp_xchg_release, struct cgroup *cgrp, const char *path) return 0; } + kptr = v->cgrp; + if (!kptr) { + err = 4; + return 0; + } + + cg = bpf_cgroup_ancestor(kptr, 1); + if (cg) /* verifier only check */ + bpf_cgroup_release(cg); + kptr = bpf_kptr_xchg(&v->cgrp, NULL); if (!kptr) { err = 3; @@ -168,3 +178,45 @@ int BPF_PROG(test_cgrp_get_ancestors, struct cgroup *cgrp, const char *path) return 0; } + +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_from_id, struct cgroup *cgrp, const char *path) +{ + struct cgroup *parent, *res; + u64 parent_cgid; + + if (!is_test_kfunc_task()) + return 0; + + /* @cgrp's ID is not visible yet, let's test with the parent */ + parent = bpf_cgroup_ancestor(cgrp, cgrp->level - 1); + if (!parent) { + err = 1; + return 0; + } + + parent_cgid = parent->kn->id; + bpf_cgroup_release(parent); + + res = bpf_cgroup_from_id(parent_cgid); + if (!res) { + err = 2; + return 0; + } + + bpf_cgroup_release(res); + + if (res != parent) { + err = 3; + return 0; + } + + res = bpf_cgroup_from_id((u64)-1); + if (res) { + bpf_cgroup_release(res); + err = 4; + return 0; + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c index 2d11ed528b6f..7615dc23d301 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c @@ -49,7 +49,7 @@ int no_rcu_lock(void *ctx) if (task->pid != target_pid) return 0; - /* ptr_to_btf_id semantics. should work. */ + /* task->cgroups is untrusted in sleepable prog outside of RCU CS */ cgrp = task->cgroups->dfl_cgrp; ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); @@ -71,7 +71,7 @@ int yes_rcu_lock(void *ctx) bpf_rcu_read_lock(); cgrp = task->cgroups->dfl_cgrp; - /* cgrp is untrusted and cannot pass to bpf_cgrp_storage_get() helper. */ + /* cgrp is trusted under RCU CS */ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (ptr) cgroup_id = cgrp->kn->id; diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h index ad34f3b602be..65e5496ca1b2 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_common.h +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h @@ -10,7 +10,7 @@ int err; struct __cpumask_map_value { - struct bpf_cpumask __kptr_ref * cpumask; + struct bpf_cpumask __kptr * cpumask; }; struct array_map { diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c index 33e8e86dd090..c16f7563b84e 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_failure.c +++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c @@ -44,7 +44,7 @@ int BPF_PROG(test_alloc_double_release, struct task_struct *task, u64 clone_flag } SEC("tp_btf/task_newtask") -__failure __msg("bpf_cpumask_acquire args#0 expected pointer to STRUCT bpf_cpumask") +__failure __msg("must be referenced") int BPF_PROG(test_acquire_wrong_cpumask, struct task_struct *task, u64 clone_flags) { struct bpf_cpumask *cpumask; diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index aa5b69354b91..20ce920d891d 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -5,7 +5,9 @@ #include <string.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include <linux/if_ether.h> #include "bpf_misc.h" +#include "bpf_kfuncs.h" char _license[] SEC("license") = "GPL"; @@ -244,6 +246,27 @@ done: return 0; } +/* A data slice can't be accessed out of bounds */ +SEC("?tc") +__failure __msg("value is outside of the allowed memory range") +int data_slice_out_of_bounds_skb(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + /* this should fail */ + *(__u8*)(hdr + 1) = 1; + + return SK_PASS; +} + SEC("?raw_tp") __failure __msg("value is outside of the allowed memory range") int data_slice_out_of_bounds_map_value(void *ctx) @@ -399,7 +422,6 @@ int invalid_helper2(void *ctx) /* this should fail */ bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0, 0); - return 0; } @@ -1044,6 +1066,193 @@ int dynptr_read_into_slot(void *ctx) return 0; } +/* bpf_dynptr_slice()s are read-only and cannot be written to */ +SEC("?tc") +__failure __msg("R0 cannot write into rdonly_mem") +int skb_invalid_slice_write(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + /* this should fail */ + hdr->h_proto = 1; + + return SK_PASS; +} + +/* The read-only data slice is invalidated whenever a helper changes packet data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int skb_invalid_data_slice1(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + val = hdr->h_proto; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + val = hdr->h_proto; + + return SK_PASS; +} + +/* The read-write data slice is invalidated whenever a helper changes packet data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int skb_invalid_data_slice2(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + hdr->h_proto = 123; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + hdr->h_proto = 1; + + return SK_PASS; +} + +/* The read-only data slice is invalidated whenever bpf_dynptr_write() is called */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int skb_invalid_data_slice3(struct __sk_buff *skb) +{ + char write_data[64] = "hello there, world!!"; + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + val = hdr->h_proto; + + bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0); + + /* this should fail */ + val = hdr->h_proto; + + return SK_PASS; +} + +/* The read-write data slice is invalidated whenever bpf_dynptr_write() is called */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int skb_invalid_data_slice4(struct __sk_buff *skb) +{ + char write_data[64] = "hello there, world!!"; + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + hdr->h_proto = 123; + + bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0); + + /* this should fail */ + hdr->h_proto = 1; + + return SK_PASS; +} + +/* The read-only data slice is invalidated whenever a helper changes packet data */ +SEC("?xdp") +__failure __msg("invalid mem access 'scalar'") +int xdp_invalid_data_slice1(struct xdp_md *xdp) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_xdp(xdp, 0, &ptr); + hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + val = hdr->h_proto; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr))) + return XDP_DROP; + + /* this should fail */ + val = hdr->h_proto; + + return XDP_PASS; +} + +/* The read-write data slice is invalidated whenever a helper changes packet data */ +SEC("?xdp") +__failure __msg("invalid mem access 'scalar'") +int xdp_invalid_data_slice2(struct xdp_md *xdp) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_xdp(xdp, 0, &ptr); + hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + if (!hdr) + return SK_DROP; + + hdr->h_proto = 9; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr))) + return XDP_DROP; + + /* this should fail */ + hdr->h_proto = 1; + + return XDP_PASS; +} + +/* Only supported prog type can create skb-type dynptrs */ +SEC("?raw_tp") +__failure __msg("calling kernel function bpf_dynptr_from_skb is not allowed") +int skb_invalid_ctx(void *ctx) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_dynptr_from_skb(ctx, 0, &ptr); + + return 0; +} + /* Reject writes to dynptr slot for uninit arg */ SEC("?raw_tp") __failure __msg("potential write to dynptr at off=-16") @@ -1061,6 +1270,61 @@ int uninit_write_into_slot(void *ctx) return 0; } +/* Only supported prog type can create xdp-type dynptrs */ +SEC("?raw_tp") +__failure __msg("calling kernel function bpf_dynptr_from_xdp is not allowed") +int xdp_invalid_ctx(void *ctx) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_dynptr_from_xdp(ctx, 0, &ptr); + + return 0; +} + +__u32 hdr_size = sizeof(struct ethhdr); +/* Can't pass in variable-sized len to bpf_dynptr_slice */ +SEC("?tc") +__failure __msg("unbounded memory access") +int dynptr_slice_var_len1(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + struct ethhdr *hdr; + char buffer[sizeof(*hdr)] = {}; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + /* this should fail */ + hdr = bpf_dynptr_slice(&ptr, 0, buffer, hdr_size); + if (!hdr) + return SK_DROP; + + return SK_PASS; +} + +/* Can't pass in variable-sized len to bpf_dynptr_slice */ +SEC("?tc") +__failure __msg("must be a known constant") +int dynptr_slice_var_len2(struct __sk_buff *skb) +{ + char buffer[sizeof(struct ethhdr)] = {}; + struct bpf_dynptr ptr; + struct ethhdr *hdr; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + if (hdr_size <= sizeof(buffer)) { + /* this should fail */ + hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, hdr_size); + if (!hdr) + return SK_DROP; + hdr->h_proto = 12; + } + + return SK_PASS; +} + static int callback(__u32 index, void *data) { *(__u32 *)data = 123; @@ -1092,3 +1356,24 @@ int invalid_data_slices(void *ctx) return 0; } + +/* Program types that don't allow writes to packet data should fail if + * bpf_dynptr_slice_rdwr is called + */ +SEC("cgroup_skb/ingress") +__failure __msg("the prog does not allow writes to packet data") +int invalid_slice_rdwr_rdonly(struct __sk_buff *skb) +{ + char buffer[sizeof(struct ethhdr)] = {}; + struct bpf_dynptr ptr; + struct ethhdr *hdr; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + /* this should fail since cgroup_skb doesn't allow + * changing packet data + */ + hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index 35db7c6c1fc7..c8358a7c7924 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -5,6 +5,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" +#include "bpf_kfuncs.h" #include "errno.h" char _license[] SEC("license") = "GPL"; @@ -30,7 +31,7 @@ struct { __type(value, __u32); } array_map SEC(".maps"); -SEC("tp/syscalls/sys_enter_nanosleep") +SEC("?tp/syscalls/sys_enter_nanosleep") int test_read_write(void *ctx) { char write_data[64] = "hello there, world!!"; @@ -61,8 +62,8 @@ int test_read_write(void *ctx) return 0; } -SEC("tp/syscalls/sys_enter_nanosleep") -int test_data_slice(void *ctx) +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_data(void *ctx) { __u32 key = 0, val = 235, *map_val; struct bpf_dynptr ptr; @@ -131,7 +132,7 @@ static int ringbuf_callback(__u32 index, void *data) return 0; } -SEC("tp/syscalls/sys_enter_nanosleep") +SEC("?tp/syscalls/sys_enter_nanosleep") int test_ringbuf(void *ctx) { struct bpf_dynptr ptr; @@ -163,3 +164,49 @@ done: bpf_ringbuf_discard_dynptr(&ptr, 0); return 0; } + +SEC("?cgroup_skb/egress") +int test_skb_readonly(struct __sk_buff *skb) +{ + __u8 write_data[2] = {1, 2}; + struct bpf_dynptr ptr; + __u64 *data; + int ret; + + if (bpf_dynptr_from_skb(skb, 0, &ptr)) { + err = 1; + return 1; + } + + /* since cgroup skbs are read only, writes should fail */ + ret = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0); + if (ret != -EINVAL) { + err = 2; + return 1; + } + + return 1; +} + +SEC("?cgroup_skb/egress") +int test_dynptr_skb_data(struct __sk_buff *skb) +{ + __u8 write_data[2] = {1, 2}; + struct bpf_dynptr ptr; + __u64 *data; + int ret; + + if (bpf_dynptr_from_skb(skb, 0, &ptr)) { + err = 1; + return 1; + } + + /* This should return NULL. Must use bpf_dynptr_slice API */ + data = bpf_dynptr_data(&ptr, 0, 1); + if (data) { + err = 2; + return 1; + } + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/find_vma_fail1.c b/tools/testing/selftests/bpf/progs/find_vma_fail1.c index 6dab9cffda13..7ba9a428f228 100644 --- a/tools/testing/selftests/bpf/progs/find_vma_fail1.c +++ b/tools/testing/selftests/bpf/progs/find_vma_fail1.c @@ -14,7 +14,7 @@ static long write_vma(struct task_struct *task, struct vm_area_struct *vma, struct callback_ctx *data) { /* writing to vma, which is illegal */ - vma->vm_flags |= 0x55; + vma->vm_start = 0xffffffffff600000; return 0; } diff --git a/tools/testing/selftests/bpf/progs/jit_probe_mem.c b/tools/testing/selftests/bpf/progs/jit_probe_mem.c index 2d2e61470794..13f00ca2ed0a 100644 --- a/tools/testing/selftests/bpf/progs/jit_probe_mem.c +++ b/tools/testing/selftests/bpf/progs/jit_probe_mem.c @@ -4,7 +4,7 @@ #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> -static struct prog_test_ref_kfunc __kptr_ref *v; +static struct prog_test_ref_kfunc __kptr *v; long total_sum = -1; extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; diff --git a/tools/testing/selftests/bpf/progs/lru_bug.c b/tools/testing/selftests/bpf/progs/lru_bug.c index 687081a724b3..ad73029cb1e3 100644 --- a/tools/testing/selftests/bpf/progs/lru_bug.c +++ b/tools/testing/selftests/bpf/progs/lru_bug.c @@ -4,7 +4,7 @@ #include <bpf/bpf_helpers.h> struct map_value { - struct task_struct __kptr *ptr; + struct task_struct __kptr_untrusted *ptr; }; struct { diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c index 228ec45365a8..3903d30217b8 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr.c +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -4,8 +4,8 @@ #include <bpf/bpf_helpers.h> struct map_value { - struct prog_test_ref_kfunc __kptr *unref_ptr; - struct prog_test_ref_kfunc __kptr_ref *ref_ptr; + struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr; + struct prog_test_ref_kfunc __kptr *ref_ptr; }; struct array_map { @@ -15,6 +15,13 @@ struct array_map { __uint(max_entries, 1); } array_map SEC(".maps"); +struct pcpu_array_map { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} pcpu_array_map SEC(".maps"); + struct hash_map { __uint(type, BPF_MAP_TYPE_HASH); __type(key, int); @@ -22,6 +29,13 @@ struct hash_map { __uint(max_entries, 1); } hash_map SEC(".maps"); +struct pcpu_hash_map { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} pcpu_hash_map SEC(".maps"); + struct hash_malloc_map { __uint(type, BPF_MAP_TYPE_HASH); __type(key, int); @@ -30,6 +44,14 @@ struct hash_malloc_map { __uint(map_flags, BPF_F_NO_PREALLOC); } hash_malloc_map SEC(".maps"); +struct pcpu_hash_malloc_map { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); + __uint(map_flags, BPF_F_NO_PREALLOC); +} pcpu_hash_malloc_map SEC(".maps"); + struct lru_hash_map { __uint(type, BPF_MAP_TYPE_LRU_HASH); __type(key, int); @@ -37,6 +59,41 @@ struct lru_hash_map { __uint(max_entries, 1); } lru_hash_map SEC(".maps"); +struct lru_pcpu_hash_map { + __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} lru_pcpu_hash_map SEC(".maps"); + +struct cgrp_ls_map { + __uint(type, BPF_MAP_TYPE_CGRP_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); +} cgrp_ls_map SEC(".maps"); + +struct task_ls_map { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); +} task_ls_map SEC(".maps"); + +struct inode_ls_map { + __uint(type, BPF_MAP_TYPE_INODE_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); +} inode_ls_map SEC(".maps"); + +struct sk_ls_map { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct map_value); +} sk_ls_map SEC(".maps"); + #define DEFINE_MAP_OF_MAP(map_type, inner_map_type, name) \ struct { \ __uint(type, map_type); \ @@ -61,6 +118,7 @@ extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp extern struct prog_test_ref_kfunc * bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym; extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; +void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym; #define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val)) @@ -90,12 +148,23 @@ static void test_kptr_ref(struct map_value *v) WRITE_ONCE(v->unref_ptr, p); if (!p) return; + /* + * p is rcu_ptr_prog_test_ref_kfunc, + * because bpf prog is non-sleepable and runs in RCU CS. + * p can be passed to kfunc that requires KF_RCU. + */ + bpf_kfunc_call_test_ref(p); if (p->a + p->b > 100) return; /* store NULL */ p = bpf_kptr_xchg(&v->ref_ptr, NULL); if (!p) return; + /* + * p is trusted_ptr_prog_test_ref_kfunc. + * p can be passed to kfunc that requires KF_RCU. + */ + bpf_kfunc_call_test_ref(p); if (p->a + p->b > 100) { bpf_kfunc_call_test_release(p); return; @@ -160,6 +229,58 @@ int test_map_kptr(struct __sk_buff *ctx) return 0; } +SEC("tp_btf/cgroup_mkdir") +int BPF_PROG(test_cgrp_map_kptr, struct cgroup *cgrp, const char *path) +{ + struct map_value *v; + + v = bpf_cgrp_storage_get(&cgrp_ls_map, cgrp, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (v) + test_kptr(v); + return 0; +} + +SEC("lsm/inode_unlink") +int BPF_PROG(test_task_map_kptr, struct inode *inode, struct dentry *victim) +{ + struct task_struct *task; + struct map_value *v; + + task = bpf_get_current_task_btf(); + if (!task) + return 0; + v = bpf_task_storage_get(&task_ls_map, task, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (v) + test_kptr(v); + return 0; +} + +SEC("lsm/inode_unlink") +int BPF_PROG(test_inode_map_kptr, struct inode *inode, struct dentry *victim) +{ + struct map_value *v; + + v = bpf_inode_storage_get(&inode_ls_map, inode, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (v) + test_kptr(v); + return 0; +} + +SEC("tc") +int test_sk_map_kptr(struct __sk_buff *ctx) +{ + struct map_value *v; + struct bpf_sock *sk; + + sk = ctx->sk; + if (!sk) + return 0; + v = bpf_sk_storage_get(&sk_ls_map, sk, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (v) + test_kptr(v); + return 0; +} + SEC("tc") int test_map_in_map_kptr(struct __sk_buff *ctx) { @@ -189,106 +310,257 @@ int test_map_in_map_kptr(struct __sk_buff *ctx) return 0; } -SEC("tc") -int test_map_kptr_ref(struct __sk_buff *ctx) +int ref = 1; + +static __always_inline +int test_map_kptr_ref_pre(struct map_value *v) { struct prog_test_ref_kfunc *p, *p_st; unsigned long arg = 0; - struct map_value *v; - int key = 0, ret; + int ret; p = bpf_kfunc_call_test_acquire(&arg); if (!p) return 1; + ref++; p_st = p->next; - if (p_st->cnt.refs.counter != 2) { + if (p_st->cnt.refs.counter != ref) { ret = 2; goto end; } - v = bpf_map_lookup_elem(&array_map, &key); - if (!v) { - ret = 3; - goto end; - } - p = bpf_kptr_xchg(&v->ref_ptr, p); if (p) { - ret = 4; + ret = 3; goto end; } - if (p_st->cnt.refs.counter != 2) - return 5; + if (p_st->cnt.refs.counter != ref) + return 4; p = bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0); if (!p) - return 6; - if (p_st->cnt.refs.counter != 3) { - ret = 7; + return 5; + ref++; + if (p_st->cnt.refs.counter != ref) { + ret = 6; goto end; } bpf_kfunc_call_test_release(p); - if (p_st->cnt.refs.counter != 2) - return 8; + ref--; + if (p_st->cnt.refs.counter != ref) + return 7; p = bpf_kptr_xchg(&v->ref_ptr, NULL); if (!p) - return 9; + return 8; bpf_kfunc_call_test_release(p); - if (p_st->cnt.refs.counter != 1) - return 10; + ref--; + if (p_st->cnt.refs.counter != ref) + return 9; p = bpf_kfunc_call_test_acquire(&arg); if (!p) - return 11; + return 10; + ref++; p = bpf_kptr_xchg(&v->ref_ptr, p); if (p) { - ret = 12; + ret = 11; goto end; } - if (p_st->cnt.refs.counter != 2) - return 13; + if (p_st->cnt.refs.counter != ref) + return 12; /* Leave in map */ return 0; end: + ref--; bpf_kfunc_call_test_release(p); return ret; } -SEC("tc") -int test_map_kptr_ref2(struct __sk_buff *ctx) +static __always_inline +int test_map_kptr_ref_post(struct map_value *v) { struct prog_test_ref_kfunc *p, *p_st; - struct map_value *v; - int key = 0; - - v = bpf_map_lookup_elem(&array_map, &key); - if (!v) - return 1; p_st = v->ref_ptr; - if (!p_st || p_st->cnt.refs.counter != 2) - return 2; + if (!p_st || p_st->cnt.refs.counter != ref) + return 1; p = bpf_kptr_xchg(&v->ref_ptr, NULL); if (!p) - return 3; - if (p_st->cnt.refs.counter != 2) { + return 2; + if (p_st->cnt.refs.counter != ref) { bpf_kfunc_call_test_release(p); - return 4; + return 3; } p = bpf_kptr_xchg(&v->ref_ptr, p); if (p) { bpf_kfunc_call_test_release(p); - return 5; + return 4; } - if (p_st->cnt.refs.counter != 2) - return 6; + if (p_st->cnt.refs.counter != ref) + return 5; + + return 0; +} + +#define TEST(map) \ + v = bpf_map_lookup_elem(&map, &key); \ + if (!v) \ + return -1; \ + ret = test_map_kptr_ref_pre(v); \ + if (ret) \ + return ret; + +#define TEST_PCPU(map) \ + v = bpf_map_lookup_percpu_elem(&map, &key, 0); \ + if (!v) \ + return -1; \ + ret = test_map_kptr_ref_pre(v); \ + if (ret) \ + return ret; + +SEC("tc") +int test_map_kptr_ref1(struct __sk_buff *ctx) +{ + struct map_value *v, val = {}; + int key = 0, ret; + + bpf_map_update_elem(&hash_map, &key, &val, 0); + bpf_map_update_elem(&hash_malloc_map, &key, &val, 0); + bpf_map_update_elem(&lru_hash_map, &key, &val, 0); + + bpf_map_update_elem(&pcpu_hash_map, &key, &val, 0); + bpf_map_update_elem(&pcpu_hash_malloc_map, &key, &val, 0); + bpf_map_update_elem(&lru_pcpu_hash_map, &key, &val, 0); + + TEST(array_map); + TEST(hash_map); + TEST(hash_malloc_map); + TEST(lru_hash_map); + + TEST_PCPU(pcpu_array_map); + TEST_PCPU(pcpu_hash_map); + TEST_PCPU(pcpu_hash_malloc_map); + TEST_PCPU(lru_pcpu_hash_map); + + return 0; +} + +#undef TEST +#undef TEST_PCPU + +#define TEST(map) \ + v = bpf_map_lookup_elem(&map, &key); \ + if (!v) \ + return -1; \ + ret = test_map_kptr_ref_post(v); \ + if (ret) \ + return ret; + +#define TEST_PCPU(map) \ + v = bpf_map_lookup_percpu_elem(&map, &key, 0); \ + if (!v) \ + return -1; \ + ret = test_map_kptr_ref_post(v); \ + if (ret) \ + return ret; + +SEC("tc") +int test_map_kptr_ref2(struct __sk_buff *ctx) +{ + struct map_value *v; + int key = 0, ret; + + TEST(array_map); + TEST(hash_map); + TEST(hash_malloc_map); + TEST(lru_hash_map); + + TEST_PCPU(pcpu_array_map); + TEST_PCPU(pcpu_hash_map); + TEST_PCPU(pcpu_hash_malloc_map); + TEST_PCPU(lru_pcpu_hash_map); return 0; } +#undef TEST +#undef TEST_PCPU + +SEC("tc") +int test_map_kptr_ref3(struct __sk_buff *ctx) +{ + struct prog_test_ref_kfunc *p; + unsigned long sp = 0; + + p = bpf_kfunc_call_test_acquire(&sp); + if (!p) + return 1; + ref++; + if (p->cnt.refs.counter != ref) { + bpf_kfunc_call_test_release(p); + return 2; + } + bpf_kfunc_call_test_release(p); + ref--; + return 0; +} + +SEC("syscall") +int test_ls_map_kptr_ref1(void *ctx) +{ + struct task_struct *current; + struct map_value *v; + int ret; + + current = bpf_get_current_task_btf(); + if (!current) + return 100; + v = bpf_task_storage_get(&task_ls_map, current, NULL, 0); + if (v) + return 150; + v = bpf_task_storage_get(&task_ls_map, current, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!v) + return 200; + return test_map_kptr_ref_pre(v); +} + +SEC("syscall") +int test_ls_map_kptr_ref2(void *ctx) +{ + struct task_struct *current; + struct map_value *v; + int ret; + + current = bpf_get_current_task_btf(); + if (!current) + return 100; + v = bpf_task_storage_get(&task_ls_map, current, NULL, 0); + if (!v) + return 200; + return test_map_kptr_ref_post(v); +} + +SEC("syscall") +int test_ls_map_kptr_ref_del(void *ctx) +{ + struct task_struct *current; + struct map_value *v; + int ret; + + current = bpf_get_current_task_btf(); + if (!current) + return 100; + v = bpf_task_storage_get(&task_ls_map, current, NULL, 0); + if (!v) + return 200; + if (!v->ref_ptr) + return 300; + return bpf_task_storage_delete(&task_ls_map, current); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c index 760e41e1a632..08f9ec18c345 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c @@ -7,9 +7,9 @@ struct map_value { char buf[8]; - struct prog_test_ref_kfunc __kptr *unref_ptr; - struct prog_test_ref_kfunc __kptr_ref *ref_ptr; - struct prog_test_member __kptr_ref *ref_memb_ptr; + struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr; + struct prog_test_ref_kfunc __kptr *ref_ptr; + struct prog_test_member __kptr *ref_memb_ptr; }; struct array_map { @@ -281,7 +281,7 @@ int reject_kptr_get_bad_type_match(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_") +__failure __msg("R1 type=rcu_ptr_or_null_ expected=percpu_ptr_") int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx) { struct map_value *v; @@ -316,7 +316,7 @@ int reject_untrusted_store_to_ref(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("R2 type=untrusted_ptr_ expected=ptr_") +__failure __msg("R2 must be referenced") int reject_untrusted_xchg(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *p; diff --git a/tools/testing/selftests/bpf/progs/nested_trust_failure.c b/tools/testing/selftests/bpf/progs/nested_trust_failure.c index 14aff7676436..0d1aa6bbace4 100644 --- a/tools/testing/selftests/bpf/progs/nested_trust_failure.c +++ b/tools/testing/selftests/bpf/progs/nested_trust_failure.c @@ -17,7 +17,7 @@ char _license[] SEC("license") = "GPL"; */ SEC("tp_btf/task_newtask") -__failure __msg("R2 must be referenced or trusted") +__failure __msg("R2 must be") int BPF_PROG(test_invalid_nested_user_cpus, struct task_struct *task, u64 clone_flags) { bpf_cpumask_test_cpu(0, task->user_cpus_ptr); diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c index e5db1a4287e5..4c90aa6abddd 100644 --- a/tools/testing/selftests/bpf/progs/rbtree.c +++ b/tools/testing/selftests/bpf/progs/rbtree.c @@ -75,7 +75,7 @@ SEC("tc") long rbtree_add_and_remove(void *ctx) { struct bpf_rb_node *res = NULL; - struct node_data *n, *m; + struct node_data *n, *m = NULL; n = bpf_obj_new(typeof(*n)); if (!n) diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c index bf3cba115897..1ced900f3fce 100644 --- a/tools/testing/selftests/bpf/progs/rbtree_fail.c +++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c @@ -232,8 +232,11 @@ long rbtree_api_first_release_unlock_escape(void *ctx) bpf_spin_lock(&glock); res = bpf_rbtree_first(&groot); - if (res) - n = container_of(res, struct node_data, node); + if (!res) { + bpf_spin_unlock(&glock); + return 1; + } + n = container_of(res, struct node_data, node); bpf_spin_unlock(&glock); bpf_spin_lock(&glock); diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c index 5cecbdbbb16e..7250bb76d18a 100644 --- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -81,7 +81,7 @@ int no_lock(void *ctx) { struct task_struct *task, *real_parent; - /* no bpf_rcu_read_lock(), old code still works */ + /* old style ptr_to_btf_id is not allowed in sleepable */ task = bpf_get_current_task_btf(); real_parent = task->real_parent; (void)bpf_task_storage_get(&map_a, real_parent, 0, 0); @@ -286,13 +286,13 @@ out: } SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") -int task_untrusted_non_rcuptr(void *ctx) +int task_trusted_non_rcuptr(void *ctx) { struct task_struct *task, *group_leader; task = bpf_get_current_task_btf(); bpf_rcu_read_lock(); - /* the pointer group_leader marked as untrusted */ + /* the pointer group_leader is explicitly marked as trusted */ group_leader = task->real_parent->group_leader; (void)bpf_task_storage_get(&map_a, group_leader, 0, 0); bpf_rcu_read_unlock(); diff --git a/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c b/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c new file mode 100644 index 000000000000..df4873558634 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +struct task_ls_map { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} task_ls_map SEC(".maps"); + +long gp_seq; + +SEC("syscall") +int do_call_rcu_tasks_trace(void *ctx) +{ + struct task_struct *current; + int *v; + + current = bpf_get_current_task_btf(); + v = bpf_task_storage_get(&task_ls_map, current, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!v) + return 1; + /* Invoke call_rcu_tasks_trace */ + return bpf_task_storage_delete(&task_ls_map, current); +} + +SEC("kprobe/rcu_tasks_trace_postgp") +int rcu_tasks_trace_postgp(void *ctx) +{ + __sync_add_and_fetch(&gp_seq, 1); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h index c0ffd171743e..4c2a4b0e3a25 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_common.h +++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h @@ -10,7 +10,7 @@ #include <bpf/bpf_tracing.h> struct __tasks_kfunc_map_value { - struct task_struct __kptr_ref * task; + struct task_struct __kptr * task; }; struct hash_map { diff --git a/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c b/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c new file mode 100644 index 000000000000..f548b7446218 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> +#include "bpf_misc.h" + +int kprobe_res = 0; + +/** + * This program will be manually made sleepable on the userspace side + * and should thus be unattachable. + */ +SEC("kprobe/" SYS_PREFIX "sys_nanosleep") +int handle_kprobe_sleepable(struct pt_regs *ctx) +{ + kprobe_res = 1; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index 3b5dc34d23e9..68466a6ad18c 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -7,12 +7,8 @@ #include <bpf/bpf_core_read.h> #include "bpf_misc.h" -int kprobe_res = 0; int kprobe2_res = 0; -int kretprobe_res = 0; int kretprobe2_res = 0; -int uprobe_res = 0; -int uretprobe_res = 0; int uprobe_byname_res = 0; int uretprobe_byname_res = 0; int uprobe_byname2_res = 0; @@ -23,13 +19,6 @@ int uretprobe_byname3_sleepable_res = 0; int uretprobe_byname3_res = 0; void *user_ptr = 0; -SEC("kprobe") -int handle_kprobe(struct pt_regs *ctx) -{ - kprobe_res = 1; - return 0; -} - SEC("ksyscall/nanosleep") int BPF_KSYSCALL(handle_kprobe_auto, struct __kernel_timespec *req, struct __kernel_timespec *rem) { @@ -37,24 +26,6 @@ int BPF_KSYSCALL(handle_kprobe_auto, struct __kernel_timespec *req, struct __ker return 0; } -/** - * This program will be manually made sleepable on the userspace side - * and should thus be unattachable. - */ -SEC("kprobe/" SYS_PREFIX "sys_nanosleep") -int handle_kprobe_sleepable(struct pt_regs *ctx) -{ - kprobe_res = 2; - return 0; -} - -SEC("kretprobe") -int handle_kretprobe(struct pt_regs *ctx) -{ - kretprobe_res = 2; - return 0; -} - SEC("kretsyscall/nanosleep") int BPF_KRETPROBE(handle_kretprobe_auto, int ret) { @@ -63,16 +34,14 @@ int BPF_KRETPROBE(handle_kretprobe_auto, int ret) } SEC("uprobe") -int handle_uprobe(struct pt_regs *ctx) +int handle_uprobe_ref_ctr(struct pt_regs *ctx) { - uprobe_res = 3; return 0; } SEC("uretprobe") -int handle_uretprobe(struct pt_regs *ctx) +int handle_uretprobe_ref_ctr(struct pt_regs *ctx) { - uretprobe_res = 4; return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c b/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c new file mode 100644 index 000000000000..7f08bce94596 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> +#include "bpf_misc.h" + +int kprobe_res = 0; +int kretprobe_res = 0; +int uprobe_res = 0; +int uretprobe_res = 0; +int uprobe_byname_res = 0; +void *user_ptr = 0; + +SEC("kprobe") +int handle_kprobe(struct pt_regs *ctx) +{ + kprobe_res = 1; + return 0; +} + +SEC("kretprobe") +int handle_kretprobe(struct pt_regs *ctx) +{ + kretprobe_res = 2; + return 0; +} + +SEC("uprobe") +int handle_uprobe(struct pt_regs *ctx) +{ + uprobe_res = 3; + return 0; +} + +SEC("uretprobe") +int handle_uretprobe(struct pt_regs *ctx) +{ + uretprobe_res = 4; + return 0; +} + +SEC("uprobe") +int handle_uprobe_byname(struct pt_regs *ctx) +{ + uprobe_byname_res = 5; + return 0; +} + + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c new file mode 100644 index 000000000000..f45a7095de7a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c @@ -0,0 +1,980 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// Copyright (c) 2019, 2020 Cloudflare + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#include <linux/bpf.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/pkt_cls.h> +#include <linux/tcp.h> +#include <linux/udp.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#include "test_cls_redirect.h" +#include "bpf_kfuncs.h" + +#define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER))) + +#define IP_OFFSET_MASK (0x1FFF) +#define IP_MF (0x2000) + +char _license[] SEC("license") = "Dual BSD/GPL"; + +/** + * Destination port and IP used for UDP encapsulation. + */ +volatile const __be16 ENCAPSULATION_PORT; +volatile const __be32 ENCAPSULATION_IP; + +typedef struct { + uint64_t processed_packets_total; + uint64_t l3_protocol_packets_total_ipv4; + uint64_t l3_protocol_packets_total_ipv6; + uint64_t l4_protocol_packets_total_tcp; + uint64_t l4_protocol_packets_total_udp; + uint64_t accepted_packets_total_syn; + uint64_t accepted_packets_total_syn_cookies; + uint64_t accepted_packets_total_last_hop; + uint64_t accepted_packets_total_icmp_echo_request; + uint64_t accepted_packets_total_established; + uint64_t forwarded_packets_total_gue; + uint64_t forwarded_packets_total_gre; + + uint64_t errors_total_unknown_l3_proto; + uint64_t errors_total_unknown_l4_proto; + uint64_t errors_total_malformed_ip; + uint64_t errors_total_fragmented_ip; + uint64_t errors_total_malformed_icmp; + uint64_t errors_total_unwanted_icmp; + uint64_t errors_total_malformed_icmp_pkt_too_big; + uint64_t errors_total_malformed_tcp; + uint64_t errors_total_malformed_udp; + uint64_t errors_total_icmp_echo_replies; + uint64_t errors_total_malformed_encapsulation; + uint64_t errors_total_encap_adjust_failed; + uint64_t errors_total_encap_buffer_too_small; + uint64_t errors_total_redirect_loop; + uint64_t errors_total_encap_mtu_violate; +} metrics_t; + +typedef enum { + INVALID = 0, + UNKNOWN, + ECHO_REQUEST, + SYN, + SYN_COOKIE, + ESTABLISHED, +} verdict_t; + +typedef struct { + uint16_t src, dst; +} flow_ports_t; + +_Static_assert( + sizeof(flow_ports_t) != + offsetofend(struct bpf_sock_tuple, ipv4.dport) - + offsetof(struct bpf_sock_tuple, ipv4.sport) - 1, + "flow_ports_t must match sport and dport in struct bpf_sock_tuple"); +_Static_assert( + sizeof(flow_ports_t) != + offsetofend(struct bpf_sock_tuple, ipv6.dport) - + offsetof(struct bpf_sock_tuple, ipv6.sport) - 1, + "flow_ports_t must match sport and dport in struct bpf_sock_tuple"); + +struct iphdr_info { + void *hdr; + __u64 len; +}; + +typedef int ret_t; + +/* This is a bit of a hack. We need a return value which allows us to + * indicate that the regular flow of the program should continue, + * while allowing functions to use XDP_PASS and XDP_DROP, etc. + */ +static const ret_t CONTINUE_PROCESSING = -1; + +/* Convenience macro to call functions which return ret_t. + */ +#define MAYBE_RETURN(x) \ + do { \ + ret_t __ret = x; \ + if (__ret != CONTINUE_PROCESSING) \ + return __ret; \ + } while (0) + +static bool ipv4_is_fragment(const struct iphdr *ip) +{ + uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK); + return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0; +} + +static int pkt_parse_ipv4(struct bpf_dynptr *dynptr, __u64 *offset, struct iphdr *iphdr) +{ + if (bpf_dynptr_read(iphdr, sizeof(*iphdr), dynptr, *offset, 0)) + return -1; + + *offset += sizeof(*iphdr); + + if (iphdr->ihl < 5) + return -1; + + /* skip ipv4 options */ + *offset += (iphdr->ihl - 5) * 4; + + return 0; +} + +/* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */ +static bool pkt_parse_icmp_l4_ports(struct bpf_dynptr *dynptr, __u64 *offset, flow_ports_t *ports) +{ + if (bpf_dynptr_read(ports, sizeof(*ports), dynptr, *offset, 0)) + return false; + + *offset += sizeof(*ports); + + /* Ports in the L4 headers are reversed, since we are parsing an ICMP + * payload which is going towards the eyeball. + */ + uint16_t dst = ports->src; + ports->src = ports->dst; + ports->dst = dst; + return true; +} + +static uint16_t pkt_checksum_fold(uint32_t csum) +{ + /* The highest reasonable value for an IPv4 header + * checksum requires two folds, so we just do that always. + */ + csum = (csum & 0xffff) + (csum >> 16); + csum = (csum & 0xffff) + (csum >> 16); + return (uint16_t)~csum; +} + +static void pkt_ipv4_checksum(struct iphdr *iph) +{ + iph->check = 0; + + /* An IP header without options is 20 bytes. Two of those + * are the checksum, which we always set to zero. Hence, + * the maximum accumulated value is 18 / 2 * 0xffff = 0x8fff7, + * which fits in 32 bit. + */ + _Static_assert(sizeof(struct iphdr) == 20, "iphdr must be 20 bytes"); + uint32_t acc = 0; + uint16_t *ipw = (uint16_t *)iph; + + for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++) + acc += ipw[i]; + + iph->check = pkt_checksum_fold(acc); +} + +static bool pkt_skip_ipv6_extension_headers(struct bpf_dynptr *dynptr, __u64 *offset, + const struct ipv6hdr *ipv6, uint8_t *upper_proto, + bool *is_fragment) +{ + /* We understand five extension headers. + * https://tools.ietf.org/html/rfc8200#section-4.1 states that all + * headers should occur once, except Destination Options, which may + * occur twice. Hence we give up after 6 headers. + */ + struct { + uint8_t next; + uint8_t len; + } exthdr = { + .next = ipv6->nexthdr, + }; + *is_fragment = false; + + for (int i = 0; i < 6; i++) { + switch (exthdr.next) { + case IPPROTO_FRAGMENT: + *is_fragment = true; + /* NB: We don't check that hdrlen == 0 as per spec. */ + /* fallthrough; */ + + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: + case IPPROTO_MH: + if (bpf_dynptr_read(&exthdr, sizeof(exthdr), dynptr, *offset, 0)) + return false; + + /* hdrlen is in 8-octet units, and excludes the first 8 octets. */ + *offset += (exthdr.len + 1) * 8; + + /* Decode next header */ + break; + + default: + /* The next header is not one of the known extension + * headers, treat it as the upper layer header. + * + * This handles IPPROTO_NONE. + * + * Encapsulating Security Payload (50) and Authentication + * Header (51) also end up here (and will trigger an + * unknown proto error later). They have a custom header + * format and seem too esoteric to care about. + */ + *upper_proto = exthdr.next; + return true; + } + } + + /* We never found an upper layer header. */ + return false; +} + +static int pkt_parse_ipv6(struct bpf_dynptr *dynptr, __u64 *offset, struct ipv6hdr *ipv6, + uint8_t *proto, bool *is_fragment) +{ + if (bpf_dynptr_read(ipv6, sizeof(*ipv6), dynptr, *offset, 0)) + return -1; + + *offset += sizeof(*ipv6); + + if (!pkt_skip_ipv6_extension_headers(dynptr, offset, ipv6, proto, is_fragment)) + return -1; + + return 0; +} + +/* Global metrics, per CPU + */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, unsigned int); + __type(value, metrics_t); +} metrics_map SEC(".maps"); + +static metrics_t *get_global_metrics(void) +{ + uint64_t key = 0; + return bpf_map_lookup_elem(&metrics_map, &key); +} + +static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap) +{ + const int payload_off = + sizeof(*encap) + + sizeof(struct in_addr) * encap->unigue.hop_count; + int32_t encap_overhead = payload_off - sizeof(struct ethhdr); + + /* Changing the ethertype if the encapsulated packet is ipv6 */ + if (encap->gue.proto_ctype == IPPROTO_IPV6) + encap->eth.h_proto = bpf_htons(ETH_P_IPV6); + + if (bpf_skb_adjust_room(skb, -encap_overhead, BPF_ADJ_ROOM_MAC, + BPF_F_ADJ_ROOM_FIXED_GSO | + BPF_F_ADJ_ROOM_NO_CSUM_RESET) || + bpf_csum_level(skb, BPF_CSUM_LEVEL_DEC)) + return TC_ACT_SHOT; + + return bpf_redirect(skb->ifindex, BPF_F_INGRESS); +} + +static ret_t forward_with_gre(struct __sk_buff *skb, struct bpf_dynptr *dynptr, + encap_headers_t *encap, struct in_addr *next_hop, + metrics_t *metrics) +{ + const int payload_off = + sizeof(*encap) + + sizeof(struct in_addr) * encap->unigue.hop_count; + int32_t encap_overhead = + payload_off - sizeof(struct ethhdr) - sizeof(struct iphdr); + int32_t delta = sizeof(struct gre_base_hdr) - encap_overhead; + __u8 encap_buffer[sizeof(encap_gre_t)] = {}; + uint16_t proto = ETH_P_IP; + uint32_t mtu_len = 0; + encap_gre_t *encap_gre; + + metrics->forwarded_packets_total_gre++; + + /* Loop protection: the inner packet's TTL is decremented as a safeguard + * against any forwarding loop. As the only interesting field is the TTL + * hop limit for IPv6, it is easier to use bpf_skb_load_bytes/bpf_skb_store_bytes + * as they handle the split packets if needed (no need for the data to be + * in the linear section). + */ + if (encap->gue.proto_ctype == IPPROTO_IPV6) { + proto = ETH_P_IPV6; + uint8_t ttl; + int rc; + + rc = bpf_skb_load_bytes( + skb, payload_off + offsetof(struct ipv6hdr, hop_limit), + &ttl, 1); + if (rc != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (ttl == 0) { + metrics->errors_total_redirect_loop++; + return TC_ACT_SHOT; + } + + ttl--; + rc = bpf_skb_store_bytes( + skb, payload_off + offsetof(struct ipv6hdr, hop_limit), + &ttl, 1, 0); + if (rc != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + } else { + uint8_t ttl; + int rc; + + rc = bpf_skb_load_bytes( + skb, payload_off + offsetof(struct iphdr, ttl), &ttl, + 1); + if (rc != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (ttl == 0) { + metrics->errors_total_redirect_loop++; + return TC_ACT_SHOT; + } + + /* IPv4 also has a checksum to patch. While the TTL is only one byte, + * this function only works for 2 and 4 bytes arguments (the result is + * the same). + */ + rc = bpf_l3_csum_replace( + skb, payload_off + offsetof(struct iphdr, check), ttl, + ttl - 1, 2); + if (rc != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + ttl--; + rc = bpf_skb_store_bytes( + skb, payload_off + offsetof(struct iphdr, ttl), &ttl, 1, + 0); + if (rc != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + } + + if (bpf_check_mtu(skb, skb->ifindex, &mtu_len, delta, 0)) { + metrics->errors_total_encap_mtu_violate++; + return TC_ACT_SHOT; + } + + if (bpf_skb_adjust_room(skb, delta, BPF_ADJ_ROOM_NET, + BPF_F_ADJ_ROOM_FIXED_GSO | + BPF_F_ADJ_ROOM_NO_CSUM_RESET) || + bpf_csum_level(skb, BPF_CSUM_LEVEL_INC)) { + metrics->errors_total_encap_adjust_failed++; + return TC_ACT_SHOT; + } + + if (bpf_skb_pull_data(skb, sizeof(encap_gre_t))) { + metrics->errors_total_encap_buffer_too_small++; + return TC_ACT_SHOT; + } + + encap_gre = bpf_dynptr_slice_rdwr(dynptr, 0, encap_buffer, sizeof(encap_buffer)); + if (!encap_gre) { + metrics->errors_total_encap_buffer_too_small++; + return TC_ACT_SHOT; + } + + encap_gre->ip.protocol = IPPROTO_GRE; + encap_gre->ip.daddr = next_hop->s_addr; + encap_gre->ip.saddr = ENCAPSULATION_IP; + encap_gre->ip.tot_len = + bpf_htons(bpf_ntohs(encap_gre->ip.tot_len) + delta); + encap_gre->gre.flags = 0; + encap_gre->gre.protocol = bpf_htons(proto); + pkt_ipv4_checksum((void *)&encap_gre->ip); + + if (encap_gre == encap_buffer) + bpf_dynptr_write(dynptr, 0, encap_buffer, sizeof(encap_buffer), 0); + + return bpf_redirect(skb->ifindex, 0); +} + +static ret_t forward_to_next_hop(struct __sk_buff *skb, struct bpf_dynptr *dynptr, + encap_headers_t *encap, struct in_addr *next_hop, + metrics_t *metrics) +{ + /* swap L2 addresses */ + /* This assumes that packets are received from a router. + * So just swapping the MAC addresses here will make the packet go back to + * the router, which will send it to the appropriate machine. + */ + unsigned char temp[ETH_ALEN]; + memcpy(temp, encap->eth.h_dest, sizeof(temp)); + memcpy(encap->eth.h_dest, encap->eth.h_source, + sizeof(encap->eth.h_dest)); + memcpy(encap->eth.h_source, temp, sizeof(encap->eth.h_source)); + + if (encap->unigue.next_hop == encap->unigue.hop_count - 1 && + encap->unigue.last_hop_gre) { + return forward_with_gre(skb, dynptr, encap, next_hop, metrics); + } + + metrics->forwarded_packets_total_gue++; + uint32_t old_saddr = encap->ip.saddr; + encap->ip.saddr = encap->ip.daddr; + encap->ip.daddr = next_hop->s_addr; + if (encap->unigue.next_hop < encap->unigue.hop_count) { + encap->unigue.next_hop++; + } + + /* Remove ip->saddr, add next_hop->s_addr */ + const uint64_t off = offsetof(typeof(*encap), ip.check); + int ret = bpf_l3_csum_replace(skb, off, old_saddr, next_hop->s_addr, 4); + if (ret < 0) { + return TC_ACT_SHOT; + } + + return bpf_redirect(skb->ifindex, 0); +} + +static ret_t skip_next_hops(__u64 *offset, int n) +{ + __u32 res; + switch (n) { + case 1: + *offset += sizeof(struct in_addr); + case 0: + return CONTINUE_PROCESSING; + + default: + return TC_ACT_SHOT; + } +} + +/* Get the next hop from the GLB header. + * + * Sets next_hop->s_addr to 0 if there are no more hops left. + * pkt is positioned just after the variable length GLB header + * iff the call is successful. + */ +static ret_t get_next_hop(struct bpf_dynptr *dynptr, __u64 *offset, encap_headers_t *encap, + struct in_addr *next_hop) +{ + if (encap->unigue.next_hop > encap->unigue.hop_count) + return TC_ACT_SHOT; + + /* Skip "used" next hops. */ + MAYBE_RETURN(skip_next_hops(offset, encap->unigue.next_hop)); + + if (encap->unigue.next_hop == encap->unigue.hop_count) { + /* No more next hops, we are at the end of the GLB header. */ + next_hop->s_addr = 0; + return CONTINUE_PROCESSING; + } + + if (bpf_dynptr_read(next_hop, sizeof(*next_hop), dynptr, *offset, 0)) + return TC_ACT_SHOT; + + *offset += sizeof(*next_hop); + + /* Skip the remainig next hops (may be zero). */ + return skip_next_hops(offset, encap->unigue.hop_count - encap->unigue.next_hop - 1); +} + +/* Fill a bpf_sock_tuple to be used with the socket lookup functions. + * This is a kludge that let's us work around verifier limitations: + * + * fill_tuple(&t, foo, sizeof(struct iphdr), 123, 321) + * + * clang will substitue a costant for sizeof, which allows the verifier + * to track it's value. Based on this, it can figure out the constant + * return value, and calling code works while still being "generic" to + * IPv4 and IPv6. + */ +static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph, + uint64_t iphlen, uint16_t sport, uint16_t dport) +{ + switch (iphlen) { + case sizeof(struct iphdr): { + struct iphdr *ipv4 = (struct iphdr *)iph; + tuple->ipv4.daddr = ipv4->daddr; + tuple->ipv4.saddr = ipv4->saddr; + tuple->ipv4.sport = sport; + tuple->ipv4.dport = dport; + return sizeof(tuple->ipv4); + } + + case sizeof(struct ipv6hdr): { + struct ipv6hdr *ipv6 = (struct ipv6hdr *)iph; + memcpy(&tuple->ipv6.daddr, &ipv6->daddr, + sizeof(tuple->ipv6.daddr)); + memcpy(&tuple->ipv6.saddr, &ipv6->saddr, + sizeof(tuple->ipv6.saddr)); + tuple->ipv6.sport = sport; + tuple->ipv6.dport = dport; + return sizeof(tuple->ipv6); + } + + default: + return 0; + } +} + +static verdict_t classify_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, + uint64_t tuplen, void *iph, struct tcphdr *tcp) +{ + struct bpf_sock *sk = + bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); + + if (sk == NULL) + return UNKNOWN; + + if (sk->state != BPF_TCP_LISTEN) { + bpf_sk_release(sk); + return ESTABLISHED; + } + + if (iph != NULL && tcp != NULL) { + /* Kludge: we've run out of arguments, but need the length of the ip header. */ + uint64_t iphlen = sizeof(struct iphdr); + + if (tuplen == sizeof(tuple->ipv6)) + iphlen = sizeof(struct ipv6hdr); + + if (bpf_tcp_check_syncookie(sk, iph, iphlen, tcp, + sizeof(*tcp)) == 0) { + bpf_sk_release(sk); + return SYN_COOKIE; + } + } + + bpf_sk_release(sk); + return UNKNOWN; +} + +static verdict_t classify_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, uint64_t tuplen) +{ + struct bpf_sock *sk = + bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); + + if (sk == NULL) + return UNKNOWN; + + if (sk->state == BPF_TCP_ESTABLISHED) { + bpf_sk_release(sk); + return ESTABLISHED; + } + + bpf_sk_release(sk); + return UNKNOWN; +} + +static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto, struct bpf_sock_tuple *tuple, + uint64_t tuplen, metrics_t *metrics) +{ + switch (proto) { + case IPPROTO_TCP: + return classify_tcp(skb, tuple, tuplen, NULL, NULL); + + case IPPROTO_UDP: + return classify_udp(skb, tuple, tuplen); + + default: + metrics->errors_total_malformed_icmp++; + return INVALID; + } +} + +static verdict_t process_icmpv4(struct __sk_buff *skb, struct bpf_dynptr *dynptr, __u64 *offset, + metrics_t *metrics) +{ + struct icmphdr icmp; + struct iphdr ipv4; + + if (bpf_dynptr_read(&icmp, sizeof(icmp), dynptr, *offset, 0)) { + metrics->errors_total_malformed_icmp++; + return INVALID; + } + + *offset += sizeof(icmp); + + /* We should never receive encapsulated echo replies. */ + if (icmp.type == ICMP_ECHOREPLY) { + metrics->errors_total_icmp_echo_replies++; + return INVALID; + } + + if (icmp.type == ICMP_ECHO) + return ECHO_REQUEST; + + if (icmp.type != ICMP_DEST_UNREACH || icmp.code != ICMP_FRAG_NEEDED) { + metrics->errors_total_unwanted_icmp++; + return INVALID; + } + + if (pkt_parse_ipv4(dynptr, offset, &ipv4)) { + metrics->errors_total_malformed_icmp_pkt_too_big++; + return INVALID; + } + + /* The source address in the outer IP header is from the entity that + * originated the ICMP message. Use the original IP header to restore + * the correct flow tuple. + */ + struct bpf_sock_tuple tuple; + tuple.ipv4.saddr = ipv4.daddr; + tuple.ipv4.daddr = ipv4.saddr; + + if (!pkt_parse_icmp_l4_ports(dynptr, offset, (flow_ports_t *)&tuple.ipv4.sport)) { + metrics->errors_total_malformed_icmp_pkt_too_big++; + return INVALID; + } + + return classify_icmp(skb, ipv4.protocol, &tuple, + sizeof(tuple.ipv4), metrics); +} + +static verdict_t process_icmpv6(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb, + metrics_t *metrics) +{ + struct bpf_sock_tuple tuple; + struct ipv6hdr ipv6; + struct icmp6hdr icmp6; + bool is_fragment; + uint8_t l4_proto; + + if (bpf_dynptr_read(&icmp6, sizeof(icmp6), dynptr, *offset, 0)) { + metrics->errors_total_malformed_icmp++; + return INVALID; + } + + /* We should never receive encapsulated echo replies. */ + if (icmp6.icmp6_type == ICMPV6_ECHO_REPLY) { + metrics->errors_total_icmp_echo_replies++; + return INVALID; + } + + if (icmp6.icmp6_type == ICMPV6_ECHO_REQUEST) { + return ECHO_REQUEST; + } + + if (icmp6.icmp6_type != ICMPV6_PKT_TOOBIG) { + metrics->errors_total_unwanted_icmp++; + return INVALID; + } + + if (pkt_parse_ipv6(dynptr, offset, &ipv6, &l4_proto, &is_fragment)) { + metrics->errors_total_malformed_icmp_pkt_too_big++; + return INVALID; + } + + if (is_fragment) { + metrics->errors_total_fragmented_ip++; + return INVALID; + } + + /* Swap source and dest addresses. */ + memcpy(&tuple.ipv6.saddr, &ipv6.daddr, sizeof(tuple.ipv6.saddr)); + memcpy(&tuple.ipv6.daddr, &ipv6.saddr, sizeof(tuple.ipv6.daddr)); + + if (!pkt_parse_icmp_l4_ports(dynptr, offset, (flow_ports_t *)&tuple.ipv6.sport)) { + metrics->errors_total_malformed_icmp_pkt_too_big++; + return INVALID; + } + + return classify_icmp(skb, l4_proto, &tuple, sizeof(tuple.ipv6), + metrics); +} + +static verdict_t process_tcp(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb, + struct iphdr_info *info, metrics_t *metrics) +{ + struct bpf_sock_tuple tuple; + struct tcphdr tcp; + uint64_t tuplen; + + metrics->l4_protocol_packets_total_tcp++; + + if (bpf_dynptr_read(&tcp, sizeof(tcp), dynptr, *offset, 0)) { + metrics->errors_total_malformed_tcp++; + return INVALID; + } + + *offset += sizeof(tcp); + + if (tcp.syn) + return SYN; + + tuplen = fill_tuple(&tuple, info->hdr, info->len, tcp.source, tcp.dest); + return classify_tcp(skb, &tuple, tuplen, info->hdr, &tcp); +} + +static verdict_t process_udp(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb, + struct iphdr_info *info, metrics_t *metrics) +{ + struct bpf_sock_tuple tuple; + struct udphdr udph; + uint64_t tuplen; + + metrics->l4_protocol_packets_total_udp++; + + if (bpf_dynptr_read(&udph, sizeof(udph), dynptr, *offset, 0)) { + metrics->errors_total_malformed_udp++; + return INVALID; + } + *offset += sizeof(udph); + + tuplen = fill_tuple(&tuple, info->hdr, info->len, udph.source, udph.dest); + return classify_udp(skb, &tuple, tuplen); +} + +static verdict_t process_ipv4(struct __sk_buff *skb, struct bpf_dynptr *dynptr, + __u64 *offset, metrics_t *metrics) +{ + struct iphdr ipv4; + struct iphdr_info info = { + .hdr = &ipv4, + .len = sizeof(ipv4), + }; + + metrics->l3_protocol_packets_total_ipv4++; + + if (pkt_parse_ipv4(dynptr, offset, &ipv4)) { + metrics->errors_total_malformed_ip++; + return INVALID; + } + + if (ipv4.version != 4) { + metrics->errors_total_malformed_ip++; + return INVALID; + } + + if (ipv4_is_fragment(&ipv4)) { + metrics->errors_total_fragmented_ip++; + return INVALID; + } + + switch (ipv4.protocol) { + case IPPROTO_ICMP: + return process_icmpv4(skb, dynptr, offset, metrics); + + case IPPROTO_TCP: + return process_tcp(dynptr, offset, skb, &info, metrics); + + case IPPROTO_UDP: + return process_udp(dynptr, offset, skb, &info, metrics); + + default: + metrics->errors_total_unknown_l4_proto++; + return INVALID; + } +} + +static verdict_t process_ipv6(struct __sk_buff *skb, struct bpf_dynptr *dynptr, + __u64 *offset, metrics_t *metrics) +{ + struct ipv6hdr ipv6; + struct iphdr_info info = { + .hdr = &ipv6, + .len = sizeof(ipv6), + }; + uint8_t l4_proto; + bool is_fragment; + + metrics->l3_protocol_packets_total_ipv6++; + + if (pkt_parse_ipv6(dynptr, offset, &ipv6, &l4_proto, &is_fragment)) { + metrics->errors_total_malformed_ip++; + return INVALID; + } + + if (ipv6.version != 6) { + metrics->errors_total_malformed_ip++; + return INVALID; + } + + if (is_fragment) { + metrics->errors_total_fragmented_ip++; + return INVALID; + } + + switch (l4_proto) { + case IPPROTO_ICMPV6: + return process_icmpv6(dynptr, offset, skb, metrics); + + case IPPROTO_TCP: + return process_tcp(dynptr, offset, skb, &info, metrics); + + case IPPROTO_UDP: + return process_udp(dynptr, offset, skb, &info, metrics); + + default: + metrics->errors_total_unknown_l4_proto++; + return INVALID; + } +} + +SEC("tc") +int cls_redirect(struct __sk_buff *skb) +{ + __u8 encap_buffer[sizeof(encap_headers_t)] = {}; + struct bpf_dynptr dynptr; + struct in_addr next_hop; + /* Tracks offset of the dynptr. This will be unnecessary once + * bpf_dynptr_advance() is available. + */ + __u64 off = 0; + ret_t ret; + + bpf_dynptr_from_skb(skb, 0, &dynptr); + + metrics_t *metrics = get_global_metrics(); + if (metrics == NULL) + return TC_ACT_SHOT; + + metrics->processed_packets_total++; + + /* Pass bogus packets as long as we're not sure they're + * destined for us. + */ + if (skb->protocol != bpf_htons(ETH_P_IP)) + return TC_ACT_OK; + + encap_headers_t *encap; + + /* Make sure that all encapsulation headers are available in + * the linear portion of the skb. This makes it easy to manipulate them. + */ + if (bpf_skb_pull_data(skb, sizeof(*encap))) + return TC_ACT_OK; + + encap = bpf_dynptr_slice_rdwr(&dynptr, 0, encap_buffer, sizeof(encap_buffer)); + if (!encap) + return TC_ACT_OK; + + off += sizeof(*encap); + + if (encap->ip.ihl != 5) + /* We never have any options. */ + return TC_ACT_OK; + + if (encap->ip.daddr != ENCAPSULATION_IP || + encap->ip.protocol != IPPROTO_UDP) + return TC_ACT_OK; + + /* TODO Check UDP length? */ + if (encap->udp.dest != ENCAPSULATION_PORT) + return TC_ACT_OK; + + /* We now know that the packet is destined to us, we can + * drop bogus ones. + */ + if (ipv4_is_fragment((void *)&encap->ip)) { + metrics->errors_total_fragmented_ip++; + return TC_ACT_SHOT; + } + + if (encap->gue.variant != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (encap->gue.control != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (encap->gue.flags != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (encap->gue.hlen != + sizeof(encap->unigue) / 4 + encap->unigue.hop_count) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (encap->unigue.version != 0) { + metrics->errors_total_malformed_encapsulation++; + return TC_ACT_SHOT; + } + + if (encap->unigue.reserved != 0) + return TC_ACT_SHOT; + + MAYBE_RETURN(get_next_hop(&dynptr, &off, encap, &next_hop)); + + if (next_hop.s_addr == 0) { + metrics->accepted_packets_total_last_hop++; + return accept_locally(skb, encap); + } + + verdict_t verdict; + switch (encap->gue.proto_ctype) { + case IPPROTO_IPIP: + verdict = process_ipv4(skb, &dynptr, &off, metrics); + break; + + case IPPROTO_IPV6: + verdict = process_ipv6(skb, &dynptr, &off, metrics); + break; + + default: + metrics->errors_total_unknown_l3_proto++; + return TC_ACT_SHOT; + } + + switch (verdict) { + case INVALID: + /* metrics have already been bumped */ + return TC_ACT_SHOT; + + case UNKNOWN: + return forward_to_next_hop(skb, &dynptr, encap, &next_hop, metrics); + + case ECHO_REQUEST: + metrics->accepted_packets_total_icmp_echo_request++; + break; + + case SYN: + if (encap->unigue.forward_syn) { + return forward_to_next_hop(skb, &dynptr, encap, &next_hop, + metrics); + } + + metrics->accepted_packets_total_syn++; + break; + + case SYN_COOKIE: + metrics->accepted_packets_total_syn_cookies++; + break; + + case ESTABLISHED: + metrics->accepted_packets_total_established++; + break; + } + + ret = accept_locally(skb, encap); + + if (encap == encap_buffer) + bpf_dynptr_write(&dynptr, 0, encap_buffer, sizeof(encap_buffer), 0); + + return ret; +} diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c index 2fbef3cc7ad8..2dde8e3fe4c9 100644 --- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c +++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c @@ -48,7 +48,7 @@ SEC("?lsm.s/bpf") __failure __msg("arg#0 expected pointer to stack or dynptr_ptr") int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size) { - unsigned long val; + unsigned long val = 0; return bpf_verify_pkcs7_signature((struct bpf_dynptr *)val, (struct bpf_dynptr *)val, NULL); diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c new file mode 100644 index 000000000000..f997f5080748 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c @@ -0,0 +1,487 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/pkt_cls.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <bpf/bpf_helpers.h> +#include "test_iptunnel_common.h" +#include <bpf/bpf_endian.h> + +#include "bpf_kfuncs.h" + +static __always_inline __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +/* copy paste of jhash from kernel sources to make sure llvm + * can compile it into valid sequence of bpf instructions + */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +#define JHASH_INITVAL 0xdeadbeef + +typedef unsigned int u32; + +static __noinline u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const unsigned char *k = key; + + a = b = c = JHASH_INITVAL + length + initval; + + while (length > 12) { + a += *(u32 *)(k); + b += *(u32 *)(k + 4); + c += *(u32 *)(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +static __noinline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + __jhash_final(a, b, c); + return c; +} + +static __noinline u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +#define PCKT_FRAGMENTED 65343 +#define IPV4_HDR_LEN_NO_OPT 20 +#define IPV4_PLUS_ICMP_HDR 28 +#define IPV6_PLUS_ICMP_HDR 48 +#define RING_SIZE 2 +#define MAX_VIPS 12 +#define MAX_REALS 5 +#define CTL_MAP_SIZE 16 +#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE) +#define F_IPV6 (1 << 0) +#define F_HASH_NO_SRC_PORT (1 << 0) +#define F_ICMP (1 << 0) +#define F_SYN_SET (1 << 1) + +struct packet_description { + union { + __be32 src; + __be32 srcv6[4]; + }; + union { + __be32 dst; + __be32 dstv6[4]; + }; + union { + __u32 ports; + __u16 port16[2]; + }; + __u8 proto; + __u8 flags; +}; + +struct ctl_value { + union { + __u64 value; + __u32 ifindex; + __u8 mac[6]; + }; +}; + +struct vip_meta { + __u32 flags; + __u32 vip_num; +}; + +struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; +}; + +struct vip_stats { + __u64 bytes; + __u64 pkts; +}; + +struct eth_hdr { + unsigned char eth_dest[ETH_ALEN]; + unsigned char eth_source[ETH_ALEN]; + unsigned short eth_proto; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_VIPS); + __type(key, struct vip); + __type(value, struct vip_meta); +} vip_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, CH_RINGS_SIZE); + __type(key, __u32); + __type(value, __u32); +} ch_rings SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, MAX_REALS); + __type(key, __u32); + __type(value, struct real_definition); +} reals SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, MAX_VIPS); + __type(key, __u32); + __type(value, struct vip_stats); +} stats SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, CTL_MAP_SIZE); + __type(key, __u32); + __type(value, struct ctl_value); +} ctl_array SEC(".maps"); + +static __noinline __u32 get_packet_hash(struct packet_description *pckt, bool ipv6) +{ + if (ipv6) + return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS), + pckt->ports, CH_RINGS_SIZE); + else + return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE); +} + +static __noinline bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6) +{ + __u32 hash = get_packet_hash(pckt, is_ipv6); + __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE; + __u32 *real_pos; + + if (hash != 0x358459b7 /* jhash of ipv4 packet */ && + hash != 0x2f4bc6bb /* jhash of ipv6 packet */) + return false; + + real_pos = bpf_map_lookup_elem(&ch_rings, &key); + if (!real_pos) + return false; + key = *real_pos; + *real = bpf_map_lookup_elem(&reals, &key); + if (!(*real)) + return false; + return true; +} + +static __noinline int parse_icmpv6(struct bpf_dynptr *skb_ptr, __u64 off, + struct packet_description *pckt) +{ + __u8 buffer[sizeof(struct ipv6hdr)] = {}; + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + + icmp_hdr = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer)); + if (!icmp_hdr) + return TC_ACT_SHOT; + + if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG) + return TC_ACT_OK; + off += sizeof(struct icmp6hdr); + ip6h = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer)); + if (!ip6h) + return TC_ACT_SHOT; + pckt->proto = ip6h->nexthdr; + pckt->flags |= F_ICMP; + memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16); + memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16); + return TC_ACT_UNSPEC; +} + +static __noinline int parse_icmp(struct bpf_dynptr *skb_ptr, __u64 off, + struct packet_description *pckt) +{ + __u8 buffer_icmp[sizeof(struct iphdr)] = {}; + __u8 buffer_ip[sizeof(struct iphdr)] = {}; + struct icmphdr *icmp_hdr; + struct iphdr *iph; + + icmp_hdr = bpf_dynptr_slice(skb_ptr, off, buffer_icmp, sizeof(buffer_icmp)); + if (!icmp_hdr) + return TC_ACT_SHOT; + if (icmp_hdr->type != ICMP_DEST_UNREACH || + icmp_hdr->code != ICMP_FRAG_NEEDED) + return TC_ACT_OK; + off += sizeof(struct icmphdr); + iph = bpf_dynptr_slice(skb_ptr, off, buffer_ip, sizeof(buffer_ip)); + if (!iph || iph->ihl != 5) + return TC_ACT_SHOT; + pckt->proto = iph->protocol; + pckt->flags |= F_ICMP; + pckt->src = iph->daddr; + pckt->dst = iph->saddr; + return TC_ACT_UNSPEC; +} + +static __noinline bool parse_udp(struct bpf_dynptr *skb_ptr, __u64 off, + struct packet_description *pckt) +{ + __u8 buffer[sizeof(struct udphdr)] = {}; + struct udphdr *udp; + + udp = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer)); + if (!udp) + return false; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = udp->source; + pckt->port16[1] = udp->dest; + } else { + pckt->port16[0] = udp->dest; + pckt->port16[1] = udp->source; + } + return true; +} + +static __noinline bool parse_tcp(struct bpf_dynptr *skb_ptr, __u64 off, + struct packet_description *pckt) +{ + __u8 buffer[sizeof(struct tcphdr)] = {}; + struct tcphdr *tcp; + + tcp = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer)); + if (!tcp) + return false; + + if (tcp->syn) + pckt->flags |= F_SYN_SET; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = tcp->source; + pckt->port16[1] = tcp->dest; + } else { + pckt->port16[0] = tcp->dest; + pckt->port16[1] = tcp->source; + } + return true; +} + +static __noinline int process_packet(struct bpf_dynptr *skb_ptr, + struct eth_hdr *eth, __u64 off, + bool is_ipv6, struct __sk_buff *skb) +{ + struct packet_description pckt = {}; + struct bpf_tunnel_key tkey = {}; + struct vip_stats *data_stats; + struct real_definition *dst; + struct vip_meta *vip_info; + struct ctl_value *cval; + __u32 v4_intf_pos = 1; + __u32 v6_intf_pos = 2; + struct ipv6hdr *ip6h; + struct vip vip = {}; + struct iphdr *iph; + int tun_flag = 0; + __u16 pkt_bytes; + __u64 iph_len; + __u32 ifindex; + __u8 protocol; + __u32 vip_num; + int action; + + tkey.tunnel_ttl = 64; + if (is_ipv6) { + __u8 buffer[sizeof(struct ipv6hdr)] = {}; + + ip6h = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer)); + if (!ip6h) + return TC_ACT_SHOT; + + iph_len = sizeof(struct ipv6hdr); + protocol = ip6h->nexthdr; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(ip6h->payload_len); + off += iph_len; + if (protocol == IPPROTO_FRAGMENT) { + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_ICMPV6) { + action = parse_icmpv6(skb_ptr, off, &pckt); + if (action >= 0) + return action; + off += IPV6_PLUS_ICMP_HDR; + } else { + memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16); + memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16); + } + } else { + __u8 buffer[sizeof(struct iphdr)] = {}; + + iph = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer)); + if (!iph || iph->ihl != 5) + return TC_ACT_SHOT; + + protocol = iph->protocol; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(iph->tot_len); + off += IPV4_HDR_LEN_NO_OPT; + + if (iph->frag_off & PCKT_FRAGMENTED) + return TC_ACT_SHOT; + if (protocol == IPPROTO_ICMP) { + action = parse_icmp(skb_ptr, off, &pckt); + if (action >= 0) + return action; + off += IPV4_PLUS_ICMP_HDR; + } else { + pckt.src = iph->saddr; + pckt.dst = iph->daddr; + } + } + protocol = pckt.proto; + + if (protocol == IPPROTO_TCP) { + if (!parse_tcp(skb_ptr, off, &pckt)) + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_UDP) { + if (!parse_udp(skb_ptr, off, &pckt)) + return TC_ACT_SHOT; + } else { + return TC_ACT_SHOT; + } + + if (is_ipv6) + memcpy(vip.daddr.v6, pckt.dstv6, 16); + else + vip.daddr.v4 = pckt.dst; + + vip.dport = pckt.port16[1]; + vip.protocol = pckt.proto; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) { + vip.dport = 0; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) + return TC_ACT_SHOT; + pckt.port16[1] = 0; + } + + if (vip_info->flags & F_HASH_NO_SRC_PORT) + pckt.port16[0] = 0; + + if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6)) + return TC_ACT_SHOT; + + if (dst->flags & F_IPV6) { + cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + memcpy(tkey.remote_ipv6, dst->dstv6, 16); + tun_flag = BPF_F_TUNINFO_IPV6; + } else { + cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + tkey.remote_ipv4 = dst->dst; + } + vip_num = vip_info->vip_num; + data_stats = bpf_map_lookup_elem(&stats, &vip_num); + if (!data_stats) + return TC_ACT_SHOT; + data_stats->pkts++; + data_stats->bytes += pkt_bytes; + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag); + *(u32 *)eth->eth_dest = tkey.remote_ipv4; + return bpf_redirect(ifindex, 0); +} + +SEC("tc") +int balancer_ingress(struct __sk_buff *ctx) +{ + __u8 buffer[sizeof(struct eth_hdr)] = {}; + struct bpf_dynptr ptr; + struct eth_hdr *eth; + __u32 eth_proto; + __u32 nh_off; + int err; + + nh_off = sizeof(struct eth_hdr); + + bpf_dynptr_from_skb(ctx, 0, &ptr); + eth = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer)); + if (!eth) + return TC_ACT_SHOT; + eth_proto = eth->eth_proto; + if (eth_proto == bpf_htons(ETH_P_IP)) + err = process_packet(&ptr, eth, nh_off, false, ctx); + else if (eth_proto == bpf_htons(ETH_P_IPV6)) + err = process_packet(&ptr, eth, nh_off, true, ctx); + else + return TC_ACT_SHOT; + + if (eth == buffer) + bpf_dynptr_write(&ptr, 0, buffer, sizeof(buffer), 0); + + return err; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c new file mode 100644 index 000000000000..79bab9b50e9e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* This parsing logic is taken from the open source library katran, a layer 4 + * load balancer. + * + * This code logic using dynptrs can be found in test_parse_tcp_hdr_opt_dynptr.c + * + * https://github.com/facebookincubator/katran/blob/main/katran/lib/bpf/pckt_parsing.h + */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <linux/tcp.h> +#include <stdbool.h> +#include <linux/ipv6.h> +#include <linux/if_ether.h> +#include "test_tcp_hdr_options.h" + +char _license[] SEC("license") = "GPL"; + +/* Kind number used for experiments */ +const __u32 tcp_hdr_opt_kind_tpr = 0xFD; +/* Length of the tcp header option */ +const __u32 tcp_hdr_opt_len_tpr = 6; +/* maximum number of header options to check to lookup server_id */ +const __u32 tcp_hdr_opt_max_opt_checks = 15; + +__u32 server_id; + +struct hdr_opt_state { + __u32 server_id; + __u8 byte_offset; + __u8 hdr_bytes_remaining; +}; + +static int parse_hdr_opt(const struct xdp_md *xdp, struct hdr_opt_state *state) +{ + const void *data = (void *)(long)xdp->data; + const void *data_end = (void *)(long)xdp->data_end; + __u8 *tcp_opt, kind, hdr_len; + + tcp_opt = (__u8 *)(data + state->byte_offset); + if (tcp_opt + 1 > data_end) + return -1; + + kind = tcp_opt[0]; + + if (kind == TCPOPT_EOL) + return -1; + + if (kind == TCPOPT_NOP) { + state->hdr_bytes_remaining--; + state->byte_offset++; + return 0; + } + + if (state->hdr_bytes_remaining < 2 || + tcp_opt + sizeof(__u8) + sizeof(__u8) > data_end) + return -1; + + hdr_len = tcp_opt[1]; + if (hdr_len > state->hdr_bytes_remaining) + return -1; + + if (kind == tcp_hdr_opt_kind_tpr) { + if (hdr_len != tcp_hdr_opt_len_tpr) + return -1; + + if (tcp_opt + tcp_hdr_opt_len_tpr > data_end) + return -1; + + state->server_id = *(__u32 *)&tcp_opt[2]; + return 1; + } + + state->hdr_bytes_remaining -= hdr_len; + state->byte_offset += hdr_len; + return 0; +} + +SEC("xdp") +int xdp_ingress_v6(struct xdp_md *xdp) +{ + const void *data = (void *)(long)xdp->data; + const void *data_end = (void *)(long)xdp->data_end; + struct hdr_opt_state opt_state = {}; + __u8 tcp_hdr_opt_len = 0; + struct tcphdr *tcp_hdr; + __u64 tcp_offset = 0; + __u32 off; + int err; + + tcp_offset = sizeof(struct ethhdr) + sizeof(struct ipv6hdr); + tcp_hdr = (struct tcphdr *)(data + tcp_offset); + if (tcp_hdr + 1 > data_end) + return XDP_DROP; + + tcp_hdr_opt_len = (tcp_hdr->doff * 4) - sizeof(struct tcphdr); + if (tcp_hdr_opt_len < tcp_hdr_opt_len_tpr) + return XDP_DROP; + + opt_state.hdr_bytes_remaining = tcp_hdr_opt_len; + opt_state.byte_offset = sizeof(struct tcphdr) + tcp_offset; + + /* max number of bytes of options in tcp header is 40 bytes */ + for (int i = 0; i < tcp_hdr_opt_max_opt_checks; i++) { + err = parse_hdr_opt(xdp, &opt_state); + + if (err || !opt_state.hdr_bytes_remaining) + break; + } + + if (!opt_state.server_id) + return XDP_DROP; + + server_id = opt_state.server_id; + + return XDP_PASS; +} diff --git a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c new file mode 100644 index 000000000000..d3b319722e30 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* This logic is lifted from a real-world use case of packet parsing, used in + * the open source library katran, a layer 4 load balancer. + * + * This test demonstrates how to parse packet contents using dynptrs. The + * original code (parsing without dynptrs) can be found in test_parse_tcp_hdr_opt.c + */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <linux/tcp.h> +#include <stdbool.h> +#include <linux/ipv6.h> +#include <linux/if_ether.h> +#include "test_tcp_hdr_options.h" +#include "bpf_kfuncs.h" + +char _license[] SEC("license") = "GPL"; + +/* Kind number used for experiments */ +const __u32 tcp_hdr_opt_kind_tpr = 0xFD; +/* Length of the tcp header option */ +const __u32 tcp_hdr_opt_len_tpr = 6; +/* maximum number of header options to check to lookup server_id */ +const __u32 tcp_hdr_opt_max_opt_checks = 15; + +__u32 server_id; + +static int parse_hdr_opt(struct bpf_dynptr *ptr, __u32 *off, __u8 *hdr_bytes_remaining, + __u32 *server_id) +{ + __u8 *tcp_opt, kind, hdr_len; + __u8 buffer[sizeof(kind) + sizeof(hdr_len) + sizeof(*server_id)]; + __u8 *data; + + __builtin_memset(buffer, 0, sizeof(buffer)); + + data = bpf_dynptr_slice(ptr, *off, buffer, sizeof(buffer)); + if (!data) + return -1; + + kind = data[0]; + + if (kind == TCPOPT_EOL) + return -1; + + if (kind == TCPOPT_NOP) { + *off += 1; + *hdr_bytes_remaining -= 1; + return 0; + } + + if (*hdr_bytes_remaining < 2) + return -1; + + hdr_len = data[1]; + if (hdr_len > *hdr_bytes_remaining) + return -1; + + if (kind == tcp_hdr_opt_kind_tpr) { + if (hdr_len != tcp_hdr_opt_len_tpr) + return -1; + + __builtin_memcpy(server_id, (__u32 *)(data + 2), sizeof(*server_id)); + return 1; + } + + *off += hdr_len; + *hdr_bytes_remaining -= hdr_len; + return 0; +} + +SEC("xdp") +int xdp_ingress_v6(struct xdp_md *xdp) +{ + __u8 buffer[sizeof(struct tcphdr)] = {}; + __u8 hdr_bytes_remaining; + struct tcphdr *tcp_hdr; + __u8 tcp_hdr_opt_len; + int err = 0; + __u32 off; + + struct bpf_dynptr ptr; + + bpf_dynptr_from_xdp(xdp, 0, &ptr); + + off = sizeof(struct ethhdr) + sizeof(struct ipv6hdr); + + tcp_hdr = bpf_dynptr_slice(&ptr, off, buffer, sizeof(buffer)); + if (!tcp_hdr) + return XDP_DROP; + + tcp_hdr_opt_len = (tcp_hdr->doff * 4) - sizeof(struct tcphdr); + if (tcp_hdr_opt_len < tcp_hdr_opt_len_tpr) + return XDP_DROP; + + hdr_bytes_remaining = tcp_hdr_opt_len; + + off += sizeof(struct tcphdr); + + /* max number of bytes of options in tcp header is 40 bytes */ + for (int i = 0; i < tcp_hdr_opt_max_opt_checks; i++) { + err = parse_hdr_opt(&ptr, &off, &hdr_bytes_remaining, &server_id); + + if (err || !hdr_bytes_remaining) + break; + } + + if (!server_id) + return XDP_DROP; + + return XDP_PASS; +} diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c index b502e5c92e33..6ccf6d546074 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c @@ -23,8 +23,8 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off, bool *ipv4) { struct bpf_sock_tuple *result; + __u64 ihl_len = 0; __u8 proto = 0; - __u64 ihl_len; if (eth_proto == bpf_htons(ETH_P_IP)) { struct iphdr *iph = (struct iphdr *)(data + nh_off); diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 508da4a23c4f..95b4aa0928ba 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -324,11 +324,11 @@ int ip4ip6erspan_get_tunnel(struct __sk_buff *skb) SEC("tc") int vxlan_set_tunnel_dst(struct __sk_buff *skb) { - int ret; struct bpf_tunnel_key key; struct vxlan_metadata md; __u32 index = 0; __u32 *local_ip = NULL; + int ret = 0; local_ip = bpf_map_lookup_elem(&local_ip_map, &index); if (!local_ip) { @@ -363,11 +363,11 @@ int vxlan_set_tunnel_dst(struct __sk_buff *skb) SEC("tc") int vxlan_set_tunnel_src(struct __sk_buff *skb) { - int ret; struct bpf_tunnel_key key; struct vxlan_metadata md; __u32 index = 0; __u32 *local_ip = NULL; + int ret = 0; local_ip = bpf_map_lookup_elem(&local_ip_map, &index); if (!local_ip) { @@ -494,9 +494,9 @@ SEC("tc") int ip6vxlan_set_tunnel_dst(struct __sk_buff *skb) { struct bpf_tunnel_key key; - int ret; __u32 index = 0; __u32 *local_ip; + int ret = 0; local_ip = bpf_map_lookup_elem(&local_ip_map, &index); if (!local_ip) { @@ -525,9 +525,9 @@ SEC("tc") int ip6vxlan_set_tunnel_src(struct __sk_buff *skb) { struct bpf_tunnel_key key; - int ret; __u32 index = 0; __u32 *local_ip; + int ret = 0; local_ip = bpf_map_lookup_elem(&local_ip_map, &index); if (!local_ip) { @@ -556,9 +556,9 @@ SEC("tc") int ip6vxlan_get_tunnel_src(struct __sk_buff *skb) { struct bpf_tunnel_key key; - int ret; __u32 index = 0; __u32 *local_ip; + int ret = 0; local_ip = bpf_map_lookup_elem(&local_ip_map, &index); if (!local_ip) { diff --git a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c new file mode 100644 index 000000000000..7521a805b506 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta */ +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/in.h> +#include <linux/udp.h> +#include <linux/tcp.h> +#include <linux/pkt_cls.h> +#include <sys/socket.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include "test_iptunnel_common.h" +#include "bpf_kfuncs.h" + +const size_t tcphdr_sz = sizeof(struct tcphdr); +const size_t udphdr_sz = sizeof(struct udphdr); +const size_t ethhdr_sz = sizeof(struct ethhdr); +const size_t iphdr_sz = sizeof(struct iphdr); +const size_t ipv6hdr_sz = sizeof(struct ipv6hdr); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 256); + __type(key, __u32); + __type(value, __u64); +} rxcnt SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, MAX_IPTNL_ENTRIES); + __type(key, struct vip); + __type(value, struct iptnl_info); +} vip2tnl SEC(".maps"); + +static __always_inline void count_tx(__u32 protocol) +{ + __u64 *rxcnt_count; + + rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol); + if (rxcnt_count) + *rxcnt_count += 1; +} + +static __always_inline int get_dport(void *trans_data, __u8 protocol) +{ + struct tcphdr *th; + struct udphdr *uh; + + switch (protocol) { + case IPPROTO_TCP: + th = (struct tcphdr *)trans_data; + return th->dest; + case IPPROTO_UDP: + uh = (struct udphdr *)trans_data; + return uh->dest; + default: + return 0; + } +} + +static __always_inline void set_ethhdr(struct ethhdr *new_eth, + const struct ethhdr *old_eth, + const struct iptnl_info *tnl, + __be16 h_proto) +{ + memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source)); + memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest)); + new_eth->h_proto = h_proto; +} + +static __always_inline int handle_ipv4(struct xdp_md *xdp, struct bpf_dynptr *xdp_ptr) +{ + __u8 eth_buffer[ethhdr_sz + iphdr_sz + ethhdr_sz]; + __u8 iph_buffer_tcp[iphdr_sz + tcphdr_sz]; + __u8 iph_buffer_udp[iphdr_sz + udphdr_sz]; + struct bpf_dynptr new_xdp_ptr; + struct iptnl_info *tnl; + struct ethhdr *new_eth; + struct ethhdr *old_eth; + __u32 transport_hdr_sz; + struct iphdr *iph; + __u16 *next_iph; + __u16 payload_len; + struct vip vip = {}; + int dport; + __u32 csum = 0; + int i; + + __builtin_memset(eth_buffer, 0, sizeof(eth_buffer)); + __builtin_memset(iph_buffer_tcp, 0, sizeof(iph_buffer_tcp)); + __builtin_memset(iph_buffer_udp, 0, sizeof(iph_buffer_udp)); + + if (ethhdr_sz + iphdr_sz + tcphdr_sz > xdp->data_end - xdp->data) + iph = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, iph_buffer_udp, sizeof(iph_buffer_udp)); + else + iph = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, iph_buffer_tcp, sizeof(iph_buffer_tcp)); + + if (!iph) + return XDP_DROP; + + dport = get_dport(iph + 1, iph->protocol); + if (dport == -1) + return XDP_DROP; + + vip.protocol = iph->protocol; + vip.family = AF_INET; + vip.daddr.v4 = iph->daddr; + vip.dport = dport; + payload_len = bpf_ntohs(iph->tot_len); + + tnl = bpf_map_lookup_elem(&vip2tnl, &vip); + /* It only does v4-in-v4 */ + if (!tnl || tnl->family != AF_INET) + return XDP_PASS; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)iphdr_sz)) + return XDP_DROP; + + bpf_dynptr_from_xdp(xdp, 0, &new_xdp_ptr); + new_eth = bpf_dynptr_slice_rdwr(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer)); + if (!new_eth) + return XDP_DROP; + + iph = (struct iphdr *)(new_eth + 1); + old_eth = (struct ethhdr *)(iph + 1); + + set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP)); + + if (new_eth == eth_buffer) + bpf_dynptr_write(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer), 0); + + iph->version = 4; + iph->ihl = iphdr_sz >> 2; + iph->frag_off = 0; + iph->protocol = IPPROTO_IPIP; + iph->check = 0; + iph->tos = 0; + iph->tot_len = bpf_htons(payload_len + iphdr_sz); + iph->daddr = tnl->daddr.v4; + iph->saddr = tnl->saddr.v4; + iph->ttl = 8; + + next_iph = (__u16 *)iph; + for (i = 0; i < iphdr_sz >> 1; i++) + csum += *next_iph++; + + iph->check = ~((csum & 0xffff) + (csum >> 16)); + + count_tx(vip.protocol); + + return XDP_TX; +} + +static __always_inline int handle_ipv6(struct xdp_md *xdp, struct bpf_dynptr *xdp_ptr) +{ + __u8 eth_buffer[ethhdr_sz + ipv6hdr_sz + ethhdr_sz]; + __u8 ip6h_buffer_tcp[ipv6hdr_sz + tcphdr_sz]; + __u8 ip6h_buffer_udp[ipv6hdr_sz + udphdr_sz]; + struct bpf_dynptr new_xdp_ptr; + struct iptnl_info *tnl; + struct ethhdr *new_eth; + struct ethhdr *old_eth; + __u32 transport_hdr_sz; + struct ipv6hdr *ip6h; + __u16 payload_len; + struct vip vip = {}; + int dport; + + __builtin_memset(eth_buffer, 0, sizeof(eth_buffer)); + __builtin_memset(ip6h_buffer_tcp, 0, sizeof(ip6h_buffer_tcp)); + __builtin_memset(ip6h_buffer_udp, 0, sizeof(ip6h_buffer_udp)); + + if (ethhdr_sz + iphdr_sz + tcphdr_sz > xdp->data_end - xdp->data) + ip6h = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, ip6h_buffer_udp, sizeof(ip6h_buffer_udp)); + else + ip6h = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, ip6h_buffer_tcp, sizeof(ip6h_buffer_tcp)); + + if (!ip6h) + return XDP_DROP; + + dport = get_dport(ip6h + 1, ip6h->nexthdr); + if (dport == -1) + return XDP_DROP; + + vip.protocol = ip6h->nexthdr; + vip.family = AF_INET6; + memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr)); + vip.dport = dport; + payload_len = ip6h->payload_len; + + tnl = bpf_map_lookup_elem(&vip2tnl, &vip); + /* It only does v6-in-v6 */ + if (!tnl || tnl->family != AF_INET6) + return XDP_PASS; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)ipv6hdr_sz)) + return XDP_DROP; + + bpf_dynptr_from_xdp(xdp, 0, &new_xdp_ptr); + new_eth = bpf_dynptr_slice_rdwr(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer)); + if (!new_eth) + return XDP_DROP; + + ip6h = (struct ipv6hdr *)(new_eth + 1); + old_eth = (struct ethhdr *)(ip6h + 1); + + set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6)); + + if (new_eth == eth_buffer) + bpf_dynptr_write(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer), 0); + + ip6h->version = 6; + ip6h->priority = 0; + memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl)); + ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + ipv6hdr_sz); + ip6h->nexthdr = IPPROTO_IPV6; + ip6h->hop_limit = 8; + memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6)); + memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6)); + + count_tx(vip.protocol); + + return XDP_TX; +} + +SEC("xdp") +int _xdp_tx_iptunnel(struct xdp_md *xdp) +{ + __u8 buffer[ethhdr_sz]; + struct bpf_dynptr ptr; + struct ethhdr *eth; + __u16 h_proto; + + __builtin_memset(buffer, 0, sizeof(buffer)); + + bpf_dynptr_from_xdp(xdp, 0, &ptr); + eth = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer)); + if (!eth) + return XDP_DROP; + + h_proto = eth->h_proto; + + if (h_proto == bpf_htons(ETH_P_IP)) + return handle_ipv4(xdp, &ptr); + else if (h_proto == bpf_htons(ETH_P_IPV6)) + + return handle_ipv6(xdp, &ptr); + else + return XDP_DROP; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c index acda5c9cea93..9a16d95213e1 100644 --- a/tools/testing/selftests/bpf/progs/timer.c +++ b/tools/testing/selftests/bpf/progs/timer.c @@ -46,7 +46,15 @@ struct { __type(value, struct elem); } lru SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} abs_timer SEC(".maps"); + __u64 bss_data; +__u64 abs_data; __u64 err; __u64 ok; __u64 callback_check = 52; @@ -284,3 +292,40 @@ int BPF_PROG2(test2, int, a, int, b) return bpf_timer_test(); } + +/* callback for absolute timer */ +static int timer_cb3(void *map, int *key, struct bpf_timer *timer) +{ + abs_data += 6; + + if (abs_data < 12) { + bpf_timer_start(timer, bpf_ktime_get_boot_ns() + 1000, + BPF_F_TIMER_ABS); + } else { + /* Re-arm timer ~35 seconds in future */ + bpf_timer_start(timer, bpf_ktime_get_boot_ns() + (1ull << 35), + BPF_F_TIMER_ABS); + } + + return 0; +} + +SEC("fentry/bpf_fentry_test3") +int BPF_PROG2(test3, int, a) +{ + int key = 0; + struct bpf_timer *timer; + + bpf_printk("test3"); + + timer = bpf_map_lookup_elem(&abs_timer, &key); + if (timer) { + if (bpf_timer_init(timer, &abs_timer, CLOCK_BOOTTIME) != 0) + err |= 2048; + bpf_timer_set_callback(timer, timer_cb3); + bpf_timer_start(timer, bpf_ktime_get_boot_ns() + 1000, + BPF_F_TIMER_ABS); + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_success.c b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c index b39093dd5715..0ade1110613b 100644 --- a/tools/testing/selftests/bpf/progs/user_ringbuf_success.c +++ b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c @@ -202,7 +202,7 @@ do_nothing_cb(struct bpf_dynptr *dynptr, void *context) return 0; } -SEC("fentry/" SYS_PREFIX "sys_getrlimit") +SEC("fentry/" SYS_PREFIX "sys_prlimit64") int test_user_ringbuf_epoll(void *ctx) { long num_samples; diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 679efb3aa785..bf41390157bf 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -13,12 +13,15 @@ #define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success" #define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg=" #define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level=" +#define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags=" struct test_spec { const char *name; bool expect_failure; - const char *expect_msg; + const char **expect_msgs; + size_t expect_msg_cnt; int log_level; + int prog_flags; }; static int tester_init(struct test_loader *tester) @@ -67,7 +70,8 @@ static int parse_test_spec(struct test_loader *tester, for (i = 1; i < btf__type_cnt(btf); i++) { const struct btf_type *t; - const char *s; + const char *s, *val; + char *e; t = btf__type_by_id(btf, i); if (!btf_is_decl_tag(t)) @@ -82,14 +86,48 @@ static int parse_test_spec(struct test_loader *tester, } else if (strcmp(s, TEST_TAG_EXPECT_SUCCESS) == 0) { spec->expect_failure = false; } else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX)) { - spec->expect_msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1; + void *tmp; + const char **msg; + + tmp = realloc(spec->expect_msgs, + (1 + spec->expect_msg_cnt) * sizeof(void *)); + if (!tmp) { + ASSERT_FAIL("failed to realloc memory for messages\n"); + return -ENOMEM; + } + spec->expect_msgs = tmp; + msg = &spec->expect_msgs[spec->expect_msg_cnt++]; + *msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1; } else if (str_has_pfx(s, TEST_TAG_LOG_LEVEL_PFX)) { + val = s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1; errno = 0; - spec->log_level = strtol(s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1, NULL, 0); - if (errno) { + spec->log_level = strtol(val, &e, 0); + if (errno || e[0] != '\0') { ASSERT_FAIL("failed to parse test log level from '%s'", s); return -EINVAL; } + } else if (str_has_pfx(s, TEST_TAG_PROG_FLAGS_PFX)) { + val = s + sizeof(TEST_TAG_PROG_FLAGS_PFX) - 1; + if (strcmp(val, "BPF_F_STRICT_ALIGNMENT") == 0) { + spec->prog_flags |= BPF_F_STRICT_ALIGNMENT; + } else if (strcmp(val, "BPF_F_ANY_ALIGNMENT") == 0) { + spec->prog_flags |= BPF_F_ANY_ALIGNMENT; + } else if (strcmp(val, "BPF_F_TEST_RND_HI32") == 0) { + spec->prog_flags |= BPF_F_TEST_RND_HI32; + } else if (strcmp(val, "BPF_F_TEST_STATE_FREQ") == 0) { + spec->prog_flags |= BPF_F_TEST_STATE_FREQ; + } else if (strcmp(val, "BPF_F_SLEEPABLE") == 0) { + spec->prog_flags |= BPF_F_SLEEPABLE; + } else if (strcmp(val, "BPF_F_XDP_HAS_FRAGS") == 0) { + spec->prog_flags |= BPF_F_XDP_HAS_FRAGS; + } else /* assume numeric value */ { + errno = 0; + spec->prog_flags |= strtol(val, &e, 0); + if (errno || e[0] != '\0') { + ASSERT_FAIL("failed to parse test prog flags from '%s'", s); + return -EINVAL; + } + } } } @@ -101,7 +139,7 @@ static void prepare_case(struct test_loader *tester, struct bpf_object *obj, struct bpf_program *prog) { - int min_log_level = 0; + int min_log_level = 0, prog_flags; if (env.verbosity > VERBOSE_NONE) min_log_level = 1; @@ -119,7 +157,11 @@ static void prepare_case(struct test_loader *tester, else bpf_program__set_log_level(prog, spec->log_level); + prog_flags = bpf_program__flags(prog); + bpf_program__set_flags(prog, prog_flags | spec->prog_flags); + tester->log_buf[0] = '\0'; + tester->next_match_pos = 0; } static void emit_verifier_log(const char *log_buf, bool force) @@ -135,17 +177,26 @@ static void validate_case(struct test_loader *tester, struct bpf_program *prog, int load_err) { - if (spec->expect_msg) { + int i, j; + + for (i = 0; i < spec->expect_msg_cnt; i++) { char *match; + const char *expect_msg; + + expect_msg = spec->expect_msgs[i]; - match = strstr(tester->log_buf, spec->expect_msg); + match = strstr(tester->log_buf + tester->next_match_pos, expect_msg); if (!ASSERT_OK_PTR(match, "expect_msg")) { /* if we are in verbose mode, we've already emitted log */ if (env.verbosity == VERBOSE_NONE) emit_verifier_log(tester->log_buf, true /*force*/); - fprintf(stderr, "EXPECTED MSG: '%s'\n", spec->expect_msg); + for (j = 0; j < i; j++) + fprintf(stderr, "MATCHED MSG: '%s'\n", spec->expect_msgs[j]); + fprintf(stderr, "EXPECTED MSG: '%s'\n", expect_msg); return; } + + tester->next_match_pos = match - tester->log_buf + strlen(expect_msg); } } diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index d5d51ec97ec8..3cbf005747ed 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -376,6 +376,21 @@ int test__join_cgroup(const char *path); ___ok; \ }) +#define SYS(goto_label, fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (!ASSERT_OK(system(cmd), cmd)) \ + goto goto_label; \ + }) + +#define SYS_NOFAIL(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + system(cmd); \ + }) + static inline __u64 ptr_to_u64(const void *ptr) { return (__u64) (unsigned long) ptr; @@ -412,6 +427,7 @@ int get_bpf_max_tramp_links(void); struct test_loader { char *log_buf; size_t log_buf_sz; + size_t next_match_pos; struct bpf_object *obj; }; diff --git a/tools/testing/selftests/bpf/test_tcp_hdr_options.h b/tools/testing/selftests/bpf/test_tcp_hdr_options.h index 6118e3ab61fc..56c9f8a3ad3d 100644 --- a/tools/testing/selftests/bpf/test_tcp_hdr_options.h +++ b/tools/testing/selftests/bpf/test_tcp_hdr_options.h @@ -50,6 +50,7 @@ struct linum_err { #define TCPOPT_EOL 0 #define TCPOPT_NOP 1 +#define TCPOPT_MSS 2 #define TCPOPT_WINDOW 3 #define TCPOPT_EXP 254 diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 8b9949bb833d..49a70d9beb0b 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -699,13 +699,13 @@ static int create_cgroup_storage(bool percpu) * struct bpf_timer t; * }; * struct btf_ptr { + * struct prog_test_ref_kfunc __kptr_untrusted *ptr; * struct prog_test_ref_kfunc __kptr *ptr; - * struct prog_test_ref_kfunc __kptr_ref *ptr; - * struct prog_test_member __kptr_ref *ptr; + * struct prog_test_member __kptr *ptr; * } */ static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t" - "\0btf_ptr\0prog_test_ref_kfunc\0ptr\0kptr\0kptr_ref" + "\0btf_ptr\0prog_test_ref_kfunc\0ptr\0kptr\0kptr_untrusted" "\0prog_test_member"; static __u32 btf_raw_types[] = { /* int */ @@ -724,20 +724,20 @@ static __u32 btf_raw_types[] = { BTF_MEMBER_ENC(41, 4, 0), /* struct bpf_timer t; */ /* struct prog_test_ref_kfunc */ /* [6] */ BTF_STRUCT_ENC(51, 0, 0), - BTF_STRUCT_ENC(89, 0, 0), /* [7] */ + BTF_STRUCT_ENC(95, 0, 0), /* [7] */ + /* type tag "kptr_untrusted" */ + BTF_TYPE_TAG_ENC(80, 6), /* [8] */ /* type tag "kptr" */ - BTF_TYPE_TAG_ENC(75, 6), /* [8] */ - /* type tag "kptr_ref" */ - BTF_TYPE_TAG_ENC(80, 6), /* [9] */ - BTF_TYPE_TAG_ENC(80, 7), /* [10] */ + BTF_TYPE_TAG_ENC(75, 6), /* [9] */ + BTF_TYPE_TAG_ENC(75, 7), /* [10] */ BTF_PTR_ENC(8), /* [11] */ BTF_PTR_ENC(9), /* [12] */ BTF_PTR_ENC(10), /* [13] */ /* struct btf_ptr */ /* [14] */ BTF_STRUCT_ENC(43, 3, 24), - BTF_MEMBER_ENC(71, 11, 0), /* struct prog_test_ref_kfunc __kptr *ptr; */ - BTF_MEMBER_ENC(71, 12, 64), /* struct prog_test_ref_kfunc __kptr_ref *ptr; */ - BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr_ref *ptr; */ + BTF_MEMBER_ENC(71, 11, 0), /* struct prog_test_ref_kfunc __kptr_untrusted *ptr; */ + BTF_MEMBER_ENC(71, 12, 64), /* struct prog_test_ref_kfunc __kptr *ptr; */ + BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr *ptr; */ }; static char bpf_vlog[UINT_MAX >> 8]; diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 289ed202ec66..5702fc9761ef 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -181,7 +181,7 @@ }, .result_unpriv = REJECT, .result = REJECT, - .errstr = "negative offset ptr_ ptr R1 off=-4 disallowed", + .errstr = "ptr R1 off=-4 disallowed", }, { "calls: invalid kfunc call: PTR_TO_BTF_ID with variable offset", @@ -243,7 +243,7 @@ }, .result_unpriv = REJECT, .result = REJECT, - .errstr = "R1 must be referenced", + .errstr = "R1 must be", }, { "calls: valid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID", diff --git a/tools/testing/selftests/bpf/verifier/ctx.c b/tools/testing/selftests/bpf/verifier/ctx.c index c8eaf0536c24..2fd31612c0b8 100644 --- a/tools/testing/selftests/bpf/verifier/ctx.c +++ b/tools/testing/selftests/bpf/verifier/ctx.c @@ -1,15 +1,4 @@ { - "context stores via ST", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0), - BPF_EXIT_INSN(), - }, - .errstr = "BPF_ST stores into R1 ctx is not allowed", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, -}, -{ "context stores via BPF_ATOMIC", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c index 6914904344c0..d775ccb01989 100644 --- a/tools/testing/selftests/bpf/verifier/map_kptr.c +++ b/tools/testing/selftests/bpf/verifier/map_kptr.c @@ -336,7 +336,7 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .fixup_map_kptr = { 1 }, .result = REJECT, - .errstr = "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_", + .errstr = "R1 type=rcu_ptr_or_null_ expected=percpu_ptr_", }, { "map_kptr: ref: reject off != 0", diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c index 878ca26c3f0a..af0c0f336625 100644 --- a/tools/testing/selftests/bpf/verifier/unpriv.c +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -240,6 +240,29 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { + /* Same as above, but use BPF_ST_MEM to save 42 + * instead of BPF_STX_MEM. + */ + "unpriv: spill/fill of different pointers st", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + BPF_ST_MEM(BPF_W, BPF_REG_1, offsetof(struct __sk_buff, mark), 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "same insn cannot be used with different pointers", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, +{ "unpriv: spill/fill of different pointers stx - ctx and sock", .insns = { BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 80fbfe0330f6..1de34ec99290 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -48,6 +48,7 @@ TEST_PROGS += l2_tos_ttl_inherit.sh TEST_PROGS += bind_bhash.sh TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh +TEST_PROGS += big_tcp.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest @@ -81,13 +82,14 @@ TEST_GEN_FILES += csum TEST_GEN_FILES += nat6to4.o TEST_GEN_FILES += ip_local_port_range TEST_GEN_FILES += bind_wildcard +TEST_PROGS += test_vxlan_mdb.sh TEST_FILES := settings include ../lib.mk $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma -$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread +$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto $(OUTPUT)/tcp_inq: LDLIBS += -lpthread $(OUTPUT)/bind_bhash: LDLIBS += -lpthread diff --git a/tools/testing/selftests/net/big_tcp.sh b/tools/testing/selftests/net/big_tcp.sh new file mode 100755 index 000000000000..cde9a91c4797 --- /dev/null +++ b/tools/testing/selftests/net/big_tcp.sh @@ -0,0 +1,180 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing For IPv4 and IPv6 BIG TCP. +# TOPO: CLIENT_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) SERVER_NS + +CLIENT_NS=$(mktemp -u client-XXXXXXXX) +CLIENT_IP4="198.51.100.1" +CLIENT_IP6="2001:db8:1::1" + +SERVER_NS=$(mktemp -u server-XXXXXXXX) +SERVER_IP4="203.0.113.1" +SERVER_IP6="2001:db8:2::1" + +ROUTER_NS=$(mktemp -u router-XXXXXXXX) +SERVER_GW4="203.0.113.2" +CLIENT_GW4="198.51.100.2" +SERVER_GW6="2001:db8:2::2" +CLIENT_GW6="2001:db8:1::2" + +MAX_SIZE=128000 +CHK_SIZE=65535 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +setup() { + ip netns add $CLIENT_NS + ip netns add $SERVER_NS + ip netns add $ROUTER_NS + ip -net $ROUTER_NS link add link1 type veth peer name link0 netns $CLIENT_NS + ip -net $ROUTER_NS link add link2 type veth peer name link3 netns $SERVER_NS + + ip -net $CLIENT_NS link set link0 up + ip -net $CLIENT_NS link set link0 mtu 1442 + ip -net $CLIENT_NS addr add $CLIENT_IP4/24 dev link0 + ip -net $CLIENT_NS addr add $CLIENT_IP6/64 dev link0 nodad + ip -net $CLIENT_NS route add $SERVER_IP4 dev link0 via $CLIENT_GW4 + ip -net $CLIENT_NS route add $SERVER_IP6 dev link0 via $CLIENT_GW6 + ip -net $CLIENT_NS link set dev link0 \ + gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE + ip -net $CLIENT_NS link set dev link0 \ + gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE + ip net exec $CLIENT_NS sysctl -wq net.ipv4.tcp_window_scaling=10 + + ip -net $ROUTER_NS link set link1 up + ip -net $ROUTER_NS link set link2 up + ip -net $ROUTER_NS addr add $CLIENT_GW4/24 dev link1 + ip -net $ROUTER_NS addr add $CLIENT_GW6/64 dev link1 nodad + ip -net $ROUTER_NS addr add $SERVER_GW4/24 dev link2 + ip -net $ROUTER_NS addr add $SERVER_GW6/64 dev link2 nodad + ip -net $ROUTER_NS link set dev link1 \ + gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE + ip -net $ROUTER_NS link set dev link2 \ + gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE + ip -net $ROUTER_NS link set dev link1 \ + gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE + ip -net $ROUTER_NS link set dev link2 \ + gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE + # test for nf_ct_skb_network_trim in nf_conntrack_ovs used by TC ct action. + ip net exec $ROUTER_NS tc qdisc add dev link1 ingress + ip net exec $ROUTER_NS tc filter add dev link1 ingress \ + proto ip flower ip_proto tcp action ct + ip net exec $ROUTER_NS tc filter add dev link1 ingress \ + proto ipv6 flower ip_proto tcp action ct + ip net exec $ROUTER_NS sysctl -wq net.ipv4.ip_forward=1 + ip net exec $ROUTER_NS sysctl -wq net.ipv6.conf.all.forwarding=1 + + ip -net $SERVER_NS link set link3 up + ip -net $SERVER_NS addr add $SERVER_IP4/24 dev link3 + ip -net $SERVER_NS addr add $SERVER_IP6/64 dev link3 nodad + ip -net $SERVER_NS route add $CLIENT_IP4 dev link3 via $SERVER_GW4 + ip -net $SERVER_NS route add $CLIENT_IP6 dev link3 via $SERVER_GW6 + ip -net $SERVER_NS link set dev link3 \ + gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE + ip -net $SERVER_NS link set dev link3 \ + gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE + ip net exec $SERVER_NS sysctl -wq net.ipv4.tcp_window_scaling=10 + ip net exec $SERVER_NS netserver 2>&1 >/dev/null +} + +cleanup() { + ip net exec $SERVER_NS pkill netserver + ip -net $ROUTER_NS link del link1 + ip -net $ROUTER_NS link del link2 + ip netns del "$CLIENT_NS" + ip netns del "$SERVER_NS" + ip netns del "$ROUTER_NS" +} + +start_counter() { + local ipt="iptables" + local iface=$1 + local netns=$2 + + [ "$NF" = "6" ] && ipt="ip6tables" + ip net exec $netns $ipt -t raw -A PREROUTING -i $iface \ + -m length ! --length 0:$CHK_SIZE -j ACCEPT +} + +check_counter() { + local ipt="iptables" + local iface=$1 + local netns=$2 + + [ "$NF" = "6" ] && ipt="ip6tables" + test `ip net exec $netns $ipt -t raw -L -v |grep $iface | awk '{print $1}'` != "0" +} + +stop_counter() { + local ipt="iptables" + local iface=$1 + local netns=$2 + + [ "$NF" = "6" ] && ipt="ip6tables" + ip net exec $netns $ipt -t raw -D PREROUTING -i $iface \ + -m length ! --length 0:$CHK_SIZE -j ACCEPT +} + +do_netperf() { + local serip=$SERVER_IP4 + local netns=$1 + + [ "$NF" = "6" ] && serip=$SERVER_IP6 + ip net exec $netns netperf -$NF -t TCP_STREAM -H $serip 2>&1 >/dev/null +} + +do_test() { + local cli_tso=$1 + local gw_gro=$2 + local gw_tso=$3 + local ser_gro=$4 + local ret="PASS" + + ip net exec $CLIENT_NS ethtool -K link0 tso $cli_tso + ip net exec $ROUTER_NS ethtool -K link1 gro $gw_gro + ip net exec $ROUTER_NS ethtool -K link2 tso $gw_tso + ip net exec $SERVER_NS ethtool -K link3 gro $ser_gro + + start_counter link1 $ROUTER_NS + start_counter link3 $SERVER_NS + do_netperf $CLIENT_NS + + if check_counter link1 $ROUTER_NS; then + check_counter link3 $SERVER_NS || ret="FAIL_on_link3" + else + ret="FAIL_on_link1" + fi + + stop_counter link1 $ROUTER_NS + stop_counter link3 $SERVER_NS + printf "%-9s %-8s %-8s %-8s: [%s]\n" \ + $cli_tso $gw_gro $gw_tso $ser_gro $ret + test $ret = "PASS" +} + +testup() { + echo "CLI GSO | GW GRO | GW GSO | SER GRO" && \ + do_test "on" "on" "on" "on" && \ + do_test "on" "off" "on" "off" && \ + do_test "off" "on" "on" "on" && \ + do_test "on" "on" "off" "on" && \ + do_test "off" "on" "off" "on" +} + +if ! netperf -V &> /dev/null; then + echo "SKIP: Could not run test without netperf tool" + exit $ksft_skip +fi + +if ! ip link help 2>&1 | grep gso_ipv4_max_size &> /dev/null; then + echo "SKIP: Could not run test without gso/gro_ipv4_max_size supported in ip-link" + exit $ksft_skip +fi + +trap cleanup EXIT +setup && echo "Testing for BIG TCP:" && \ +NF=4 testup && echo "***v4 Tests Done***" && \ +NF=6 testup && echo "***v6 Tests Done***" +exit $? diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index cc9fd55ab869..4c7ce07afa2f 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -48,3 +48,4 @@ CONFIG_BAREUDP=m CONFIG_IPV6_IOAM6_LWTUNNEL=y CONFIG_CRYPTO_SM4_GENERIC=y CONFIG_AMT=m +CONFIG_VXLAN=m diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 42e3bd1a05f5..fafd19ec7e1f 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1719,6 +1719,46 @@ chk_subflow_nr() fi } +chk_mptcp_info() +{ + local nr_info=$1 + local info + local cnt1 + local cnt2 + local dump_stats + + if [[ $nr_info = "subflows_"* ]]; then + info="subflows" + nr_info=${nr_info:9} + else + echo "[fail] unsupported argument: $nr_info" + fail_test + return 1 + fi + + printf "%-${nr_blank}s %-30s" " " "mptcp_info $info=$nr_info" + + cnt1=$(ss -N $ns1 -inmHM | grep "$info:" | + sed -n 's/.*\('"$info"':\)\([[:digit:]]*\).*$/\2/p;q') + [ -z "$cnt1" ] && cnt1=0 + cnt2=$(ss -N $ns2 -inmHM | grep "$info:" | + sed -n 's/.*\('"$info"':\)\([[:digit:]]*\).*$/\2/p;q') + [ -z "$cnt2" ] && cnt2=0 + if [ "$cnt1" != "$nr_info" ] || [ "$cnt2" != "$nr_info" ]; then + echo "[fail] got $cnt1:$cnt2 $info expected $nr_info" + fail_test + dump_stats=1 + else + echo "[ ok ]" + fi + + if [ "$dump_stats" = 1 ]; then + ss -N $ns1 -inmHM + ss -N $ns2 -inmHM + dump_stats + fi +} + chk_link_usage() { local ns=$1 @@ -3118,13 +3158,18 @@ endpoint_tests() run_tests $ns1 $ns2 10.0.1.1 4 0 0 speed_20 2>/dev/null & wait_mpj $ns2 + chk_subflow_nr needtitle "before delete" 2 + chk_mptcp_info subflows_1 + pm_nl_del_endpoint $ns2 2 10.0.2.2 sleep 0.5 - chk_subflow_nr needtitle "after delete" 1 + chk_subflow_nr "" "after delete" 1 + chk_mptcp_info subflows_0 pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow wait_mpj $ns2 chk_subflow_nr "" "after re-add" 2 + chk_mptcp_info subflows_1 kill_tests_wait fi } diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 275491be3da2..3b15c686c03f 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -4,6 +4,31 @@ # # set -e +ALL_TESTS=" + kci_test_polrouting + kci_test_route_get + kci_test_addrlft + kci_test_promote_secondaries + kci_test_tc + kci_test_gre + kci_test_gretap + kci_test_ip6gretap + kci_test_erspan + kci_test_ip6erspan + kci_test_bridge + kci_test_addrlabel + kci_test_ifalias + kci_test_vrf + kci_test_encap + kci_test_macsec + kci_test_ipsec + kci_test_ipsec_offload + kci_test_fdb_get + kci_test_neigh_get + kci_test_bridge_parent_id + kci_test_address_proto +" + devdummy="test-dummy0" # Kselftest framework requirement - SKIP code is 4. @@ -1225,62 +1250,126 @@ kci_test_bridge_parent_id() echo "PASS: bridge_parent_id" } -kci_test_rtnl() +address_get_proto() +{ + local addr=$1; shift + + ip -N -j address show dev "$devdummy" | + jq -e -r --arg addr "${addr%/*}" \ + '.[].addr_info[] | select(.local == $addr) | .protocol' +} + +address_count() +{ + ip -N -j address show dev "$devdummy" "$@" | + jq -e -r '[.[].addr_info[] | .local | select(. != null)] | length' +} + +do_test_address_proto() { + local what=$1; shift + local addr=$1; shift + local addr2=${addr%/*}2/${addr#*/} + local addr3=${addr%/*}3/${addr#*/} + local proto + local count local ret=0 - kci_add_dummy - if [ $ret -ne 0 ];then - echo "FAIL: cannot add dummy interface" - return 1 - fi + local err - kci_test_polrouting - check_err $? - kci_test_route_get - check_err $? - kci_test_addrlft - check_err $? - kci_test_promote_secondaries + ip address add dev "$devdummy" "$addr3" check_err $? - kci_test_tc + proto=$(address_get_proto "$addr3") + [[ "$proto" == null ]] check_err $? - kci_test_gre - check_err $? - kci_test_gretap - check_err $? - kci_test_ip6gretap - check_err $? - kci_test_erspan - check_err $? - kci_test_ip6erspan + + ip address add dev "$devdummy" "$addr2" proto 0x99 check_err $? - kci_test_bridge + proto=$(address_get_proto "$addr2") + [[ "$proto" == 0x99 ]] check_err $? - kci_test_addrlabel + + ip address add dev "$devdummy" "$addr" proto 0xab check_err $? - kci_test_ifalias + proto=$(address_get_proto "$addr") + [[ "$proto" == 0xab ]] check_err $? - kci_test_vrf + + ip address replace dev "$devdummy" "$addr" proto 0x11 + proto=$(address_get_proto "$addr") check_err $? - kci_test_encap + [[ "$proto" == 0x11 ]] check_err $? - kci_test_macsec + + count=$(address_count) check_err $? - kci_test_ipsec + (( count == 3 )) # $addr, $addr2 and $addr3 + + count=$(address_count proto 0) check_err $? - kci_test_ipsec_offload + (( count == 1 )) # just $addr2 + + count=$(address_count proto 0x11) check_err $? - kci_test_fdb_get + (( count == 2 )) # $addr and $addr2 + + count=$(address_count proto 0xab) check_err $? - kci_test_neigh_get + (( count == 1 )) # just $addr2 + + ip address del dev "$devdummy" "$addr" + ip address del dev "$devdummy" "$addr2" + ip address del dev "$devdummy" "$addr3" + + if [ $ret -ne 0 ]; then + echo "FAIL: address proto $what" + return 1 + fi + echo "PASS: address proto $what" +} + +kci_test_address_proto() +{ + local ret=0 + + do_test_address_proto IPv4 192.0.2.1/28 check_err $? - kci_test_bridge_parent_id + + do_test_address_proto IPv6 2001:db8:1::1/64 check_err $? + return $ret +} + +kci_test_rtnl() +{ + local current_test + local ret=0 + + kci_add_dummy + if [ $ret -ne 0 ];then + echo "FAIL: cannot add dummy interface" + return 1 + fi + + for current_test in ${TESTS:-$ALL_TESTS}; do + $current_test + check_err $? + done + kci_del_dummy return $ret } +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $(echo $ALL_TESTS)) +EOF +} + #check for needed privileges if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" @@ -1295,6 +1384,14 @@ for x in ip tc;do fi done +while getopts t:h o; do + case $o in + t) TESTS=$OPTARG;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + kci_test_rtnl exit $? diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c index 46a02bbd31d0..607cc9ad8d1b 100644 --- a/tools/testing/selftests/net/tcp_mmap.c +++ b/tools/testing/selftests/net/tcp_mmap.c @@ -66,11 +66,16 @@ #include <poll.h> #include <linux/tcp.h> #include <assert.h> +#include <openssl/pem.h> #ifndef MSG_ZEROCOPY #define MSG_ZEROCOPY 0x4000000 #endif +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif + #define FILE_SZ (1ULL << 35) static int cfg_family = AF_INET6; static socklen_t cfg_alen = sizeof(struct sockaddr_in6); @@ -81,12 +86,14 @@ static int sndbuf; /* Default: autotuning. Can be set with -w <integer> option static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */ static int xflg; /* hash received data (simple xor) (-h option) */ static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */ +static int integrity; /* -i option: sender and receiver compute sha256 over the data.*/ static size_t chunk_size = 512*1024; static size_t map_align; unsigned long htotal; +unsigned int digest_len; static inline void prefetch(const void *x) { @@ -148,12 +155,14 @@ static void *mmap_large_buffer(size_t need, size_t *allocated) void *child_thread(void *arg) { + unsigned char digest[SHA256_DIGEST_LENGTH]; unsigned long total_mmap = 0, total = 0; struct tcp_zerocopy_receive zc; + unsigned char *buffer = NULL; unsigned long delta_usec; + EVP_MD_CTX *ctx = NULL; int flags = MAP_SHARED; struct timeval t0, t1; - char *buffer = NULL; void *raddr = NULL; void *addr = NULL; double throughput; @@ -180,6 +189,14 @@ void *child_thread(void *arg) addr = ALIGN_PTR_UP(raddr, map_align); } } + if (integrity) { + ctx = EVP_MD_CTX_new(); + if (!ctx) { + perror("cannot enable SHA computing"); + goto error; + } + EVP_DigestInit_ex(ctx, EVP_sha256(), NULL); + } while (1) { struct pollfd pfd = { .fd = fd, .events = POLLIN, }; int sub; @@ -191,7 +208,7 @@ void *child_thread(void *arg) memset(&zc, 0, sizeof(zc)); zc.address = (__u64)((unsigned long)addr); - zc.length = chunk_size; + zc.length = min(chunk_size, FILE_SZ - lu); res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, &zc, &zc_len); @@ -200,6 +217,8 @@ void *child_thread(void *arg) if (zc.length) { assert(zc.length <= chunk_size); + if (integrity) + EVP_DigestUpdate(ctx, addr, zc.length); total_mmap += zc.length; if (xflg) hash_zone(addr, zc.length); @@ -211,22 +230,30 @@ void *child_thread(void *arg) } if (zc.recv_skip_hint) { assert(zc.recv_skip_hint <= chunk_size); - lu = read(fd, buffer, zc.recv_skip_hint); + lu = read(fd, buffer, min(zc.recv_skip_hint, + FILE_SZ - total)); if (lu > 0) { + if (integrity) + EVP_DigestUpdate(ctx, buffer, lu); if (xflg) hash_zone(buffer, lu); total += lu; } + if (lu == 0) + goto end; } continue; } sub = 0; while (sub < chunk_size) { - lu = read(fd, buffer + sub, chunk_size - sub); + lu = read(fd, buffer + sub, min(chunk_size - sub, + FILE_SZ - total)); if (lu == 0) goto end; if (lu < 0) break; + if (integrity) + EVP_DigestUpdate(ctx, buffer + sub, lu); if (xflg) hash_zone(buffer + sub, lu); total += lu; @@ -237,6 +264,20 @@ end: gettimeofday(&t1, NULL); delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec; + if (integrity) { + fcntl(fd, F_SETFL, 0); + EVP_DigestFinal_ex(ctx, digest, &digest_len); + lu = read(fd, buffer, SHA256_DIGEST_LENGTH); + if (lu != SHA256_DIGEST_LENGTH) + perror("Error: Cannot read SHA256\n"); + + if (memcmp(digest, buffer, + SHA256_DIGEST_LENGTH)) + fprintf(stderr, "Error: SHA256 of the data is not right\n"); + else + printf("\nSHA256 is correct\n"); + } + throughput = 0; if (delta_usec) throughput = total * 8.0 / (double)delta_usec / 1000.0; @@ -368,19 +409,38 @@ static unsigned long default_huge_page_size(void) return hps; } +static void randomize(void *target, size_t count) +{ + static int urandom = -1; + ssize_t got; + + urandom = open("/dev/urandom", O_RDONLY); + if (urandom < 0) { + perror("open /dev/urandom"); + exit(1); + } + got = read(urandom, target, count); + if (got != count) { + perror("read /dev/urandom"); + exit(1); + } +} + int main(int argc, char *argv[]) { + unsigned char digest[SHA256_DIGEST_LENGTH]; struct sockaddr_storage listenaddr, addr; unsigned int max_pacing_rate = 0; + EVP_MD_CTX *ctx = NULL; + unsigned char *buffer; uint64_t total = 0; char *host = NULL; int fd, c, on = 1; size_t buffer_sz; - char *buffer; int sflg = 0; int mss = 0; - while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:C:a:")) != -1) { + while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:C:a:i")) != -1) { switch (c) { case '4': cfg_family = PF_INET; @@ -426,6 +486,9 @@ int main(int argc, char *argv[]) case 'a': map_align = atol(optarg); break; + case 'i': + integrity = 1; + break; default: exit(1); } @@ -468,7 +531,7 @@ int main(int argc, char *argv[]) } buffer = mmap_large_buffer(chunk_size, &buffer_sz); - if (buffer == (char *)-1) { + if (buffer == (unsigned char *)-1) { perror("mmap"); exit(1); } @@ -501,17 +564,34 @@ int main(int argc, char *argv[]) perror("setsockopt SO_ZEROCOPY, (-z option disabled)"); zflg = 0; } + if (integrity) { + randomize(buffer, buffer_sz); + ctx = EVP_MD_CTX_new(); + if (!ctx) { + perror("cannot enable SHA computing"); + exit(1); + } + EVP_DigestInit_ex(ctx, EVP_sha256(), NULL); + } while (total < FILE_SZ) { + size_t offset = total % chunk_size; int64_t wr = FILE_SZ - total; - if (wr > chunk_size) - wr = chunk_size; - /* Note : we just want to fill the pipe with 0 bytes */ - wr = send(fd, buffer, (size_t)wr, zflg ? MSG_ZEROCOPY : 0); + if (wr > chunk_size - offset) + wr = chunk_size - offset; + /* Note : we just want to fill the pipe with random bytes */ + wr = send(fd, buffer + offset, + (size_t)wr, zflg ? MSG_ZEROCOPY : 0); if (wr <= 0) break; + if (integrity) + EVP_DigestUpdate(ctx, buffer + offset, wr); total += wr; } + if (integrity && total == FILE_SZ) { + EVP_DigestFinal_ex(ctx, digest, &digest_len); + send(fd, digest, (size_t)SHA256_DIGEST_LENGTH, 0); + } close(fd); munmap(buffer, buffer_sz); return 0; diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh new file mode 100755 index 000000000000..31e5f0f8859d --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_mdb.sh @@ -0,0 +1,2318 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking VXLAN MDB functionality. The topology consists of +# two sets of namespaces: One for the testing of IPv4 underlay and another for +# IPv6. In both cases, both IPv4 and IPv6 overlay traffic are tested. +# +# Data path functionality is tested by sending traffic from one of the upper +# namespaces and checking using ingress tc filters that the expected traffic +# was received by one of the lower namespaces. +# +# +------------------------------------+ +------------------------------------+ +# | ns1_v4 | | ns1_v6 | +# | | | | +# | br0.10 br0.4000 br0.20 | | br0.10 br0.4000 br0.20 | +# | + + + | | + + + | +# | | | | | | | | | | +# | | | | | | | | | | +# | +---------+---------+ | | +---------+---------+ | +# | | | | | | +# | | | | | | +# | + | | + | +# | br0 | | br0 | +# | + | | + | +# | | | | | | +# | | | | | | +# | + | | + | +# | vx0 | | vx0 | +# | | | | +# | | | | +# | veth0 | | veth0 | +# | + | | + | +# +-----------------|------------------+ +-----------------|------------------+ +# | | +# +-----------------|------------------+ +-----------------|------------------+ +# | + | | + | +# | veth0 | | veth0 | +# | | | | +# | | | | +# | vx0 | | vx0 | +# | + | | + | +# | | | | | | +# | | | | | | +# | + | | + | +# | br0 | | br0 | +# | + | | + | +# | | | | | | +# | | | | | | +# | +---------+---------+ | | +---------+---------+ | +# | | | | | | | | | | +# | | | | | | | | | | +# | + + + | | + + + | +# | br0.10 br0.4000 br0.10 | | br0.10 br0.4000 br0.20 | +# | | | | +# | ns2_v4 | | ns2_v6 | +# +------------------------------------+ +------------------------------------+ + +ret=0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +CONTROL_PATH_TESTS=" + basic_star_g_ipv4_ipv4 + basic_star_g_ipv6_ipv4 + basic_star_g_ipv4_ipv6 + basic_star_g_ipv6_ipv6 + basic_sg_ipv4_ipv4 + basic_sg_ipv6_ipv4 + basic_sg_ipv4_ipv6 + basic_sg_ipv6_ipv6 + star_g_ipv4_ipv4 + star_g_ipv6_ipv4 + star_g_ipv4_ipv6 + star_g_ipv6_ipv6 + sg_ipv4_ipv4 + sg_ipv6_ipv4 + sg_ipv4_ipv6 + sg_ipv6_ipv6 + dump_ipv4_ipv4 + dump_ipv6_ipv4 + dump_ipv4_ipv6 + dump_ipv6_ipv6 +" + +DATA_PATH_TESTS=" + encap_params_ipv4_ipv4 + encap_params_ipv6_ipv4 + encap_params_ipv4_ipv6 + encap_params_ipv6_ipv6 + starg_exclude_ir_ipv4_ipv4 + starg_exclude_ir_ipv6_ipv4 + starg_exclude_ir_ipv4_ipv6 + starg_exclude_ir_ipv6_ipv6 + starg_include_ir_ipv4_ipv4 + starg_include_ir_ipv6_ipv4 + starg_include_ir_ipv4_ipv6 + starg_include_ir_ipv6_ipv6 + starg_exclude_p2mp_ipv4_ipv4 + starg_exclude_p2mp_ipv6_ipv4 + starg_exclude_p2mp_ipv4_ipv6 + starg_exclude_p2mp_ipv6_ipv6 + starg_include_p2mp_ipv4_ipv4 + starg_include_p2mp_ipv6_ipv4 + starg_include_p2mp_ipv4_ipv6 + starg_include_p2mp_ipv6_ipv6 + egress_vni_translation_ipv4_ipv4 + egress_vni_translation_ipv6_ipv4 + egress_vni_translation_ipv4_ipv6 + egress_vni_translation_ipv6_ipv6 + all_zeros_mdb_ipv4 + all_zeros_mdb_ipv6 + mdb_fdb_ipv4_ipv4 + mdb_fdb_ipv6_ipv4 + mdb_fdb_ipv4_ipv6 + mdb_fdb_ipv6_ipv6 + mdb_torture_ipv4_ipv4 + mdb_torture_ipv6_ipv4 + mdb_torture_ipv4_ipv6 + mdb_torture_ipv6_ipv6 +" + +# All tests in this script. Can be overridden with -t option. +TESTS=" + $CONTROL_PATH_TESTS + $DATA_PATH_TESTS +" +VERBOSE=0 +PAUSE_ON_FAIL=no +PAUSE=no + +################################################################################ +# Utilities + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "$VERBOSE" = "1" ]; then + echo " rc=$rc, expected $expected" + fi + + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + + [ "$VERBOSE" = "1" ] && echo +} + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + printf "COMMAND: $cmd\n" + stderr= + fi + + out=$(eval $cmd $stderr) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +tc_check_packets() +{ + local ns=$1; shift + local id=$1; shift + local handle=$1; shift + local count=$1; shift + local pkts + + sleep 0.1 + pkts=$(tc -n $ns -j -s filter show $id \ + | jq ".[] | select(.options.handle == $handle) | \ + .options.actions[0].stats.packets") + [[ $pkts == $count ]] +} + +################################################################################ +# Setup + +setup_common_ns() +{ + local ns=$1; shift + local local_addr=$1; shift + + ip netns exec $ns sysctl -qw net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -qw net.ipv4.fib_multipath_use_neigh=1 + ip netns exec $ns sysctl -qw net.ipv4.conf.default.ignore_routes_with_linkdown=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.forwarding=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.default.forwarding=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0 + ip netns exec $ns sysctl -qw net.ipv6.conf.default.accept_dad=0 + + ip -n $ns link set dev lo up + ip -n $ns address add $local_addr dev lo + + ip -n $ns link set dev veth0 up + + ip -n $ns link add name br0 up type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + + ip -n $ns link add link br0 name br0.10 up type vlan id 10 + bridge -n $ns vlan add vid 10 dev br0 self + + ip -n $ns link add link br0 name br0.20 up type vlan id 20 + bridge -n $ns vlan add vid 20 dev br0 self + + ip -n $ns link add link br0 name br0.4000 up type vlan id 4000 + bridge -n $ns vlan add vid 4000 dev br0 self + + ip -n $ns link add name vx0 up master br0 type vxlan \ + local $local_addr dstport 4789 external vnifilter + bridge -n $ns link set dev vx0 vlan_tunnel on + + bridge -n $ns vlan add vid 10 dev vx0 + bridge -n $ns vlan add vid 10 dev vx0 tunnel_info id 10010 + bridge -n $ns vni add vni 10010 dev vx0 + + bridge -n $ns vlan add vid 20 dev vx0 + bridge -n $ns vlan add vid 20 dev vx0 tunnel_info id 10020 + bridge -n $ns vni add vni 10020 dev vx0 + + bridge -n $ns vlan add vid 4000 dev vx0 pvid + bridge -n $ns vlan add vid 4000 dev vx0 tunnel_info id 14000 + bridge -n $ns vni add vni 14000 dev vx0 +} + +setup_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local local_addr1=$1; shift + local local_addr2=$1; shift + + ip netns add $ns1 + ip netns add $ns2 + + ip link add name veth0 type veth peer name veth1 + ip link set dev veth0 netns $ns1 name veth0 + ip link set dev veth1 netns $ns2 name veth0 + + setup_common_ns $ns1 $local_addr1 + setup_common_ns $ns2 $local_addr2 +} + +setup_v4() +{ + setup_common ns1_v4 ns2_v4 192.0.2.1 192.0.2.2 + + ip -n ns1_v4 address add 192.0.2.17/28 dev veth0 + ip -n ns2_v4 address add 192.0.2.18/28 dev veth0 + + ip -n ns1_v4 route add default via 192.0.2.18 + ip -n ns2_v4 route add default via 192.0.2.17 +} + +cleanup_v4() +{ + ip netns del ns2_v4 + ip netns del ns1_v4 +} + +setup_v6() +{ + setup_common ns1_v6 ns2_v6 2001:db8:1::1 2001:db8:1::2 + + ip -n ns1_v6 address add 2001:db8:2::1/64 dev veth0 nodad + ip -n ns2_v6 address add 2001:db8:2::2/64 dev veth0 nodad + + ip -n ns1_v6 route add default via 2001:db8:2::2 + ip -n ns2_v6 route add default via 2001:db8:2::1 +} + +cleanup_v6() +{ + ip netns del ns2_v6 + ip netns del ns1_v6 +} + +setup() +{ + set -e + + setup_v4 + setup_v6 + + sleep 5 + + set +e +} + +cleanup() +{ + cleanup_v6 &> /dev/null + cleanup_v4 &> /dev/null +} + +################################################################################ +# Tests - Control path + +basic_common() +{ + local ns1=$1; shift + local grp_key=$1; shift + local vtep_ip=$1; shift + + # Test basic control path operations common to all MDB entry types. + + # Basic add, replace and delete behavior. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + log_test $? 0 "MDB entry addition" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\"" + log_test $? 0 "MDB entry presence after addition" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + log_test $? 0 "MDB entry replacement" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\"" + log_test $? 0 "MDB entry presence after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + log_test $? 0 "MDB entry deletion" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\"" + log_test $? 1 "MDB entry presence after deletion" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + log_test $? 255 "Non-existent MDB entry deletion" + + # Default protocol and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"proto static\"" + log_test $? 0 "MDB entry default protocol" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent proto 123 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"proto 123\"" + log_test $? 0 "MDB entry protocol replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + + # Default destination port and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" dst_port \"" + log_test $? 1 "MDB entry default destination port" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip dst_port 1234 src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"dst_port 1234\"" + log_test $? 0 "MDB entry destination port replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + + # Default destination VNI and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" vni \"" + log_test $? 1 "MDB entry default destination VNI" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip vni 1234 src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"vni 1234\"" + log_test $? 0 "MDB entry destination VNI replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + + # Default outgoing interface and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" via \"" + log_test $? 1 "MDB entry default outgoing interface" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010 via veth0" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"via veth0\"" + log_test $? 0 "MDB entry outgoing interface replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" + + # Common error cases. + run_cmd "bridge -n $ns1 mdb add dev vx0 port veth0 $grp_key permanent dst $vtep_ip src_vni 10010" + log_test $? 255 "MDB entry with mismatch between device and port" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key temp dst $vtep_ip src_vni 10010" + log_test $? 255 "MDB entry with temp state" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent vid 10 dst $vtep_ip src_vni 10010" + log_test $? 255 "MDB entry with VLAN" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp 01:02:03:04:05:06 permanent dst $vtep_ip src_vni 10010" + log_test $? 255 "MDB entry MAC address" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent" + log_test $? 255 "MDB entry without extended parameters" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent proto 3 dst $vtep_ip src_vni 10010" + log_test $? 255 "MDB entry with an invalid protocol" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip vni $((2 ** 24)) src_vni 10010" + log_test $? 255 "MDB entry with an invalid destination VNI" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni $((2 ** 24))" + log_test $? 255 "MDB entry with an invalid source VNI" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent src_vni 10010" + log_test $? 255 "MDB entry without a remote destination IP" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010" + log_test $? 255 "Duplicate MDB entries" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010" +} + +basic_star_g_ipv4_ipv4() +{ + local ns1=ns1_v4 + local grp_key="grp 239.1.1.1" + local vtep_ip=198.51.100.100 + + echo + echo "Control path: Basic (*, G) operations - IPv4 overlay / IPv4 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_star_g_ipv6_ipv4() +{ + local ns1=ns1_v4 + local grp_key="grp ff0e::1" + local vtep_ip=198.51.100.100 + + echo + echo "Control path: Basic (*, G) operations - IPv6 overlay / IPv4 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_star_g_ipv4_ipv6() +{ + local ns1=ns1_v6 + local grp_key="grp 239.1.1.1" + local vtep_ip=2001:db8:1000::1 + + echo + echo "Control path: Basic (*, G) operations - IPv4 overlay / IPv6 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_star_g_ipv6_ipv6() +{ + local ns1=ns1_v6 + local grp_key="grp ff0e::1" + local vtep_ip=2001:db8:1000::1 + + echo + echo "Control path: Basic (*, G) operations - IPv6 overlay / IPv6 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_sg_ipv4_ipv4() +{ + local ns1=ns1_v4 + local grp_key="grp 239.1.1.1 src 192.0.2.129" + local vtep_ip=198.51.100.100 + + echo + echo "Control path: Basic (S, G) operations - IPv4 overlay / IPv4 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_sg_ipv6_ipv4() +{ + local ns1=ns1_v4 + local grp_key="grp ff0e::1 src 2001:db8:100::1" + local vtep_ip=198.51.100.100 + + echo + echo "Control path: Basic (S, G) operations - IPv6 overlay / IPv4 underlay" + echo "---------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_sg_ipv4_ipv6() +{ + local ns1=ns1_v6 + local grp_key="grp 239.1.1.1 src 192.0.2.129" + local vtep_ip=2001:db8:1000::1 + + echo + echo "Control path: Basic (S, G) operations - IPv4 overlay / IPv6 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +basic_sg_ipv6_ipv6() +{ + local ns1=ns1_v6 + local grp_key="grp ff0e::1 src 2001:db8:100::1" + local vtep_ip=2001:db8:1000::1 + + echo + echo "Control path: Basic (S, G) operations - IPv6 overlay / IPv6 underlay" + echo "--------------------------------------------------------------------" + + basic_common $ns1 "$grp_key" $vtep_ip +} + +star_g_common() +{ + local ns1=$1; shift + local grp=$1; shift + local src1=$1; shift + local src2=$1; shift + local src3=$1; shift + local vtep_ip=$1; shift + local all_zeros_grp=$1; shift + + # Test control path operations specific to (*, G) entries. + + # Basic add, replace and delete behavior. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + log_test $? 0 "(*, G) MDB entry addition with source list" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \"" + log_test $? 0 "(*, G) MDB entry presence after addition" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 0 "(S, G) MDB entry presence after addition" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + log_test $? 0 "(*, G) MDB entry replacement with source list" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \"" + log_test $? 0 "(*, G) MDB entry presence after replacement" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 0 "(S, G) MDB entry presence after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + log_test $? 0 "(*, G) MDB entry deletion" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \"" + log_test $? 1 "(*, G) MDB entry presence after deletion" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 1 "(S, G) MDB entry presence after deletion" + + # Default filter mode and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep exclude" + log_test $? 0 "(*, G) MDB entry default filter mode" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep include" + log_test $? 0 "(*, G) MDB entry after replacing filter mode to \"include\"" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 0 "(S, G) MDB entry after replacing filter mode to \"include\"" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\" | grep blocked" + log_test $? 1 "\"blocked\" flag after replacing filter mode to \"include\"" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep exclude" + log_test $? 0 "(*, G) MDB entry after replacing filter mode to \"exclude\"" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 0 "(S, G) MDB entry after replacing filter mode to \"exclude\"" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\" | grep blocked" + log_test $? 0 "\"blocked\" flag after replacing filter mode to \"exclude\"" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Default source list and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep source_list" + log_test $? 1 "(*, G) MDB entry default source list" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1,$src2,$src3 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 0 "(S, G) MDB entry of 1st source after replacing source list" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src2\"" + log_test $? 0 "(S, G) MDB entry of 2nd source after replacing source list" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src3\"" + log_test $? 0 "(S, G) MDB entry of 3rd source after replacing source list" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1,$src3 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\"" + log_test $? 0 "(S, G) MDB entry of 1st source after removing source" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src2\"" + log_test $? 1 "(S, G) MDB entry of 2nd source after removing source" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src3\"" + log_test $? 0 "(S, G) MDB entry of 3rd source after removing source" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Default protocol and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \"proto static\"" + log_test $? 0 "(*, G) MDB entry default protocol" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \"proto static\"" + log_test $? 0 "(S, G) MDB entry default protocol" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 proto bgp dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \"proto bgp\"" + log_test $? 0 "(*, G) MDB entry protocol after replacement" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \"proto bgp\"" + log_test $? 0 "(S, G) MDB entry protocol after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Default destination port and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" dst_port \"" + log_test $? 1 "(*, G) MDB entry default destination port" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" dst_port \"" + log_test $? 1 "(S, G) MDB entry default destination port" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip dst_port 1234 src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" dst_port 1234 \"" + log_test $? 0 "(*, G) MDB entry destination port after replacement" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" dst_port 1234 \"" + log_test $? 0 "(S, G) MDB entry destination port after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Default destination VNI and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" vni \"" + log_test $? 1 "(*, G) MDB entry default destination VNI" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" vni \"" + log_test $? 1 "(S, G) MDB entry default destination VNI" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip vni 1234 src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" vni 1234 \"" + log_test $? 0 "(*, G) MDB entry destination VNI after replacement" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" vni 1234 \"" + log_test $? 0 "(S, G) MDB entry destination VNI after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Default outgoing interface and replacement. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" via \"" + log_test $? 1 "(*, G) MDB entry default outgoing interface" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" via \"" + log_test $? 1 "(S, G) MDB entry default outgoing interface" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010 via veth0" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" via veth0 \"" + log_test $? 0 "(*, G) MDB entry outgoing interface after replacement" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" via veth0 \"" + log_test $? 0 "(S, G) MDB entry outgoing interface after replacement" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010" + + # Error cases. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp permanent filter_mode exclude dst $vtep_ip src_vni 10010" + log_test $? 255 "All-zeros group with filter mode" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp permanent source_list $src1 dst $vtep_ip src_vni 10010" + log_test $? 255 "All-zeros group with source list" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode include dst $vtep_ip src_vni 10010" + log_test $? 255 "(*, G) INCLUDE with an empty source list" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $grp dst $vtep_ip src_vni 10010" + log_test $? 255 "Invalid source in source list" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent source_list $src1 dst $vtep_ip src_vni 10010" + log_test $? 255 "Source list without filter mode" +} + +star_g_ipv4_ipv4() +{ + local ns1=ns1_v4 + local grp=239.1.1.1 + local src1=192.0.2.129 + local src2=192.0.2.130 + local src3=192.0.2.131 + local vtep_ip=198.51.100.100 + local all_zeros_grp=0.0.0.0 + + echo + echo "Control path: (*, G) operations - IPv4 overlay / IPv4 underlay" + echo "--------------------------------------------------------------" + + star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp +} + +star_g_ipv6_ipv4() +{ + local ns1=ns1_v4 + local grp=ff0e::1 + local src1=2001:db8:100::1 + local src2=2001:db8:100::2 + local src3=2001:db8:100::3 + local vtep_ip=198.51.100.100 + local all_zeros_grp=:: + + echo + echo "Control path: (*, G) operations - IPv6 overlay / IPv4 underlay" + echo "--------------------------------------------------------------" + + star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp +} + +star_g_ipv4_ipv6() +{ + local ns1=ns1_v6 + local grp=239.1.1.1 + local src1=192.0.2.129 + local src2=192.0.2.130 + local src3=192.0.2.131 + local vtep_ip=2001:db8:1000::1 + local all_zeros_grp=0.0.0.0 + + echo + echo "Control path: (*, G) operations - IPv4 overlay / IPv6 underlay" + echo "--------------------------------------------------------------" + + star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp +} + +star_g_ipv6_ipv6() +{ + local ns1=ns1_v6 + local grp=ff0e::1 + local src1=2001:db8:100::1 + local src2=2001:db8:100::2 + local src3=2001:db8:100::3 + local vtep_ip=2001:db8:1000::1 + local all_zeros_grp=:: + + echo + echo "Control path: (*, G) operations - IPv6 overlay / IPv6 underlay" + echo "--------------------------------------------------------------" + + star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp +} + +sg_common() +{ + local ns1=$1; shift + local grp=$1; shift + local src=$1; shift + local vtep_ip=$1; shift + local all_zeros_grp=$1; shift + + # Test control path operations specific to (S, G) entries. + + # Default filter mode. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent dst $vtep_ip src_vni 10010" + run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep include" + log_test $? 0 "(S, G) MDB entry default filter mode" + + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp src $src permanent dst $vtep_ip src_vni 10010" + + # Error cases. + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent filter_mode include dst $vtep_ip src_vni 10010" + log_test $? 255 "(S, G) with filter mode" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent source_list $src dst $vtep_ip src_vni 10010" + log_test $? 255 "(S, G) with source list" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $grp permanent dst $vtep_ip src_vni 10010" + log_test $? 255 "(S, G) with an invalid source list" + + run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp src $src permanent dst $vtep_ip src_vni 10010" + log_test $? 255 "All-zeros group with source" +} + +sg_ipv4_ipv4() +{ + local ns1=ns1_v4 + local grp=239.1.1.1 + local src=192.0.2.129 + local vtep_ip=198.51.100.100 + local all_zeros_grp=0.0.0.0 + + echo + echo "Control path: (S, G) operations - IPv4 overlay / IPv4 underlay" + echo "--------------------------------------------------------------" + + sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp +} + +sg_ipv6_ipv4() +{ + local ns1=ns1_v4 + local grp=ff0e::1 + local src=2001:db8:100::1 + local vtep_ip=198.51.100.100 + local all_zeros_grp=:: + + echo + echo "Control path: (S, G) operations - IPv6 overlay / IPv4 underlay" + echo "--------------------------------------------------------------" + + sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp +} + +sg_ipv4_ipv6() +{ + local ns1=ns1_v6 + local grp=239.1.1.1 + local src=192.0.2.129 + local vtep_ip=2001:db8:1000::1 + local all_zeros_grp=0.0.0.0 + + echo + echo "Control path: (S, G) operations - IPv4 overlay / IPv6 underlay" + echo "--------------------------------------------------------------" + + sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp +} + +sg_ipv6_ipv6() +{ + local ns1=ns1_v6 + local grp=ff0e::1 + local src=2001:db8:100::1 + local vtep_ip=2001:db8:1000::1 + local all_zeros_grp=:: + + echo + echo "Control path: (S, G) operations - IPv6 overlay / IPv6 underlay" + echo "--------------------------------------------------------------" + + sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp +} + +ipv4_grps_get() +{ + local max_grps=$1; shift + local i + + for i in $(seq 0 $((max_grps - 1))); do + echo "239.1.1.$i" + done +} + +ipv6_grps_get() +{ + local max_grps=$1; shift + local i + + for i in $(seq 0 $((max_grps - 1))); do + echo "ff0e::$(printf %x $i)" + done +} + +dump_common() +{ + local ns1=$1; shift + local local_addr=$1; shift + local remote_prefix=$1; shift + local fn=$1; shift + local max_vxlan_devs=2 + local max_remotes=64 + local max_grps=256 + local num_entries + local batch_file + local grp + local i j + + # The kernel maintains various markers for the MDB dump. Add a test for + # large scale MDB dump to make sure that all the configured entries are + # dumped and that the markers are used correctly. + + # Create net devices. + for i in $(seq 1 $max_vxlan_devs); do + ip -n $ns1 link add name vx-test${i} up type vxlan \ + local $local_addr dstport 4789 external vnifilter + done + + # Create batch file with MDB entries. + batch_file=$(mktemp) + for i in $(seq 1 $max_vxlan_devs); do + for j in $(seq 1 $max_remotes); do + for grp in $($fn $max_grps); do + echo "mdb add dev vx-test${i} port vx-test${i} grp $grp permanent dst ${remote_prefix}${j}" >> $batch_file + done + done + done + + # Program the batch file and check for expected number of entries. + bridge -n $ns1 -b $batch_file + for i in $(seq 1 $max_vxlan_devs); do + num_entries=$(bridge -n $ns1 mdb show dev vx-test${i} | grep "permanent" | wc -l) + [[ $num_entries -eq $((max_grps * max_remotes)) ]] + log_test $? 0 "Large scale dump - VXLAN device #$i" + done + + rm -rf $batch_file +} + +dump_ipv4_ipv4() +{ + local ns1=ns1_v4 + local local_addr=192.0.2.1 + local remote_prefix=198.51.100. + local fn=ipv4_grps_get + + echo + echo "Control path: Large scale MDB dump - IPv4 overlay / IPv4 underlay" + echo "-----------------------------------------------------------------" + + dump_common $ns1 $local_addr $remote_prefix $fn +} + +dump_ipv6_ipv4() +{ + local ns1=ns1_v4 + local local_addr=192.0.2.1 + local remote_prefix=198.51.100. + local fn=ipv6_grps_get + + echo + echo "Control path: Large scale MDB dump - IPv6 overlay / IPv4 underlay" + echo "-----------------------------------------------------------------" + + dump_common $ns1 $local_addr $remote_prefix $fn +} + +dump_ipv4_ipv6() +{ + local ns1=ns1_v6 + local local_addr=2001:db8:1::1 + local remote_prefix=2001:db8:1000:: + local fn=ipv4_grps_get + + echo + echo "Control path: Large scale MDB dump - IPv4 overlay / IPv6 underlay" + echo "-----------------------------------------------------------------" + + dump_common $ns1 $local_addr $remote_prefix $fn +} + +dump_ipv6_ipv6() +{ + local ns1=ns1_v6 + local local_addr=2001:db8:1::1 + local remote_prefix=2001:db8:1000:: + local fn=ipv6_grps_get + + echo + echo "Control path: Large scale MDB dump - IPv6 overlay / IPv6 underlay" + echo "-----------------------------------------------------------------" + + dump_common $ns1 $local_addr $remote_prefix $fn +} + +################################################################################ +# Tests - Data path + +encap_params_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local plen=$1; shift + local enc_ethtype=$1; shift + local grp=$1; shift + local src=$1; shift + local mz=$1; shift + + # Test that packets forwarded by the VXLAN MDB are encapsulated with + # the correct parameters. Transmit packets from the first namespace and + # check that they hit the corresponding filters on the ingress of the + # second namespace. + + run_cmd "tc -n $ns2 qdisc replace dev veth0 clsact" + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo" + + # Check destination IP. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep2_ip src_vni 10020" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Destination IP - match" + + run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Destination IP - no match" + + run_cmd "tc -n $ns2 filter del dev vx0 ingress pref 1 handle 101 flower" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10020" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" + + # Check destination port. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip dst_port 1111 src_vni 10020" + + run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 4789 action pass" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev veth0 ingress" 101 1 + log_test $? 0 "Default destination port - match" + + run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev veth0 ingress" 101 1 + log_test $? 0 "Default destination port - no match" + + run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 1111 action pass" + run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev veth0 ingress" 101 1 + log_test $? 0 "Non-default destination port - match" + + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev veth0 ingress" 101 1 + log_test $? 0 "Non-default destination port - no match" + + run_cmd "tc -n $ns2 filter del dev veth0 ingress pref 1 handle 101 flower" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10020" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" + + # Check default VNI. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10020" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10010 action pass" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Default destination VNI - match" + + run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Default destination VNI - no match" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10020 src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10010 src_vni 10020" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10020 action pass" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Non-default destination VNI - match" + + run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Non-default destination VNI - no match" + + run_cmd "tc -n $ns2 filter del dev vx0 ingress pref 1 handle 101 flower" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10020" + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" +} + +encap_params_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local enc_ethtype="ip" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: Encapsulation parameters - IPv4 overlay / IPv4 underlay" + echo "------------------------------------------------------------------" + + encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ + $grp $src "mausezahn" +} + +encap_params_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local enc_ethtype="ip" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: Encapsulation parameters - IPv6 overlay / IPv4 underlay" + echo "------------------------------------------------------------------" + + encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ + $grp $src "mausezahn -6" +} + +encap_params_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local enc_ethtype="ipv6" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: Encapsulation parameters - IPv4 overlay / IPv6 underlay" + echo "------------------------------------------------------------------" + + encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ + $grp $src "mausezahn" +} + +encap_params_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local enc_ethtype="ipv6" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: Encapsulation parameters - IPv6 overlay / IPv6 underlay" + echo "------------------------------------------------------------------" + + encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ + $grp $src "mausezahn -6" +} + +starg_exclude_ir_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local plen=$1; shift + local grp=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mz=$1; shift + + # Install a (*, G) EXCLUDE MDB entry with one source and two remote + # VTEPs. Make sure that the source in the source list is not forwarded + # and that a source not in the list is forwarded. Remove one of the + # VTEPs from the entry and make sure that packets are only forwarded to + # the remaining VTEP. + + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep2_ip src_vni 10010" + + # Check that invalid source is not forwarded to any VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "Block excluded source - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "Block excluded source - second VTEP" + + # Check that valid source is forwarded to both VTEPs. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Forward valid source - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Forward valid source - second VTEP" + + # Remove second VTEP. + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010" + + # Check that invalid source is not forwarded to any VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Block excluded source after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Block excluded source after removal - second VTEP" + + # Check that valid source is forwarded to the remaining VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Forward valid source after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Forward valid source after removal - second VTEP" +} + +starg_exclude_ir_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) EXCLUDE - IR - IPv4 overlay / IPv4 underlay" + echo "-------------------------------------------------------------" + + starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_exclude_ir_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) EXCLUDE - IR - IPv6 overlay / IPv4 underlay" + echo "-------------------------------------------------------------" + + starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_exclude_ir_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) EXCLUDE - IR - IPv4 overlay / IPv6 underlay" + echo "-------------------------------------------------------------" + + starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_exclude_ir_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) EXCLUDE - IR - IPv6 overlay / IPv6 underlay" + echo "-------------------------------------------------------------" + + starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_include_ir_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local plen=$1; shift + local grp=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mz=$1; shift + + # Install a (*, G) INCLUDE MDB entry with one source and two remote + # VTEPs. Make sure that the source in the source list is forwarded and + # that a source not in the list is not forwarded. Remove one of the + # VTEPs from the entry and make sure that packets are only forwarded to + # the remaining VTEP. + + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep2_ip src_vni 10010" + + # Check that invalid source is not forwarded to any VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "Block excluded source - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "Block excluded source - second VTEP" + + # Check that valid source is forwarded to both VTEPs. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Forward valid source - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Forward valid source - second VTEP" + + # Remove second VTEP. + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010" + + # Check that invalid source is not forwarded to any VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Block excluded source after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Block excluded source after removal - second VTEP" + + # Check that valid source is forwarded to the remaining VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Forward valid source after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Forward valid source after removal - second VTEP" +} + +starg_include_ir_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) INCLUDE - IR - IPv4 overlay / IPv4 underlay" + echo "-------------------------------------------------------------" + + starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_include_ir_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) INCLUDE - IR - IPv6 overlay / IPv4 underlay" + echo "-------------------------------------------------------------" + + starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_include_ir_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) INCLUDE - IR - IPv4 overlay / IPv6 underlay" + echo "-------------------------------------------------------------" + + starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_include_ir_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) INCLUDE - IR - IPv6 overlay / IPv6 underlay" + echo "-------------------------------------------------------------" + + starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_exclude_p2mp_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local mcast_grp=$1; shift + local plen=$1; shift + local grp=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mz=$1; shift + + # Install a (*, G) EXCLUDE MDB entry with one source and one multicast + # group to which packets are sent. Make sure that the source in the + # source list is not forwarded and that a source not in the list is + # forwarded. + + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $mcast_grp action pass" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $mcast_grp src_vni 10010 via veth0" + + # Check that invalid source is not forwarded. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "Block excluded source" + + # Check that valid source is forwarded. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Forward valid source" + + # Remove the VTEP from the multicast group. + run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0" + + # Check that valid source is not received anymore. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Receive of valid source after removal from group" +} + +starg_exclude_p2mp_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv4 underlay" + echo "---------------------------------------------------------------" + + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_exclude_p2mp_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv4 underlay" + echo "---------------------------------------------------------------" + + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_exclude_p2mp_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv6 underlay" + echo "---------------------------------------------------------------" + + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_exclude_p2mp_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv6 underlay" + echo "---------------------------------------------------------------" + + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_include_p2mp_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local mcast_grp=$1; shift + local plen=$1; shift + local grp=$1; shift + local valid_src=$1; shift + local invalid_src=$1; shift + local mz=$1; shift + + # Install a (*, G) INCLUDE MDB entry with one source and one multicast + # group to which packets are sent. Make sure that the source in the + # source list is forwarded and that a source not in the list is not + # forwarded. + + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin" + + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $mcast_grp action pass" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $mcast_grp src_vni 10010 via veth0" + + # Check that invalid source is not forwarded. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "Block excluded source" + + # Check that valid source is forwarded. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Forward valid source" + + # Remove the VTEP from the multicast group. + run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0" + + # Check that valid source is not received anymore. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Receive of valid source after removal from group" +} + +starg_include_p2mp_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv4 underlay" + echo "---------------------------------------------------------------" + + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_include_p2mp_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv4 underlay" + echo "---------------------------------------------------------------" + + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +starg_include_p2mp_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local grp=239.1.1.1 + local valid_src=192.0.2.129 + local invalid_src=192.0.2.145 + + echo + echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv6 underlay" + echo "---------------------------------------------------------------" + + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn" +} + +starg_include_p2mp_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local grp=ff0e::1 + local valid_src=2001:db8:100::1 + local invalid_src=2001:db8:200::1 + + echo + echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv6 underlay" + echo "---------------------------------------------------------------" + + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + $valid_src $invalid_src "mausezahn -6" +} + +egress_vni_translation_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local mcast_grp=$1; shift + local plen=$1; shift + local proto=$1; shift + local grp=$1; shift + local src=$1; shift + local mz=$1; shift + + # When P2MP tunnels are used with optimized inter-subnet multicast + # (OISM) [1], the ingress VTEP does not perform VNI translation and + # uses the VNI of the source broadcast domain (BD). If the egress VTEP + # is a member in the source BD, then no VNI translation is needed. + # Otherwise, the egress VTEP needs to translate the VNI to the + # supplementary broadcast domain (SBD) VNI, which is usually the L3VNI. + # + # In this test, remove the VTEP in the second namespace from VLAN 10 + # (VNI 10010) and make sure that a packet sent from this VLAN on the + # first VTEP is received by the SVI corresponding to the L3VNI (14000 / + # VLAN 4000) on the second VTEP. + # + # The second VTEP will be able to decapsulate the packet with VNI 10010 + # because this VNI is configured on its shared VXLAN device. Later, + # when ingressing the bridge, the VNI to VLAN lookup will fail because + # the VTEP is not a member in VLAN 10, which will cause the packet to + # be tagged with VLAN 4000 since it is configured as PVID. + # + # [1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast + + run_cmd "tc -n $ns2 qdisc replace dev br0.4000 clsact" + run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin" + run_cmd "tc -n $ns2 filter replace dev br0.4000 ingress pref 1 handle 101 proto $proto flower src_ip $src dst_ip $grp action pass" + + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp src $src permanent dst $mcast_grp src_vni 10010 via veth0" + + # Remove the second VTEP from VLAN 10. + run_cmd "bridge -n $ns2 vlan del vid 10 dev vx0" + + # Make sure that packets sent from the first VTEP over VLAN 10 are + # received by the SVI corresponding to the L3VNI (14000 / VLAN 4000) on + # the second VTEP, since it is configured as PVID. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1 + log_test $? 0 "Egress VNI translation - PVID configured" + + # Remove PVID flag from VLAN 4000 on the second VTEP and make sure + # packets are no longer received by the SVI interface. + run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1 + log_test $? 0 "Egress VNI translation - no PVID configured" + + # Reconfigure the PVID and make sure packets are received again. + run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0 pvid" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev br0.4000 ingress" 101 2 + log_test $? 0 "Egress VNI translation - PVID reconfigured" +} + +egress_vni_translation_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local proto="ipv4" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: Egress VNI translation - IPv4 overlay / IPv4 underlay" + echo "----------------------------------------------------------------" + + egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ + $src "mausezahn" +} + +egress_vni_translation_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local mcast_grp=238.1.1.1 + local plen=32 + local proto="ipv6" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: Egress VNI translation - IPv6 overlay / IPv4 underlay" + echo "----------------------------------------------------------------" + + egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ + $src "mausezahn -6" +} + +egress_vni_translation_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local proto="ipv4" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: Egress VNI translation - IPv4 overlay / IPv6 underlay" + echo "----------------------------------------------------------------" + + egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ + $src "mausezahn" +} + +egress_vni_translation_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local mcast_grp=ff0e::2 + local plen=128 + local proto="ipv6" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: Egress VNI translation - IPv6 overlay / IPv6 underlay" + echo "----------------------------------------------------------------" + + egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ + $src "mausezahn -6" +} + +all_zeros_mdb_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local vtep3_ip=$1; shift + local vtep4_ip=$1; shift + local plen=$1; shift + local ipv4_grp=239.1.1.1 + local ipv4_unreg_grp=239.2.2.2 + local ipv4_ll_grp=224.0.0.100 + local ipv4_src=192.0.2.129 + local ipv6_grp=ff0e::1 + local ipv6_unreg_grp=ff0e::2 + local ipv6_ll_grp=ff02::1 + local ipv6_src=2001:db8:100::1 + + # Install all-zeros (catchall) MDB entries for IPv4 and IPv6 traffic + # and make sure they only forward unregistered IP multicast traffic + # which is not link-local. Also make sure that each entry only forwards + # traffic from the matching address family. + + # Associate two different VTEPs with one all-zeros MDB entry: Two with + # the IPv4 entry (0.0.0.0) and another two with the IPv6 one (::). + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp 0.0.0.0 permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp 0.0.0.0 permanent dst $vtep2_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp :: permanent dst $vtep3_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp :: permanent dst $vtep4_ip src_vni 10010" + + # Associate one VTEP from each set with a regular MDB entry: One with + # an IPv4 entry and another with an IPv6 one. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $ipv4_grp permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $ipv6_grp permanent dst $vtep3_ip src_vni 10010" + + # Add filters to match on decapsulated traffic in the second namespace. + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 103 proto all flower enc_dst_ip $vtep3_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 104 proto all flower enc_dst_ip $vtep4_ip action pass" + + # Configure the VTEP addresses in the second namespace to enable + # decapsulation. + run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep3_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep4_ip/$plen dev lo" + + # Send registered IPv4 multicast and make sure it only arrives to the + # first VTEP. + run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Registered IPv4 multicast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "Registered IPv4 multicast - second VTEP" + + # Send unregistered IPv4 multicast that is not link-local and make sure + # it arrives to the first and second VTEPs. + run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Unregistered IPv4 multicast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Unregistered IPv4 multicast - second VTEP" + + # Send IPv4 link-local multicast traffic and make sure it does not + # arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Link-local IPv4 multicast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Link-local IPv4 multicast - second VTEP" + + # Send registered IPv4 multicast using a unicast MAC address and make + # sure it does not arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b 00:11:22:33:44:55 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Registered IPv4 multicast with a unicast MAC - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Registered IPv4 multicast with a unicast MAC - second VTEP" + + # Send registered IPv4 multicast using a broadcast MAC address and make + # sure it does not arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b bcast -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 2 + log_test $? 0 "Registered IPv4 multicast with a broadcast MAC - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Registered IPv4 multicast with a broadcast MAC - second VTEP" + + # Make sure IPv4 traffic did not reach the VTEPs associated with + # IPv6 entries. + tc_check_packets "$ns2" "dev vx0 ingress" 103 0 + log_test $? 0 "IPv4 traffic - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 0 + log_test $? 0 "IPv4 traffic - fourth VTEP" + + # Reset IPv4 filters before testing IPv6 traffic. + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass" + + # Send registered IPv6 multicast and make sure it only arrives to the + # third VTEP. + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 1 + log_test $? 0 "Registered IPv6 multicast - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 0 + log_test $? 0 "Registered IPv6 multicast - fourth VTEP" + + # Send unregistered IPv6 multicast that is not link-local and make sure + # it arrives to the third and fourth VTEPs. + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 2 + log_test $? 0 "Unregistered IPv6 multicast - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 1 + log_test $? 0 "Unregistered IPv6 multicast - fourth VTEP" + + # Send IPv6 link-local multicast traffic and make sure it does not + # arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 2 + log_test $? 0 "Link-local IPv6 multicast - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 1 + log_test $? 0 "Link-local IPv6 multicast - fourth VTEP" + + # Send registered IPv6 multicast using a unicast MAC address and make + # sure it does not arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b 00:11:22:33:44:55 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 2 + log_test $? 0 "Registered IPv6 multicast with a unicast MAC - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 1 + log_test $? 0 "Registered IPv6 multicast with a unicast MAC - fourth VTEP" + + # Send registered IPv6 multicast using a broadcast MAC address and make + # sure it does not arrive to any VTEP. + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b bcast -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 103 2 + log_test $? 0 "Registered IPv6 multicast with a broadcast MAC - third VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 104 1 + log_test $? 0 "Registered IPv6 multicast with a broadcast MAC - fourth VTEP" + + # Make sure IPv6 traffic did not reach the VTEPs associated with + # IPv4 entries. + tc_check_packets "$ns2" "dev vx0 ingress" 101 0 + log_test $? 0 "IPv6 traffic - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "IPv6 traffic - second VTEP" +} + +all_zeros_mdb_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.101 + local vtep2_ip=198.51.100.102 + local vtep3_ip=198.51.100.103 + local vtep4_ip=198.51.100.104 + local plen=32 + + echo + echo "Data path: All-zeros MDB entry - IPv4 underlay" + echo "----------------------------------------------" + + all_zeros_mdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $vtep3_ip \ + $vtep4_ip $plen +} + +all_zeros_mdb_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local vtep3_ip=2001:db8:3000::1 + local vtep4_ip=2001:db8:4000::1 + local plen=128 + + echo + echo "Data path: All-zeros MDB entry - IPv6 underlay" + echo "----------------------------------------------" + + all_zeros_mdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $vtep3_ip \ + $vtep4_ip $plen +} + +mdb_fdb_common() +{ + local ns1=$1; shift + local ns2=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local plen=$1; shift + local proto=$1; shift + local grp=$1; shift + local src=$1; shift + local mz=$1; shift + + # Install an MDB entry and an FDB entry and make sure that the FDB + # entry only forwards traffic that was not forwarded by the MDB. + + # Associate the MDB entry with one VTEP and the FDB entry with another + # VTEP. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 fdb add 00:00:00:00:00:00 dev vx0 self static dst $vtep2_ip src_vni 10010" + + # Add filters to match on decapsulated traffic in the second namespace. + run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto $proto flower ip_proto udp dst_port 54321 enc_dst_ip $vtep1_ip action pass" + run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto $proto flower ip_proto udp dst_port 54321 enc_dst_ip $vtep2_ip action pass" + + # Configure the VTEP addresses in the second namespace to enable + # decapsulation. + run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo" + run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo" + + # Send IP multicast traffic and make sure it is forwarded by the MDB + # and only arrives to the first VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "IP multicast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 0 + log_test $? 0 "IP multicast - second VTEP" + + # Send broadcast traffic and make sure it is forwarded by the FDB and + # only arrives to the second VTEP. + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b bcast -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "Broadcast - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 1 + log_test $? 0 "Broadcast - second VTEP" + + # Remove the MDB entry and make sure that IP multicast is now forwarded + # by the FDB to the second VTEP. + run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" + run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + tc_check_packets "$ns2" "dev vx0 ingress" 101 1 + log_test $? 0 "IP multicast after removal - first VTEP" + tc_check_packets "$ns2" "dev vx0 ingress" 102 2 + log_test $? 0 "IP multicast after removal - second VTEP" +} + +mdb_fdb_ipv4_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local proto="ipv4" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: MDB with FDB - IPv4 overlay / IPv4 underlay" + echo "------------------------------------------------------" + + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ + "mausezahn" +} + +mdb_fdb_ipv6_ipv4() +{ + local ns1=ns1_v4 + local ns2=ns2_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local plen=32 + local proto="ipv6" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: MDB with FDB - IPv6 overlay / IPv4 underlay" + echo "------------------------------------------------------" + + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ + "mausezahn -6" +} + +mdb_fdb_ipv4_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local proto="ipv4" + local grp=239.1.1.1 + local src=192.0.2.129 + + echo + echo "Data path: MDB with FDB - IPv4 overlay / IPv6 underlay" + echo "------------------------------------------------------" + + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ + "mausezahn" +} + +mdb_fdb_ipv6_ipv6() +{ + local ns1=ns1_v6 + local ns2=ns2_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local plen=128 + local proto="ipv6" + local grp=ff0e::1 + local src=2001:db8:100::1 + + echo + echo "Data path: MDB with FDB - IPv6 overlay / IPv6 underlay" + echo "------------------------------------------------------" + + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ + "mausezahn -6" +} + +mdb_grp1_loop() +{ + local ns1=$1; shift + local vtep1_ip=$1; shift + local grp1=$1; shift + + while true; do + bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp1 dst $vtep1_ip src_vni 10010 + bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp1 permanent dst $vtep1_ip src_vni 10010 + done >/dev/null 2>&1 +} + +mdb_grp2_loop() +{ + local ns1=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local grp2=$1; shift + + while true; do + bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp2 dst $vtep1_ip src_vni 10010 + bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp2 permanent dst $vtep1_ip src_vni 10010 + bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep2_ip src_vni 10010 + done >/dev/null 2>&1 +} + +mdb_torture_common() +{ + local ns1=$1; shift + local vtep1_ip=$1; shift + local vtep2_ip=$1; shift + local grp1=$1; shift + local grp2=$1; shift + local src=$1; shift + local mz=$1; shift + local pid1 + local pid2 + local pid3 + local pid4 + + # Continuously send two streams that are forwarded by two different MDB + # entries. The first entry will be added and deleted in a loop. This + # allows us to test that the data path does not use freed MDB entry + # memory. The second entry will have two remotes, one that is added and + # deleted in a loop and another that is replaced in a loop. This allows + # us to test that the data path does not use freed remote entry memory. + # The test is considered successful if nothing crashed. + + # Create the MDB entries that will be continuously deleted / replaced. + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp1 permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep1_ip src_vni 10010" + run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep2_ip src_vni 10010" + + mdb_grp1_loop $ns1 $vtep1_ip $grp1 & + pid1=$! + mdb_grp2_loop $ns1 $vtep1_ip $vtep2_ip $grp2 & + pid2=$! + ip netns exec $ns1 $mz br0.10 -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & + pid3=$! + ip netns exec $ns1 $mz br0.10 -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & + pid4=$! + + sleep 30 + kill -9 $pid1 $pid2 $pid3 $pid4 + wait $pid1 $pid2 $pid3 $pid4 2>/dev/null + + log_test 0 0 "Torture test" +} + +mdb_torture_ipv4_ipv4() +{ + local ns1=ns1_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local grp1=239.1.1.1 + local grp2=239.2.2.2 + local src=192.0.2.129 + + echo + echo "Data path: MDB torture test - IPv4 overlay / IPv4 underlay" + echo "----------------------------------------------------------" + + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ + "mausezahn" +} + +mdb_torture_ipv6_ipv4() +{ + local ns1=ns1_v4 + local vtep1_ip=198.51.100.100 + local vtep2_ip=198.51.100.200 + local grp1=ff0e::1 + local grp2=ff0e::2 + local src=2001:db8:100::1 + + echo + echo "Data path: MDB torture test - IPv6 overlay / IPv4 underlay" + echo "----------------------------------------------------------" + + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ + "mausezahn -6" +} + +mdb_torture_ipv4_ipv6() +{ + local ns1=ns1_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local grp1=239.1.1.1 + local grp2=239.2.2.2 + local src=192.0.2.129 + + echo + echo "Data path: MDB torture test - IPv4 overlay / IPv6 underlay" + echo "----------------------------------------------------------" + + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ + "mausezahn" +} + +mdb_torture_ipv6_ipv6() +{ + local ns1=ns1_v6 + local vtep1_ip=2001:db8:1000::1 + local vtep2_ip=2001:db8:2000::1 + local grp1=ff0e::1 + local grp2=ff0e::2 + local src=2001:db8:100::1 + + echo + echo "Data path: MDB torture test - IPv6 overlay / IPv6 underlay" + echo "----------------------------------------------------------" + + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ + "mausezahn -6" +} + +################################################################################ +# Usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) + -c Control path tests only + -d Data path tests only + -p Pause on fail + -P Pause after each test before cleanup + -v Verbose mode (show commands and output) +EOF +} + +################################################################################ +# Main + +trap cleanup EXIT + +while getopts ":t:cdpPvh" opt; do + case $opt in + t) TESTS=$OPTARG;; + c) TESTS=${CONTROL_PATH_TESTS};; + d) TESTS=${DATA_PATH_TESTS};; + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +# Make sure we don't pause twice. +[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v bridge)" ]; then + echo "SKIP: Could not run test without bridge tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v jq)" ]; then + echo "SKIP: Could not run test without jq tool" + exit $ksft_skip +fi + +bridge mdb help 2>&1 | grep -q "src_vni" +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 bridge too old, missing VXLAN MDB support" + exit $ksft_skip +fi + +# Start clean. +cleanup + +for t in $TESTS +do + setup; $t; cleanup; +done + +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi + +exit $ret diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 2cbb12736596..e699548d4247 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1820,4 +1820,49 @@ TEST(tls_v6ops) { close(sfd); } +TEST(prequeue) { + struct tls_crypto_info_keys tls12; + char buf[20000], buf2[20000]; + struct sockaddr_in addr; + int sfd, cfd, ret, fd; + socklen_t len; + + len = sizeof(addr); + memrnd(buf, sizeof(buf)); + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12); + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = 0; + + fd = socket(AF_INET, SOCK_STREAM, 0); + sfd = socket(AF_INET, SOCK_STREAM, 0); + + ASSERT_EQ(bind(sfd, &addr, sizeof(addr)), 0); + ASSERT_EQ(listen(sfd, 10), 0); + ASSERT_EQ(getsockname(sfd, &addr, &len), 0); + ASSERT_EQ(connect(fd, &addr, sizeof(addr)), 0); + ASSERT_GE(cfd = accept(sfd, &addr, &len), 0); + close(sfd); + + ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + if (ret) { + ASSERT_EQ(errno, ENOENT); + SKIP(return, "no TLS support"); + } + + ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + EXPECT_EQ(send(fd, buf, sizeof(buf), MSG_DONTWAIT), sizeof(buf)); + + ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0); + ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_WAITALL), sizeof(buf2)); + + EXPECT_EQ(memcmp(buf, buf2, sizeof(buf)), 0); + + close(fd); + close(cfd); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json new file mode 100644 index 000000000000..16f3a83605e4 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json @@ -0,0 +1,416 @@ +[ + { + "id": "abdc", + "name": "Reference pedit action object in filter", + "category": [ + "infra", + "pedit" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC action add action pedit munge offset 0 u8 clear index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action pedit index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action pedit" + ] + }, + { + "id": "7a70", + "name": "Reference mpls action object in filter", + "category": [ + "infra", + "mpls" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC action add action mpls pop protocol ipv4 index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action mpls index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action mpls" + ] + }, + { + "id": "d241", + "name": "Reference bpf action object in filter", + "category": [ + "infra", + "bpf" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action bpf index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action bpf" + ] + }, + { + "id": "383a", + "name": "Reference connmark action object in filter", + "category": [ + "infra", + "connmark" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action connmark" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action connmark index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action connmark" + ] + }, + { + "id": "c619", + "name": "Reference csum action object in filter", + "category": [ + "infra", + "csum" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action csum ip4h index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action csum index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action csum" + ] + }, + { + "id": "a93d", + "name": "Reference ct action object in filter", + "category": [ + "infra", + "ct" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action ct index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action ct index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action ct" + ] + }, + { + "id": "8bb5", + "name": "Reference ctinfo action object in filter", + "category": [ + "infra", + "ctinfo" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC action add action ctinfo index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action ctinfo index 10", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action ctinfo" + ] + }, + { + "id": "2241", + "name": "Reference gact action object in filter", + "category": [ + "infra", + "gact" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action pass index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action gact index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action gact" + ] + }, + { + "id": "35e9", + "name": "Reference gate action object in filter", + "category": [ + "infra", + "gate" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC action add action gate priority 1 sched-entry close 100000000ns index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action gate index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action gate" + ] + }, + { + "id": "b22e", + "name": "Reference ife action object in filter", + "category": [ + "infra", + "ife" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action ife encode allow mark pass index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action ife index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action ife" + ] + }, + { + "id": "ef74", + "name": "Reference mirred action object in filter", + "category": [ + "infra", + "mirred" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action mirred egress mirror index 1 dev lo" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action mirred index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action mirred" + ] + }, + { + "id": "2c81", + "name": "Reference nat action object in filter", + "category": [ + "infra", + "nat" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action nat ingress 192.168.1.1 200.200.200.1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action nat index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action nat" + ] + }, + { + "id": "ac9d", + "name": "Reference police action object in filter", + "category": [ + "infra", + "police" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action police rate 1kbit burst 10k index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action police index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action police" + ] + }, + { + "id": "68be", + "name": "Reference sample action object in filter", + "category": [ + "infra", + "sample" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action sample rate 10 group 1 index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action sample index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action sample" + ] + }, + { + "id": "cf01", + "name": "Reference skbedit action object in filter", + "category": [ + "infra", + "skbedit" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action skbedit mark 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action skbedit index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action skbedit" + ] + }, + { + "id": "c109", + "name": "Reference skbmod action object in filter", + "category": [ + "infra", + "skbmod" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action skbmod set dmac 11:22:33:44:55:66 index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action skbmod index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action skbmod" + ] + }, + { + "id": "4abc", + "name": "Reference tunnel_key action object in filter", + "category": [ + "infra", + "tunnel_key" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1 index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action tunnel_key index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action tunnel_key" + ] + }, + { + "id": "dadd", + "name": "Reference vlan action object in filter", + "category": [ + "infra", + "tunnel_key" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions add action vlan pop pipe index 1" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action vlan index 1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions flush action vlan" + ] + } +] diff --git a/tools/testing/vsock/.gitignore b/tools/testing/vsock/.gitignore index 87ca2731cff9..a8adcfdc292b 100644 --- a/tools/testing/vsock/.gitignore +++ b/tools/testing/vsock/.gitignore @@ -2,3 +2,4 @@ *.d vsock_test vsock_diag_test +vsock_perf |